| /* Copyright (C) 1991, 1993, 1995, 1997, 1998, 2003, 2006 Free Software |
| Foundation, Inc. |
| |
| Contributed by Torbjorn Granlund (tege@sics.se). |
| |
| NOTE: The canonical source of this file is maintained with the GNU C Library. |
| Bugs can be reported to bug-glibc@prep.ai.mit.edu. |
| |
| This program is free software; you can redistribute it and/or modify it |
| under the terms of the GNU General Public License as published by the |
| Free Software Foundation; either version 2, or (at your option) any |
| later version. |
| |
| This program is distributed in the hope that it will be useful, |
| but WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| GNU General Public License for more details. |
| |
| You should have received a copy of the GNU General Public License |
| along with this program; if not, write to the Free Software |
| Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, |
| USA. */ |
| |
| #ifndef _LIBC |
| # include <config.h> |
| #endif |
| |
| #include <string.h> |
| |
| #undef memcmp |
| |
| #ifdef _LIBC |
| |
| # include <memcopy.h> |
| # include <endian.h> |
| |
| # if __BYTE_ORDER == __BIG_ENDIAN |
| # define WORDS_BIGENDIAN |
| # endif |
| |
| #else /* Not in the GNU C library. */ |
| |
| # include <sys/types.h> |
| |
| /* Type to use for aligned memory operations. |
| This should normally be the biggest type supported by a single load |
| and store. Must be an unsigned type. */ |
| # define op_t unsigned long int |
| # define OPSIZ (sizeof(op_t)) |
| |
| /* Threshold value for when to enter the unrolled loops. */ |
| # define OP_T_THRES 16 |
| |
| /* Type to use for unaligned operations. */ |
| typedef unsigned char byte; |
| |
| # ifndef WORDS_BIGENDIAN |
| # define MERGE(w0, sh_1, w1, sh_2) (((w0) >> (sh_1)) | ((w1) << (sh_2))) |
| # else |
| # define MERGE(w0, sh_1, w1, sh_2) (((w0) << (sh_1)) | ((w1) >> (sh_2))) |
| # endif |
| |
| #endif /* In the GNU C library. */ |
| |
| #ifdef WORDS_BIGENDIAN |
| # define CMP_LT_OR_GT(a, b) ((a) > (b) ? 1 : -1) |
| #else |
| # define CMP_LT_OR_GT(a, b) memcmp_bytes ((a), (b)) |
| #endif |
| |
| /* BE VERY CAREFUL IF YOU CHANGE THIS CODE! */ |
| |
| /* The strategy of this memcmp is: |
| |
| 1. Compare bytes until one of the block pointers is aligned. |
| |
| 2. Compare using memcmp_common_alignment or |
| memcmp_not_common_alignment, regarding the alignment of the other |
| block after the initial byte operations. The maximum number of |
| full words (of type op_t) are compared in this way. |
| |
| 3. Compare the few remaining bytes. */ |
| |
| #ifndef WORDS_BIGENDIAN |
| /* memcmp_bytes -- Compare A and B bytewise in the byte order of the machine. |
| A and B are known to be different. |
| This is needed only on little-endian machines. */ |
| |
| # ifdef __GNUC__ |
| __inline |
| # endif |
| static int |
| memcmp_bytes (long unsigned int a, long unsigned int b) |
| { |
| long int srcp1 = (long int) &a; |
| long int srcp2 = (long int) &b; |
| op_t a0, b0; |
| |
| do |
| { |
| a0 = ((byte *) srcp1)[0]; |
| b0 = ((byte *) srcp2)[0]; |
| srcp1 += 1; |
| srcp2 += 1; |
| } |
| while (a0 == b0); |
| return a0 - b0; |
| } |
| #endif |
| |
| /* memcmp_common_alignment -- Compare blocks at SRCP1 and SRCP2 with LEN `op_t' |
| objects (not LEN bytes!). Both SRCP1 and SRCP2 should be aligned for |
| memory operations on `op_t's. */ |
| #ifdef __GNUC__ |
| __inline |
| #endif |
| static int |
| memcmp_common_alignment (long int srcp1, long int srcp2, size_t len) |
| { |
| op_t a0, a1; |
| op_t b0, b1; |
| |
| switch (len % 4) |
| { |
| default: /* Avoid warning about uninitialized local variables. */ |
| case 2: |
| a0 = ((op_t *) srcp1)[0]; |
| b0 = ((op_t *) srcp2)[0]; |
| srcp1 -= 2 * OPSIZ; |
| srcp2 -= 2 * OPSIZ; |
| len += 2; |
| goto do1; |
| case 3: |
| a1 = ((op_t *) srcp1)[0]; |
| b1 = ((op_t *) srcp2)[0]; |
| srcp1 -= OPSIZ; |
| srcp2 -= OPSIZ; |
| len += 1; |
| goto do2; |
| case 0: |
| if (OP_T_THRES <= 3 * OPSIZ && len == 0) |
| return 0; |
| a0 = ((op_t *) srcp1)[0]; |
| b0 = ((op_t *) srcp2)[0]; |
| goto do3; |
| case 1: |
| a1 = ((op_t *) srcp1)[0]; |
| b1 = ((op_t *) srcp2)[0]; |
| srcp1 += OPSIZ; |
| srcp2 += OPSIZ; |
| len -= 1; |
| if (OP_T_THRES <= 3 * OPSIZ && len == 0) |
| goto do0; |
| /* Fall through. */ |
| } |
| |
| do |
| { |
| a0 = ((op_t *) srcp1)[0]; |
| b0 = ((op_t *) srcp2)[0]; |
| if (a1 != b1) |
| return CMP_LT_OR_GT (a1, b1); |
| |
| do3: |
| a1 = ((op_t *) srcp1)[1]; |
| b1 = ((op_t *) srcp2)[1]; |
| if (a0 != b0) |
| return CMP_LT_OR_GT (a0, b0); |
| |
| do2: |
| a0 = ((op_t *) srcp1)[2]; |
| b0 = ((op_t *) srcp2)[2]; |
| if (a1 != b1) |
| return CMP_LT_OR_GT (a1, b1); |
| |
| do1: |
| a1 = ((op_t *) srcp1)[3]; |
| b1 = ((op_t *) srcp2)[3]; |
| if (a0 != b0) |
| return CMP_LT_OR_GT (a0, b0); |
| |
| srcp1 += 4 * OPSIZ; |
| srcp2 += 4 * OPSIZ; |
| len -= 4; |
| } |
| while (len != 0); |
| |
| /* This is the right position for do0. Please don't move |
| it into the loop. */ |
| do0: |
| if (a1 != b1) |
| return CMP_LT_OR_GT (a1, b1); |
| return 0; |
| } |
| |
| /* memcmp_not_common_alignment -- Compare blocks at SRCP1 and SRCP2 with LEN |
| `op_t' objects (not LEN bytes!). SRCP2 should be aligned for memory |
| operations on `op_t', but SRCP1 *should be unaligned*. */ |
| #ifdef __GNUC__ |
| __inline |
| #endif |
| static int |
| memcmp_not_common_alignment (long int srcp1, long int srcp2, size_t len) |
| { |
| op_t a0, a1, a2, a3; |
| op_t b0, b1, b2, b3; |
| op_t x; |
| int shl, shr; |
| |
| /* Calculate how to shift a word read at the memory operation |
| aligned srcp1 to make it aligned for comparison. */ |
| |
| shl = 8 * (srcp1 % OPSIZ); |
| shr = 8 * OPSIZ - shl; |
| |
| /* Make SRCP1 aligned by rounding it down to the beginning of the `op_t' |
| it points in the middle of. */ |
| srcp1 &= -OPSIZ; |
| |
| switch (len % 4) |
| { |
| default: /* Avoid warning about uninitialized local variables. */ |
| case 2: |
| a1 = ((op_t *) srcp1)[0]; |
| a2 = ((op_t *) srcp1)[1]; |
| b2 = ((op_t *) srcp2)[0]; |
| srcp1 -= 1 * OPSIZ; |
| srcp2 -= 2 * OPSIZ; |
| len += 2; |
| goto do1; |
| case 3: |
| a0 = ((op_t *) srcp1)[0]; |
| a1 = ((op_t *) srcp1)[1]; |
| b1 = ((op_t *) srcp2)[0]; |
| srcp2 -= 1 * OPSIZ; |
| len += 1; |
| goto do2; |
| case 0: |
| if (OP_T_THRES <= 3 * OPSIZ && len == 0) |
| return 0; |
| a3 = ((op_t *) srcp1)[0]; |
| a0 = ((op_t *) srcp1)[1]; |
| b0 = ((op_t *) srcp2)[0]; |
| srcp1 += 1 * OPSIZ; |
| goto do3; |
| case 1: |
| a2 = ((op_t *) srcp1)[0]; |
| a3 = ((op_t *) srcp1)[1]; |
| b3 = ((op_t *) srcp2)[0]; |
| srcp1 += 2 * OPSIZ; |
| srcp2 += 1 * OPSIZ; |
| len -= 1; |
| if (OP_T_THRES <= 3 * OPSIZ && len == 0) |
| goto do0; |
| /* Fall through. */ |
| } |
| |
| do |
| { |
| a0 = ((op_t *) srcp1)[0]; |
| b0 = ((op_t *) srcp2)[0]; |
| x = MERGE(a2, shl, a3, shr); |
| if (x != b3) |
| return CMP_LT_OR_GT (x, b3); |
| |
| do3: |
| a1 = ((op_t *) srcp1)[1]; |
| b1 = ((op_t *) srcp2)[1]; |
| x = MERGE(a3, shl, a0, shr); |
| if (x != b0) |
| return CMP_LT_OR_GT (x, b0); |
| |
| do2: |
| a2 = ((op_t *) srcp1)[2]; |
| b2 = ((op_t *) srcp2)[2]; |
| x = MERGE(a0, shl, a1, shr); |
| if (x != b1) |
| return CMP_LT_OR_GT (x, b1); |
| |
| do1: |
| a3 = ((op_t *) srcp1)[3]; |
| b3 = ((op_t *) srcp2)[3]; |
| x = MERGE(a1, shl, a2, shr); |
| if (x != b2) |
| return CMP_LT_OR_GT (x, b2); |
| |
| srcp1 += 4 * OPSIZ; |
| srcp2 += 4 * OPSIZ; |
| len -= 4; |
| } |
| while (len != 0); |
| |
| /* This is the right position for do0. Please don't move |
| it into the loop. */ |
| do0: |
| x = MERGE(a2, shl, a3, shr); |
| if (x != b3) |
| return CMP_LT_OR_GT (x, b3); |
| return 0; |
| } |
| |
| int |
| rpl_memcmp (const void *s1, const void *s2, size_t len) |
| { |
| op_t a0; |
| op_t b0; |
| long int srcp1 = (long int) s1; |
| long int srcp2 = (long int) s2; |
| op_t res; |
| |
| if (len >= OP_T_THRES) |
| { |
| /* There are at least some bytes to compare. No need to test |
| for LEN == 0 in this alignment loop. */ |
| while (srcp2 % OPSIZ != 0) |
| { |
| a0 = ((byte *) srcp1)[0]; |
| b0 = ((byte *) srcp2)[0]; |
| srcp1 += 1; |
| srcp2 += 1; |
| res = a0 - b0; |
| if (res != 0) |
| return res; |
| len -= 1; |
| } |
| |
| /* SRCP2 is now aligned for memory operations on `op_t'. |
| SRCP1 alignment determines if we can do a simple, |
| aligned compare or need to shuffle bits. */ |
| |
| if (srcp1 % OPSIZ == 0) |
| res = memcmp_common_alignment (srcp1, srcp2, len / OPSIZ); |
| else |
| res = memcmp_not_common_alignment (srcp1, srcp2, len / OPSIZ); |
| if (res != 0) |
| return res; |
| |
| /* Number of bytes remaining in the interval [0..OPSIZ-1]. */ |
| srcp1 += len & -OPSIZ; |
| srcp2 += len & -OPSIZ; |
| len %= OPSIZ; |
| } |
| |
| /* There are just a few bytes to compare. Use byte memory operations. */ |
| while (len != 0) |
| { |
| a0 = ((byte *) srcp1)[0]; |
| b0 = ((byte *) srcp2)[0]; |
| srcp1 += 1; |
| srcp2 += 1; |
| res = a0 - b0; |
| if (res != 0) |
| return res; |
| len -= 1; |
| } |
| |
| return 0; |
| } |
| |
| #ifdef weak_alias |
| # undef bcmp |
| weak_alias (memcmp, bcmp) |
| #endif |