| /* memcmp with SSE4.2 |
| Copyright (C) 2010 Free Software Foundation, Inc. |
| Contributed by Intel Corporation. |
| This file is part of the GNU C Library. |
| |
| The GNU C Library is free software; you can redistribute it and/or |
| modify it under the terms of the GNU Lesser General Public |
| License as published by the Free Software Foundation; either |
| version 2.1 of the License, or (at your option) any later version. |
| |
| The GNU C Library is distributed in the hope that it will be useful, |
| but WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| Lesser General Public License for more details. |
| |
| You should have received a copy of the GNU Lesser General Public |
| License along with the GNU C Library; if not, write to the Free |
| Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA |
| 02111-1307 USA. */ |
| |
| #ifndef NOT_IN_libc |
| |
| #include <sysdep.h> |
| #include "asm-syntax.h" |
| |
| #ifndef MEMCMP |
| # define MEMCMP __memcmp_sse4_2 |
| #endif |
| |
| #define CFI_PUSH(REG) \ |
| cfi_adjust_cfa_offset (4); \ |
| cfi_rel_offset (REG, 0) |
| |
| #define CFI_POP(REG) \ |
| cfi_adjust_cfa_offset (-4); \ |
| cfi_restore (REG) |
| |
| #define PUSH(REG) pushl REG; CFI_PUSH (REG) |
| #define POP(REG) popl REG; CFI_POP (REG) |
| |
| #define PARMS 4 |
| #define BLK1 PARMS |
| #define BLK2 BLK1+4 |
| #define LEN BLK2+4 |
| #define RETURN POP (%ebx); ret; CFI_PUSH (%ebx) |
| |
| |
| #ifdef SHARED |
| # define JMPTBL(I, B) I - B |
| |
| /* Load an entry in a jump table into EBX and branch to it. TABLE is a |
| jump table with relative offsets. INDEX is a register contains the |
| index into the jump table. SCALE is the scale of INDEX. */ |
| # define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \ |
| /* We first load PC into EBX. */ \ |
| call __i686.get_pc_thunk.bx; \ |
| /* Get the address of the jump table. */ \ |
| addl $(TABLE - .), %ebx; \ |
| /* Get the entry and convert the relative offset to the \ |
| absolute address. */ \ |
| addl (%ebx,INDEX,SCALE), %ebx; \ |
| /* We loaded the jump table and adjuested EDX/ESI. Go. */ \ |
| jmp *%ebx |
| |
| .section .gnu.linkonce.t.__i686.get_pc_thunk.bx,"ax",@progbits |
| .globl __i686.get_pc_thunk.bx |
| .hidden __i686.get_pc_thunk.bx |
| ALIGN (4) |
| .type __i686.get_pc_thunk.bx,@function |
| __i686.get_pc_thunk.bx: |
| movl (%esp), %ebx |
| ret |
| #else |
| # define JMPTBL(I, B) I |
| |
| /* Load an entry in a jump table into EBX and branch to it. TABLE is a |
| jump table with relative offsets. INDEX is a register contains the |
| index into the jump table. SCALE is the scale of INDEX. */ |
| # define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \ |
| jmp *TABLE(,INDEX,SCALE) |
| #endif |
| |
| .section .text.sse4.2,"ax",@progbits |
| ENTRY (MEMCMP) |
| movl BLK1(%esp), %eax |
| movl BLK2(%esp), %edx |
| movl LEN(%esp), %ecx |
| cmp $1, %ecx |
| jbe L(less1bytes) |
| pxor %xmm0, %xmm0 |
| cmp $64, %ecx |
| ja L(64bytesormore) |
| cmp $8, %ecx |
| PUSH (%ebx) |
| jb L(less8bytes) |
| add %ecx, %edx |
| add %ecx, %eax |
| BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %ecx, 4) |
| |
| ALIGN (4) |
| L(less8bytes): |
| mov (%eax), %bl |
| cmpb (%edx), %bl |
| jne L(nonzero) |
| |
| mov 1(%eax), %bl |
| cmpb 1(%edx), %bl |
| jne L(nonzero) |
| |
| cmp $2, %ecx |
| jz L(0bytes) |
| |
| mov 2(%eax), %bl |
| cmpb 2(%edx), %bl |
| jne L(nonzero) |
| |
| cmp $3, %ecx |
| jz L(0bytes) |
| |
| mov 3(%eax), %bl |
| cmpb 3(%edx), %bl |
| jne L(nonzero) |
| |
| cmp $4, %ecx |
| jz L(0bytes) |
| |
| mov 4(%eax), %bl |
| cmpb 4(%edx), %bl |
| jne L(nonzero) |
| |
| cmp $5, %ecx |
| jz L(0bytes) |
| |
| mov 5(%eax), %bl |
| cmpb 5(%edx), %bl |
| jne L(nonzero) |
| |
| cmp $6, %ecx |
| jz L(0bytes) |
| |
| mov 6(%eax), %bl |
| cmpb 6(%edx), %bl |
| je L(0bytes) |
| L(nonzero): |
| POP (%ebx) |
| mov $1, %eax |
| ja L(above) |
| neg %eax |
| L(above): |
| ret |
| CFI_PUSH (%ebx) |
| |
| ALIGN (4) |
| L(0bytes): |
| POP (%ebx) |
| xor %eax, %eax |
| ret |
| |
| ALIGN (4) |
| L(less1bytes): |
| jb L(0bytesend) |
| movzbl (%eax), %eax |
| movzbl (%edx), %edx |
| sub %edx, %eax |
| ret |
| |
| ALIGN (4) |
| L(0bytesend): |
| xor %eax, %eax |
| ret |
| |
| ALIGN (4) |
| L(64bytesormore): |
| PUSH (%ebx) |
| mov %ecx, %ebx |
| mov $64, %ecx |
| sub $64, %ebx |
| L(64bytesormore_loop): |
| movdqu (%eax), %xmm1 |
| movdqu (%edx), %xmm2 |
| pxor %xmm1, %xmm2 |
| ptest %xmm2, %xmm0 |
| jnc L(find_16diff) |
| |
| movdqu 16(%eax), %xmm1 |
| movdqu 16(%edx), %xmm2 |
| pxor %xmm1, %xmm2 |
| ptest %xmm2, %xmm0 |
| jnc L(find_32diff) |
| |
| movdqu 32(%eax), %xmm1 |
| movdqu 32(%edx), %xmm2 |
| pxor %xmm1, %xmm2 |
| ptest %xmm2, %xmm0 |
| jnc L(find_48diff) |
| |
| movdqu 48(%eax), %xmm1 |
| movdqu 48(%edx), %xmm2 |
| pxor %xmm1, %xmm2 |
| ptest %xmm2, %xmm0 |
| jnc L(find_64diff) |
| add %ecx, %eax |
| add %ecx, %edx |
| sub %ecx, %ebx |
| jae L(64bytesormore_loop) |
| add %ebx, %ecx |
| add %ecx, %edx |
| add %ecx, %eax |
| BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %ecx, 4) |
| |
| ALIGN (4) |
| L(find_16diff): |
| sub $16, %ecx |
| L(find_32diff): |
| sub $16, %ecx |
| L(find_48diff): |
| sub $16, %ecx |
| L(find_64diff): |
| add %ecx, %edx |
| add %ecx, %eax |
| jmp L(16bytes) |
| |
| ALIGN (4) |
| L(16bytes): |
| mov -16(%eax), %ecx |
| mov -16(%edx), %ebx |
| cmp %ebx, %ecx |
| jne L(find_diff) |
| L(12bytes): |
| mov -12(%eax), %ecx |
| mov -12(%edx), %ebx |
| cmp %ebx, %ecx |
| jne L(find_diff) |
| L(8bytes): |
| mov -8(%eax), %ecx |
| mov -8(%edx), %ebx |
| cmp %ebx, %ecx |
| jne L(find_diff) |
| L(4bytes): |
| mov -4(%eax), %ecx |
| mov -4(%edx), %ebx |
| cmp %ebx, %ecx |
| mov $0, %eax |
| jne L(find_diff) |
| RETURN |
| |
| ALIGN (4) |
| L(49bytes): |
| movdqu -49(%eax), %xmm1 |
| movdqu -49(%edx), %xmm2 |
| mov $-49, %ebx |
| pxor %xmm1, %xmm2 |
| ptest %xmm2, %xmm0 |
| jnc L(less16bytes) |
| L(33bytes): |
| movdqu -33(%eax), %xmm1 |
| movdqu -33(%edx), %xmm2 |
| mov $-33, %ebx |
| pxor %xmm1, %xmm2 |
| ptest %xmm2, %xmm0 |
| jnc L(less16bytes) |
| L(17bytes): |
| mov -17(%eax), %ecx |
| mov -17(%edx), %ebx |
| cmp %ebx, %ecx |
| jne L(find_diff) |
| L(13bytes): |
| mov -13(%eax), %ecx |
| mov -13(%edx), %ebx |
| cmp %ebx, %ecx |
| jne L(find_diff) |
| L(9bytes): |
| mov -9(%eax), %ecx |
| mov -9(%edx), %ebx |
| cmp %ebx, %ecx |
| jne L(find_diff) |
| L(5bytes): |
| mov -5(%eax), %ecx |
| mov -5(%edx), %ebx |
| cmp %ebx, %ecx |
| jne L(find_diff) |
| movzbl -1(%eax), %ecx |
| cmp -1(%edx), %cl |
| mov $0, %eax |
| jne L(end) |
| RETURN |
| |
| ALIGN (4) |
| L(50bytes): |
| mov $-50, %ebx |
| movdqu -50(%eax), %xmm1 |
| movdqu -50(%edx), %xmm2 |
| pxor %xmm1, %xmm2 |
| ptest %xmm2, %xmm0 |
| jnc L(less16bytes) |
| L(34bytes): |
| mov $-34, %ebx |
| movdqu -34(%eax), %xmm1 |
| movdqu -34(%edx), %xmm2 |
| pxor %xmm1, %xmm2 |
| ptest %xmm2, %xmm0 |
| jnc L(less16bytes) |
| L(18bytes): |
| mov -18(%eax), %ecx |
| mov -18(%edx), %ebx |
| cmp %ebx, %ecx |
| jne L(find_diff) |
| L(14bytes): |
| mov -14(%eax), %ecx |
| mov -14(%edx), %ebx |
| cmp %ebx, %ecx |
| jne L(find_diff) |
| L(10bytes): |
| mov -10(%eax), %ecx |
| mov -10(%edx), %ebx |
| cmp %ebx, %ecx |
| jne L(find_diff) |
| L(6bytes): |
| mov -6(%eax), %ecx |
| mov -6(%edx), %ebx |
| cmp %ebx, %ecx |
| jne L(find_diff) |
| L(2bytes): |
| movzwl -2(%eax), %ecx |
| movzwl -2(%edx), %ebx |
| cmp %bl, %cl |
| jne L(end) |
| cmp %bh, %ch |
| mov $0, %eax |
| jne L(end) |
| RETURN |
| |
| ALIGN (4) |
| L(51bytes): |
| mov $-51, %ebx |
| movdqu -51(%eax), %xmm1 |
| movdqu -51(%edx), %xmm2 |
| pxor %xmm1, %xmm2 |
| ptest %xmm2, %xmm0 |
| jnc L(less16bytes) |
| L(35bytes): |
| mov $-35, %ebx |
| movdqu -35(%eax), %xmm1 |
| movdqu -35(%edx), %xmm2 |
| pxor %xmm1, %xmm2 |
| ptest %xmm2, %xmm0 |
| jnc L(less16bytes) |
| L(19bytes): |
| movl -19(%eax), %ecx |
| movl -19(%edx), %ebx |
| cmp %ebx, %ecx |
| jne L(find_diff) |
| L(15bytes): |
| movl -15(%eax), %ecx |
| movl -15(%edx), %ebx |
| cmp %ebx, %ecx |
| jne L(find_diff) |
| L(11bytes): |
| movl -11(%eax), %ecx |
| movl -11(%edx), %ebx |
| cmp %ebx, %ecx |
| jne L(find_diff) |
| L(7bytes): |
| movl -7(%eax), %ecx |
| movl -7(%edx), %ebx |
| cmp %ebx, %ecx |
| jne L(find_diff) |
| L(3bytes): |
| movzwl -3(%eax), %ecx |
| movzwl -3(%edx), %ebx |
| cmpb %bl, %cl |
| jne L(end) |
| cmp %bx, %cx |
| jne L(end) |
| L(1bytes): |
| movzbl -1(%eax), %eax |
| cmpb -1(%edx), %al |
| mov $0, %eax |
| jne L(end) |
| RETURN |
| |
| ALIGN (4) |
| L(52bytes): |
| movdqu -52(%eax), %xmm1 |
| movdqu -52(%edx), %xmm2 |
| mov $-52, %ebx |
| pxor %xmm1, %xmm2 |
| ptest %xmm2, %xmm0 |
| jnc L(less16bytes) |
| L(36bytes): |
| movdqu -36(%eax), %xmm1 |
| movdqu -36(%edx), %xmm2 |
| mov $-36, %ebx |
| pxor %xmm1, %xmm2 |
| ptest %xmm2, %xmm0 |
| jnc L(less16bytes) |
| L(20bytes): |
| movdqu -20(%eax), %xmm1 |
| movdqu -20(%edx), %xmm2 |
| mov $-20, %ebx |
| pxor %xmm1, %xmm2 |
| ptest %xmm2, %xmm0 |
| jnc L(less16bytes) |
| mov -4(%eax), %ecx |
| mov -4(%edx), %ebx |
| cmp %ebx, %ecx |
| mov $0, %eax |
| jne L(find_diff) |
| RETURN |
| |
| ALIGN (4) |
| L(53bytes): |
| movdqu -53(%eax), %xmm1 |
| movdqu -53(%edx), %xmm2 |
| mov $-53, %ebx |
| pxor %xmm1, %xmm2 |
| ptest %xmm2, %xmm0 |
| jnc L(less16bytes) |
| L(37bytes): |
| mov $-37, %ebx |
| movdqu -37(%eax), %xmm1 |
| movdqu -37(%edx), %xmm2 |
| pxor %xmm1, %xmm2 |
| ptest %xmm2, %xmm0 |
| jnc L(less16bytes) |
| L(21bytes): |
| mov $-21, %ebx |
| movdqu -21(%eax), %xmm1 |
| movdqu -21(%edx), %xmm2 |
| pxor %xmm1, %xmm2 |
| ptest %xmm2, %xmm0 |
| jnc L(less16bytes) |
| mov -5(%eax), %ecx |
| mov -5(%edx), %ebx |
| cmp %ebx, %ecx |
| jne L(find_diff) |
| movzbl -1(%eax), %ecx |
| cmp -1(%edx), %cl |
| mov $0, %eax |
| jne L(end) |
| RETURN |
| |
| ALIGN (4) |
| L(54bytes): |
| movdqu -54(%eax), %xmm1 |
| movdqu -54(%edx), %xmm2 |
| mov $-54, %ebx |
| pxor %xmm1, %xmm2 |
| ptest %xmm2, %xmm0 |
| jnc L(less16bytes) |
| L(38bytes): |
| mov $-38, %ebx |
| movdqu -38(%eax), %xmm1 |
| movdqu -38(%edx), %xmm2 |
| pxor %xmm1, %xmm2 |
| ptest %xmm2, %xmm0 |
| jnc L(less16bytes) |
| L(22bytes): |
| mov $-22, %ebx |
| movdqu -22(%eax), %xmm1 |
| movdqu -22(%edx), %xmm2 |
| pxor %xmm1, %xmm2 |
| ptest %xmm2, %xmm0 |
| jnc L(less16bytes) |
| |
| mov -6(%eax), %ecx |
| mov -6(%edx), %ebx |
| cmp %ebx, %ecx |
| jne L(find_diff) |
| movzwl -2(%eax), %ecx |
| movzwl -2(%edx), %ebx |
| cmp %bl, %cl |
| jne L(end) |
| cmp %bh, %ch |
| mov $0, %eax |
| jne L(end) |
| RETURN |
| |
| ALIGN (4) |
| L(55bytes): |
| movdqu -55(%eax), %xmm1 |
| movdqu -55(%edx), %xmm2 |
| mov $-55, %ebx |
| pxor %xmm1, %xmm2 |
| ptest %xmm2, %xmm0 |
| jnc L(less16bytes) |
| L(39bytes): |
| mov $-39, %ebx |
| movdqu -39(%eax), %xmm1 |
| movdqu -39(%edx), %xmm2 |
| pxor %xmm1, %xmm2 |
| ptest %xmm2, %xmm0 |
| jnc L(less16bytes) |
| L(23bytes): |
| mov $-23, %ebx |
| movdqu -23(%eax), %xmm1 |
| movdqu -23(%edx), %xmm2 |
| pxor %xmm1, %xmm2 |
| ptest %xmm2, %xmm0 |
| jnc L(less16bytes) |
| movl -7(%eax), %ecx |
| movl -7(%edx), %ebx |
| cmp %ebx, %ecx |
| jne L(find_diff) |
| movzwl -3(%eax), %ecx |
| movzwl -3(%edx), %ebx |
| cmpb %bl, %cl |
| jne L(end) |
| cmp %bx, %cx |
| jne L(end) |
| movzbl -1(%eax), %eax |
| cmpb -1(%edx), %al |
| mov $0, %eax |
| jne L(end) |
| RETURN |
| |
| ALIGN (4) |
| L(56bytes): |
| movdqu -56(%eax), %xmm1 |
| movdqu -56(%edx), %xmm2 |
| mov $-56, %ebx |
| pxor %xmm1, %xmm2 |
| ptest %xmm2, %xmm0 |
| jnc L(less16bytes) |
| L(40bytes): |
| mov $-40, %ebx |
| movdqu -40(%eax), %xmm1 |
| movdqu -40(%edx), %xmm2 |
| pxor %xmm1, %xmm2 |
| ptest %xmm2, %xmm0 |
| jnc L(less16bytes) |
| L(24bytes): |
| mov $-24, %ebx |
| movdqu -24(%eax), %xmm1 |
| movdqu -24(%edx), %xmm2 |
| pxor %xmm1, %xmm2 |
| ptest %xmm2, %xmm0 |
| jnc L(less16bytes) |
| |
| mov -8(%eax), %ecx |
| mov -8(%edx), %ebx |
| cmp %ebx, %ecx |
| jne L(find_diff) |
| |
| mov -4(%eax), %ecx |
| mov -4(%edx), %ebx |
| cmp %ebx, %ecx |
| mov $0, %eax |
| jne L(find_diff) |
| RETURN |
| |
| ALIGN (4) |
| L(57bytes): |
| movdqu -57(%eax), %xmm1 |
| movdqu -57(%edx), %xmm2 |
| mov $-57, %ebx |
| pxor %xmm1, %xmm2 |
| ptest %xmm2, %xmm0 |
| jnc L(less16bytes) |
| L(41bytes): |
| mov $-41, %ebx |
| movdqu -41(%eax), %xmm1 |
| movdqu -41(%edx), %xmm2 |
| pxor %xmm1, %xmm2 |
| ptest %xmm2, %xmm0 |
| jnc L(less16bytes) |
| L(25bytes): |
| mov $-25, %ebx |
| movdqu -25(%eax), %xmm1 |
| movdqu -25(%edx), %xmm2 |
| pxor %xmm1, %xmm2 |
| ptest %xmm2, %xmm0 |
| jnc L(less16bytes) |
| mov -9(%eax), %ecx |
| mov -9(%edx), %ebx |
| cmp %ebx, %ecx |
| jne L(find_diff) |
| mov -5(%eax), %ecx |
| mov -5(%edx), %ebx |
| cmp %ebx, %ecx |
| jne L(find_diff) |
| movzbl -1(%eax), %ecx |
| cmp -1(%edx), %cl |
| mov $0, %eax |
| jne L(end) |
| RETURN |
| |
| ALIGN (4) |
| L(58bytes): |
| movdqu -58(%eax), %xmm1 |
| movdqu -58(%edx), %xmm2 |
| mov $-58, %ebx |
| pxor %xmm1, %xmm2 |
| ptest %xmm2, %xmm0 |
| jnc L(less16bytes) |
| L(42bytes): |
| mov $-42, %ebx |
| movdqu -42(%eax), %xmm1 |
| movdqu -42(%edx), %xmm2 |
| pxor %xmm1, %xmm2 |
| ptest %xmm2, %xmm0 |
| jnc L(less16bytes) |
| L(26bytes): |
| mov $-26, %ebx |
| movdqu -26(%eax), %xmm1 |
| movdqu -26(%edx), %xmm2 |
| pxor %xmm1, %xmm2 |
| ptest %xmm2, %xmm0 |
| jnc L(less16bytes) |
| |
| mov -10(%eax), %ecx |
| mov -10(%edx), %ebx |
| cmp %ebx, %ecx |
| jne L(find_diff) |
| |
| mov -6(%eax), %ecx |
| mov -6(%edx), %ebx |
| cmp %ebx, %ecx |
| jne L(find_diff) |
| |
| movzwl -2(%eax), %ecx |
| movzwl -2(%edx), %ebx |
| cmp %bl, %cl |
| jne L(end) |
| cmp %bh, %ch |
| mov $0, %eax |
| jne L(end) |
| RETURN |
| |
| ALIGN (4) |
| L(59bytes): |
| movdqu -59(%eax), %xmm1 |
| movdqu -59(%edx), %xmm2 |
| mov $-59, %ebx |
| pxor %xmm1, %xmm2 |
| ptest %xmm2, %xmm0 |
| jnc L(less16bytes) |
| L(43bytes): |
| mov $-43, %ebx |
| movdqu -43(%eax), %xmm1 |
| movdqu -43(%edx), %xmm2 |
| pxor %xmm1, %xmm2 |
| ptest %xmm2, %xmm0 |
| jnc L(less16bytes) |
| L(27bytes): |
| mov $-27, %ebx |
| movdqu -27(%eax), %xmm1 |
| movdqu -27(%edx), %xmm2 |
| pxor %xmm1, %xmm2 |
| ptest %xmm2, %xmm0 |
| jnc L(less16bytes) |
| movl -11(%eax), %ecx |
| movl -11(%edx), %ebx |
| cmp %ebx, %ecx |
| jne L(find_diff) |
| movl -7(%eax), %ecx |
| movl -7(%edx), %ebx |
| cmp %ebx, %ecx |
| jne L(find_diff) |
| movzwl -3(%eax), %ecx |
| movzwl -3(%edx), %ebx |
| cmpb %bl, %cl |
| jne L(end) |
| cmp %bx, %cx |
| jne L(end) |
| movzbl -1(%eax), %eax |
| cmpb -1(%edx), %al |
| mov $0, %eax |
| jne L(end) |
| RETURN |
| |
| ALIGN (4) |
| L(60bytes): |
| movdqu -60(%eax), %xmm1 |
| movdqu -60(%edx), %xmm2 |
| mov $-60, %ebx |
| pxor %xmm1, %xmm2 |
| ptest %xmm2, %xmm0 |
| jnc L(less16bytes) |
| L(44bytes): |
| mov $-44, %ebx |
| movdqu -44(%eax), %xmm1 |
| movdqu -44(%edx), %xmm2 |
| pxor %xmm1, %xmm2 |
| ptest %xmm2, %xmm0 |
| jnc L(less16bytes) |
| L(28bytes): |
| mov $-28, %ebx |
| movdqu -28(%eax), %xmm1 |
| movdqu -28(%edx), %xmm2 |
| pxor %xmm1, %xmm2 |
| ptest %xmm2, %xmm0 |
| jnc L(less16bytes) |
| mov -12(%eax), %ecx |
| mov -12(%edx), %ebx |
| cmp %ebx, %ecx |
| jne L(find_diff) |
| mov -8(%eax), %ecx |
| mov -8(%edx), %ebx |
| cmp %ebx, %ecx |
| jne L(find_diff) |
| mov -4(%eax), %ecx |
| mov -4(%edx), %ebx |
| cmp %ebx, %ecx |
| mov $0, %eax |
| jne L(find_diff) |
| RETURN |
| |
| ALIGN (4) |
| L(61bytes): |
| movdqu -61(%eax), %xmm1 |
| movdqu -61(%edx), %xmm2 |
| mov $-61, %ebx |
| pxor %xmm1, %xmm2 |
| ptest %xmm2, %xmm0 |
| jnc L(less16bytes) |
| L(45bytes): |
| mov $-45, %ebx |
| movdqu -45(%eax), %xmm1 |
| movdqu -45(%edx), %xmm2 |
| pxor %xmm1, %xmm2 |
| ptest %xmm2, %xmm0 |
| jnc L(less16bytes) |
| L(29bytes): |
| mov $-29, %ebx |
| movdqu -29(%eax), %xmm1 |
| movdqu -29(%edx), %xmm2 |
| pxor %xmm1, %xmm2 |
| ptest %xmm2, %xmm0 |
| jnc L(less16bytes) |
| |
| mov -13(%eax), %ecx |
| mov -13(%edx), %ebx |
| cmp %ebx, %ecx |
| jne L(find_diff) |
| |
| mov -9(%eax), %ecx |
| mov -9(%edx), %ebx |
| cmp %ebx, %ecx |
| jne L(find_diff) |
| |
| mov -5(%eax), %ecx |
| mov -5(%edx), %ebx |
| cmp %ebx, %ecx |
| jne L(find_diff) |
| movzbl -1(%eax), %ecx |
| cmp -1(%edx), %cl |
| mov $0, %eax |
| jne L(end) |
| RETURN |
| |
| ALIGN (4) |
| L(62bytes): |
| movdqu -62(%eax), %xmm1 |
| movdqu -62(%edx), %xmm2 |
| mov $-62, %ebx |
| pxor %xmm1, %xmm2 |
| ptest %xmm2, %xmm0 |
| jnc L(less16bytes) |
| L(46bytes): |
| mov $-46, %ebx |
| movdqu -46(%eax), %xmm1 |
| movdqu -46(%edx), %xmm2 |
| pxor %xmm1, %xmm2 |
| ptest %xmm2, %xmm0 |
| jnc L(less16bytes) |
| L(30bytes): |
| mov $-30, %ebx |
| movdqu -30(%eax), %xmm1 |
| movdqu -30(%edx), %xmm2 |
| pxor %xmm1, %xmm2 |
| ptest %xmm2, %xmm0 |
| jnc L(less16bytes) |
| mov -14(%eax), %ecx |
| mov -14(%edx), %ebx |
| cmp %ebx, %ecx |
| jne L(find_diff) |
| mov -10(%eax), %ecx |
| mov -10(%edx), %ebx |
| cmp %ebx, %ecx |
| jne L(find_diff) |
| mov -6(%eax), %ecx |
| mov -6(%edx), %ebx |
| cmp %ebx, %ecx |
| jne L(find_diff) |
| movzwl -2(%eax), %ecx |
| movzwl -2(%edx), %ebx |
| cmp %bl, %cl |
| jne L(end) |
| cmp %bh, %ch |
| mov $0, %eax |
| jne L(end) |
| RETURN |
| |
| ALIGN (4) |
| L(63bytes): |
| movdqu -63(%eax), %xmm1 |
| movdqu -63(%edx), %xmm2 |
| mov $-63, %ebx |
| pxor %xmm1, %xmm2 |
| ptest %xmm2, %xmm0 |
| jnc L(less16bytes) |
| L(47bytes): |
| mov $-47, %ebx |
| movdqu -47(%eax), %xmm1 |
| movdqu -47(%edx), %xmm2 |
| pxor %xmm1, %xmm2 |
| ptest %xmm2, %xmm0 |
| jnc L(less16bytes) |
| L(31bytes): |
| mov $-31, %ebx |
| movdqu -31(%eax), %xmm1 |
| movdqu -31(%edx), %xmm2 |
| pxor %xmm1, %xmm2 |
| ptest %xmm2, %xmm0 |
| jnc L(less16bytes) |
| |
| movl -15(%eax), %ecx |
| movl -15(%edx), %ebx |
| cmp %ebx, %ecx |
| jne L(find_diff) |
| movl -11(%eax), %ecx |
| movl -11(%edx), %ebx |
| cmp %ebx, %ecx |
| jne L(find_diff) |
| movl -7(%eax), %ecx |
| movl -7(%edx), %ebx |
| cmp %ebx, %ecx |
| jne L(find_diff) |
| movzwl -3(%eax), %ecx |
| movzwl -3(%edx), %ebx |
| cmpb %bl, %cl |
| jne L(end) |
| cmp %bx, %cx |
| jne L(end) |
| movzbl -1(%eax), %eax |
| cmpb -1(%edx), %al |
| mov $0, %eax |
| jne L(end) |
| RETURN |
| |
| ALIGN (4) |
| L(64bytes): |
| movdqu -64(%eax), %xmm1 |
| movdqu -64(%edx), %xmm2 |
| mov $-64, %ebx |
| pxor %xmm1, %xmm2 |
| ptest %xmm2, %xmm0 |
| jnc L(less16bytes) |
| L(48bytes): |
| movdqu -48(%eax), %xmm1 |
| movdqu -48(%edx), %xmm2 |
| mov $-48, %ebx |
| pxor %xmm1, %xmm2 |
| ptest %xmm2, %xmm0 |
| jnc L(less16bytes) |
| L(32bytes): |
| movdqu -32(%eax), %xmm1 |
| movdqu -32(%edx), %xmm2 |
| mov $-32, %ebx |
| pxor %xmm1, %xmm2 |
| ptest %xmm2, %xmm0 |
| jnc L(less16bytes) |
| |
| mov -16(%eax), %ecx |
| mov -16(%edx), %ebx |
| cmp %ebx, %ecx |
| jne L(find_diff) |
| |
| mov -12(%eax), %ecx |
| mov -12(%edx), %ebx |
| cmp %ebx, %ecx |
| jne L(find_diff) |
| |
| mov -8(%eax), %ecx |
| mov -8(%edx), %ebx |
| cmp %ebx, %ecx |
| jne L(find_diff) |
| |
| mov -4(%eax), %ecx |
| mov -4(%edx), %ebx |
| cmp %ebx, %ecx |
| mov $0, %eax |
| jne L(find_diff) |
| RETURN |
| |
| ALIGN (4) |
| L(less16bytes): |
| add %ebx, %eax |
| add %ebx, %edx |
| |
| mov (%eax), %ecx |
| mov (%edx), %ebx |
| cmp %ebx, %ecx |
| jne L(find_diff) |
| |
| mov 4(%eax), %ecx |
| mov 4(%edx), %ebx |
| cmp %ebx, %ecx |
| jne L(find_diff) |
| |
| mov 8(%eax), %ecx |
| mov 8(%edx), %ebx |
| cmp %ebx, %ecx |
| jne L(find_diff) |
| |
| mov 12(%eax), %ecx |
| mov 12(%edx), %ebx |
| cmp %ebx, %ecx |
| mov $0, %eax |
| jne L(find_diff) |
| RETURN |
| |
| ALIGN (4) |
| L(find_diff): |
| cmpb %bl, %cl |
| jne L(end) |
| cmp %bx, %cx |
| jne L(end) |
| shr $16,%ecx |
| shr $16,%ebx |
| cmp %bl, %cl |
| jne L(end) |
| cmp %bx, %cx |
| L(end): |
| POP (%ebx) |
| mov $1, %eax |
| ja L(bigger) |
| neg %eax |
| L(bigger): |
| ret |
| END (MEMCMP) |
| |
| .section .rodata.sse4.2,"a",@progbits |
| ALIGN (2) |
| .type L(table_64bytes), @object |
| L(table_64bytes): |
| .int JMPTBL (L(0bytes), L(table_64bytes)) |
| .int JMPTBL (L(1bytes), L(table_64bytes)) |
| .int JMPTBL (L(2bytes), L(table_64bytes)) |
| .int JMPTBL (L(3bytes), L(table_64bytes)) |
| .int JMPTBL (L(4bytes), L(table_64bytes)) |
| .int JMPTBL (L(5bytes), L(table_64bytes)) |
| .int JMPTBL (L(6bytes), L(table_64bytes)) |
| .int JMPTBL (L(7bytes), L(table_64bytes)) |
| .int JMPTBL (L(8bytes), L(table_64bytes)) |
| .int JMPTBL (L(9bytes), L(table_64bytes)) |
| .int JMPTBL (L(10bytes), L(table_64bytes)) |
| .int JMPTBL (L(11bytes), L(table_64bytes)) |
| .int JMPTBL (L(12bytes), L(table_64bytes)) |
| .int JMPTBL (L(13bytes), L(table_64bytes)) |
| .int JMPTBL (L(14bytes), L(table_64bytes)) |
| .int JMPTBL (L(15bytes), L(table_64bytes)) |
| .int JMPTBL (L(16bytes), L(table_64bytes)) |
| .int JMPTBL (L(17bytes), L(table_64bytes)) |
| .int JMPTBL (L(18bytes), L(table_64bytes)) |
| .int JMPTBL (L(19bytes), L(table_64bytes)) |
| .int JMPTBL (L(20bytes), L(table_64bytes)) |
| .int JMPTBL (L(21bytes), L(table_64bytes)) |
| .int JMPTBL (L(22bytes), L(table_64bytes)) |
| .int JMPTBL (L(23bytes), L(table_64bytes)) |
| .int JMPTBL (L(24bytes), L(table_64bytes)) |
| .int JMPTBL (L(25bytes), L(table_64bytes)) |
| .int JMPTBL (L(26bytes), L(table_64bytes)) |
| .int JMPTBL (L(27bytes), L(table_64bytes)) |
| .int JMPTBL (L(28bytes), L(table_64bytes)) |
| .int JMPTBL (L(29bytes), L(table_64bytes)) |
| .int JMPTBL (L(30bytes), L(table_64bytes)) |
| .int JMPTBL (L(31bytes), L(table_64bytes)) |
| .int JMPTBL (L(32bytes), L(table_64bytes)) |
| .int JMPTBL (L(33bytes), L(table_64bytes)) |
| .int JMPTBL (L(34bytes), L(table_64bytes)) |
| .int JMPTBL (L(35bytes), L(table_64bytes)) |
| .int JMPTBL (L(36bytes), L(table_64bytes)) |
| .int JMPTBL (L(37bytes), L(table_64bytes)) |
| .int JMPTBL (L(38bytes), L(table_64bytes)) |
| .int JMPTBL (L(39bytes), L(table_64bytes)) |
| .int JMPTBL (L(40bytes), L(table_64bytes)) |
| .int JMPTBL (L(41bytes), L(table_64bytes)) |
| .int JMPTBL (L(42bytes), L(table_64bytes)) |
| .int JMPTBL (L(43bytes), L(table_64bytes)) |
| .int JMPTBL (L(44bytes), L(table_64bytes)) |
| .int JMPTBL (L(45bytes), L(table_64bytes)) |
| .int JMPTBL (L(46bytes), L(table_64bytes)) |
| .int JMPTBL (L(47bytes), L(table_64bytes)) |
| .int JMPTBL (L(48bytes), L(table_64bytes)) |
| .int JMPTBL (L(49bytes), L(table_64bytes)) |
| .int JMPTBL (L(50bytes), L(table_64bytes)) |
| .int JMPTBL (L(51bytes), L(table_64bytes)) |
| .int JMPTBL (L(52bytes), L(table_64bytes)) |
| .int JMPTBL (L(53bytes), L(table_64bytes)) |
| .int JMPTBL (L(54bytes), L(table_64bytes)) |
| .int JMPTBL (L(55bytes), L(table_64bytes)) |
| .int JMPTBL (L(56bytes), L(table_64bytes)) |
| .int JMPTBL (L(57bytes), L(table_64bytes)) |
| .int JMPTBL (L(58bytes), L(table_64bytes)) |
| .int JMPTBL (L(59bytes), L(table_64bytes)) |
| .int JMPTBL (L(60bytes), L(table_64bytes)) |
| .int JMPTBL (L(61bytes), L(table_64bytes)) |
| .int JMPTBL (L(62bytes), L(table_64bytes)) |
| .int JMPTBL (L(63bytes), L(table_64bytes)) |
| .int JMPTBL (L(64bytes), L(table_64bytes)) |
| .size L(table_64bytes), .-L(table_64bytes) |
| #endif |