blob: 0de0a113c041103cb61d5d39a17389ccb92187b8 [file] [log] [blame]
/* strcmp with SSE4.2
Copyright (C) 2010 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, write to the Free
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
#ifndef NOT_IN_libc
#include <sysdep.h>
#include "asm-syntax.h"
#define CFI_PUSH(REG) \
cfi_adjust_cfa_offset (4); \
cfi_rel_offset (REG, 0)
#define CFI_POP(REG) \
cfi_adjust_cfa_offset (-4); \
cfi_restore (REG)
#define PUSH(REG) pushl REG; CFI_PUSH (REG)
#define POP(REG) popl REG; CFI_POP (REG)
#ifndef USE_AS_STRNCMP
# ifndef STRCMP
# define STRCMP __strcmp_sse4_2
# endif
# define STR1 4
# define STR2 STR1+4
# define RETURN ret; .p2align 4
#else
# ifndef STRCMP
# define STRCMP __strncmp_sse4_2
# endif
# define STR1 8
# define STR2 STR1+4
# define CNT STR2+4
# define RETURN POP (%ebp); ret; .p2align 4; CFI_PUSH (%ebp)
#endif
.section .text.sse4.2,"ax",@progbits
ENTRY (STRCMP)
#ifdef USE_AS_STRNCMP
PUSH (%ebp)
#endif
mov STR1(%esp), %edx
mov STR2(%esp), %eax
#ifdef USE_AS_STRNCMP
movl CNT(%esp), %ebp
test %ebp, %ebp
je L(eq)
#endif
mov %dx, %cx
and $0xfff, %cx
cmp $0xff0, %cx
ja L(first4bytes)
movdqu (%edx), %xmm2
mov %eax, %ecx
and $0xfff, %ecx
cmp $0xff0, %ecx
ja L(first4bytes)
movd %xmm2, %ecx
cmp (%eax), %ecx
jne L(less4bytes)
movdqu (%eax), %xmm1
pxor %xmm2, %xmm1
pxor %xmm0, %xmm0
ptest %xmm1, %xmm0
jnc L(less16bytes)
pcmpeqb %xmm0, %xmm2
ptest %xmm2, %xmm0
jnc L(less16bytes)
#ifdef USE_AS_STRNCMP
sub $16, %ebp
jbe L(eq)
#endif
add $16, %edx
add $16, %eax
L(first4bytes):
movzbl (%eax), %ecx
cmpb %cl, (%edx)
jne L(neq)
cmpl $0, %ecx
je L(eq)
#ifdef USE_AS_STRNCMP
cmp $1, %ebp
je L(eq)
#endif
movzbl 1(%eax), %ecx
cmpb %cl, 1(%edx)
jne L(neq)
cmpl $0, %ecx
je L(eq)
#ifdef USE_AS_STRNCMP
cmp $2, %ebp
je L(eq)
#endif
movzbl 2(%eax), %ecx
cmpb %cl, 2(%edx)
jne L(neq)
cmpl $0, %ecx
je L(eq)
#ifdef USE_AS_STRNCMP
cmp $3, %ebp
je L(eq)
#endif
movzbl 3(%eax), %ecx
cmpb %cl, 3(%edx)
jne L(neq)
cmpl $0, %ecx
je L(eq)
#ifdef USE_AS_STRNCMP
cmp $4, %ebp
je L(eq)
#endif
movzbl 4(%eax), %ecx
cmpb %cl, 4(%edx)
jne L(neq)
cmpl $0, %ecx
je L(eq)
#ifdef USE_AS_STRNCMP
cmp $5, %ebp
je L(eq)
#endif
movzbl 5(%eax), %ecx
cmpb %cl, 5(%edx)
jne L(neq)
cmpl $0, %ecx
je L(eq)
#ifdef USE_AS_STRNCMP
cmp $6, %ebp
je L(eq)
#endif
movzbl 6(%eax), %ecx
cmpb %cl, 6(%edx)
jne L(neq)
cmpl $0, %ecx
je L(eq)
#ifdef USE_AS_STRNCMP
cmp $7, %ebp
je L(eq)
#endif
movzbl 7(%eax), %ecx
cmpb %cl, 7(%edx)
jne L(neq)
cmpl $0, %ecx
je L(eq)
#ifdef USE_AS_STRNCMP
sub $8, %ebp
je L(eq)
#endif
add $8, %eax
add $8, %edx
PUSH (%ebx)
PUSH (%edi)
PUSH (%esi)
#ifdef USE_AS_STRNCMP
cfi_remember_state
#endif
mov %edx, %edi
mov %eax, %esi
xorl %eax, %eax
L(check_offset):
movl %edi, %ebx
movl %esi, %ecx
andl $0xfff, %ebx
andl $0xfff, %ecx
cmpl %ebx, %ecx
cmovl %ebx, %ecx
lea -0xff0(%ecx), %edx
sub %edx, %edi
sub %edx, %esi
testl %edx, %edx
jg L(crosspage)
L(loop):
movdqu (%esi,%edx), %xmm2
movdqu (%edi,%edx), %xmm1
pcmpistri $0x1a, %xmm2, %xmm1
jbe L(end)
#ifdef USE_AS_STRNCMP
sub $16, %ebp
jbe L(more16byteseq)
#endif
add $16, %edx
jle L(loop)
L(crosspage):
movzbl (%edi,%edx), %eax
movzbl (%esi,%edx), %ebx
subl %ebx, %eax
jne L(ret)
testl %ebx, %ebx
je L(ret)
#ifdef USE_AS_STRNCMP
sub $1, %ebp
jbe L(more16byteseq)
#endif
inc %edx
cmp $15, %edx
jle L(crosspage)
add %edx, %edi
add %edx, %esi
jmp L(check_offset)
.p2align 4
L(end):
jnc L(ret)
#ifdef USE_AS_STRNCMP
sub %ecx, %ebp
jbe L(more16byteseq)
#endif
lea (%ecx,%edx), %ebx
movzbl (%edi,%ebx), %eax
movzbl (%esi,%ebx), %ecx
subl %ecx, %eax
L(ret):
POP (%esi)
POP (%edi)
POP (%ebx)
#ifdef USE_AS_STRNCMP
POP (%ebp)
#endif
ret
.p2align 4
#ifdef USE_AS_STRNCMP
cfi_restore_state
L(more16byteseq):
POP (%esi)
POP (%edi)
POP (%ebx)
#endif
L(eq):
xorl %eax, %eax
RETURN
L(neq):
mov $1, %eax
ja L(neq_bigger)
neg %eax
L(neq_bigger):
RETURN
L(less16bytes):
add $0xfefefeff, %ecx
jnc L(less4bytes)
xor (%edx), %ecx
or $0xfefefeff, %ecx
add $1, %ecx
jnz L(less4bytes)
#ifdef USE_AS_STRNCMP
cmp $4, %ebp
jbe L(eq)
#endif
mov 4(%edx), %ecx
cmp 4(%eax), %ecx
jne L(more4bytes)
add $0xfefefeff, %ecx
jnc L(more4bytes)
xor 4(%edx), %ecx
or $0xfefefeff, %ecx
add $1, %ecx
jnz L(more4bytes)
#ifdef USE_AS_STRNCMP
sub $8, %ebp
jbe L(eq)
#endif
add $8, %edx
add $8, %eax
L(less4bytes):
movzbl (%eax), %ecx
cmpb %cl, (%edx)
jne L(neq)
cmpl $0, %ecx
je L(eq)
#ifdef USE_AS_STRNCMP
cmp $1, %ebp
je L(eq)
#endif
movzbl 1(%eax), %ecx
cmpb %cl, 1(%edx)
jne L(neq)
cmpl $0, %ecx
je L(eq)
#ifdef USE_AS_STRNCMP
cmp $2, %ebp
je L(eq)
#endif
movzbl 2(%eax), %ecx
cmpb %cl, 2(%edx)
jne L(neq)
cmpl $0, %ecx
je L(eq)
#ifdef USE_AS_STRNCMP
cmp $3, %ebp
je L(eq)
#endif
movzbl 3(%eax), %ecx
cmpb %cl, 3(%edx)
jne L(neq)
cmpl $0, %ecx
je L(eq)
L(more4bytes):
#ifdef USE_AS_STRNCMP
cmp $4, %ebp
je L(eq)
#endif
movzbl 4(%eax), %ecx
cmpb %cl, 4(%edx)
jne L(neq)
cmpl $0, %ecx
je L(eq)
#ifdef USE_AS_STRNCMP
cmp $5, %ebp
je L(eq)
#endif
movzbl 5(%eax), %ecx
cmpb %cl, 5(%edx)
jne L(neq)
cmpl $0, %ecx
je L(eq)
#ifdef USE_AS_STRNCMP
cmp $6, %ebp
je L(eq)
#endif
movzbl 6(%eax), %ecx
cmpb %cl, 6(%edx)
jne L(neq)
cmpl $0, %ecx
je L(eq)
#ifdef USE_AS_STRNCMP
cmp $7, %ebp
je L(eq)
#endif
movzbl 7(%eax), %ecx
cmpb %cl, 7(%edx)
jne L(neq)
jmp L(eq)
END (STRCMP)
#endif