blob: 280cff7fb907d92ad4253f394325fbc7d5624fe2 [file] [log] [blame]
.globl _start
_start:
# This code tests for the fldcw "load floating point command word"
# instruction. On most x86 processors the retired_instruction
# performance counter counts this as one instruction. However,
# on Pentium 4 systems it counts as two. Therefore this can
# affect BBV results on such a system.
# fldcw is most often used to set the rouding mode when doing
# floating point to integer conversions
# It is encoded as "d9 /5" which means
# 1101 1001 xx10 1yyy
# Where xx is the "mod" which will be 00, 01, or 10 indicating offset
# and yyy is the register field
# these are instructions with similar encodings to fldcw
# that can cause false positives if the test isn't explicit enough
similar:
fld1 # d9 e8
fldl2t # d9 e9
fldl2e # d9 ea
fldpi # d9 eb
fldlg2 # d9 ec
fldln2 # d9 ed
fldz # d9 ee
# check some varied ways of calling fldcw
# offset on stack
stack:
sub $8,%rsp # allocate space on stack
fnstcw 2(%rsp)
fldcw 2(%rsp)
add $8,%rsp # restore stack
# 64-bit register
sixtyfour_reg:
fnstcw cw
mov $cw,%rax
fldcw 0(%rax) # rax
mov $cw,%rbx
fldcw 0(%rbx) # rbx
mov $cw,%rcx
fldcw 0(%rcx) # rcx
mov $cw,%rdx
fldcw 0(%rdx) # rdx
# 32-bit register
# Note! The assembler that comes with SuSE 9.1
# cannot assemble 32-bit fldcw on 64-bit systems
# Hence the need to hand-code them
thirtytwo_reg:
fnstcw cw
mov $cw,%eax
# fldcw 0(%eax) # eax
.byte 0x67,0xd9,0x28
mov $cw,%ebx
# fldcw 0(%ebx) # ebx
.byte 0x67,0xd9,0x2b
mov $cw,%ecx
# fldcw 0(%ecx) # ecx
.byte 0x67,0xd9,0x29
mov $cw,%edx
# fldcw 0(%edx) # edx
.byte 0x67,0xd9,0x2a
# register + 8-bit offset
eight_bit:
mov $cw,%eax
sub $32,%eax
# fldcw 32(%eax) # eax + 8 bit offset
.byte 0x67,0xd9,0x68,0x20
mov %eax,%ebx
# fldcw 32(%ebx) # ebx + 8 bit offset
.byte 0x67,0xd9,0x6b,0x20
mov %eax,%ecx
# fldcw 32(%ecx) # ecx + 8 bit offset
.byte 0x67,0xd9,0x69,0x20
mov %eax,%edx
# fldcw 32(%edx) # edx + 8 bit offset
.byte 0x67,0xd9,0x6a,0x20
# register + 32-bit offset
thirtytwo_bit:
mov $cw,%eax
sub $30000,%eax
# fldcw 30000(%eax) # eax + 16 bit offset
.byte 0x67,0xd9,0xa8,0x30,0x75,0x00,0x00
mov %eax,%ebx
# fldcw 30000(%ebx) # ebx + 16 bit offset
.byte 0x67,0xd9,0xab,0x30,0x75,0x00,0x00
mov %eax,%ecx
# fldcw 30000(%ecx) # ecx + 16 bit offset
.byte 0x67,0xd9,0xa9,0x30,0x75,0x00,0x00
mov %eax,%edx
# fldcw 30000(%edx) # edx + 16 bit offset
.byte 0x67,0xd9,0xaa,0x30,0x75,0x00,0x00
# check an fp/integer conversion
# in a loop to give a bigger count
mov $1024,%rcx
big_loop:
fldl three # load value onto fp stack
fnstcw saved_cw # store control word to mem
movzwl saved_cw, %eax # load cw from mem, zero extending
movb $12, %ah # set cw for "round to zero"
movw %ax, cw # store back to memory
fldcw cw # save new rounding mode
fistpl result # save stack value as integer to mem
fldcw saved_cw # restore old cw
loop big_loop # loop to make the count more obvious
movl result, %ebx # sanity check to see if the
cmp $3,%rbx # result is the expected one
je exit
print_error:
mov $1,%rax # write syscall
mov $1,%rdi # stdout
mov $error,%rsi # string
mov $22,%rdx # length of string
syscall
exit:
xor %rdi, %rdi # return 0
mov $60, %rax # SYSCALL_EXIT
syscall
.data
saved_cw: .long 0
cw: .long 0
result: .long 0
three: .long 0 # a floating point 3.0
.long 1074266112
error: .asciz "Error! Wrong result!\n"