| # |
| # This Source Code Form is subject to the terms of the Mozilla Public |
| # License, v. 2.0. If a copy of the MPL was not distributed with this |
| # file, You can obtain one at http://mozilla.org/MPL/2.0/. |
| |
| .data |
| .align 4 |
| # |
| # -1 means to call _s_mpi_is_sse to determine if we support sse |
| # instructions. |
| # 0 means to use x86 instructions |
| # 1 means to use sse2 instructions |
| .type is_sse,@object |
| .size is_sse,4 |
| is_sse: .long -1 |
| |
| # |
| # sigh, handle the difference between -fPIC and not PIC |
| # default to pic, since this file seems to be exclusively |
| # linux right now (solaris uses mpi_i86pc.s and windows uses |
| # mpi_x86_asm.c) |
| # |
| #.ifndef NO_PIC |
| #.macro GET var,reg |
| # movl \var@GOTOFF(%ebx),\reg |
| #.endm |
| #.macro PUT reg,var |
| # movl \reg,\var@GOTOFF(%ebx) |
| #.endm |
| #.else |
| .macro GET var,reg |
| movl \var,\reg |
| .endm |
| .macro PUT reg,var |
| movl \reg,\var |
| .endm |
| #.endif |
| |
| .text |
| |
| |
| # ebp - 36: caller's esi |
| # ebp - 32: caller's edi |
| # ebp - 28: |
| # ebp - 24: |
| # ebp - 20: |
| # ebp - 16: |
| # ebp - 12: |
| # ebp - 8: |
| # ebp - 4: |
| # ebp + 0: caller's ebp |
| # ebp + 4: return address |
| # ebp + 8: a argument |
| # ebp + 12: a_len argument |
| # ebp + 16: b argument |
| # ebp + 20: c argument |
| # registers: |
| # eax: |
| # ebx: carry |
| # ecx: a_len |
| # edx: |
| # esi: a ptr |
| # edi: c ptr |
| .globl _s_mpv_mul_d |
| .type _s_mpv_mul_d,@function |
| _s_mpv_mul_d: |
| GET is_sse,%eax |
| cmp $0,%eax |
| je _s_mpv_mul_d_x86 |
| jg _s_mpv_mul_d_sse2 |
| call _s_mpi_is_sse2 |
| PUT %eax,is_sse |
| cmp $0,%eax |
| jg _s_mpv_mul_d_sse2 |
| _s_mpv_mul_d_x86: |
| push %ebp |
| mov %esp,%ebp |
| sub $28,%esp |
| push %edi |
| push %esi |
| push %ebx |
| movl $0,%ebx # carry = 0 |
| mov 12(%ebp),%ecx # ecx = a_len |
| mov 20(%ebp),%edi |
| cmp $0,%ecx |
| je 2f # jmp if a_len == 0 |
| mov 8(%ebp),%esi # esi = a |
| cld |
| 1: |
| lodsl # eax = [ds:esi]; esi += 4 |
| mov 16(%ebp),%edx # edx = b |
| mull %edx # edx:eax = Phi:Plo = a_i * b |
| |
| add %ebx,%eax # add carry (%ebx) to edx:eax |
| adc $0,%edx |
| mov %edx,%ebx # high half of product becomes next carry |
| |
| stosl # [es:edi] = ax; edi += 4; |
| dec %ecx # --a_len |
| jnz 1b # jmp if a_len != 0 |
| 2: |
| mov %ebx,0(%edi) # *c = carry |
| pop %ebx |
| pop %esi |
| pop %edi |
| leave |
| ret |
| nop |
| _s_mpv_mul_d_sse2: |
| push %ebp |
| mov %esp,%ebp |
| push %edi |
| push %esi |
| psubq %mm2,%mm2 # carry = 0 |
| mov 12(%ebp),%ecx # ecx = a_len |
| movd 16(%ebp),%mm1 # mm1 = b |
| mov 20(%ebp),%edi |
| cmp $0,%ecx |
| je 6f # jmp if a_len == 0 |
| mov 8(%ebp),%esi # esi = a |
| cld |
| 5: |
| movd 0(%esi),%mm0 # mm0 = *a++ |
| add $4,%esi |
| pmuludq %mm1,%mm0 # mm0 = b * *a++ |
| paddq %mm0,%mm2 # add the carry |
| movd %mm2,0(%edi) # store the 32bit result |
| add $4,%edi |
| psrlq $32, %mm2 # save the carry |
| dec %ecx # --a_len |
| jnz 5b # jmp if a_len != 0 |
| 6: |
| movd %mm2,0(%edi) # *c = carry |
| emms |
| pop %esi |
| pop %edi |
| leave |
| ret |
| nop |
| |
| # ebp - 36: caller's esi |
| # ebp - 32: caller's edi |
| # ebp - 28: |
| # ebp - 24: |
| # ebp - 20: |
| # ebp - 16: |
| # ebp - 12: |
| # ebp - 8: |
| # ebp - 4: |
| # ebp + 0: caller's ebp |
| # ebp + 4: return address |
| # ebp + 8: a argument |
| # ebp + 12: a_len argument |
| # ebp + 16: b argument |
| # ebp + 20: c argument |
| # registers: |
| # eax: |
| # ebx: carry |
| # ecx: a_len |
| # edx: |
| # esi: a ptr |
| # edi: c ptr |
| .globl _s_mpv_mul_d_add |
| .type _s_mpv_mul_d_add,@function |
| _s_mpv_mul_d_add: |
| GET is_sse,%eax |
| cmp $0,%eax |
| je _s_mpv_mul_d_add_x86 |
| jg _s_mpv_mul_d_add_sse2 |
| call _s_mpi_is_sse2 |
| PUT %eax,is_sse |
| cmp $0,%eax |
| jg _s_mpv_mul_d_add_sse2 |
| _s_mpv_mul_d_add_x86: |
| push %ebp |
| mov %esp,%ebp |
| sub $28,%esp |
| push %edi |
| push %esi |
| push %ebx |
| movl $0,%ebx # carry = 0 |
| mov 12(%ebp),%ecx # ecx = a_len |
| mov 20(%ebp),%edi |
| cmp $0,%ecx |
| je 11f # jmp if a_len == 0 |
| mov 8(%ebp),%esi # esi = a |
| cld |
| 10: |
| lodsl # eax = [ds:esi]; esi += 4 |
| mov 16(%ebp),%edx # edx = b |
| mull %edx # edx:eax = Phi:Plo = a_i * b |
| |
| add %ebx,%eax # add carry (%ebx) to edx:eax |
| adc $0,%edx |
| mov 0(%edi),%ebx # add in current word from *c |
| add %ebx,%eax |
| adc $0,%edx |
| mov %edx,%ebx # high half of product becomes next carry |
| |
| stosl # [es:edi] = ax; edi += 4; |
| dec %ecx # --a_len |
| jnz 10b # jmp if a_len != 0 |
| 11: |
| mov %ebx,0(%edi) # *c = carry |
| pop %ebx |
| pop %esi |
| pop %edi |
| leave |
| ret |
| nop |
| _s_mpv_mul_d_add_sse2: |
| push %ebp |
| mov %esp,%ebp |
| push %edi |
| push %esi |
| psubq %mm2,%mm2 # carry = 0 |
| mov 12(%ebp),%ecx # ecx = a_len |
| movd 16(%ebp),%mm1 # mm1 = b |
| mov 20(%ebp),%edi |
| cmp $0,%ecx |
| je 16f # jmp if a_len == 0 |
| mov 8(%ebp),%esi # esi = a |
| cld |
| 15: |
| movd 0(%esi),%mm0 # mm0 = *a++ |
| add $4,%esi |
| pmuludq %mm1,%mm0 # mm0 = b * *a++ |
| paddq %mm0,%mm2 # add the carry |
| movd 0(%edi),%mm0 |
| paddq %mm0,%mm2 # add the carry |
| movd %mm2,0(%edi) # store the 32bit result |
| add $4,%edi |
| psrlq $32, %mm2 # save the carry |
| dec %ecx # --a_len |
| jnz 15b # jmp if a_len != 0 |
| 16: |
| movd %mm2,0(%edi) # *c = carry |
| emms |
| pop %esi |
| pop %edi |
| leave |
| ret |
| nop |
| |
| # ebp - 8: caller's esi |
| # ebp - 4: caller's edi |
| # ebp + 0: caller's ebp |
| # ebp + 4: return address |
| # ebp + 8: a argument |
| # ebp + 12: a_len argument |
| # ebp + 16: b argument |
| # ebp + 20: c argument |
| # registers: |
| # eax: |
| # ebx: carry |
| # ecx: a_len |
| # edx: |
| # esi: a ptr |
| # edi: c ptr |
| .globl _s_mpv_mul_d_add_prop |
| .type _s_mpv_mul_d_add_prop,@function |
| _s_mpv_mul_d_add_prop: |
| GET is_sse,%eax |
| cmp $0,%eax |
| je _s_mpv_mul_d_add_prop_x86 |
| jg _s_mpv_mul_d_add_prop_sse2 |
| call _s_mpi_is_sse2 |
| PUT %eax,is_sse |
| cmp $0,%eax |
| jg _s_mpv_mul_d_add_prop_sse2 |
| _s_mpv_mul_d_add_prop_x86: |
| push %ebp |
| mov %esp,%ebp |
| sub $28,%esp |
| push %edi |
| push %esi |
| push %ebx |
| movl $0,%ebx # carry = 0 |
| mov 12(%ebp),%ecx # ecx = a_len |
| mov 20(%ebp),%edi |
| cmp $0,%ecx |
| je 21f # jmp if a_len == 0 |
| cld |
| mov 8(%ebp),%esi # esi = a |
| 20: |
| lodsl # eax = [ds:esi]; esi += 4 |
| mov 16(%ebp),%edx # edx = b |
| mull %edx # edx:eax = Phi:Plo = a_i * b |
| |
| add %ebx,%eax # add carry (%ebx) to edx:eax |
| adc $0,%edx |
| mov 0(%edi),%ebx # add in current word from *c |
| add %ebx,%eax |
| adc $0,%edx |
| mov %edx,%ebx # high half of product becomes next carry |
| |
| stosl # [es:edi] = ax; edi += 4; |
| dec %ecx # --a_len |
| jnz 20b # jmp if a_len != 0 |
| 21: |
| cmp $0,%ebx # is carry zero? |
| jz 23f |
| mov 0(%edi),%eax # add in current word from *c |
| add %ebx,%eax |
| stosl # [es:edi] = ax; edi += 4; |
| jnc 23f |
| 22: |
| mov 0(%edi),%eax # add in current word from *c |
| adc $0,%eax |
| stosl # [es:edi] = ax; edi += 4; |
| jc 22b |
| 23: |
| pop %ebx |
| pop %esi |
| pop %edi |
| leave |
| ret |
| nop |
| _s_mpv_mul_d_add_prop_sse2: |
| push %ebp |
| mov %esp,%ebp |
| push %edi |
| push %esi |
| push %ebx |
| psubq %mm2,%mm2 # carry = 0 |
| mov 12(%ebp),%ecx # ecx = a_len |
| movd 16(%ebp),%mm1 # mm1 = b |
| mov 20(%ebp),%edi |
| cmp $0,%ecx |
| je 26f # jmp if a_len == 0 |
| mov 8(%ebp),%esi # esi = a |
| cld |
| 25: |
| movd 0(%esi),%mm0 # mm0 = *a++ |
| movd 0(%edi),%mm3 # fetch the sum |
| add $4,%esi |
| pmuludq %mm1,%mm0 # mm0 = b * *a++ |
| paddq %mm0,%mm2 # add the carry |
| paddq %mm3,%mm2 # add *c++ |
| movd %mm2,0(%edi) # store the 32bit result |
| add $4,%edi |
| psrlq $32, %mm2 # save the carry |
| dec %ecx # --a_len |
| jnz 25b # jmp if a_len != 0 |
| 26: |
| movd %mm2,%ebx |
| cmp $0,%ebx # is carry zero? |
| jz 28f |
| mov 0(%edi),%eax |
| add %ebx, %eax |
| stosl |
| jnc 28f |
| 27: |
| mov 0(%edi),%eax # add in current word from *c |
| adc $0,%eax |
| stosl # [es:edi] = ax; edi += 4; |
| jc 27b |
| 28: |
| emms |
| pop %ebx |
| pop %esi |
| pop %edi |
| leave |
| ret |
| nop |
| |
| |
| # ebp - 20: caller's esi |
| # ebp - 16: caller's edi |
| # ebp - 12: |
| # ebp - 8: carry |
| # ebp - 4: a_len local |
| # ebp + 0: caller's ebp |
| # ebp + 4: return address |
| # ebp + 8: pa argument |
| # ebp + 12: a_len argument |
| # ebp + 16: ps argument |
| # ebp + 20: |
| # registers: |
| # eax: |
| # ebx: carry |
| # ecx: a_len |
| # edx: |
| # esi: a ptr |
| # edi: c ptr |
| |
| .globl _s_mpv_sqr_add_prop |
| .type _s_mpv_sqr_add_prop,@function |
| _s_mpv_sqr_add_prop: |
| GET is_sse,%eax |
| cmp $0,%eax |
| je _s_mpv_sqr_add_prop_x86 |
| jg _s_mpv_sqr_add_prop_sse2 |
| call _s_mpi_is_sse2 |
| PUT %eax,is_sse |
| cmp $0,%eax |
| jg _s_mpv_sqr_add_prop_sse2 |
| _s_mpv_sqr_add_prop_x86: |
| push %ebp |
| mov %esp,%ebp |
| sub $12,%esp |
| push %edi |
| push %esi |
| push %ebx |
| movl $0,%ebx # carry = 0 |
| mov 12(%ebp),%ecx # a_len |
| mov 16(%ebp),%edi # edi = ps |
| cmp $0,%ecx |
| je 31f # jump if a_len == 0 |
| cld |
| mov 8(%ebp),%esi # esi = pa |
| 30: |
| lodsl # %eax = [ds:si]; si += 4; |
| mull %eax |
| |
| add %ebx,%eax # add "carry" |
| adc $0,%edx |
| mov 0(%edi),%ebx |
| add %ebx,%eax # add low word from result |
| mov 4(%edi),%ebx |
| stosl # [es:di] = %eax; di += 4; |
| adc %ebx,%edx # add high word from result |
| movl $0,%ebx |
| mov %edx,%eax |
| adc $0,%ebx |
| stosl # [es:di] = %eax; di += 4; |
| dec %ecx # --a_len |
| jnz 30b # jmp if a_len != 0 |
| 31: |
| cmp $0,%ebx # is carry zero? |
| jz 34f |
| mov 0(%edi),%eax # add in current word from *c |
| add %ebx,%eax |
| stosl # [es:edi] = ax; edi += 4; |
| jnc 34f |
| 32: |
| mov 0(%edi),%eax # add in current word from *c |
| adc $0,%eax |
| stosl # [es:edi] = ax; edi += 4; |
| jc 32b |
| 34: |
| pop %ebx |
| pop %esi |
| pop %edi |
| leave |
| ret |
| nop |
| _s_mpv_sqr_add_prop_sse2: |
| push %ebp |
| mov %esp,%ebp |
| push %edi |
| push %esi |
| push %ebx |
| psubq %mm2,%mm2 # carry = 0 |
| mov 12(%ebp),%ecx # ecx = a_len |
| mov 16(%ebp),%edi |
| cmp $0,%ecx |
| je 36f # jmp if a_len == 0 |
| mov 8(%ebp),%esi # esi = a |
| cld |
| 35: |
| movd 0(%esi),%mm0 # mm0 = *a |
| movd 0(%edi),%mm3 # fetch the sum |
| add $4,%esi |
| pmuludq %mm0,%mm0 # mm0 = sqr(a) |
| paddq %mm0,%mm2 # add the carry |
| paddq %mm3,%mm2 # add the low word |
| movd 4(%edi),%mm3 |
| movd %mm2,0(%edi) # store the 32bit result |
| psrlq $32, %mm2 |
| paddq %mm3,%mm2 # add the high word |
| movd %mm2,4(%edi) # store the 32bit result |
| psrlq $32, %mm2 # save the carry. |
| add $8,%edi |
| dec %ecx # --a_len |
| jnz 35b # jmp if a_len != 0 |
| 36: |
| movd %mm2,%ebx |
| cmp $0,%ebx # is carry zero? |
| jz 38f |
| mov 0(%edi),%eax |
| add %ebx, %eax |
| stosl |
| jnc 38f |
| 37: |
| mov 0(%edi),%eax # add in current word from *c |
| adc $0,%eax |
| stosl # [es:edi] = ax; edi += 4; |
| jc 37b |
| 38: |
| emms |
| pop %ebx |
| pop %esi |
| pop %edi |
| leave |
| ret |
| nop |
| |
| # |
| # Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized |
| # so its high bit is 1. This code is from NSPR. |
| # |
| # mp_err _s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor, |
| # mp_digit *qp, mp_digit *rp) |
| |
| # esp + 0: Caller's ebx |
| # esp + 4: return address |
| # esp + 8: Nhi argument |
| # esp + 12: Nlo argument |
| # esp + 16: divisor argument |
| # esp + 20: qp argument |
| # esp + 24: rp argument |
| # registers: |
| # eax: |
| # ebx: carry |
| # ecx: a_len |
| # edx: |
| # esi: a ptr |
| # edi: c ptr |
| # |
| |
| .globl _s_mpv_div_2dx1d |
| .type _s_mpv_div_2dx1d,@function |
| _s_mpv_div_2dx1d: |
| push %ebx |
| mov 8(%esp),%edx |
| mov 12(%esp),%eax |
| mov 16(%esp),%ebx |
| div %ebx |
| mov 20(%esp),%ebx |
| mov %eax,0(%ebx) |
| mov 24(%esp),%ebx |
| mov %edx,0(%ebx) |
| xor %eax,%eax # return zero |
| pop %ebx |
| ret |
| nop |
| |