| # This Source Code Form is subject to the terms of the Mozilla Public |
| # License, v. 2.0. If a copy of the MPL was not distributed with this |
| # file, You can obtain one at http://mozilla.org/MPL/2.0/. |
| |
| #ifdef DARWIN |
| #define s_mpv_mul_d _s_mpv_mul_d |
| #define s_mpv_mul_d_add _s_mpv_mul_d_add |
| #define s_mpv_mul_d_add_prop _s_mpv_mul_d_add_prop |
| #define s_mpv_sqr_add_prop _s_mpv_sqr_add_prop |
| #define s_mpv_div_2dx1d _s_mpv_div_2dx1d |
| #define TYPE_FUNCTION(x) |
| #else |
| #define TYPE_FUNCTION(x) .type x, @function |
| #endif |
| |
| .text |
| |
| # ebp - 8: caller's esi |
| # ebp - 4: caller's edi |
| # ebp + 0: caller's ebp |
| # ebp + 4: return address |
| # ebp + 8: a argument |
| # ebp + 12: a_len argument |
| # ebp + 16: b argument |
| # ebp + 20: c argument |
| # registers: |
| # ebx: |
| # ecx: a_len |
| # esi: a ptr |
| # edi: c ptr |
| .globl s_mpv_mul_d |
| .private_extern s_mpv_mul_d |
| TYPE_FUNCTION(s_mpv_mul_d) |
| s_mpv_mul_d: |
| push %ebp |
| mov %esp, %ebp |
| push %edi |
| push %esi |
| psubq %mm2, %mm2 # carry = 0 |
| mov 12(%ebp), %ecx # ecx = a_len |
| movd 16(%ebp), %mm1 # mm1 = b |
| mov 20(%ebp), %edi |
| cmp $0, %ecx |
| je 2f # jmp if a_len == 0 |
| mov 8(%ebp), %esi # esi = a |
| cld |
| 1: |
| movd 0(%esi), %mm0 # mm0 = *a++ |
| add $4, %esi |
| pmuludq %mm1, %mm0 # mm0 = b * *a++ |
| paddq %mm0, %mm2 # add the carry |
| movd %mm2, 0(%edi) # store the 32bit result |
| add $4, %edi |
| psrlq $32, %mm2 # save the carry |
| dec %ecx # --a_len |
| jnz 1b # jmp if a_len != 0 |
| 2: |
| movd %mm2, 0(%edi) # *c = carry |
| emms |
| pop %esi |
| pop %edi |
| leave |
| ret |
| nop |
| |
| # ebp - 8: caller's esi |
| # ebp - 4: caller's edi |
| # ebp + 0: caller's ebp |
| # ebp + 4: return address |
| # ebp + 8: a argument |
| # ebp + 12: a_len argument |
| # ebp + 16: b argument |
| # ebp + 20: c argument |
| # registers: |
| # ebx: |
| # ecx: a_len |
| # esi: a ptr |
| # edi: c ptr |
| .globl s_mpv_mul_d_add |
| .private_extern s_mpv_mul_d_add |
| TYPE_FUNCTION(s_mpv_mul_d_add) |
| s_mpv_mul_d_add: |
| push %ebp |
| mov %esp, %ebp |
| push %edi |
| push %esi |
| psubq %mm2, %mm2 # carry = 0 |
| mov 12(%ebp), %ecx # ecx = a_len |
| movd 16(%ebp), %mm1 # mm1 = b |
| mov 20(%ebp), %edi |
| cmp $0, %ecx |
| je 2f # jmp if a_len == 0 |
| mov 8(%ebp), %esi # esi = a |
| cld |
| 1: |
| movd 0(%esi), %mm0 # mm0 = *a++ |
| add $4, %esi |
| pmuludq %mm1, %mm0 # mm0 = b * *a++ |
| paddq %mm0, %mm2 # add the carry |
| movd 0(%edi), %mm0 |
| paddq %mm0, %mm2 # add the carry |
| movd %mm2, 0(%edi) # store the 32bit result |
| add $4, %edi |
| psrlq $32, %mm2 # save the carry |
| dec %ecx # --a_len |
| jnz 1b # jmp if a_len != 0 |
| 2: |
| movd %mm2, 0(%edi) # *c = carry |
| emms |
| pop %esi |
| pop %edi |
| leave |
| ret |
| nop |
| |
| # ebp - 12: caller's ebx |
| # ebp - 8: caller's esi |
| # ebp - 4: caller's edi |
| # ebp + 0: caller's ebp |
| # ebp + 4: return address |
| # ebp + 8: a argument |
| # ebp + 12: a_len argument |
| # ebp + 16: b argument |
| # ebp + 20: c argument |
| # registers: |
| # eax: |
| # ebx: carry |
| # ecx: a_len |
| # esi: a ptr |
| # edi: c ptr |
| .globl s_mpv_mul_d_add_prop |
| .private_extern s_mpv_mul_d_add_prop |
| TYPE_FUNCTION(s_mpv_mul_d_add_prop) |
| s_mpv_mul_d_add_prop: |
| push %ebp |
| mov %esp, %ebp |
| push %edi |
| push %esi |
| push %ebx |
| psubq %mm2, %mm2 # carry = 0 |
| mov 12(%ebp), %ecx # ecx = a_len |
| movd 16(%ebp), %mm1 # mm1 = b |
| mov 20(%ebp), %edi |
| cmp $0, %ecx |
| je 2f # jmp if a_len == 0 |
| mov 8(%ebp), %esi # esi = a |
| cld |
| 1: |
| movd 0(%esi), %mm0 # mm0 = *a++ |
| movd 0(%edi), %mm3 # fetch the sum |
| add $4, %esi |
| pmuludq %mm1, %mm0 # mm0 = b * *a++ |
| paddq %mm0, %mm2 # add the carry |
| paddq %mm3, %mm2 # add *c++ |
| movd %mm2, 0(%edi) # store the 32bit result |
| add $4, %edi |
| psrlq $32, %mm2 # save the carry |
| dec %ecx # --a_len |
| jnz 1b # jmp if a_len != 0 |
| 2: |
| movd %mm2, %ebx |
| cmp $0, %ebx # is carry zero? |
| jz 4f |
| mov 0(%edi), %eax |
| add %ebx, %eax |
| stosl |
| jnc 4f |
| 3: |
| mov 0(%edi), %eax # add in current word from *c |
| adc $0, %eax |
| stosl # [es:edi] = ax; edi += 4; |
| jc 3b |
| 4: |
| emms |
| pop %ebx |
| pop %esi |
| pop %edi |
| leave |
| ret |
| nop |
| |
| # ebp - 12: caller's ebx |
| # ebp - 8: caller's esi |
| # ebp - 4: caller's edi |
| # ebp + 0: caller's ebp |
| # ebp + 4: return address |
| # ebp + 8: pa argument |
| # ebp + 12: a_len argument |
| # ebp + 16: ps argument |
| # registers: |
| # eax: |
| # ebx: carry |
| # ecx: a_len |
| # esi: a ptr |
| # edi: c ptr |
| .globl s_mpv_sqr_add_prop |
| .private_extern s_mpv_sqr_add_prop |
| TYPE_FUNCTION(s_mpv_sqr_add_prop) |
| s_mpv_sqr_add_prop: |
| push %ebp |
| mov %esp, %ebp |
| push %edi |
| push %esi |
| push %ebx |
| psubq %mm2, %mm2 # carry = 0 |
| mov 12(%ebp), %ecx # ecx = a_len |
| mov 16(%ebp), %edi |
| cmp $0, %ecx |
| je 2f # jmp if a_len == 0 |
| mov 8(%ebp), %esi # esi = a |
| cld |
| 1: |
| movd 0(%esi), %mm0 # mm0 = *a |
| movd 0(%edi), %mm3 # fetch the sum |
| add $4, %esi |
| pmuludq %mm0, %mm0 # mm0 = sqr(a) |
| paddq %mm0, %mm2 # add the carry |
| paddq %mm3, %mm2 # add the low word |
| movd 4(%edi), %mm3 |
| movd %mm2, 0(%edi) # store the 32bit result |
| psrlq $32, %mm2 |
| paddq %mm3, %mm2 # add the high word |
| movd %mm2, 4(%edi) # store the 32bit result |
| psrlq $32, %mm2 # save the carry. |
| add $8, %edi |
| dec %ecx # --a_len |
| jnz 1b # jmp if a_len != 0 |
| 2: |
| movd %mm2, %ebx |
| cmp $0, %ebx # is carry zero? |
| jz 4f |
| mov 0(%edi), %eax |
| add %ebx, %eax |
| stosl |
| jnc 4f |
| 3: |
| mov 0(%edi), %eax # add in current word from *c |
| adc $0, %eax |
| stosl # [es:edi] = ax; edi += 4; |
| jc 3b |
| 4: |
| emms |
| pop %ebx |
| pop %esi |
| pop %edi |
| leave |
| ret |
| nop |
| |
| # |
| # Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized |
| # so its high bit is 1. This code is from NSPR. |
| # |
| # mp_err s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor, |
| # mp_digit *qp, mp_digit *rp) |
| |
| # esp + 0: Caller's ebx |
| # esp + 4: return address |
| # esp + 8: Nhi argument |
| # esp + 12: Nlo argument |
| # esp + 16: divisor argument |
| # esp + 20: qp argument |
| # esp + 24: rp argument |
| # registers: |
| # eax: |
| # ebx: carry |
| # ecx: a_len |
| # edx: |
| # esi: a ptr |
| # edi: c ptr |
| # |
| .globl s_mpv_div_2dx1d |
| .private_extern s_mpv_div_2dx1d |
| TYPE_FUNCTION(s_mpv_div_2dx1d) |
| s_mpv_div_2dx1d: |
| push %ebx |
| mov 8(%esp), %edx |
| mov 12(%esp), %eax |
| mov 16(%esp), %ebx |
| div %ebx |
| mov 20(%esp), %ebx |
| mov %eax, 0(%ebx) |
| mov 24(%esp), %ebx |
| mov %edx, 0(%ebx) |
| xor %eax, %eax # return zero |
| pop %ebx |
| ret |
| nop |
| |
| #ifndef DARWIN |
| # Magic indicating no need for an executable stack |
| .section .note.GNU-stack, "", @progbits |
| .previous |
| #endif |