| /* This Source Code Form is subject to the terms of the Mozilla Public |
| * License, v. 2.0. If a copy of the MPL was not distributed with this |
| * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
| #include <regdef.h> |
| .set noreorder |
| .set noat |
| |
| .section .text, 1, 0x00000006, 4, 4 |
| .text: |
| .section .text |
| |
| .ent s_mpv_mul_d_add |
| .globl s_mpv_mul_d_add |
| |
| s_mpv_mul_d_add: |
| #/* c += a * b */ |
| #void s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, |
| # mp_digit *c) |
| #{ |
| # mp_digit a0, a1; regs a4, a5 |
| # mp_digit c0, c1; regs a6, a7 |
| # mp_digit cy = 0; reg t2 |
| # mp_word w0, w1; regs t0, t1 |
| # |
| # if (a_len) { |
| beq a1,zero,.L.1 |
| move t2,zero # cy = 0 |
| dsll32 a2,a2,0 # "b" is sometimes negative (?!?!) |
| dsrl32 a2,a2,0 # This clears the upper 32 bits. |
| # a0 = a[0]; |
| lwu a4,0(a0) |
| # w0 = ((mp_word)b * a0); |
| dmultu a2,a4 |
| # if (--a_len) { |
| addiu a1,a1,-1 |
| beq a1,zero,.L.2 |
| # while (a_len >= 2) { |
| sltiu t3,a1,2 |
| bne t3,zero,.L.3 |
| # a1 = a[1]; |
| lwu a5,4(a0) |
| .L.4: |
| # a_len -= 2; |
| addiu a1,a1,-2 |
| # c0 = c[0]; |
| lwu a6,0(a3) |
| # w0 += cy; |
| mflo t0 |
| daddu t0,t0,t2 |
| # w0 += c0; |
| daddu t0,t0,a6 |
| # w1 = (mp_word)b * a1; |
| dmultu a2,a5 # |
| # cy = CARRYOUT(w0); |
| dsrl32 t2,t0,0 |
| # c[0] = ACCUM(w0); |
| sw t0,0(a3) |
| # a0 = a[2]; |
| lwu a4,8(a0) |
| # a += 2; |
| addiu a0,a0,8 |
| # c1 = c[1]; |
| lwu a7,4(a3) |
| # w1 += cy; |
| mflo t1 |
| daddu t1,t1,t2 |
| # w1 += c1; |
| daddu t1,t1,a7 |
| # w0 = (mp_word)b * a0; |
| dmultu a2,a4 # |
| # cy = CARRYOUT(w1); |
| dsrl32 t2,t1,0 |
| # c[1] = ACCUM(w1); |
| sw t1,4(a3) |
| # c += 2; |
| addiu a3,a3,8 |
| sltiu t3,a1,2 |
| beq t3,zero,.L.4 |
| # a1 = a[1]; |
| lwu a5,4(a0) |
| # } |
| .L.3: |
| # c0 = c[0]; |
| lwu a6,0(a3) |
| # w0 += cy; |
| # if (a_len) { |
| mflo t0 |
| beq a1,zero,.L.5 |
| daddu t0,t0,t2 |
| # w1 = (mp_word)b * a1; |
| dmultu a2,a5 |
| # w0 += c0; |
| daddu t0,t0,a6 # |
| # cy = CARRYOUT(w0); |
| dsrl32 t2,t0,0 |
| # c[0] = ACCUM(w0); |
| sw t0,0(a3) |
| # c1 = c[1]; |
| lwu a7,4(a3) |
| # w1 += cy; |
| mflo t1 |
| daddu t1,t1,t2 |
| # w1 += c1; |
| daddu t1,t1,a7 |
| # c[1] = ACCUM(w1); |
| sw t1,4(a3) |
| # cy = CARRYOUT(w1); |
| dsrl32 t2,t1,0 |
| # c += 1; |
| b .L.6 |
| addiu a3,a3,4 |
| # } else { |
| .L.5: |
| # w0 += c0; |
| daddu t0,t0,a6 |
| # c[0] = ACCUM(w0); |
| sw t0,0(a3) |
| # cy = CARRYOUT(w0); |
| b .L.6 |
| dsrl32 t2,t0,0 |
| # } |
| # } else { |
| .L.2: |
| # c0 = c[0]; |
| lwu a6,0(a3) |
| # w0 += c0; |
| mflo t0 |
| daddu t0,t0,a6 |
| # c[0] = ACCUM(w0); |
| sw t0,0(a3) |
| # cy = CARRYOUT(w0); |
| dsrl32 t2,t0,0 |
| # } |
| .L.6: |
| # c[1] = cy; |
| jr ra |
| sw t2,4(a3) |
| # } |
| .L.1: |
| jr ra |
| nop |
| #} |
| # |
| .end s_mpv_mul_d_add |
| |
| .ent s_mpv_mul_d_add_prop |
| .globl s_mpv_mul_d_add_prop |
| |
| s_mpv_mul_d_add_prop: |
| #/* c += a * b */ |
| #void s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, |
| # mp_digit *c) |
| #{ |
| # mp_digit a0, a1; regs a4, a5 |
| # mp_digit c0, c1; regs a6, a7 |
| # mp_digit cy = 0; reg t2 |
| # mp_word w0, w1; regs t0, t1 |
| # |
| # if (a_len) { |
| beq a1,zero,.M.1 |
| move t2,zero # cy = 0 |
| dsll32 a2,a2,0 # "b" is sometimes negative (?!?!) |
| dsrl32 a2,a2,0 # This clears the upper 32 bits. |
| # a0 = a[0]; |
| lwu a4,0(a0) |
| # w0 = ((mp_word)b * a0); |
| dmultu a2,a4 |
| # if (--a_len) { |
| addiu a1,a1,-1 |
| beq a1,zero,.M.2 |
| # while (a_len >= 2) { |
| sltiu t3,a1,2 |
| bne t3,zero,.M.3 |
| # a1 = a[1]; |
| lwu a5,4(a0) |
| .M.4: |
| # a_len -= 2; |
| addiu a1,a1,-2 |
| # c0 = c[0]; |
| lwu a6,0(a3) |
| # w0 += cy; |
| mflo t0 |
| daddu t0,t0,t2 |
| # w0 += c0; |
| daddu t0,t0,a6 |
| # w1 = (mp_word)b * a1; |
| dmultu a2,a5 # |
| # cy = CARRYOUT(w0); |
| dsrl32 t2,t0,0 |
| # c[0] = ACCUM(w0); |
| sw t0,0(a3) |
| # a0 = a[2]; |
| lwu a4,8(a0) |
| # a += 2; |
| addiu a0,a0,8 |
| # c1 = c[1]; |
| lwu a7,4(a3) |
| # w1 += cy; |
| mflo t1 |
| daddu t1,t1,t2 |
| # w1 += c1; |
| daddu t1,t1,a7 |
| # w0 = (mp_word)b * a0; |
| dmultu a2,a4 # |
| # cy = CARRYOUT(w1); |
| dsrl32 t2,t1,0 |
| # c[1] = ACCUM(w1); |
| sw t1,4(a3) |
| # c += 2; |
| addiu a3,a3,8 |
| sltiu t3,a1,2 |
| beq t3,zero,.M.4 |
| # a1 = a[1]; |
| lwu a5,4(a0) |
| # } |
| .M.3: |
| # c0 = c[0]; |
| lwu a6,0(a3) |
| # w0 += cy; |
| # if (a_len) { |
| mflo t0 |
| beq a1,zero,.M.5 |
| daddu t0,t0,t2 |
| # w1 = (mp_word)b * a1; |
| dmultu a2,a5 |
| # w0 += c0; |
| daddu t0,t0,a6 # |
| # cy = CARRYOUT(w0); |
| dsrl32 t2,t0,0 |
| # c[0] = ACCUM(w0); |
| sw t0,0(a3) |
| # c1 = c[1]; |
| lwu a7,4(a3) |
| # w1 += cy; |
| mflo t1 |
| daddu t1,t1,t2 |
| # w1 += c1; |
| daddu t1,t1,a7 |
| # c[1] = ACCUM(w1); |
| sw t1,4(a3) |
| # cy = CARRYOUT(w1); |
| dsrl32 t2,t1,0 |
| # c += 1; |
| b .M.6 |
| addiu a3,a3,8 |
| # } else { |
| .M.5: |
| # w0 += c0; |
| daddu t0,t0,a6 |
| # c[0] = ACCUM(w0); |
| sw t0,0(a3) |
| # cy = CARRYOUT(w0); |
| dsrl32 t2,t0,0 |
| b .M.6 |
| addiu a3,a3,4 |
| # } |
| # } else { |
| .M.2: |
| # c0 = c[0]; |
| lwu a6,0(a3) |
| # w0 += c0; |
| mflo t0 |
| daddu t0,t0,a6 |
| # c[0] = ACCUM(w0); |
| sw t0,0(a3) |
| # cy = CARRYOUT(w0); |
| dsrl32 t2,t0,0 |
| addiu a3,a3,4 |
| # } |
| .M.6: |
| |
| # while (cy) { |
| beq t2,zero,.M.1 |
| nop |
| .M.7: |
| # mp_word w = (mp_word)*c + cy; |
| lwu a6,0(a3) |
| daddu t2,t2,a6 |
| # *c++ = ACCUM(w); |
| sw t2,0(a3) |
| # cy = CARRYOUT(w); |
| dsrl32 t2,t2,0 |
| bne t2,zero,.M.7 |
| addiu a3,a3,4 |
| |
| # } |
| .M.1: |
| jr ra |
| nop |
| #} |
| # |
| .end s_mpv_mul_d_add_prop |
| |
| .ent s_mpv_mul_d |
| .globl s_mpv_mul_d |
| |
| s_mpv_mul_d: |
| #/* c = a * b */ |
| #void s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, |
| # mp_digit *c) |
| #{ |
| # mp_digit a0, a1; regs a4, a5 |
| # mp_digit cy = 0; reg t2 |
| # mp_word w0, w1; regs t0, t1 |
| # |
| # if (a_len) { |
| beq a1,zero,.N.1 |
| move t2,zero # cy = 0 |
| dsll32 a2,a2,0 # "b" is sometimes negative (?!?!) |
| dsrl32 a2,a2,0 # This clears the upper 32 bits. |
| # a0 = a[0]; |
| lwu a4,0(a0) |
| # w0 = ((mp_word)b * a0); |
| dmultu a2,a4 |
| # if (--a_len) { |
| addiu a1,a1,-1 |
| beq a1,zero,.N.2 |
| # while (a_len >= 2) { |
| sltiu t3,a1,2 |
| bne t3,zero,.N.3 |
| # a1 = a[1]; |
| lwu a5,4(a0) |
| .N.4: |
| # a_len -= 2; |
| addiu a1,a1,-2 |
| # w0 += cy; |
| mflo t0 |
| daddu t0,t0,t2 |
| # cy = CARRYOUT(w0); |
| dsrl32 t2,t0,0 |
| # w1 = (mp_word)b * a1; |
| dmultu a2,a5 |
| # c[0] = ACCUM(w0); |
| sw t0,0(a3) |
| # a0 = a[2]; |
| lwu a4,8(a0) |
| # a += 2; |
| addiu a0,a0,8 |
| # w1 += cy; |
| mflo t1 |
| daddu t1,t1,t2 |
| # cy = CARRYOUT(w1); |
| dsrl32 t2,t1,0 |
| # w0 = (mp_word)b * a0; |
| dmultu a2,a4 |
| # c[1] = ACCUM(w1); |
| sw t1,4(a3) |
| # c += 2; |
| addiu a3,a3,8 |
| sltiu t3,a1,2 |
| beq t3,zero,.N.4 |
| # a1 = a[1]; |
| lwu a5,4(a0) |
| # } |
| .N.3: |
| # w0 += cy; |
| # if (a_len) { |
| mflo t0 |
| beq a1,zero,.N.5 |
| daddu t0,t0,t2 |
| # w1 = (mp_word)b * a1; |
| dmultu a2,a5 # |
| # cy = CARRYOUT(w0); |
| dsrl32 t2,t0,0 |
| # c[0] = ACCUM(w0); |
| sw t0,0(a3) |
| # w1 += cy; |
| mflo t1 |
| daddu t1,t1,t2 |
| # c[1] = ACCUM(w1); |
| sw t1,4(a3) |
| # cy = CARRYOUT(w1); |
| dsrl32 t2,t1,0 |
| # c += 1; |
| b .N.6 |
| addiu a3,a3,4 |
| # } else { |
| .N.5: |
| # c[0] = ACCUM(w0); |
| sw t0,0(a3) |
| # cy = CARRYOUT(w0); |
| b .N.6 |
| dsrl32 t2,t0,0 |
| # } |
| # } else { |
| .N.2: |
| mflo t0 |
| # c[0] = ACCUM(w0); |
| sw t0,0(a3) |
| # cy = CARRYOUT(w0); |
| dsrl32 t2,t0,0 |
| # } |
| .N.6: |
| # c[1] = cy; |
| jr ra |
| sw t2,4(a3) |
| # } |
| .N.1: |
| jr ra |
| nop |
| #} |
| # |
| .end s_mpv_mul_d |
| |
| |
| .ent s_mpv_sqr_add_prop |
| .globl s_mpv_sqr_add_prop |
| #void s_mpv_sqr_add_prop(const mp_digit *a, mp_size a_len, mp_digit *sqrs); |
| # registers |
| # a0 *a |
| # a1 a_len |
| # a2 *sqr |
| # a3 digit from *a, a_i |
| # a4 square of digit from a |
| # a5,a6 next 2 digits in sqr |
| # a7,t0 carry |
| s_mpv_sqr_add_prop: |
| move a7,zero |
| move t0,zero |
| lwu a3,0(a0) |
| addiu a1,a1,-1 # --a_len |
| dmultu a3,a3 |
| beq a1,zero,.P.3 # jump if we've already done the only sqr |
| addiu a0,a0,4 # ++a |
| .P.2: |
| lwu a5,0(a2) |
| lwu a6,4(a2) |
| addiu a2,a2,8 # sqrs += 2; |
| dsll32 a6,a6,0 |
| daddu a5,a5,a6 |
| lwu a3,0(a0) |
| addiu a0,a0,4 # ++a |
| mflo a4 |
| daddu a6,a5,a4 |
| sltu a7,a6,a5 # a7 = a6 < a5 detect overflow |
| dmultu a3,a3 |
| daddu a4,a6,t0 |
| sltu t0,a4,a6 |
| add t0,t0,a7 |
| sw a4,-8(a2) |
| addiu a1,a1,-1 # --a_len |
| dsrl32 a4,a4,0 |
| bne a1,zero,.P.2 # loop if a_len > 0 |
| sw a4,-4(a2) |
| .P.3: |
| lwu a5,0(a2) |
| lwu a6,4(a2) |
| addiu a2,a2,8 # sqrs += 2; |
| dsll32 a6,a6,0 |
| daddu a5,a5,a6 |
| mflo a4 |
| daddu a6,a5,a4 |
| sltu a7,a6,a5 # a7 = a6 < a5 detect overflow |
| daddu a4,a6,t0 |
| sltu t0,a4,a6 |
| add t0,t0,a7 |
| sw a4,-8(a2) |
| beq t0,zero,.P.9 # jump if no carry |
| dsrl32 a4,a4,0 |
| .P.8: |
| sw a4,-4(a2) |
| /* propagate final carry */ |
| lwu a5,0(a2) |
| daddu a6,a5,t0 |
| sltu t0,a6,a5 |
| bne t0,zero,.P.8 # loop if carry persists |
| addiu a2,a2,4 # sqrs++ |
| .P.9: |
| jr ra |
| sw a4,-4(a2) |
| |
| .end s_mpv_sqr_add_prop |