| /* SPARC _add_n -- Add two limb vectors of the same length > 0 and store |
| * sum in a third limb vector. |
| * |
| * Copyright (C) 1995, 1996, 1998, |
| * 2001, 2002 Free Software Foundation, Inc. |
| * |
| * This file is part of Libgcrypt. |
| * |
| * Libgcrypt is free software; you can redistribute it and/or modify |
| * it under the terms of the GNU Lesser General Public License as |
| * published by the Free Software Foundation; either version 2.1 of |
| * the License, or (at your option) any later version. |
| * |
| * Libgcrypt is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| * GNU Lesser General Public License for more details. |
| * |
| * You should have received a copy of the GNU Lesser General Public |
| * License along with this program; if not, write to the Free Software |
| * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA |
| */ |
| |
| |
| |
| /******************* |
| * mpi_limb_t |
| * _gcry_mpih_add_n( mpi_ptr_t res_ptr, |
| * mpi_ptr_t s1_ptr, |
| * mpi_ptr_t s2_ptr, |
| * mpi_size_t size) |
| */ |
| |
| ! INPUT PARAMETERS |
| #define res_ptr %o0 |
| #define s1_ptr %o1 |
| #define s2_ptr %o2 |
| #define size %o3 |
| |
| #include "sysdep.h" |
| |
| .text |
| .align 4 |
| .global C_SYMBOL_NAME(_gcry_mpih_add_n) |
| C_SYMBOL_NAME(_gcry_mpih_add_n): |
| xor s2_ptr,res_ptr,%g1 |
| andcc %g1,4,%g0 |
| bne L1 ! branch if alignment differs |
| nop |
| ! ** V1a ** |
| L0: andcc res_ptr,4,%g0 ! res_ptr unaligned? Side effect: cy=0 |
| be L_v1 ! if no, branch |
| nop |
| /* Add least significant limb separately to align res_ptr and s2_ptr */ |
| ld [s1_ptr],%g4 |
| add s1_ptr,4,s1_ptr |
| ld [s2_ptr],%g2 |
| add s2_ptr,4,s2_ptr |
| add size,-1,size |
| addcc %g4,%g2,%o4 |
| st %o4,[res_ptr] |
| add res_ptr,4,res_ptr |
| L_v1: addx %g0,%g0,%o4 ! save cy in register |
| cmp size,2 ! if size < 2 ... |
| bl Lend2 ! ... branch to tail code |
| subcc %g0,%o4,%g0 ! restore cy |
| |
| ld [s1_ptr+0],%g4 |
| addcc size,-10,size |
| ld [s1_ptr+4],%g1 |
| ldd [s2_ptr+0],%g2 |
| blt Lfin1 |
| subcc %g0,%o4,%g0 ! restore cy |
| /* Add blocks of 8 limbs until less than 8 limbs remain */ |
| Loop1: addxcc %g4,%g2,%o4 |
| ld [s1_ptr+8],%g4 |
| addxcc %g1,%g3,%o5 |
| ld [s1_ptr+12],%g1 |
| ldd [s2_ptr+8],%g2 |
| std %o4,[res_ptr+0] |
| addxcc %g4,%g2,%o4 |
| ld [s1_ptr+16],%g4 |
| addxcc %g1,%g3,%o5 |
| ld [s1_ptr+20],%g1 |
| ldd [s2_ptr+16],%g2 |
| std %o4,[res_ptr+8] |
| addxcc %g4,%g2,%o4 |
| ld [s1_ptr+24],%g4 |
| addxcc %g1,%g3,%o5 |
| ld [s1_ptr+28],%g1 |
| ldd [s2_ptr+24],%g2 |
| std %o4,[res_ptr+16] |
| addxcc %g4,%g2,%o4 |
| ld [s1_ptr+32],%g4 |
| addxcc %g1,%g3,%o5 |
| ld [s1_ptr+36],%g1 |
| ldd [s2_ptr+32],%g2 |
| std %o4,[res_ptr+24] |
| addx %g0,%g0,%o4 ! save cy in register |
| addcc size,-8,size |
| add s1_ptr,32,s1_ptr |
| add s2_ptr,32,s2_ptr |
| add res_ptr,32,res_ptr |
| bge Loop1 |
| subcc %g0,%o4,%g0 ! restore cy |
| |
| Lfin1: addcc size,8-2,size |
| blt Lend1 |
| subcc %g0,%o4,%g0 ! restore cy |
| /* Add blocks of 2 limbs until less than 2 limbs remain */ |
| Loope1: addxcc %g4,%g2,%o4 |
| ld [s1_ptr+8],%g4 |
| addxcc %g1,%g3,%o5 |
| ld [s1_ptr+12],%g1 |
| ldd [s2_ptr+8],%g2 |
| std %o4,[res_ptr+0] |
| addx %g0,%g0,%o4 ! save cy in register |
| addcc size,-2,size |
| add s1_ptr,8,s1_ptr |
| add s2_ptr,8,s2_ptr |
| add res_ptr,8,res_ptr |
| bge Loope1 |
| subcc %g0,%o4,%g0 ! restore cy |
| Lend1: addxcc %g4,%g2,%o4 |
| addxcc %g1,%g3,%o5 |
| std %o4,[res_ptr+0] |
| addx %g0,%g0,%o4 ! save cy in register |
| |
| andcc size,1,%g0 |
| be Lret1 |
| subcc %g0,%o4,%g0 ! restore cy |
| /* Add last limb */ |
| ld [s1_ptr+8],%g4 |
| ld [s2_ptr+8],%g2 |
| addxcc %g4,%g2,%o4 |
| st %o4,[res_ptr+8] |
| |
| Lret1: retl |
| addx %g0,%g0,%o0 ! return carry-out from most sign. limb |
| |
| L1: xor s1_ptr,res_ptr,%g1 |
| andcc %g1,4,%g0 |
| bne L2 |
| nop |
| ! ** V1b ** |
| mov s2_ptr,%g1 |
| mov s1_ptr,s2_ptr |
| b L0 |
| mov %g1,s1_ptr |
| |
| ! ** V2 ** |
| /* If we come here, the alignment of s1_ptr and res_ptr as well as the |
| alignment of s2_ptr and res_ptr differ. Since there are only two ways |
| things can be aligned (that we care about) we now know that the alignment |
| of s1_ptr and s2_ptr are the same. */ |
| |
| L2: cmp size,1 |
| be Ljone |
| nop |
| andcc s1_ptr,4,%g0 ! s1_ptr unaligned? Side effect: cy=0 |
| be L_v2 ! if no, branch |
| nop |
| /* Add least significant limb separately to align s1_ptr and s2_ptr */ |
| ld [s1_ptr],%g4 |
| add s1_ptr,4,s1_ptr |
| ld [s2_ptr],%g2 |
| add s2_ptr,4,s2_ptr |
| add size,-1,size |
| addcc %g4,%g2,%o4 |
| st %o4,[res_ptr] |
| add res_ptr,4,res_ptr |
| |
| L_v2: addx %g0,%g0,%o4 ! save cy in register |
| addcc size,-8,size |
| blt Lfin2 |
| subcc %g0,%o4,%g0 ! restore cy |
| /* Add blocks of 8 limbs until less than 8 limbs remain */ |
| Loop2: ldd [s1_ptr+0],%g2 |
| ldd [s2_ptr+0],%o4 |
| addxcc %g2,%o4,%g2 |
| st %g2,[res_ptr+0] |
| addxcc %g3,%o5,%g3 |
| st %g3,[res_ptr+4] |
| ldd [s1_ptr+8],%g2 |
| ldd [s2_ptr+8],%o4 |
| addxcc %g2,%o4,%g2 |
| st %g2,[res_ptr+8] |
| addxcc %g3,%o5,%g3 |
| st %g3,[res_ptr+12] |
| ldd [s1_ptr+16],%g2 |
| ldd [s2_ptr+16],%o4 |
| addxcc %g2,%o4,%g2 |
| st %g2,[res_ptr+16] |
| addxcc %g3,%o5,%g3 |
| st %g3,[res_ptr+20] |
| ldd [s1_ptr+24],%g2 |
| ldd [s2_ptr+24],%o4 |
| addxcc %g2,%o4,%g2 |
| st %g2,[res_ptr+24] |
| addxcc %g3,%o5,%g3 |
| st %g3,[res_ptr+28] |
| addx %g0,%g0,%o4 ! save cy in register |
| addcc size,-8,size |
| add s1_ptr,32,s1_ptr |
| add s2_ptr,32,s2_ptr |
| add res_ptr,32,res_ptr |
| bge Loop2 |
| subcc %g0,%o4,%g0 ! restore cy |
| |
| Lfin2: addcc size,8-2,size |
| blt Lend2 |
| subcc %g0,%o4,%g0 ! restore cy |
| Loope2: ldd [s1_ptr+0],%g2 |
| ldd [s2_ptr+0],%o4 |
| addxcc %g2,%o4,%g2 |
| st %g2,[res_ptr+0] |
| addxcc %g3,%o5,%g3 |
| st %g3,[res_ptr+4] |
| addx %g0,%g0,%o4 ! save cy in register |
| addcc size,-2,size |
| add s1_ptr,8,s1_ptr |
| add s2_ptr,8,s2_ptr |
| add res_ptr,8,res_ptr |
| bge Loope2 |
| subcc %g0,%o4,%g0 ! restore cy |
| Lend2: andcc size,1,%g0 |
| be Lret2 |
| subcc %g0,%o4,%g0 ! restore cy |
| /* Add last limb */ |
| Ljone: ld [s1_ptr],%g4 |
| ld [s2_ptr],%g2 |
| addxcc %g4,%g2,%o4 |
| st %o4,[res_ptr] |
| |
| Lret2: retl |
| addx %g0,%g0,%o0 ! return carry-out from most sign. limb |
| |
| |
| |