blob: 61a80ca320b773c601fdf496f5a962288e3d46fe [file] [log] [blame]
/* SPARC _add_n -- Add two limb vectors of the same length > 0 and store
* sum in a third limb vector.
*
* Copyright (C) 1995, 1996, 1998,
* 2001, 2002 Free Software Foundation, Inc.
*
* This file is part of Libgcrypt.
*
* Libgcrypt is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of
* the License, or (at your option) any later version.
*
* Libgcrypt is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
*/
/*******************
* mpi_limb_t
* _gcry_mpih_add_n( mpi_ptr_t res_ptr,
* mpi_ptr_t s1_ptr,
* mpi_ptr_t s2_ptr,
* mpi_size_t size)
*/
! INPUT PARAMETERS
#define res_ptr %o0
#define s1_ptr %o1
#define s2_ptr %o2
#define size %o3
#include "sysdep.h"
.text
.align 4
.global C_SYMBOL_NAME(_gcry_mpih_add_n)
C_SYMBOL_NAME(_gcry_mpih_add_n):
xor s2_ptr,res_ptr,%g1
andcc %g1,4,%g0
bne L1 ! branch if alignment differs
nop
! ** V1a **
L0: andcc res_ptr,4,%g0 ! res_ptr unaligned? Side effect: cy=0
be L_v1 ! if no, branch
nop
/* Add least significant limb separately to align res_ptr and s2_ptr */
ld [s1_ptr],%g4
add s1_ptr,4,s1_ptr
ld [s2_ptr],%g2
add s2_ptr,4,s2_ptr
add size,-1,size
addcc %g4,%g2,%o4
st %o4,[res_ptr]
add res_ptr,4,res_ptr
L_v1: addx %g0,%g0,%o4 ! save cy in register
cmp size,2 ! if size < 2 ...
bl Lend2 ! ... branch to tail code
subcc %g0,%o4,%g0 ! restore cy
ld [s1_ptr+0],%g4
addcc size,-10,size
ld [s1_ptr+4],%g1
ldd [s2_ptr+0],%g2
blt Lfin1
subcc %g0,%o4,%g0 ! restore cy
/* Add blocks of 8 limbs until less than 8 limbs remain */
Loop1: addxcc %g4,%g2,%o4
ld [s1_ptr+8],%g4
addxcc %g1,%g3,%o5
ld [s1_ptr+12],%g1
ldd [s2_ptr+8],%g2
std %o4,[res_ptr+0]
addxcc %g4,%g2,%o4
ld [s1_ptr+16],%g4
addxcc %g1,%g3,%o5
ld [s1_ptr+20],%g1
ldd [s2_ptr+16],%g2
std %o4,[res_ptr+8]
addxcc %g4,%g2,%o4
ld [s1_ptr+24],%g4
addxcc %g1,%g3,%o5
ld [s1_ptr+28],%g1
ldd [s2_ptr+24],%g2
std %o4,[res_ptr+16]
addxcc %g4,%g2,%o4
ld [s1_ptr+32],%g4
addxcc %g1,%g3,%o5
ld [s1_ptr+36],%g1
ldd [s2_ptr+32],%g2
std %o4,[res_ptr+24]
addx %g0,%g0,%o4 ! save cy in register
addcc size,-8,size
add s1_ptr,32,s1_ptr
add s2_ptr,32,s2_ptr
add res_ptr,32,res_ptr
bge Loop1
subcc %g0,%o4,%g0 ! restore cy
Lfin1: addcc size,8-2,size
blt Lend1
subcc %g0,%o4,%g0 ! restore cy
/* Add blocks of 2 limbs until less than 2 limbs remain */
Loope1: addxcc %g4,%g2,%o4
ld [s1_ptr+8],%g4
addxcc %g1,%g3,%o5
ld [s1_ptr+12],%g1
ldd [s2_ptr+8],%g2
std %o4,[res_ptr+0]
addx %g0,%g0,%o4 ! save cy in register
addcc size,-2,size
add s1_ptr,8,s1_ptr
add s2_ptr,8,s2_ptr
add res_ptr,8,res_ptr
bge Loope1
subcc %g0,%o4,%g0 ! restore cy
Lend1: addxcc %g4,%g2,%o4
addxcc %g1,%g3,%o5
std %o4,[res_ptr+0]
addx %g0,%g0,%o4 ! save cy in register
andcc size,1,%g0
be Lret1
subcc %g0,%o4,%g0 ! restore cy
/* Add last limb */
ld [s1_ptr+8],%g4
ld [s2_ptr+8],%g2
addxcc %g4,%g2,%o4
st %o4,[res_ptr+8]
Lret1: retl
addx %g0,%g0,%o0 ! return carry-out from most sign. limb
L1: xor s1_ptr,res_ptr,%g1
andcc %g1,4,%g0
bne L2
nop
! ** V1b **
mov s2_ptr,%g1
mov s1_ptr,s2_ptr
b L0
mov %g1,s1_ptr
! ** V2 **
/* If we come here, the alignment of s1_ptr and res_ptr as well as the
alignment of s2_ptr and res_ptr differ. Since there are only two ways
things can be aligned (that we care about) we now know that the alignment
of s1_ptr and s2_ptr are the same. */
L2: cmp size,1
be Ljone
nop
andcc s1_ptr,4,%g0 ! s1_ptr unaligned? Side effect: cy=0
be L_v2 ! if no, branch
nop
/* Add least significant limb separately to align s1_ptr and s2_ptr */
ld [s1_ptr],%g4
add s1_ptr,4,s1_ptr
ld [s2_ptr],%g2
add s2_ptr,4,s2_ptr
add size,-1,size
addcc %g4,%g2,%o4
st %o4,[res_ptr]
add res_ptr,4,res_ptr
L_v2: addx %g0,%g0,%o4 ! save cy in register
addcc size,-8,size
blt Lfin2
subcc %g0,%o4,%g0 ! restore cy
/* Add blocks of 8 limbs until less than 8 limbs remain */
Loop2: ldd [s1_ptr+0],%g2
ldd [s2_ptr+0],%o4
addxcc %g2,%o4,%g2
st %g2,[res_ptr+0]
addxcc %g3,%o5,%g3
st %g3,[res_ptr+4]
ldd [s1_ptr+8],%g2
ldd [s2_ptr+8],%o4
addxcc %g2,%o4,%g2
st %g2,[res_ptr+8]
addxcc %g3,%o5,%g3
st %g3,[res_ptr+12]
ldd [s1_ptr+16],%g2
ldd [s2_ptr+16],%o4
addxcc %g2,%o4,%g2
st %g2,[res_ptr+16]
addxcc %g3,%o5,%g3
st %g3,[res_ptr+20]
ldd [s1_ptr+24],%g2
ldd [s2_ptr+24],%o4
addxcc %g2,%o4,%g2
st %g2,[res_ptr+24]
addxcc %g3,%o5,%g3
st %g3,[res_ptr+28]
addx %g0,%g0,%o4 ! save cy in register
addcc size,-8,size
add s1_ptr,32,s1_ptr
add s2_ptr,32,s2_ptr
add res_ptr,32,res_ptr
bge Loop2
subcc %g0,%o4,%g0 ! restore cy
Lfin2: addcc size,8-2,size
blt Lend2
subcc %g0,%o4,%g0 ! restore cy
Loope2: ldd [s1_ptr+0],%g2
ldd [s2_ptr+0],%o4
addxcc %g2,%o4,%g2
st %g2,[res_ptr+0]
addxcc %g3,%o5,%g3
st %g3,[res_ptr+4]
addx %g0,%g0,%o4 ! save cy in register
addcc size,-2,size
add s1_ptr,8,s1_ptr
add s2_ptr,8,s2_ptr
add res_ptr,8,res_ptr
bge Loope2
subcc %g0,%o4,%g0 ! restore cy
Lend2: andcc size,1,%g0
be Lret2
subcc %g0,%o4,%g0 ! restore cy
/* Add last limb */
Ljone: ld [s1_ptr],%g4
ld [s2_ptr],%g2
addxcc %g4,%g2,%o4
st %o4,[res_ptr]
Lret2: retl
addx %g0,%g0,%o0 ! return carry-out from most sign. limb