| dnl ARM64 mpn_rshift. |
| |
| dnl Copyright 2013, 2014 Free Software Foundation, Inc. |
| |
| dnl This file is part of the GNU MP Library. |
| |
| dnl The GNU MP Library is free software; you can redistribute it and/or modify |
| dnl it under the terms of the GNU Lesser General Public License as published |
| dnl by the Free Software Foundation; either version 3 of the License, or (at |
| dnl your option) any later version. |
| |
| dnl The GNU MP Library is distributed in the hope that it will be useful, but |
| dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
| dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public |
| dnl License for more details. |
| |
| dnl You should have received a copy of the GNU Lesser General Public License |
| dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. |
| |
| include(`../config.m4') |
| |
| C cycles/limb |
| C Cortex-A53 ? |
| C Cortex-A57 ? |
| |
| changecom(@&*$) |
| |
| define(`rp_arg', `x0') |
| define(`up', `x1') |
| define(`n', `x2') |
| define(`cnt', `x3') |
| |
| define(`rp', `x16') |
| |
| define(`tnc',`x8') |
| |
| ASM_START() |
| PROLOGUE(mpn_rshift) |
| mov rp, rp_arg |
| sub tnc, xzr, cnt |
| tbz n, #0, L(bx0) |
| |
| L(bx1): ldr x4, [up,#0] |
| tbnz n, #1, L(b11) |
| |
| L(b01): lsl x0, x4, tnc |
| lsr x18, x4, cnt |
| sub n, n, #1 |
| cbnz n, L(gt1) |
| str x18, [rp,#0] |
| ret |
| L(gt1): ldp x5, x4, [up,#8] |
| sub up, up, #8 |
| sub rp, rp, #32 |
| b L(lo2) |
| |
| L(b11): lsl x0, x4, tnc |
| lsr x9, x4, cnt |
| ldp x7, x6, [up,#8] |
| add n, n, #1 |
| sub up, up, #24 |
| sub rp, rp, #48 |
| b L(lo0) |
| |
| L(bx0): ldp x5, x4, [up,#0] |
| tbz n, #1, L(b00) |
| |
| L(b10): lsl x0, x5, tnc |
| lsr x13, x5, cnt |
| lsl x10, x4, tnc |
| lsr x18, x4, cnt |
| sub n, n, #2 |
| cbnz n, L(gt2) |
| orr x10, x10, x13 |
| stp x10, x18, [rp,#0] |
| ret |
| L(gt2): ldp x5, x4, [up,#16] |
| orr x10, x10, x13 |
| str x10, [rp,#0] |
| sub rp, rp, #24 |
| b L(lo2) |
| |
| L(b00): lsl x0, x5, tnc |
| lsr x13, x5, cnt |
| lsl x10, x4, tnc |
| lsr x9, x4, cnt |
| ldp x7, x6, [up,#16] |
| orr x10, x10, x13 |
| str x10, [rp,#0] |
| sub up, up, #16 |
| sub rp, rp, #40 |
| b L(lo0) |
| |
| ALIGN(16) |
| L(top): ldp x5, x4, [up,#48] |
| add rp, rp, #32 C integrate with stp? |
| add up, up, #32 C integrate with ldp? |
| orr x11, x11, x9 |
| orr x10, x10, x13 |
| stp x11, x10, [rp,#16] |
| L(lo2): lsl x11, x5, tnc |
| lsr x13, x5, cnt |
| lsl x10, x4, tnc |
| lsr x9, x4, cnt |
| ldp x7, x6, [up,#32] |
| orr x11, x11, x18 |
| orr x10, x10, x13 |
| stp x11, x10, [rp,#32] |
| L(lo0): sub n, n, #4 |
| lsl x11, x7, tnc |
| lsr x13, x7, cnt |
| lsl x10, x6, tnc |
| lsr x18, x6, cnt |
| cbnz n, L(top) |
| |
| L(end): orr x11, x11, x9 |
| orr x10, x10, x13 |
| stp x11, x10, [rp,#48] |
| str x18, [rp,#64] |
| ret |
| EPILOGUE() |