| @/* |
| @ ** Copyright 2003-2010, VisualOn, Inc. |
| @ ** |
| @ ** Licensed under the Apache License, Version 2.0 (the "License"); |
| @ ** you may not use this file except in compliance with the License. |
| @ ** You may obtain a copy of the License at |
| @ ** |
| @ ** http://www.apache.org/licenses/LICENSE-2.0 |
| @ ** |
| @ ** Unless required by applicable law or agreed to in writing, software |
| @ ** distributed under the License is distributed on an "AS IS" BASIS, |
| @ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| @ ** See the License for the specific language governing permissions and |
| @ ** limitations under the License. |
| @ */ |
| @ |
| @**********************************************************************/ |
| @Word32 Dot_product12( /* (o) Q31: normalized result (1 < val <= -1) */ |
| @ Word16 x[], /* (i) 12bits: x vector */ |
| @ Word16 y[], /* (i) 12bits: y vector */ |
| @ Word16 lg, /* (i) : vector length */ |
| @ Word16 * exp /* (o) : exponent of result (0..+30) */ |
| @) |
| @************************************************************************ |
| @ x[] --- r0 |
| @ y[] --- r1 |
| @ lg --- r2 |
| @ *exp --- r3 |
| |
| .section .text |
| .global Dot_product12_asm |
| |
| Dot_product12_asm: |
| |
| STMFD r13!, {r4 - r12, r14} |
| CMP r0, r1 |
| BEQ LOOP_EQ |
| |
| VLD1.S16 {Q0, Q1}, [r0]! @load 16 Word16 x[] |
| VLD1.S16 {Q2, Q3}, [r0]! @load 16 Word16 x[] |
| VLD1.S16 {Q4, Q5}, [r0]! @load 16 Word16 x[] |
| VLD1.S16 {Q6, Q7}, [r0]! @load 16 Word16 x[] |
| VLD1.S16 {Q8, Q9}, [r1]! @load 16 Word16 y[] |
| VLD1.S16 {Q10, Q11}, [r1]! @load 16 Word16 y[] |
| VLD1.S16 {Q12, Q13}, [r1]! @load 16 Word16 y[] |
| |
| VMULL.S16 Q15, D16, D0 |
| VMLAL.S16 Q15, D17, D1 |
| VMLAL.S16 Q15, D18, D2 |
| VMLAL.S16 Q15, D19, D3 |
| VLD1.S16 {Q0, Q1}, [r1]! @load 16 Word16 y[] |
| VMLAL.S16 Q15, D20, D4 |
| VMLAL.S16 Q15, D21, D5 |
| VMLAL.S16 Q15, D22, D6 |
| VMLAL.S16 Q15, D23, D7 |
| VMLAL.S16 Q15, D24, D8 |
| VMLAL.S16 Q15, D25, D9 |
| VMLAL.S16 Q15, D26, D10 |
| VMLAL.S16 Q15, D27, D11 |
| VMLAL.S16 Q15, D0, D12 |
| VMLAL.S16 Q15, D1, D13 |
| VMLAL.S16 Q15, D2, D14 |
| VMLAL.S16 Q15, D3, D15 |
| |
| CMP r2, #64 |
| BEQ Lable1 |
| VLD1.S16 {Q0, Q1}, [r0]! @load 16 Word16 x[] |
| VLD1.S16 {Q2, Q3}, [r1]! |
| VMLAL.S16 Q15, D4, D0 |
| VMLAL.S16 Q15, D5, D1 |
| VMLAL.S16 Q15, D6, D2 |
| VMLAL.S16 Q15, D7, D3 |
| BL Lable1 |
| |
| LOOP_EQ: |
| VLD1.S16 {Q0, Q1}, [r0]! |
| VLD1.S16 {Q2, Q3}, [r0]! |
| VLD1.S16 {Q4, Q5}, [r0]! |
| VLD1.S16 {Q6, Q7}, [r0]! |
| VMULL.S16 Q15, D0, D0 |
| VMLAL.S16 Q15, D1, D1 |
| VMLAL.S16 Q15, D2, D2 |
| VMLAL.S16 Q15, D3, D3 |
| VMLAL.S16 Q15, D4, D4 |
| VMLAL.S16 Q15, D5, D5 |
| VMLAL.S16 Q15, D6, D6 |
| VMLAL.S16 Q15, D7, D7 |
| VMLAL.S16 Q15, D8, D8 |
| VMLAL.S16 Q15, D9, D9 |
| VMLAL.S16 Q15, D10, D10 |
| VMLAL.S16 Q15, D11, D11 |
| VMLAL.S16 Q15, D12, D12 |
| VMLAL.S16 Q15, D13, D13 |
| VMLAL.S16 Q15, D14, D14 |
| VMLAL.S16 Q15, D15, D15 |
| |
| CMP r2, #64 |
| BEQ Lable1 |
| VLD1.S16 {Q0, Q1}, [r0]! |
| VMLAL.S16 Q15, D0, D0 |
| VMLAL.S16 Q15, D1, D1 |
| VMLAL.S16 Q15, D2, D2 |
| VMLAL.S16 Q15, D3, D3 |
| |
| Lable1: |
| |
| VQADD.S32 D30, D30, D31 |
| VPADD.S32 D30, D30, D30 |
| VMOV.S32 r12, D30[0] |
| |
| ADD r12, r12, r12 |
| ADD r12, r12, #1 @ L_sum = (L_sum << 1) + 1 |
| MOV r4, r12 |
| CMP r12, #0 |
| RSBLT r4, r12, #0 |
| CLZ r10, r4 |
| SUB r10, r10, #1 @ sft = norm_l(L_sum) |
| MOV r0, r12, LSL r10 @ L_sum = L_sum << sft |
| RSB r11, r10, #30 @ *exp = 30 - sft |
| STRH r11, [r3] |
| |
| Dot_product12_end: |
| |
| LDMFD r13!, {r4 - r12, r15} |
| |
| .end |
| |