blob: 9fb3a6e0d0f4f8d2fa9c93c630c727c38edd89fc [file] [log] [blame]
@/*
@ ** Copyright 2003-2010, VisualOn, Inc.
@ **
@ ** Licensed under the Apache License, Version 2.0 (the "License");
@ ** you may not use this file except in compliance with the License.
@ ** You may obtain a copy of the License at
@ **
@ ** http://www.apache.org/licenses/LICENSE-2.0
@ **
@ ** Unless required by applicable law or agreed to in writing, software
@ ** distributed under the License is distributed on an "AS IS" BASIS,
@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ ** See the License for the specific language governing permissions and
@ ** limitations under the License.
@ */
@
@*void Convolve (
@* Word16 x[], /* (i) : input vector */
@* Word16 h[], /* (i) : impulse response */
@* Word16 y[], /* (o) : output vector */
@* Word16 L /* (i) : vector size */
@*)
@
@ r0 --- x[]
@ r1 --- h[]
@ r2 --- y[]
@ r3 --- L
.section .text
.global Convolve_asm
Convolve_asm:
STMFD r13!, {r4 - r12, r14}
MOV r3, #0
MOV r11, #0x8000
LOOP:
@MOV r8, #0 @ s = 0
ADD r4, r1, r3, LSL #1 @ tmpH address
ADD r5, r3, #1 @ i = n + 1
MOV r6, r0
LDRSH r9, [r6], #2 @ *tmpX++
LDRSH r10, [r4] @ *tmpH--
SUB r5, r5, #1
VMOV.S32 Q10, #0
MUL r8, r9, r10
LOOP1:
CMP r5, #0
BLE L1
SUB r4, r4, #8
MOV r9, r4
VLD1.S16 D0, [r6]!
VLD1.S16 D1, [r9]!
VREV64.16 D1, D1
SUBS r5, r5, #4
VMLAL.S16 Q10, D0, D1
B LOOP1
L1:
VADD.S32 D20, D20, D21
VPADD.S32 D20, D20, D20
VMOV.S32 r5, D20[0]
ADD r5, r5, r8
ADD r5, r11, r5, LSL #1
MOV r5, r5, LSR #16 @extract_h(s)
ADD r3, r3, #1
STRH r5, [r2], #2 @y[n]
@MOV r8, #0
ADD r4, r1, r3, LSL #1 @tmpH address
ADD r5, r3, #1
MOV r6, r0
LDRSH r9, [r6], #2 @ *tmpX++
LDRSH r10, [r4], #-2
LDRSH r12, [r6], #2
LDRSH r14, [r4]
MUL r8, r9, r10
SUB r5, r5, #2
MLA r8, r12, r14, r8
VMOV.S32 Q10, #0
LOOP2:
CMP r5, #0
BLE L2
SUB r4, r4, #8
MOV r9, r4
VLD1.S16 D0, [r6]!
VLD1.S16 D1, [r9]!
SUBS r5, r5, #4
VREV64.16 D1, D1
VMLAL.S16 Q10, D0, D1
B LOOP2
L2:
VADD.S32 D20, D20, D21
VPADD.S32 D20, D20, D20
VMOV.S32 r5, D20[0]
ADD r8, r8, r5
ADD r8, r11, r8, LSL #1
MOV r8, r8, LSR #16 @extract_h(s)
ADD r3, r3, #1
STRH r8, [r2], #2 @y[n]
@MOV r8, #0
ADD r4, r1, r3, LSL #1
ADD r5, r3, #1
MOV r6, r0
LDRSH r9, [r6], #2
LDRSH r10, [r4], #-2
LDRSH r12, [r6], #2
LDRSH r14, [r4], #-2
MUL r8, r9, r10
LDRSH r9, [r6], #2
LDRSH r10, [r4]
MLA r8, r12, r14, r8
SUB r5, r5, #3
MLA r8, r9, r10, r8
VMOV.S32 Q10, #0
LOOP3:
CMP r5, #0
BLE L3
SUB r4, r4, #8
MOV r9, r4
VLD1.S16 D0, [r6]!
VLD1.S16 D1, [r9]!
VREV64.16 D1, D1
SUBS r5, r5, #4
VMLAL.S16 Q10, D0, D1
B LOOP3
L3:
VADD.S32 D20, D20, D21
VPADD.S32 D20, D20, D20
VMOV.S32 r5, D20[0]
ADD r8, r8, r5
ADD r8, r11, r8, LSL #1
MOV r8, r8, LSR #16 @extract_h(s)
ADD r3, r3, #1
STRH r8, [r2], #2 @y[n]
ADD r5, r3, #1 @ i = n + 1
ADD r4, r1, r5, LSL #1 @ tmpH address
MOV r6, r0
VMOV.S32 Q10, #0
LOOP4:
CMP r5, #0
BLE L4
SUB r4, r4, #8
MOV r9, r4
VLD1.S16 D0, [r6]!
VLD1.S16 D1, [r9]!
VREV64.16 D1, D1
SUBS r5, r5, #4
VMLAL.S16 Q10, D0, D1
B LOOP4
L4:
VADD.S32 D20, D20, D21
VPADD.S32 D20, D20, D20
VMOV.S32 r5, D20[0]
ADD r5, r11, r5, LSL #1
MOV r5, r5, LSR #16 @extract_h(s)
ADD r3, r3, #1
STRH r5, [r2], #2 @y[n]
CMP r3, #64
BLT LOOP
Convolve_asm_end:
LDMFD r13!, {r4 - r12, r15}
@ENDFUNC
.end