blob: cd7517997fd0673ff6d254cd52f3bb2a9d64dd7e [file] [log] [blame]
@/*
@ ** Copyright 2003-2010, VisualOn, Inc.
@ **
@ ** Licensed under the Apache License, Version 2.0 (the "License");
@ ** you may not use this file except in compliance with the License.
@ ** You may obtain a copy of the License at
@ **
@ ** http://www.apache.org/licenses/LICENSE-2.0
@ **
@ ** Unless required by applicable law or agreed to in writing, software
@ ** distributed under the License is distributed on an "AS IS" BASIS,
@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ ** See the License for the specific language governing permissions and
@ ** limitations under the License.
@ */
@*void Convolve (
@* Word16 x[], /* (i) : input vector */
@* Word16 h[], /* (i) : impulse response */
@* Word16 y[], /* (o) : output vector */
@* Word16 L /* (i) : vector size */
@*)
@ r0 --- x[]
@ r1 --- h[]
@ r2 --- y[]
@ r3 --- L
.section .text
.global Convolve_asm
Convolve_asm:
STMFD r13!, {r4 - r12, r14}
MOV r3, #0 @ n
MOV r11, #0x8000
LOOP:
ADD r4, r1, r3, LSL #1 @ tmpH address
ADD r5, r3, #1 @ i = n + 1
MOV r6, r0 @ tmpX = x
LDRSH r9, [r6], #2 @ *tmpX++
LDRSH r10, [r4], #-2 @ *tmpH--
SUB r5, r5, #1
MUL r8, r9, r10
LOOP1:
CMP r5, #0
BLE L1
LDRSH r9, [r6], #2 @ *tmpX++
LDRSH r10, [r4], #-2 @ *tmpH--
LDRSH r12, [r6], #2 @ *tmpX++
LDRSH r14, [r4], #-2 @ *tmpH--
MLA r8, r9, r10, r8
MLA r8, r12, r14, r8
LDRSH r9, [r6], #2 @ *tmpX++
LDRSH r10, [r4], #-2 @ *tmpH--
LDRSH r12, [r6], #2 @ *tmpX++
LDRSH r14, [r4], #-2 @ *tmpH--
MLA r8, r9, r10, r8
SUBS r5, r5, #4
MLA r8, r12, r14, r8
B LOOP1
L1:
ADD r5, r11, r8, LSL #1
MOV r5, r5, LSR #16 @extract_h(s)
ADD r3, r3, #1
STRH r5, [r2], #2 @y[n]
ADD r4, r1, r3, LSL #1 @tmpH address
ADD r5, r3, #1
MOV r6, r0
LDRSH r9, [r6], #2 @ *tmpX++
LDRSH r10, [r4], #-2
LDRSH r12, [r6], #2
LDRSH r14, [r4], #-2
MUL r8, r9, r10
SUB r5, r5, #2
MLA r8, r12, r14, r8
LOOP2:
CMP r5, #0
BLE L2
LDRSH r9, [r6], #2 @ *tmpX++
LDRSH r10, [r4], #-2 @ *tmpH--
LDRSH r12, [r6], #2 @ *tmpX++
LDRSH r14, [r4], #-2 @ *tmpH--
MLA r8, r9, r10, r8
MLA r8, r12, r14, r8
LDRSH r9, [r6], #2 @ *tmpX++
LDRSH r10, [r4], #-2 @ *tmpH--
LDRSH r12, [r6], #2 @ *tmpX++
LDRSH r14, [r4], #-2 @ *tmpH--
MLA r8, r9, r10, r8
SUBS r5, r5, #4
MLA r8, r12, r14, r8
B LOOP2
L2:
ADD r8, r11, r8, LSL #1
MOV r8, r8, LSR #16 @extract_h(s)
ADD r3, r3, #1
STRH r8, [r2], #2 @y[n]
ADD r4, r1, r3, LSL #1
ADD r5, r3, #1
MOV r6, r0
LDRSH r9, [r6], #2
LDRSH r10, [r4], #-2
LDRSH r12, [r6], #2
LDRSH r14, [r4], #-2
MUL r8, r9, r10
LDRSH r9, [r6], #2
LDRSH r10, [r4], #-2
MLA r8, r12, r14, r8
SUB r5, r5, #3
MLA r8, r9, r10, r8
LOOP3:
CMP r5, #0
BLE L3
LDRSH r9, [r6], #2 @ *tmpX++
LDRSH r10, [r4], #-2 @ *tmpH--
LDRSH r12, [r6], #2 @ *tmpX++
LDRSH r14, [r4], #-2 @ *tmpH--
MLA r8, r9, r10, r8
MLA r8, r12, r14, r8
LDRSH r9, [r6], #2 @ *tmpX++
LDRSH r10, [r4], #-2 @ *tmpH--
LDRSH r12, [r6], #2 @ *tmpX++
LDRSH r14, [r4], #-2 @ *tmpH--
MLA r8, r9, r10, r8
SUBS r5, r5, #4
MLA r8, r12, r14, r8
B LOOP3
L3:
ADD r8, r11, r8, LSL #1
MOV r8, r8, LSR #16 @extract_h(s)
ADD r3, r3, #1
STRH r8, [r2], #2 @y[n]
ADD r5, r3, #1 @ i = n + 1
ADD r4, r1, r3, LSL #1 @ tmpH address
MOV r6, r0
MOV r8, #0
LOOP4:
CMP r5, #0
BLE L4
LDRSH r9, [r6], #2 @ *tmpX++
LDRSH r10, [r4], #-2 @ *tmpH--
LDRSH r12, [r6], #2 @ *tmpX++
LDRSH r14, [r4], #-2 @ *tmpH--
MLA r8, r9, r10, r8
MLA r8, r12, r14, r8
LDRSH r9, [r6], #2 @ *tmpX++
LDRSH r10, [r4], #-2 @ *tmpH--
LDRSH r12, [r6], #2 @ *tmpX++
LDRSH r14, [r4], #-2 @ *tmpH--
MLA r8, r9, r10, r8
SUBS r5, r5, #4
MLA r8, r12, r14, r8
B LOOP4
L4:
ADD r5, r11, r8, LSL #1
MOV r5, r5, LSR #16 @extract_h(s)
ADD r3, r3, #1
STRH r5, [r2], #2 @y[n]
CMP r3, #64
BLT LOOP
Convolve_asm_end:
LDMFD r13!, {r4 - r12, r15}
@ENDFUNC
.end