av/media/libstagefright/codecs/amrwbenc/src/asm/ARMV5E/Norm_Corr_opt.s - nest-cam/4320010/av - Git at Google

 @/*
 @ ** Copyright 2003-2010, VisualOn, Inc.
 @ **
 @ ** Licensed under the Apache License, Version 2.0 (the "License");
 @ ** you may not use this file except in compliance with the License.
 @ ** You may obtain a copy of the License at
 @ **
 @ **     http://www.apache.org/licenses/LICENSE-2.0
 @ **
 @ ** Unless required by applicable law or agreed to in writing, software
 @ ** distributed under the License is distributed on an "AS IS" BASIS,
 @ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 @ ** See the License for the specific language governing permissions and
 @ ** limitations under the License.
 @ */

 @static void Norm_Corr (Word16 exc[],                    /* (i)     : excitation buffer          */
 @                       Word16 xn[],                     /* (i)     : target vector              */
 @                       Word16 h[],                      /* (i) Q15 : impulse response of synth/wgt filters */
 @                       Word16 L_subfr,                  /* (i)     : sub-frame length */
 @                       Word16 t_min,                    /* (i)     : minimum value of pitch lag.   */
 @                       Word16 t_max,                    /* (i)     : maximum value of pitch lag.   */
 @                       Word16 corr_norm[])              /* (o) Q15 : normalized correlation    */
 @

 @ r0 --- exc[]
 @ r1 --- xn[]
 @ r2 --- h[]
 @ r3 --- L_subfr
 @ r4 --- t_min
 @ r5 --- t_max
 @ r6 --- corr_norm[]


 	.section  .text
         .global   Norm_corr_asm
         .extern   Convolve_asm
         .extern   Isqrt_n
 @******************************
 @ constant
 @******************************
 .equ         EXC           ,   0
 .equ         XN            ,   4
 .equ         H             ,   8
 .equ         L_SUBFR       ,   12
 .equ         voSTACK       ,   172
 .equ         T_MIN         ,   212
 .equ         T_MAX         ,   216
 .equ         CORR_NORM     ,   220

 Norm_corr_asm:

         STMFD      r13!, {r4 - r12, r14}
         SUB        r13, r13, #voSTACK

         ADD        r8, r13, #20                 @get the excf[L_SUBFR]
         LDR        r4, [r13, #T_MIN]            @get t_min
         RSB        r11, r4, #0                  @k = -t_min
         ADD        r5, r0, r11, LSL #1          @get the &exc[k]

         @transfer Convolve function
         STMFD       sp!, {r0 - r3}
         MOV         r0, r5
         MOV         r1, r2
         MOV         r2, r8                       @r2 --- excf[]
         BL          Convolve_asm
         LDMFD       sp!, {r0 - r3}

         @ r8 --- excf[]

 	MOV         r14, r1                       @copy xn[] address
         MOV         r5, #64
         MOV         r6, #0                       @L_tmp = 0
         MOV         r7, #1

 LOOP1:
         LDR         r9,  [r14], #4
 	LDR         r10, [r14], #4
 	LDR         r11, [r14], #4
 	LDR         r12, [r14], #4
 	SMLABB      r6, r9, r9, r6               @L_tmp += (xn[i] * xn[i])
 	SMLATT      r6, r9, r9, r6               @L_tmp += (xn[i+1] * xn[i+1])
         SMLABB      r6, r10, r10, r6
 	SMLATT      r6, r10, r10, r6
 	SMLABB      r6, r11, r11, r6
         SMLATT      r6, r11, r11, r6
         SMLABB      r6, r12, r12, r6
         SMLATT      r6, r12, r12, r6
         SUBS        r5, r5, #8
         BNE         LOOP1

 	ADD         r9, r7, r6, LSL #1           @L_tmp = (L_tmp << 1) + 1
 	CLZ         r7, r9
 	SUB         r6, r7, #1                   @exp = norm_l(L_tmp)
         RSB         r7, r6, #32                  @exp = 32 - exp
 	MOV         r6, r7, ASR #1
 	RSB         r7, r6, #0                   @scale = -(exp >> 1)

         @loop for every possible period
 	@for(t = t_min@ t <= t_max@ t++)
 	@r7 --- scale r4 --- t_min r8 --- excf[]

 LOOPFOR:
         MOV         r5, #0                       @L_tmp  = 0
 	MOV         r6, #0                       @L_tmp1 = 0
 	MOV         r9, #64
 	MOV         r12, r1                      @copy of xn[]
 	ADD         r14, r13, #20                @copy of excf[]
 	MOV         r8, #0x8000

 LOOPi:
 	LDR         r11, [r14], #4               @load excf[i], excf[i+1]
         LDR         r10, [r12], #4               @load xn[i], xn[i+1]
 	SMLABB      r6, r11, r11, r6             @L_tmp1 += excf[i] * excf[i]
 	SMLATT      r6, r11, r11, r6             @L_tmp1 += excf[i+1] * excf[i+1]
         SMLABB      r5, r10, r11, r5             @L_tmp += xn[i] * excf[i]
 	SMLATT      r5, r10, r11, r5             @L_tmp += xn[i+1] * excf[i+1]
 	LDR         r11, [r14], #4               @load excf[i+2], excf[i+3]
 	LDR         r10, [r12], #4               @load xn[i+2], xn[i+3]
         SMLABB      r6, r11, r11, r6
 	SMLATT      r6, r11, r11, r6
 	SMLABB      r5, r10, r11, r5
 	SMLATT      r5, r10, r11, r5
 	SUBS        r9, r9, #4
         BNE         LOOPi

 	@r5 --- L_tmp, r6 --- L_tmp1
 	MOV         r10, #1
 	ADD         r5, r10, r5, LSL #1          @L_tmp = (L_tmp << 1) + 1
 	ADD         r6, r10, r6, LSL #1          @L_tmp1 = (L_tmp1 << 1) + 1

 	CLZ         r10, r5
 	CMP         r5, #0
 	RSBLT       r11, r5, #0
 	CLZLT       r10, r11
 	SUB         r10, r10, #1                 @exp = norm_l(L_tmp)

 	MOV         r5, r5, LSL r10              @L_tmp = (L_tmp << exp)
 	RSB         r10, r10, #30                @exp_corr = 30 - exp
 	MOV         r11, r5, ASR #16             @corr = extract_h(L_tmp)

 	CLZ         r5, r6
 	SUB         r5, r5, #1
 	MOV         r6, r6, LSL r5               @L_tmp = (L_tmp1 << exp)
 	RSB         r5, r5, #30                  @exp_norm = 30 - exp

 	@r10 --- exp_corr, r11 --- corr
 	@r6  --- L_tmp, r5 --- exp_norm

 	@Isqrt_n(&L_tmp, &exp_norm)

 	MOV         r14, r0
 	MOV         r12, r1

         STMFD       sp!, {r0 - r4, r7 - r12, r14}
 	ADD         r1, sp, #4
 	ADD         r0, sp, #0
 	STR         r6, [sp]
 	STRH        r5, [sp, #4]
 	BL          Isqrt_n
 	LDR         r6, [sp]
 	LDRSH       r5, [sp, #4]
         LDMFD       sp!, {r0 - r4, r7 - r12, r14}
 	MOV         r0, r14
 	MOV         r1, r12


 	MOV         r6, r6, ASR #16              @norm = extract_h(L_tmp)
 	MUL         r12, r6, r11
 	ADD         r12, r12, r12                @L_tmp = vo_L_mult(corr, norm)

 	ADD         r6, r10, r5
 	ADD         r6, r6, r7                   @exp_corr + exp_norm + scale

         CMP         r6, #0
         RSBLT       r6, r6, #0
 	MOVLT       r12, r12, ASR r6
         MOVGT       r12, r12, LSL r6             @L_tmp = L_shl(L_tmp, exp_corr + exp_norm + scale)

         ADD         r12, r12, r8
         MOV         r12, r12, ASR #16            @vo_round(L_tmp)

         LDR         r5, [r13, #CORR_NORM]        @ get corr_norm address
 	LDR         r6, [r13, #T_MAX]            @ get t_max
 	ADD         r10, r5, r4, LSL #1          @ get corr_norm[t] address
 	STRH        r12, [r10]                   @ corr_norm[t] = vo_round(L_tmp)

 	CMP         r4, r6
 	BEQ         Norm_corr_asm_end

 	ADD         r4, r4, #1                   @ t_min ++

 	RSB         r5, r4, #0                   @ k

 	MOV         r6, #63                      @ i = 63
 	MOV         r8, r0                       @ exc[]
 	MOV         r9, r2                       @ h[]
 	ADD         r10, r13, #20                @ excf[]

 	ADD         r8, r8, r5, LSL #1           @ exc[k] address
 	ADD         r9, r9, r6, LSL #1           @ h[i] address
 	ADD         r10, r10, r6, LSL #1         @ excf[i] address
 	LDRSH       r11, [r8]                    @ tmp = exc[k]

 LOOPK:
         LDRSH       r8, [r9], #-2                @ load h[i]
 	LDRSH       r12, [r10, #-2]              @ load excf[i - 1]
 	MUL         r14, r11, r8
 	MOV         r8, r14, ASR #15
 	ADD         r14, r8, r12
 	STRH        r14, [r10], #-2
 	SUBS        r6, r6, #1
 	BGT         LOOPK

 	LDRSH       r8, [r9]                     @ load h[0]
 	MUL         r14, r11, r8
         LDR         r6, [r13, #T_MAX]            @ get t_max
 	MOV         r8, r14, ASR #15
 	STRH        r8, [r10]

 	CMP         r4, r6
 	BLE         LOOPFOR

 Norm_corr_asm_end:

         ADD            r13, r13, #voSTACK
         LDMFD          r13!, {r4 - r12, r15}

         .end
	@/*
	@ ** Copyright 2003-2010, VisualOn, Inc.
	@ **
	@ ** Licensed under the Apache License, Version 2.0 (the "License");
	@ ** you may not use this file except in compliance with the License.
	@ ** You may obtain a copy of the License at
	@ **
	@ ** http://www.apache.org/licenses/LICENSE-2.0
	@ **
	@ ** Unless required by applicable law or agreed to in writing, software
	@ ** distributed under the License is distributed on an "AS IS" BASIS,
	@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	@ ** See the License for the specific language governing permissions and
	@ ** limitations under the License.
	@ */

	@static void Norm_Corr (Word16 exc[], /* (i) : excitation buffer */
	@ Word16 xn[], /* (i) : target vector */
	@ Word16 h[], /* (i) Q15 : impulse response of synth/wgt filters */
	@ Word16 L_subfr, /* (i) : sub-frame length */
	@ Word16 t_min, /* (i) : minimum value of pitch lag. */
	@ Word16 t_max, /* (i) : maximum value of pitch lag. */
	@ Word16 corr_norm[]) /* (o) Q15 : normalized correlation */
	@

	@ r0 --- exc[]
	@ r1 --- xn[]
	@ r2 --- h[]
	@ r3 --- L_subfr
	@ r4 --- t_min
	@ r5 --- t_max
	@ r6 --- corr_norm[]


	.section .text
	.global Norm_corr_asm
	.extern Convolve_asm
	.extern Isqrt_n
	@******************************
	@ constant
	@******************************
	.equ EXC , 0
	.equ XN , 4
	.equ H , 8
	.equ L_SUBFR , 12
	.equ voSTACK , 172
	.equ T_MIN , 212
	.equ T_MAX , 216
	.equ CORR_NORM , 220

	Norm_corr_asm:

	STMFD r13!, {r4 - r12, r14}
	SUB r13, r13, #voSTACK

	ADD r8, r13, #20 @get the excf[L_SUBFR]
	LDR r4, [r13, #T_MIN] @get t_min
	RSB r11, r4, #0 @k = -t_min
	ADD r5, r0, r11, LSL #1 @get the &exc[k]

	@transfer Convolve function
	STMFD sp!, {r0 - r3}
	MOV r0, r5
	MOV r1, r2
	MOV r2, r8 @r2 --- excf[]
	BL Convolve_asm
	LDMFD sp!, {r0 - r3}

	@ r8 --- excf[]

	MOV r14, r1 @copy xn[] address
	MOV r5, #64
	MOV r6, #0 @L_tmp = 0
	MOV r7, #1

	LOOP1:
	LDR r9, [r14], #4
	LDR r10, [r14], #4
	LDR r11, [r14], #4
	LDR r12, [r14], #4
	SMLABB r6, r9, r9, r6 @L_tmp += (xn[i] * xn[i])
	SMLATT r6, r9, r9, r6 @L_tmp += (xn[i+1] * xn[i+1])
	SMLABB r6, r10, r10, r6
	SMLATT r6, r10, r10, r6
	SMLABB r6, r11, r11, r6
	SMLATT r6, r11, r11, r6
	SMLABB r6, r12, r12, r6
	SMLATT r6, r12, r12, r6
	SUBS r5, r5, #8
	BNE LOOP1

	ADD r9, r7, r6, LSL #1 @L_tmp = (L_tmp << 1) + 1
	CLZ r7, r9
	SUB r6, r7, #1 @exp = norm_l(L_tmp)
	RSB r7, r6, #32 @exp = 32 - exp
	MOV r6, r7, ASR #1
	RSB r7, r6, #0 @scale = -(exp >> 1)

	@loop for every possible period
	@for(t = t_min@ t <= t_max@ t++)
	@r7 --- scale r4 --- t_min r8 --- excf[]

	LOOPFOR:
	MOV r5, #0 @L_tmp = 0
	MOV r6, #0 @L_tmp1 = 0
	MOV r9, #64
	MOV r12, r1 @copy of xn[]
	ADD r14, r13, #20 @copy of excf[]
	MOV r8, #0x8000

	LOOPi:
	LDR r11, [r14], #4 @load excf[i], excf[i+1]
	LDR r10, [r12], #4 @load xn[i], xn[i+1]
	SMLABB r6, r11, r11, r6 @L_tmp1 += excf[i] * excf[i]
	SMLATT r6, r11, r11, r6 @L_tmp1 += excf[i+1] * excf[i+1]
	SMLABB r5, r10, r11, r5 @L_tmp += xn[i] * excf[i]
	SMLATT r5, r10, r11, r5 @L_tmp += xn[i+1] * excf[i+1]
	LDR r11, [r14], #4 @load excf[i+2], excf[i+3]
	LDR r10, [r12], #4 @load xn[i+2], xn[i+3]
	SMLABB r6, r11, r11, r6
	SMLATT r6, r11, r11, r6
	SMLABB r5, r10, r11, r5
	SMLATT r5, r10, r11, r5
	SUBS r9, r9, #4
	BNE LOOPi

	@r5 --- L_tmp, r6 --- L_tmp1
	MOV r10, #1
	ADD r5, r10, r5, LSL #1 @L_tmp = (L_tmp << 1) + 1
	ADD r6, r10, r6, LSL #1 @L_tmp1 = (L_tmp1 << 1) + 1

	CLZ r10, r5
	CMP r5, #0
	RSBLT r11, r5, #0
	CLZLT r10, r11
	SUB r10, r10, #1 @exp = norm_l(L_tmp)

	MOV r5, r5, LSL r10 @L_tmp = (L_tmp << exp)
	RSB r10, r10, #30 @exp_corr = 30 - exp
	MOV r11, r5, ASR #16 @corr = extract_h(L_tmp)

	CLZ r5, r6
	SUB r5, r5, #1
	MOV r6, r6, LSL r5 @L_tmp = (L_tmp1 << exp)
	RSB r5, r5, #30 @exp_norm = 30 - exp

	@r10 --- exp_corr, r11 --- corr
	@r6 --- L_tmp, r5 --- exp_norm

	@Isqrt_n(&L_tmp, &exp_norm)

	MOV r14, r0
	MOV r12, r1

	STMFD sp!, {r0 - r4, r7 - r12, r14}
	ADD r1, sp, #4
	ADD r0, sp, #0
	STR r6, [sp]
	STRH r5, [sp, #4]
	BL Isqrt_n
	LDR r6, [sp]
	LDRSH r5, [sp, #4]
	LDMFD sp!, {r0 - r4, r7 - r12, r14}
	MOV r0, r14
	MOV r1, r12


	MOV r6, r6, ASR #16 @norm = extract_h(L_tmp)
	MUL r12, r6, r11
	ADD r12, r12, r12 @L_tmp = vo_L_mult(corr, norm)

	ADD r6, r10, r5
	ADD r6, r6, r7 @exp_corr + exp_norm + scale

	CMP r6, #0
	RSBLT r6, r6, #0
	MOVLT r12, r12, ASR r6
	MOVGT r12, r12, LSL r6 @L_tmp = L_shl(L_tmp, exp_corr + exp_norm + scale)

	ADD r12, r12, r8
	MOV r12, r12, ASR #16 @vo_round(L_tmp)

	LDR r5, [r13, #CORR_NORM] @ get corr_norm address
	LDR r6, [r13, #T_MAX] @ get t_max
	ADD r10, r5, r4, LSL #1 @ get corr_norm[t] address
	STRH r12, [r10] @ corr_norm[t] = vo_round(L_tmp)

	CMP r4, r6
	BEQ Norm_corr_asm_end

	ADD r4, r4, #1 @ t_min ++

	RSB r5, r4, #0 @ k

	MOV r6, #63 @ i = 63
	MOV r8, r0 @ exc[]
	MOV r9, r2 @ h[]
	ADD r10, r13, #20 @ excf[]

	ADD r8, r8, r5, LSL #1 @ exc[k] address
	ADD r9, r9, r6, LSL #1 @ h[i] address
	ADD r10, r10, r6, LSL #1 @ excf[i] address
	LDRSH r11, [r8] @ tmp = exc[k]

	LOOPK:
	LDRSH r8, [r9], #-2 @ load h[i]
	LDRSH r12, [r10, #-2] @ load excf[i - 1]
	MUL r14, r11, r8
	MOV r8, r14, ASR #15
	ADD r14, r8, r12
	STRH r14, [r10], #-2
	SUBS r6, r6, #1
	BGT LOOPK

	LDRSH r8, [r9] @ load h[0]
	MUL r14, r11, r8
	LDR r6, [r13, #T_MAX] @ get t_max
	MOV r8, r14, ASR #15
	STRH r8, [r10]

	CMP r4, r6
	BLE LOOPFOR

	Norm_corr_asm_end:

	ADD r13, r13, #voSTACK
	LDMFD r13!, {r4 - r12, r15}

	.end