av/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/syn_filt_neon.s - nest-cam/4320010/av - Git at Google

 @/*
 @ ** Copyright 2003-2010, VisualOn, Inc.
 @ **
 @ ** Licensed under the Apache License, Version 2.0 (the "License");
 @ ** you may not use this file except in compliance with the License.
 @ ** You may obtain a copy of the License at
 @ **
 @ **     http://www.apache.org/licenses/LICENSE-2.0
 @ **
 @ ** Unless required by applicable law or agreed to in writing, software
 @ ** distributed under the License is distributed on an "AS IS" BASIS,
 @ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 @ ** See the License for the specific language governing permissions and
 @ ** limitations under the License.
 @ */
 @
 @void Syn_filt(
 @     Word16 a[],                           /* (i) Q12 : a[m+1] prediction coefficients           */
 @     Word16 x[],                           /* (i)     : input signal                             */
 @     Word16 y[],                           /* (o)     : output signal                            */
 @     Word16 mem[],                         /* (i/o)   : memory associated with this filtering.   */
 @)
 @***********************************************************************
 @ a[]    ---   r0
 @ x[]    ---   r1
 @ y[]    ---   r2
 @ mem[]  ---   r3
 @ m ---  16  lg --- 80  update --- 1

           .section  .text
           .global   Syn_filt_asm

 Syn_filt_asm:

           STMFD   	r13!, {r4 - r12, r14}
           SUB           r13, r13, #700                   @ y_buf[L_FRAME16k + M16k]

           MOV           r4, r3                           @ copy mem[] address
           MOV           r5, r13                          @ copy yy = y_buf address

           @ for(i = 0@ i < m@ i++)
           @{
           @    *yy++ = mem[i]@
           @}
           VLD1.S16      {D0, D1, D2, D3}, [r4]!          @load 16 mems
 	  VST1.S16      {D0, D1, D2, D3}, [r5]!          @store 16 mem[] to *yy

           LDRSH         r5, [r0], #2                     @ load a[0]
           MOV           r8, #0                           @ i = 0
           MOV           r5, r5, ASR #1                   @ a0 = a[0] >> 1
           VMOV.S16      D8[0], r5
           @ load all a[]
           VLD1.S16      {D0, D1, D2, D3}, [r0]!          @ load a[1] ~ a[16]
 	  VREV64.16     D0, D0
 	  VREV64.16     D1, D1
 	  VREV64.16     D2, D2
 	  VREV64.16     D3, D3
 	  MOV           r8, #0                           @ loop times
 	  MOV           r10, r13                         @ temp = y_buf
 	  ADD           r4, r13, #32                     @ yy[i] address

           VLD1.S16      {D4, D5, D6, D7}, [r10]!         @ first 16 temp_p

 SYN_LOOP:

           LDRSH         r6, [r1], #2                     @ load x[i]
 	  MUL           r12, r6, r5                      @ L_tmp = x[i] * a0
 	  ADD           r10, r4, r8, LSL #1              @ y[i], yy[i] address

 	  VDUP.S32      Q10, r12
 	  VMULL.S16     Q5, D3, D4
           VMLAL.S16     Q5, D2, D5
           VMLAL.S16     Q5, D1, D6
           VMLAL.S16     Q5, D0, D7
           VEXT.8        D4, D4, D5, #2
           VEXT.8        D5, D5, D6, #2
           VEXT.8        D6, D6, D7, #2
           VPADD.S32     D12, D10, D11
           ADD           r8, r8, #1
           VPADD.S32     D10, D12, D12

 	  VDUP.S32      Q7, D10[0]

 	  VSUB.S32      Q9, Q10, Q7
           VQRSHRN.S32   D20, Q9, #12
           VMOV.S16      r9, D20[0]
           VEXT.8        D7, D7, D20, #2
           CMP           r8, #80
           STRH          r9, [r10]                        @ yy[i]
           STRH          r9, [r2], #2                     @ y[i]

           BLT           SYN_LOOP

           @ update mem[]
           ADD           r5, r13, #160                    @ yy[64] address
 	  VLD1.S16      {D0, D1, D2, D3}, [r5]!
 	  VST1.S16      {D0, D1, D2, D3}, [r3]!

 Syn_filt_asm_end:

           ADD           r13, r13, #700
           LDMFD   	r13!, {r4 - r12, r15}
           @ENDFUNC
           .end
	@/*
	@ ** Copyright 2003-2010, VisualOn, Inc.
	@ **
	@ ** Licensed under the Apache License, Version 2.0 (the "License");
	@ ** you may not use this file except in compliance with the License.
	@ ** You may obtain a copy of the License at
	@ **
	@ ** http://www.apache.org/licenses/LICENSE-2.0
	@ **
	@ ** Unless required by applicable law or agreed to in writing, software
	@ ** distributed under the License is distributed on an "AS IS" BASIS,
	@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	@ ** See the License for the specific language governing permissions and
	@ ** limitations under the License.
	@ */
	@
	@void Syn_filt(
	@ Word16 a[], /* (i) Q12 : a[m+1] prediction coefficients */
	@ Word16 x[], /* (i) : input signal */
	@ Word16 y[], /* (o) : output signal */
	@ Word16 mem[], /* (i/o) : memory associated with this filtering. */
	@)
	@***********************************************************************
	@ a[] --- r0
	@ x[] --- r1
	@ y[] --- r2
	@ mem[] --- r3
	@ m --- 16 lg --- 80 update --- 1

	.section .text
	.global Syn_filt_asm

	Syn_filt_asm:

	STMFD r13!, {r4 - r12, r14}
	SUB r13, r13, #700 @ y_buf[L_FRAME16k + M16k]

	MOV r4, r3 @ copy mem[] address
	MOV r5, r13 @ copy yy = y_buf address

	@ for(i = 0@ i < m@ i++)
	@{
	@ *yy++ = mem[i]@
	@}
	VLD1.S16 {D0, D1, D2, D3}, [r4]! @load 16 mems
	VST1.S16 {D0, D1, D2, D3}, [r5]! @store 16 mem[] to *yy

	LDRSH r5, [r0], #2 @ load a[0]
	MOV r8, #0 @ i = 0
	MOV r5, r5, ASR #1 @ a0 = a[0] >> 1
	VMOV.S16 D8[0], r5
	@ load all a[]
	VLD1.S16 {D0, D1, D2, D3}, [r0]! @ load a[1] ~ a[16]
	VREV64.16 D0, D0
	VREV64.16 D1, D1
	VREV64.16 D2, D2
	VREV64.16 D3, D3
	MOV r8, #0 @ loop times
	MOV r10, r13 @ temp = y_buf
	ADD r4, r13, #32 @ yy[i] address

	VLD1.S16 {D4, D5, D6, D7}, [r10]! @ first 16 temp_p

	SYN_LOOP:

	LDRSH r6, [r1], #2 @ load x[i]
	MUL r12, r6, r5 @ L_tmp = x[i] * a0
	ADD r10, r4, r8, LSL #1 @ y[i], yy[i] address

	VDUP.S32 Q10, r12
	VMULL.S16 Q5, D3, D4
	VMLAL.S16 Q5, D2, D5
	VMLAL.S16 Q5, D1, D6
	VMLAL.S16 Q5, D0, D7
	VEXT.8 D4, D4, D5, #2
	VEXT.8 D5, D5, D6, #2
	VEXT.8 D6, D6, D7, #2
	VPADD.S32 D12, D10, D11
	ADD r8, r8, #1
	VPADD.S32 D10, D12, D12

	VDUP.S32 Q7, D10[0]

	VSUB.S32 Q9, Q10, Q7
	VQRSHRN.S32 D20, Q9, #12
	VMOV.S16 r9, D20[0]
	VEXT.8 D7, D7, D20, #2
	CMP r8, #80
	STRH r9, [r10] @ yy[i]
	STRH r9, [r2], #2 @ y[i]

	BLT SYN_LOOP

	@ update mem[]
	ADD r5, r13, #160 @ yy[64] address
	VLD1.S16 {D0, D1, D2, D3}, [r5]!
	VST1.S16 {D0, D1, D2, D3}, [r3]!

	Syn_filt_asm_end:

	ADD r13, r13, #700
	LDMFD r13!, {r4 - r12, r15}
	@ENDFUNC
	.end