av/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_Average_4x_Align_unsafe_s.s - nest-cam/4320010/av - Git at Google

 ;//
 ;// Copyright (C) 2007-2008 ARM Limited
 ;//
 ;// Licensed under the Apache License, Version 2.0 (the "License");
 ;// you may not use this file except in compliance with the License.
 ;// You may obtain a copy of the License at
 ;//
 ;//      http://www.apache.org/licenses/LICENSE-2.0
 ;//
 ;// Unless required by applicable law or agreed to in writing, software
 ;// distributed under the License is distributed on an "AS IS" BASIS,
 ;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 ;// See the License for the specific language governing permissions and
 ;// limitations under the License.
 ;//
 ;//
 ;//
 ;// File Name:  armVCM4P10_Average_4x_Align_unsafe_s.s
 ;// OpenMAX DL: v1.0.2
 ;// Revision:   12290
 ;// Date:       Wednesday, April 9, 2008
 ;//
 ;//
 ;//
 ;//


 ;// Functions:
 ;//     armVCM4P10_Average_4x4_Align<ALIGNMENT>_unsafe
 ;//
 ;// Implements Average of 4x4 with equation c = (a+b+1)>>1.
 ;// First operand will be at offset ALIGNMENT from aligned address
 ;// Second operand will be at aligned location and will be used as output.
 ;// destination pointed by (pDst) for vertical interpolation.
 ;// This function needs to copy 4 bytes in horizontal direction
 ;//
 ;// Registers used as input for this function
 ;// r0,r1,r2,r3 where r2 containings aligned memory pointer and r3 step size
 ;//
 ;// Registers preserved for top level function
 ;// r4,r5,r6,r8,r9,r14
 ;//
 ;// Registers modified by the function
 ;// r7,r10,r11,r12
 ;//
 ;// Output registers
 ;// r2 - pointer to the aligned location
 ;// r3 - step size to this aligned location

         INCLUDE omxtypes_s.h
         INCLUDE armCOMM_s.h

         M_VARIANTS ARM1136JS

         EXPORT armVCM4P10_Average_4x4_Align0_unsafe
         EXPORT armVCM4P10_Average_4x4_Align2_unsafe
         EXPORT armVCM4P10_Average_4x4_Align3_unsafe

 DEBUG_ON    SETL {FALSE}

 ;// Declare input registers
 pPred0          RN 0
 iPredStep0      RN 1
 pPred1          RN 2
 iPredStep1      RN 3
 pDstPred        RN 2
 iDstStep        RN 3

 ;// Declare other intermediate registers
 iPredA0         RN 10
 iPredA1         RN 11
 iPredB0         RN 12
 iPredB1         RN 14
 Temp1           RN 4
 Temp2           RN 5
 ResultA         RN 5
 ResultB         RN 4
 r0x80808080     RN 7

     IF ARM1136JS

         ;// This function calculates average of 4x4 block
         ;// pPred0 is at alignment offset 0 and pPred1 is alignment 4

         ;// Function header
         M_START armVCM4P10_Average_4x4_Align0_unsafe, r6

         ;// Code start
         LDR         r0x80808080, =0x80808080

         ;// 1st load
         M_LDR       iPredB0, [pPred1]
         M_LDR       iPredA0, [pPred0], iPredStep0
         M_LDR       iPredB1, [pPred1, iPredStep1]
         M_LDR       iPredA1, [pPred0], iPredStep0

         ;// (a+b+1)/2 = (a+256-(255-b))/2 = (a-(255-b))/2 + 128
         MVN         iPredB0, iPredB0
         MVN         iPredB1, iPredB1
         UHSUB8      ResultA, iPredA0, iPredB0
         UHSUB8      ResultB, iPredA1, iPredB1
         EOR         ResultA, ResultA, r0x80808080
         M_STR       ResultA, [pDstPred], iDstStep
         EOR         ResultB, ResultB, r0x80808080
         M_STR       ResultB, [pDstPred], iDstStep

         ;// 2nd load
         M_LDR       iPredA0, [pPred0], iPredStep0
         M_LDR       iPredB0, [pPred1]
         M_LDR       iPredA1, [pPred0], iPredStep0
         M_LDR       iPredB1, [pPred1, iPredStep1]

         MVN         iPredB0, iPredB0
         UHSUB8      ResultA, iPredA0, iPredB0
         MVN         iPredB1, iPredB1
         UHSUB8      ResultB, iPredA1, iPredB1
         EOR         ResultA, ResultA, r0x80808080
         M_STR       ResultA, [pDstPred], iDstStep
         EOR         ResultB, ResultB, r0x80808080
         M_STR       ResultB, [pDstPred], iDstStep
 End0
         M_END

         ;// This function calculates average of 4x4 block
         ;// pPred0 is at alignment offset 2 and pPred1 is alignment 4

         ;// Function header
         M_START armVCM4P10_Average_4x4_Align2_unsafe, r6

         ;// Code start
         LDR         r0x80808080, =0x80808080

         ;// 1st load
         LDR         Temp1, [pPred0, #4]
         M_LDR       iPredA0, [pPred0], iPredStep0
         M_LDR       iPredB0, [pPred1]
         M_LDR       iPredB1, [pPred1, iPredStep1]
         M_LDR       Temp2, [pPred0, #4]
         M_LDR       iPredA1, [pPred0], iPredStep0
         MVN         iPredB0, iPredB0
         MVN         iPredB1, iPredB1
         MOV         iPredA0, iPredA0, LSR #16
         ORR         iPredA0, iPredA0, Temp1, LSL #16
         MOV         iPredA1, iPredA1, LSR #16
         ORR         iPredA1, iPredA1, Temp2, LSL #16

         ;// (a+b+1)/2 = (a+256-(255-b))/2 = (a-(255-b))/2 + 128
         UHSUB8      ResultA, iPredA0, iPredB0
         UHSUB8      ResultB, iPredA1, iPredB1
         EOR         ResultA, ResultA, r0x80808080
         M_STR       ResultA, [pDstPred], iDstStep
         EOR         ResultB, ResultB, r0x80808080
         M_STR       ResultB, [pDstPred], iDstStep

         ;// 2nd load
         LDR         Temp1, [pPred0, #4]
         M_LDR         iPredA0, [pPred0], iPredStep0
         LDR         iPredB0, [pPred1]
         LDR         iPredB1, [pPred1, iPredStep1]
         LDR         Temp2, [pPred0, #4]
         M_LDR         iPredA1, [pPred0], iPredStep0
         MVN         iPredB0, iPredB0
         MVN         iPredB1, iPredB1
         MOV         iPredA0, iPredA0, LSR #16
         ORR         iPredA0, iPredA0, Temp1, LSL #16
         MOV         iPredA1, iPredA1, LSR #16
         ORR         iPredA1, iPredA1, Temp2, LSL #16

         UHSUB8      ResultA, iPredA0, iPredB0
         UHSUB8      ResultB, iPredA1, iPredB1
         EOR         ResultA, ResultA, r0x80808080
         M_STR       ResultA, [pDstPred], iDstStep
         EOR         ResultB, ResultB, r0x80808080
         M_STR       ResultB, [pDstPred], iDstStep
 End2
         M_END


         ;// This function calculates average of 4x4 block
         ;// pPred0 is at alignment offset 3 and pPred1 is alignment 4

         ;// Function header
         M_START armVCM4P10_Average_4x4_Align3_unsafe, r6

         ;// Code start
         LDR         r0x80808080, =0x80808080

         ;// 1st load
         LDR         Temp1, [pPred0, #4]
         M_LDR       iPredA0, [pPred0], iPredStep0
         LDR         iPredB0, [pPred1]
         LDR         iPredB1, [pPred1, iPredStep1]
         LDR         Temp2, [pPred0, #4]
         M_LDR       iPredA1, [pPred0], iPredStep0

         MVN         iPredB0, iPredB0
         MVN         iPredB1, iPredB1
         MOV         iPredA0, iPredA0, LSR #24
         ORR         iPredA0, iPredA0, Temp1, LSL #8
         MOV         iPredA1, iPredA1, LSR #24
         ORR         iPredA1, iPredA1, Temp2, LSL #8
         UHSUB8      ResultA, iPredA0, iPredB0
         UHSUB8      ResultB, iPredA1, iPredB1
         EOR         ResultA, ResultA, r0x80808080
         M_STR       ResultA, [pDstPred], iDstStep
         EOR         ResultB, ResultB, r0x80808080
         M_STR       ResultB, [pDstPred], iDstStep

         ;// 2nd load
         LDR         Temp1, [pPred0, #4]
         M_LDR       iPredA0, [pPred0], iPredStep0
         LDR         iPredB0, [pPred1]
         LDR         iPredB1, [pPred1, iPredStep1]
         LDR         Temp2, [pPred0, #4]
         M_LDR       iPredA1, [pPred0], iPredStep0

         MVN         iPredB0, iPredB0
         MVN         iPredB1, iPredB1
         MOV         iPredA0, iPredA0, LSR #24
         ORR         iPredA0, iPredA0, Temp1, LSL #8
         MOV         iPredA1, iPredA1, LSR #24
         ORR         iPredA1, iPredA1, Temp2, LSL #8

         UHSUB8      ResultA, iPredA0, iPredB0
         UHSUB8      ResultB, iPredA1, iPredB1
         EOR         ResultA, ResultA, r0x80808080
         M_STR       ResultA, [pDstPred], iDstStep
         EOR         ResultB, ResultB, r0x80808080
         M_STR       ResultB, [pDstPred], iDstStep
 End3
         M_END

     ENDIF

     END
	;//
	;// Copyright (C) 2007-2008 ARM Limited
	;//
	;// Licensed under the Apache License, Version 2.0 (the "License");
	;// you may not use this file except in compliance with the License.
	;// You may obtain a copy of the License at
	;//
	;// http://www.apache.org/licenses/LICENSE-2.0
	;//
	;// Unless required by applicable law or agreed to in writing, software
	;// distributed under the License is distributed on an "AS IS" BASIS,
	;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	;// See the License for the specific language governing permissions and
	;// limitations under the License.
	;//
	;//
	;//
	;// File Name: armVCM4P10_Average_4x_Align_unsafe_s.s
	;// OpenMAX DL: v1.0.2
	;// Revision: 12290
	;// Date: Wednesday, April 9, 2008
	;//
	;//
	;//
	;//


	;// Functions:
	;// armVCM4P10_Average_4x4_Align<ALIGNMENT>_unsafe
	;//
	;// Implements Average of 4x4 with equation c = (a+b+1)>>1.
	;// First operand will be at offset ALIGNMENT from aligned address
	;// Second operand will be at aligned location and will be used as output.
	;// destination pointed by (pDst) for vertical interpolation.
	;// This function needs to copy 4 bytes in horizontal direction
	;//
	;// Registers used as input for this function
	;// r0,r1,r2,r3 where r2 containings aligned memory pointer and r3 step size
	;//
	;// Registers preserved for top level function
	;// r4,r5,r6,r8,r9,r14
	;//
	;// Registers modified by the function
	;// r7,r10,r11,r12
	;//
	;// Output registers
	;// r2 - pointer to the aligned location
	;// r3 - step size to this aligned location

	INCLUDE omxtypes_s.h
	INCLUDE armCOMM_s.h

	M_VARIANTS ARM1136JS

	EXPORT armVCM4P10_Average_4x4_Align0_unsafe
	EXPORT armVCM4P10_Average_4x4_Align2_unsafe
	EXPORT armVCM4P10_Average_4x4_Align3_unsafe

	DEBUG_ON SETL {FALSE}

	;// Declare input registers
	pPred0 RN 0
	iPredStep0 RN 1
	pPred1 RN 2
	iPredStep1 RN 3
	pDstPred RN 2
	iDstStep RN 3

	;// Declare other intermediate registers
	iPredA0 RN 10
	iPredA1 RN 11
	iPredB0 RN 12
	iPredB1 RN 14
	Temp1 RN 4
	Temp2 RN 5
	ResultA RN 5
	ResultB RN 4
	r0x80808080 RN 7

	IF ARM1136JS

	;// This function calculates average of 4x4 block
	;// pPred0 is at alignment offset 0 and pPred1 is alignment 4

	;// Function header
	M_START armVCM4P10_Average_4x4_Align0_unsafe, r6

	;// Code start
	LDR r0x80808080, =0x80808080

	;// 1st load
	M_LDR iPredB0, [pPred1]
	M_LDR iPredA0, [pPred0], iPredStep0
	M_LDR iPredB1, [pPred1, iPredStep1]
	M_LDR iPredA1, [pPred0], iPredStep0

	;// (a+b+1)/2 = (a+256-(255-b))/2 = (a-(255-b))/2 + 128
	MVN iPredB0, iPredB0
	MVN iPredB1, iPredB1
	UHSUB8 ResultA, iPredA0, iPredB0
	UHSUB8 ResultB, iPredA1, iPredB1
	EOR ResultA, ResultA, r0x80808080
	M_STR ResultA, [pDstPred], iDstStep
	EOR ResultB, ResultB, r0x80808080
	M_STR ResultB, [pDstPred], iDstStep

	;// 2nd load
	M_LDR iPredA0, [pPred0], iPredStep0
	M_LDR iPredB0, [pPred1]
	M_LDR iPredA1, [pPred0], iPredStep0
	M_LDR iPredB1, [pPred1, iPredStep1]

	MVN iPredB0, iPredB0
	UHSUB8 ResultA, iPredA0, iPredB0
	MVN iPredB1, iPredB1
	UHSUB8 ResultB, iPredA1, iPredB1
	EOR ResultA, ResultA, r0x80808080
	M_STR ResultA, [pDstPred], iDstStep
	EOR ResultB, ResultB, r0x80808080
	M_STR ResultB, [pDstPred], iDstStep
	End0
	M_END

	;// This function calculates average of 4x4 block
	;// pPred0 is at alignment offset 2 and pPred1 is alignment 4

	;// Function header
	M_START armVCM4P10_Average_4x4_Align2_unsafe, r6

	;// Code start
	LDR r0x80808080, =0x80808080

	;// 1st load
	LDR Temp1, [pPred0, #4]
	M_LDR iPredA0, [pPred0], iPredStep0
	M_LDR iPredB0, [pPred1]
	M_LDR iPredB1, [pPred1, iPredStep1]
	M_LDR Temp2, [pPred0, #4]
	M_LDR iPredA1, [pPred0], iPredStep0
	MVN iPredB0, iPredB0
	MVN iPredB1, iPredB1
	MOV iPredA0, iPredA0, LSR #16
	ORR iPredA0, iPredA0, Temp1, LSL #16
	MOV iPredA1, iPredA1, LSR #16
	ORR iPredA1, iPredA1, Temp2, LSL #16

	;// (a+b+1)/2 = (a+256-(255-b))/2 = (a-(255-b))/2 + 128
	UHSUB8 ResultA, iPredA0, iPredB0
	UHSUB8 ResultB, iPredA1, iPredB1
	EOR ResultA, ResultA, r0x80808080
	M_STR ResultA, [pDstPred], iDstStep
	EOR ResultB, ResultB, r0x80808080
	M_STR ResultB, [pDstPred], iDstStep

	;// 2nd load
	LDR Temp1, [pPred0, #4]
	M_LDR iPredA0, [pPred0], iPredStep0
	LDR iPredB0, [pPred1]
	LDR iPredB1, [pPred1, iPredStep1]
	LDR Temp2, [pPred0, #4]
	M_LDR iPredA1, [pPred0], iPredStep0
	MVN iPredB0, iPredB0
	MVN iPredB1, iPredB1
	MOV iPredA0, iPredA0, LSR #16
	ORR iPredA0, iPredA0, Temp1, LSL #16
	MOV iPredA1, iPredA1, LSR #16
	ORR iPredA1, iPredA1, Temp2, LSL #16

	UHSUB8 ResultA, iPredA0, iPredB0
	UHSUB8 ResultB, iPredA1, iPredB1
	EOR ResultA, ResultA, r0x80808080
	M_STR ResultA, [pDstPred], iDstStep
	EOR ResultB, ResultB, r0x80808080
	M_STR ResultB, [pDstPred], iDstStep
	End2
	M_END


	;// This function calculates average of 4x4 block
	;// pPred0 is at alignment offset 3 and pPred1 is alignment 4

	;// Function header
	M_START armVCM4P10_Average_4x4_Align3_unsafe, r6

	;// Code start
	LDR r0x80808080, =0x80808080

	;// 1st load
	LDR Temp1, [pPred0, #4]
	M_LDR iPredA0, [pPred0], iPredStep0
	LDR iPredB0, [pPred1]
	LDR iPredB1, [pPred1, iPredStep1]
	LDR Temp2, [pPred0, #4]
	M_LDR iPredA1, [pPred0], iPredStep0

	MVN iPredB0, iPredB0
	MVN iPredB1, iPredB1
	MOV iPredA0, iPredA0, LSR #24
	ORR iPredA0, iPredA0, Temp1, LSL #8
	MOV iPredA1, iPredA1, LSR #24
	ORR iPredA1, iPredA1, Temp2, LSL #8
	UHSUB8 ResultA, iPredA0, iPredB0
	UHSUB8 ResultB, iPredA1, iPredB1
	EOR ResultA, ResultA, r0x80808080
	M_STR ResultA, [pDstPred], iDstStep
	EOR ResultB, ResultB, r0x80808080
	M_STR ResultB, [pDstPred], iDstStep

	;// 2nd load
	LDR Temp1, [pPred0, #4]
	M_LDR iPredA0, [pPred0], iPredStep0
	LDR iPredB0, [pPred1]
	LDR iPredB1, [pPred1, iPredStep1]
	LDR Temp2, [pPred0, #4]
	M_LDR iPredA1, [pPred0], iPredStep0

	MVN iPredB0, iPredB0
	MVN iPredB1, iPredB1
	MOV iPredA0, iPredA0, LSR #24
	ORR iPredA0, iPredA0, Temp1, LSL #8
	MOV iPredA1, iPredA1, LSR #24
	ORR iPredA1, iPredA1, Temp2, LSL #8

	UHSUB8 ResultA, iPredA0, iPredB0
	UHSUB8 ResultB, iPredA1, iPredB1
	EOR ResultA, ResultA, r0x80808080
	M_STR ResultA, [pDstPred], iDstStep
	EOR ResultB, ResultB, r0x80808080
	M_STR ResultB, [pDstPred], iDstStep
	End3
	M_END

	ENDIF

	END