blob: 5f3eb9b5151b24a331d774f871a110e429b4c35f [file] [log] [blame]
;//
;// Copyright (C) 2007-2008 ARM Limited
;//
;// Licensed under the Apache License, Version 2.0 (the "License");
;// you may not use this file except in compliance with the License.
;// You may obtain a copy of the License at
;//
;// http://www.apache.org/licenses/LICENSE-2.0
;//
;// Unless required by applicable law or agreed to in writing, software
;// distributed under the License is distributed on an "AS IS" BASIS,
;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
;// See the License for the specific language governing permissions and
;// limitations under the License.
;//
;//
;//
;// File Name: armVCM4P10_Average_4x_Align_unsafe_s.s
;// OpenMAX DL: v1.0.2
;// Revision: 12290
;// Date: Wednesday, April 9, 2008
;//
;//
;//
;//
;// Functions:
;// armVCM4P10_Average_4x4_Align<ALIGNMENT>_unsafe
;//
;// Implements Average of 4x4 with equation c = (a+b+1)>>1.
;// First operand will be at offset ALIGNMENT from aligned address
;// Second operand will be at aligned location and will be used as output.
;// destination pointed by (pDst) for vertical interpolation.
;// This function needs to copy 4 bytes in horizontal direction
;//
;// Registers used as input for this function
;// r0,r1,r2,r3 where r2 containings aligned memory pointer and r3 step size
;//
;// Registers preserved for top level function
;// r4,r5,r6,r8,r9,r14
;//
;// Registers modified by the function
;// r7,r10,r11,r12
;//
;// Output registers
;// r2 - pointer to the aligned location
;// r3 - step size to this aligned location
INCLUDE omxtypes_s.h
INCLUDE armCOMM_s.h
M_VARIANTS ARM1136JS
EXPORT armVCM4P10_Average_4x4_Align0_unsafe
EXPORT armVCM4P10_Average_4x4_Align2_unsafe
EXPORT armVCM4P10_Average_4x4_Align3_unsafe
DEBUG_ON SETL {FALSE}
;// Declare input registers
pPred0 RN 0
iPredStep0 RN 1
pPred1 RN 2
iPredStep1 RN 3
pDstPred RN 2
iDstStep RN 3
;// Declare other intermediate registers
iPredA0 RN 10
iPredA1 RN 11
iPredB0 RN 12
iPredB1 RN 14
Temp1 RN 4
Temp2 RN 5
ResultA RN 5
ResultB RN 4
r0x80808080 RN 7
IF ARM1136JS
;// This function calculates average of 4x4 block
;// pPred0 is at alignment offset 0 and pPred1 is alignment 4
;// Function header
M_START armVCM4P10_Average_4x4_Align0_unsafe, r6
;// Code start
LDR r0x80808080, =0x80808080
;// 1st load
M_LDR iPredB0, [pPred1]
M_LDR iPredA0, [pPred0], iPredStep0
M_LDR iPredB1, [pPred1, iPredStep1]
M_LDR iPredA1, [pPred0], iPredStep0
;// (a+b+1)/2 = (a+256-(255-b))/2 = (a-(255-b))/2 + 128
MVN iPredB0, iPredB0
MVN iPredB1, iPredB1
UHSUB8 ResultA, iPredA0, iPredB0
UHSUB8 ResultB, iPredA1, iPredB1
EOR ResultA, ResultA, r0x80808080
M_STR ResultA, [pDstPred], iDstStep
EOR ResultB, ResultB, r0x80808080
M_STR ResultB, [pDstPred], iDstStep
;// 2nd load
M_LDR iPredA0, [pPred0], iPredStep0
M_LDR iPredB0, [pPred1]
M_LDR iPredA1, [pPred0], iPredStep0
M_LDR iPredB1, [pPred1, iPredStep1]
MVN iPredB0, iPredB0
UHSUB8 ResultA, iPredA0, iPredB0
MVN iPredB1, iPredB1
UHSUB8 ResultB, iPredA1, iPredB1
EOR ResultA, ResultA, r0x80808080
M_STR ResultA, [pDstPred], iDstStep
EOR ResultB, ResultB, r0x80808080
M_STR ResultB, [pDstPred], iDstStep
End0
M_END
;// This function calculates average of 4x4 block
;// pPred0 is at alignment offset 2 and pPred1 is alignment 4
;// Function header
M_START armVCM4P10_Average_4x4_Align2_unsafe, r6
;// Code start
LDR r0x80808080, =0x80808080
;// 1st load
LDR Temp1, [pPred0, #4]
M_LDR iPredA0, [pPred0], iPredStep0
M_LDR iPredB0, [pPred1]
M_LDR iPredB1, [pPred1, iPredStep1]
M_LDR Temp2, [pPred0, #4]
M_LDR iPredA1, [pPred0], iPredStep0
MVN iPredB0, iPredB0
MVN iPredB1, iPredB1
MOV iPredA0, iPredA0, LSR #16
ORR iPredA0, iPredA0, Temp1, LSL #16
MOV iPredA1, iPredA1, LSR #16
ORR iPredA1, iPredA1, Temp2, LSL #16
;// (a+b+1)/2 = (a+256-(255-b))/2 = (a-(255-b))/2 + 128
UHSUB8 ResultA, iPredA0, iPredB0
UHSUB8 ResultB, iPredA1, iPredB1
EOR ResultA, ResultA, r0x80808080
M_STR ResultA, [pDstPred], iDstStep
EOR ResultB, ResultB, r0x80808080
M_STR ResultB, [pDstPred], iDstStep
;// 2nd load
LDR Temp1, [pPred0, #4]
M_LDR iPredA0, [pPred0], iPredStep0
LDR iPredB0, [pPred1]
LDR iPredB1, [pPred1, iPredStep1]
LDR Temp2, [pPred0, #4]
M_LDR iPredA1, [pPred0], iPredStep0
MVN iPredB0, iPredB0
MVN iPredB1, iPredB1
MOV iPredA0, iPredA0, LSR #16
ORR iPredA0, iPredA0, Temp1, LSL #16
MOV iPredA1, iPredA1, LSR #16
ORR iPredA1, iPredA1, Temp2, LSL #16
UHSUB8 ResultA, iPredA0, iPredB0
UHSUB8 ResultB, iPredA1, iPredB1
EOR ResultA, ResultA, r0x80808080
M_STR ResultA, [pDstPred], iDstStep
EOR ResultB, ResultB, r0x80808080
M_STR ResultB, [pDstPred], iDstStep
End2
M_END
;// This function calculates average of 4x4 block
;// pPred0 is at alignment offset 3 and pPred1 is alignment 4
;// Function header
M_START armVCM4P10_Average_4x4_Align3_unsafe, r6
;// Code start
LDR r0x80808080, =0x80808080
;// 1st load
LDR Temp1, [pPred0, #4]
M_LDR iPredA0, [pPred0], iPredStep0
LDR iPredB0, [pPred1]
LDR iPredB1, [pPred1, iPredStep1]
LDR Temp2, [pPred0, #4]
M_LDR iPredA1, [pPred0], iPredStep0
MVN iPredB0, iPredB0
MVN iPredB1, iPredB1
MOV iPredA0, iPredA0, LSR #24
ORR iPredA0, iPredA0, Temp1, LSL #8
MOV iPredA1, iPredA1, LSR #24
ORR iPredA1, iPredA1, Temp2, LSL #8
UHSUB8 ResultA, iPredA0, iPredB0
UHSUB8 ResultB, iPredA1, iPredB1
EOR ResultA, ResultA, r0x80808080
M_STR ResultA, [pDstPred], iDstStep
EOR ResultB, ResultB, r0x80808080
M_STR ResultB, [pDstPred], iDstStep
;// 2nd load
LDR Temp1, [pPred0, #4]
M_LDR iPredA0, [pPred0], iPredStep0
LDR iPredB0, [pPred1]
LDR iPredB1, [pPred1, iPredStep1]
LDR Temp2, [pPred0, #4]
M_LDR iPredA1, [pPred0], iPredStep0
MVN iPredB0, iPredB0
MVN iPredB1, iPredB1
MOV iPredA0, iPredA0, LSR #24
ORR iPredA0, iPredA0, Temp1, LSL #8
MOV iPredA1, iPredA1, LSR #24
ORR iPredA1, iPredA1, Temp2, LSL #8
UHSUB8 ResultA, iPredA0, iPredB0
UHSUB8 ResultB, iPredA1, iPredB1
EOR ResultA, ResultA, r0x80808080
M_STR ResultA, [pDstPred], iDstStep
EOR ResultB, ResultB, r0x80808080
M_STR ResultB, [pDstPred], iDstStep
End3
M_END
ENDIF
END