blob: f5a7326a377451134ace51869a4056eb8c590091 [file] [log] [blame]
;//
;// Copyright (C) 2007-2008 ARM Limited
;//
;// Licensed under the Apache License, Version 2.0 (the "License");
;// you may not use this file except in compliance with the License.
;// You may obtain a copy of the License at
;//
;// http://www.apache.org/licenses/LICENSE-2.0
;//
;// Unless required by applicable law or agreed to in writing, software
;// distributed under the License is distributed on an "AS IS" BASIS,
;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
;// See the License for the specific language governing permissions and
;// limitations under the License.
;//
;//
;//
;// File Name: armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.s
;// OpenMAX DL: v1.0.2
;// Revision: 12290
;// Date: Wednesday, April 9, 2008
;//
;//
;//
;//
INCLUDE omxtypes_s.h
INCLUDE armCOMM_s.h
M_VARIANTS ARM1136JS
EXPORT armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe
EXPORT armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe
;// Functions:
;// armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe and
;// armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe
;//
;// Implements re-arrangement of data from temporary buffer to a buffer pointed by pBuf.
;// This will do the convertion of data from 16 bit to 8 bit and it also
;// remove offset and check for saturation.
;//
;// Registers used as input for this function
;// r0,r1,r7 where r0 is input pointer and r2 its step size, r7 is output pointer
;//
;// Registers preserved for top level function
;// r4,r5,r6,r8,r9,r14
;//
;// Registers modified by the function
;// r7,r10,r11,r12
;//
;// Output registers
;// r0 - pointer to the destination location
;// r1 - step size to this destination location
DEBUG_ON SETL {FALSE}
MASK EQU 0x80808080 ;// Mask is used to implement (a+b+1)/2
;// Declare input registers
pSrc0 RN 0
srcStep0 RN 1
;// Declare other intermediate registers
Temp1 RN 4
Temp2 RN 5
Temp3 RN 10
Temp4 RN 11
pBuf RN 7
r0x0fe00fe0 RN 6
r0x00ff00ff RN 12
Count RN 14
ValueA0 RN 10
ValueA1 RN 11
IF ARM1136JS
;// Function header
M_START armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe, r6
;// Code start
MOV Count, #4
LDR r0x0fe00fe0, =0x0fe00fe0
LDR r0x00ff00ff, =0x00ff00ff
LoopStart1
LDR Temp4, [pSrc0, #12]
LDR Temp3, [pSrc0, #8]
LDR Temp2, [pSrc0, #4]
M_LDR Temp1, [pSrc0], srcStep0
UQSUB16 Temp4, Temp4, r0x0fe00fe0
UQSUB16 Temp3, Temp3, r0x0fe00fe0
UQSUB16 Temp2, Temp2, r0x0fe00fe0
UQSUB16 Temp1, Temp1, r0x0fe00fe0
USAT16 Temp4, #13, Temp4
USAT16 Temp3, #13, Temp3
USAT16 Temp2, #13, Temp2
USAT16 Temp1, #13, Temp1
AND Temp4, r0x00ff00ff, Temp4, LSR #5
AND Temp3, r0x00ff00ff, Temp3, LSR #5
AND Temp2, r0x00ff00ff, Temp2, LSR #5
AND Temp1, r0x00ff00ff, Temp1, LSR #5
ORR ValueA1, Temp3, Temp4, LSL #8
ORR ValueA0, Temp1, Temp2, LSL #8
SUBS Count, Count, #1
STRD ValueA0, [pBuf], #8
BGT LoopStart1
End1
SUB pSrc0, pBuf, #32
MOV srcStep0, #8
M_END
;// Function header
M_START armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe, r6
;// Code start
LDR r0x0fe00fe0, =0x0fe00fe0
LDR r0x00ff00ff, =0x00ff00ff
MOV Count, #2
LoopStart
LDR Temp4, [pSrc0, #12]
LDR Temp3, [pSrc0, #8]
LDR Temp2, [pSrc0, #4]
M_LDR Temp1, [pSrc0], srcStep0
UQSUB16 Temp4, Temp4, r0x0fe00fe0
UQSUB16 Temp3, Temp3, r0x0fe00fe0
UQSUB16 Temp2, Temp2, r0x0fe00fe0
UQSUB16 Temp1, Temp1, r0x0fe00fe0
USAT16 Temp4, #13, Temp4
USAT16 Temp3, #13, Temp3
USAT16 Temp2, #13, Temp2
USAT16 Temp1, #13, Temp1
AND Temp4, r0x00ff00ff, Temp4, LSR #5
AND Temp3, r0x00ff00ff, Temp3, LSR #5
AND Temp2, r0x00ff00ff, Temp2, LSR #5
AND Temp1, r0x00ff00ff, Temp1, LSR #5
ORR ValueA1, Temp3, Temp4, LSL #8 ;// [d2 c2 d0 c0]
ORR ValueA0, Temp1, Temp2, LSL #8 ;// [b2 a2 b0 a0]
PKHBT Temp1, ValueA0, ValueA1, LSL #16 ;// [d0 c0 b0 a0]
STR Temp1, [pBuf], #8
PKHTB Temp2, ValueA1, ValueA0, ASR #16 ;// [d2 c2 b2 a2]
STR Temp2, [pBuf], #-4
LDR Temp4, [pSrc0, #12]
LDR Temp3, [pSrc0, #8]
LDR Temp2, [pSrc0, #4]
M_LDR Temp1, [pSrc0], srcStep0
UQSUB16 Temp4, Temp4, r0x0fe00fe0
UQSUB16 Temp3, Temp3, r0x0fe00fe0
UQSUB16 Temp2, Temp2, r0x0fe00fe0
UQSUB16 Temp1, Temp1, r0x0fe00fe0
USAT16 Temp4, #13, Temp4
USAT16 Temp3, #13, Temp3
USAT16 Temp2, #13, Temp2
USAT16 Temp1, #13, Temp1
AND Temp4, r0x00ff00ff, Temp4, LSR #5
AND Temp3, r0x00ff00ff, Temp3, LSR #5
AND Temp2, r0x00ff00ff, Temp2, LSR #5
AND Temp1, r0x00ff00ff, Temp1, LSR #5
ORR ValueA1, Temp3, Temp4, LSL #8 ;// [d2 c2 d0 c0]
ORR ValueA0, Temp1, Temp2, LSL #8 ;// [b2 a2 b0 a0]
PKHBT Temp1, ValueA0, ValueA1, LSL #16 ;// [d0 c0 b0 a0]
SUBS Count, Count, #1
STR Temp1, [pBuf], #8
PKHTB Temp2, ValueA1, ValueA0, ASR #16 ;// [d2 c2 b2 a2]
STR Temp2, [pBuf], #4
BGT LoopStart
End2
SUB pSrc0, pBuf, #32-8
MOV srcStep0, #4
M_END
ENDIF
END