blob: ffe123df3b0cdf551ee8671c0fdfdce9b7ed32ea [file] [log] [blame]
;// Copyright (C) 2007-2008 ARM Limited
;// Licensed under the Apache License, Version 2.0 (the "License");
;// you may not use this file except in compliance with the License.
;// You may obtain a copy of the License at
;// Unless required by applicable law or agreed to in writing, software
;// distributed under the License is distributed on an "AS IS" BASIS,
;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
;// See the License for the specific language governing permissions and
;// limitations under the License.
;// File Name: armVCM4P10_InterpolateLuma_Align_unsafe_s.s
;// OpenMAX DL: v1.0.2
;// Revision: 9641
;// Date: Thursday, February 7, 2008
INCLUDE omxtypes_s.h
EXPORT armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
EXPORT armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
;// Declare input registers
pSrc RN 0
srcStep RN 1
pDst RN 8
iHeight RN 9
;// Declare inner loop registers
x RN 7
x0 RN 7
x1 RN 10
x2 RN 11
Scratch RN 12
;// Function:
;// armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
;// Implements copy from an arbitrary aligned source memory location (pSrc) to a 4 byte aligned
;// destination pointed by (pDst) for horizontal interpolation.
;// This function needs to copy 9 bytes in horizontal direction.
;// Registers used as input for this function
;// r0,r1,r8,r9 where r8 containings aligned memory pointer and r9 no rows to copy
;// Registers preserved for top level function
;// r2,r3,r4,r5,r6
;// Registers modified by the function
;// r7,r8,r9,r10,r11,r12
;// Output registers
;// r0 - pointer to the new aligned location which will be used as pSrc
;// r1 - step size to this aligned location
;// Function header
M_START armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
;// Copy pDst to scratch
MOV Scratch, pDst
AND x, pSrc, #3
BIC pSrc, pSrc, #3
M_CASE Copy0toAligned
M_CASE Copy1toAligned
M_CASE Copy2toAligned
M_CASE Copy3toAligned
LDM pSrc, {x0, x1, x2}
SUBS iHeight, iHeight, #1
ADD pSrc, pSrc, srcStep
;// One cycle stall
STM pDst!, {x0, x1, x2} ;// Store aligned output row
BGT Copy0toAligned
B CopyEnd
LDM pSrc, {x0, x1, x2}
SUBS iHeight, iHeight, #1
ADD pSrc, pSrc, srcStep
;// One cycle stall
MOV x0, x0, LSR #8
ORR x0, x0, x1, LSL #24
MOV x1, x1, LSR #8
ORR x1, x1, x2, LSL #24
MOV x2, x2, LSR #8
STM pDst!, {x0, x1, x2} ;// Store aligned output row
BGT Copy1toAligned
B CopyEnd
LDM pSrc, {x0, x1, x2}
SUBS iHeight, iHeight, #1
ADD pSrc, pSrc, srcStep
;// One cycle stall
MOV x0, x0, LSR #16
ORR x0, x0, x1, LSL #16
MOV x1, x1, LSR #16
ORR x1, x1, x2, LSL #16
MOV x2, x2, LSR #16
STM pDst!, {x0, x1, x2} ;// Store aligned output row
BGT Copy2toAligned
B CopyEnd
LDM pSrc, {x0, x1, x2}
SUBS iHeight, iHeight, #1
ADD pSrc, pSrc, srcStep
;// One cycle stall
MOV x0, x0, LSR #24
ORR x0, x0, x1, LSL #8
MOV x1, x1, LSR #24
ORR x1, x1, x2, LSL #8
MOV x2, x2, LSR #24
STM pDst!, {x0, x1, x2} ;// Store aligned output row
BGT Copy3toAligned
MOV pSrc, Scratch
MOV srcStep, #12
;// Function:
;// armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
;// Implements copy from an arbitrary aligned source memory location (pSrc) to an aligned
;// destination pointed by (pDst) for vertical interpolation.
;// This function needs to copy 4 bytes in horizontal direction
;// Registers used as input for this function
;// r0,r1,r8,r9 where r8 containings aligned memory pointer and r9 no of rows to copy
;// Registers preserved for top level function
;// r2,r3,r4,r5,r6
;// Registers modified by the function
;// r7,r8,r9,r10,r11,r12
;// Output registers
;// r0 - pointer to the new aligned location which will be used as pSrc
;// r1 - step size to this aligned location
;// Function header
M_START armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
;// Copy pSrc to stack
AND x, pSrc, #3
BIC pSrc, pSrc, #3
M_CASE Copy0toVAligned
M_CASE Copy1toVAligned
M_CASE Copy2toVAligned
M_CASE Copy3toVAligned
M_LDR x0, [pSrc], srcStep
SUBS iHeight, iHeight, #1
;// One cycle stall
STR x0, [pDst], #4 ;// Store aligned output row
BGT Copy0toVAligned
B CopyVEnd
LDR x1, [pSrc, #4]
M_LDR x0, [pSrc], srcStep
SUBS iHeight, iHeight, #1
;// One cycle stall
MOV x1, x1, LSL #24
ORR x0, x1, x0, LSR #8
STR x0, [pDst], #4 ;// Store aligned output row
BGT Copy1toVAligned
B CopyVEnd
LDR x1, [pSrc, #4]
M_LDR x0, [pSrc], srcStep
SUBS iHeight, iHeight, #1
;// One cycle stall
MOV x1, x1, LSL #16
ORR x0, x1, x0, LSR #16
STR x0, [pDst], #4 ;// Store aligned output row
BGT Copy2toVAligned
B CopyVEnd
LDR x1, [pSrc, #4]
M_LDR x0, [pSrc], srcStep
SUBS iHeight, iHeight, #1
;// One cycle stall
MOV x1, x1, LSL #8
ORR x0, x1, x0, LSR #24
STR x0, [pDst], #4 ;// Store aligned output row
BGT Copy3toVAligned
SUB pSrc, pDst, #28
MOV srcStep, #4