| ;// |
| ;// Copyright (C) 2007-2008 ARM Limited |
| ;// |
| ;// Licensed under the Apache License, Version 2.0 (the "License"); |
| ;// you may not use this file except in compliance with the License. |
| ;// You may obtain a copy of the License at |
| ;// |
| ;// http://www.apache.org/licenses/LICENSE-2.0 |
| ;// |
| ;// Unless required by applicable law or agreed to in writing, software |
| ;// distributed under the License is distributed on an "AS IS" BASIS, |
| ;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| ;// See the License for the specific language governing permissions and |
| ;// limitations under the License. |
| ;// |
| ;// |
| ;// |
| ;// File Name: omxVCM4P10_InterpolateLuma_s.s |
| ;// OpenMAX DL: v1.0.2 |
| ;// Revision: 9641 |
| ;// Date: Thursday, February 7, 2008 |
| ;// |
| ;// |
| ;// |
| ;// |
| |
| ;// Function: |
| ;// omxVCM4P10_InterpolateLuma |
| ;// |
| ;// This function implements omxVCM4P10_InterpolateLuma in v6 assembly. |
| ;// Performs quarter pel interpolation of inter luma MB. |
| ;// It's assumed that the frame is already padded when calling this function. |
| ;// Parameters: |
| ;// [in] pSrc Pointer to the source reference frame buffer |
| ;// [in] srcStep Reference frame step in byte |
| ;// [in] dstStep Destination frame step in byte. Must be multiple of roi.width |
| ;// [in] dx Fractional part of horizontal motion vector |
| ;// component in 1/4 pixel unit; valid in the range [0,3] |
| ;// [in] dy Fractional part of vertical motion vector |
| ;// component in 1/4 pixel unit; valid in the range [0,3] |
| ;// [in] roi Dimension of the interpolation region;the parameters roi.width and roi.height must |
| ;// be equal to either 4, 8, or 16. |
| ;// [out] pDst Pointer to the destination frame buffer. |
| ;// if roi.width==4, 4-byte alignment required |
| ;// if roi.width==8, 8-byte alignment required |
| ;// if roi.width==16, 16-byte alignment required |
| ;// |
| ;// Return Value: |
| ;// If the function runs without error, it returns OMX_Sts_NoErr. |
| ;// It is assued that following cases are satisfied before calling this function: |
| ;// pSrc or pDst is not NULL. |
| ;// srcStep or dstStep >= roi.width. |
| ;// dx or dy is in the range [0-3]. |
| ;// roi.width or roi.height is not out of range {4, 8, 16}. |
| ;// If roi.width is equal to 4, Dst is 4 byte aligned. |
| ;// If roi.width is equal to 8, pDst is 8 byte aligned. |
| ;// If roi.width is equal to 16, pDst is 16 byte aligned. |
| ;// srcStep and dstStep is multiple of 8. |
| ;// |
| ;// |
| |
| |
| INCLUDE omxtypes_s.h |
| INCLUDE armCOMM_s.h |
| |
| M_VARIANTS ARM1136JS |
| |
| EXPORT omxVCM4P10_InterpolateLuma |
| |
| IF ARM1136JS |
| IMPORT armVCM4P10_InterpolateLuma_Copy4x4_unsafe |
| IMPORT armVCM4P10_InterpolateLuma_HorAlign9x_unsafe |
| IMPORT armVCM4P10_InterpolateLuma_VerAlign4x_unsafe |
| IMPORT armVCM4P10_Average_4x4_Align0_unsafe |
| IMPORT armVCM4P10_Average_4x4_Align2_unsafe |
| IMPORT armVCM4P10_Average_4x4_Align3_unsafe |
| IMPORT armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe |
| IMPORT armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe |
| ENDIF |
| |
| IF ARM1136JS |
| IMPORT armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe |
| IMPORT armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe |
| IMPORT armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe |
| IMPORT armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe |
| ENDIF |
| |
| |
| |
| ;// Declare input registers |
| pSrc RN 0 |
| srcStep RN 1 |
| pDst RN 2 |
| dstStep RN 3 |
| iHeight RN 4 |
| iWidth RN 5 |
| |
| ;// Declare other intermediate registers |
| idx RN 6 |
| idy RN 7 |
| index RN 6 |
| Temp RN 12 |
| pArgs RN 11 |
| |
| |
| ;// End of CortexA8 |
| |
| ;//------------------------------------------------------------------------------------------------------------------------- |
| ;//------------------------------------------------------------------------------------------------------------------------- |
| IF ARM1136JS |
| |
| |
| M_ALLOC4 ppDst, 8 |
| M_ALLOC4 ppSrc, 8 |
| M_ALLOC4 ppArgs, 16 |
| M_ALLOC4 pBuffer, 120 ;// 120 = 12x10 |
| M_ALLOC8 pInterBuf, 120 ;// 120 = 12*5*2 |
| M_ALLOC8 pTempBuf, 32 ;// 32 = 8*4 |
| |
| ;// Function header |
| ;// Interpolation of luma is implemented by processing block of pixels, size 4x4 at a time. |
| ;// Depending on the values of motion vector fractional parts (dx,dy), one out of 16 cases will be processed. |
| ;// Registers r4, r5, r6 to be preserved by internal unsafe functions |
| ;// r4 - iHeight |
| ;// r5 - iWidth |
| ;// r6 - index |
| M_START omxVCM4P10_InterpolateLuma, r11 |
| |
| ;// Declare other intermediate registers |
| idx RN 6 |
| idy RN 7 |
| index RN 6 |
| Temp RN 12 |
| pArgs RN 11 |
| |
| pBuf RN 8 |
| Height RN 9 |
| bufStep RN 9 |
| |
| ;// Define stack arguments |
| M_ARG ptridx, 4 |
| M_ARG ptridy, 4 |
| M_ARG ptrWidth, 4 |
| M_ARG ptrHeight, 4 |
| |
| ;// Load structure elements of roi |
| M_LDR idx, ptridx |
| M_LDR idy, ptridy |
| M_LDR iWidth, ptrWidth |
| M_LDR iHeight, ptrHeight |
| |
| M_PRINTF "roi.width %d\n", iWidth |
| M_PRINTF "roi.height %d\n", iHeight |
| |
| ADD index, idx, idy, LSL #2 ;// [index] = [idy][idx] |
| M_ADR pArgs, ppArgs |
| |
| InterpolateLuma |
| Block4x4WidthLoop |
| Block4x4HeightLoop |
| |
| STM pArgs, {pSrc,srcStep,pDst,dstStep} |
| M_ADR pBuf, pBuffer |
| |
| ;// switch table using motion vector as index |
| M_SWITCH index, L |
| M_CASE Case_0 |
| M_CASE Case_1 |
| M_CASE Case_2 |
| M_CASE Case_3 |
| M_CASE Case_4 |
| M_CASE Case_5 |
| M_CASE Case_6 |
| M_CASE Case_7 |
| M_CASE Case_8 |
| M_CASE Case_9 |
| M_CASE Case_a |
| M_CASE Case_b |
| M_CASE Case_c |
| M_CASE Case_d |
| M_CASE Case_e |
| M_CASE Case_f |
| M_ENDSWITCH |
| |
| Case_0 |
| ;// Case G |
| M_PRINTF "Case 0 \n" |
| |
| BL armVCM4P10_InterpolateLuma_Copy4x4_unsafe |
| B Block4x4LoopEnd |
| |
| Case_1 |
| ;// Case a |
| M_PRINTF "Case 1 \n" |
| |
| SUB pSrc, pSrc, #2 |
| MOV Height, #4 |
| BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe |
| BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe |
| BL armVCM4P10_Average_4x4_Align2_unsafe |
| B Block4x4LoopEnd |
| Case_2 |
| ;// Case b |
| M_PRINTF "Case 2 \n" |
| |
| SUB pSrc, pSrc, #2 |
| MOV Height, #4 |
| BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe |
| BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe |
| B Block4x4LoopEnd |
| Case_3 |
| ;// Case c |
| M_PRINTF "Case 3 \n" |
| |
| SUB pSrc, pSrc, #2 |
| MOV Height, #4 |
| BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe |
| BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe |
| BL armVCM4P10_Average_4x4_Align3_unsafe |
| B Block4x4LoopEnd |
| Case_4 |
| ;// Case d |
| M_PRINTF "Case 4 \n" |
| |
| SUB pSrc, pSrc, srcStep, LSL #1 |
| MOV Height, #9 |
| BL armVCM4P10_InterpolateLuma_VerAlign4x_unsafe |
| BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe |
| BL armVCM4P10_Average_4x4_Align0_unsafe |
| |
| B Block4x4LoopEnd |
| Case_5 |
| ;// Case e |
| M_PRINTF "Case 5 \n" |
| |
| SUB pSrc, pSrc, #2 |
| MOV Height, #4 |
| M_ADR pDst, pTempBuf |
| MOV dstStep, #4 |
| BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe |
| BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe |
| M_ADR pArgs, ppArgs |
| LDM pArgs, {pSrc, srcStep, pDst, dstStep} |
| SUB pSrc, pSrc, srcStep, LSL #1 |
| M_ADR pBuf, pBuffer |
| MOV Height, #9 |
| BL armVCM4P10_InterpolateLuma_VerAlign4x_unsafe |
| BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe |
| M_ADR pSrc, pTempBuf |
| MOV srcStep, #4 |
| BL armVCM4P10_Average_4x4_Align0_unsafe |
| |
| |
| B Block4x4LoopEnd |
| Case_6 |
| ;// Case f |
| M_PRINTF "Case 6 \n" |
| |
| SUB pSrc, pSrc, #2 |
| SUB pSrc, pSrc, srcStep, LSL #1 |
| MOV Height, #9 |
| BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe |
| M_ADR pBuf, pInterBuf |
| BL armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe |
| M_ADR idy, pTempBuf |
| BL armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe |
| BL armVCM4P10_Average_4x4_Align0_unsafe |
| B Block4x4LoopEnd |
| Case_7 |
| ;// Case g |
| M_PRINTF "Case 7 \n" |
| |
| SUB pSrc, pSrc, #2 |
| MOV Height, #4 |
| M_ADR pDst, pTempBuf |
| MOV dstStep, #4 |
| BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe |
| BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe |
| M_ADR pArgs, ppArgs |
| LDM pArgs, {pSrc, srcStep, pDst, dstStep} |
| SUB pSrc, pSrc, srcStep, LSL #1 |
| ADD pSrc, pSrc, #1 |
| M_ADR pBuf, pBuffer |
| MOV Height, #9 |
| BL armVCM4P10_InterpolateLuma_VerAlign4x_unsafe |
| BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe |
| M_ADR pSrc, pTempBuf |
| MOV srcStep, #4 |
| BL armVCM4P10_Average_4x4_Align0_unsafe |
| |
| B Block4x4LoopEnd |
| Case_8 |
| ;// Case h |
| M_PRINTF "Case 8 \n" |
| |
| SUB pSrc, pSrc, srcStep, LSL #1 |
| MOV Height, #9 |
| BL armVCM4P10_InterpolateLuma_VerAlign4x_unsafe |
| BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe |
| B Block4x4LoopEnd |
| Case_9 |
| ;// Case i |
| M_PRINTF "Case 9 \n" |
| |
| SUB pSrc, pSrc, #2 |
| SUB pSrc, pSrc, srcStep, LSL #1 |
| MOV Height, #9 |
| BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe |
| ADD pSrc, pSrc, srcStep, LSL #1 |
| M_ADR pBuf, pInterBuf |
| BL armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe |
| M_ADR idy, pTempBuf |
| BL armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe |
| BL armVCM4P10_Average_4x4_Align2_unsafe |
| B Block4x4LoopEnd |
| Case_a |
| ;// Case j |
| M_PRINTF "Case a \n" |
| |
| SUB pSrc, pSrc, #2 |
| SUB pSrc, pSrc, srcStep, LSL #1 |
| MOV Height, #9 |
| BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe |
| ADD pSrc, pSrc, srcStep, LSL #1 |
| M_ADR pBuf, pInterBuf |
| BL armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe |
| B Block4x4LoopEnd |
| Case_b |
| ;// Case k |
| M_PRINTF "Case b \n" |
| SUB pSrc, pSrc, #2 |
| SUB pSrc, pSrc, srcStep, LSL #1 |
| MOV Height, #9 |
| BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe |
| ADD pSrc, pSrc, srcStep, LSL #1 |
| M_ADR pBuf, pInterBuf |
| BL armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe |
| M_ADR idy, pTempBuf |
| BL armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe |
| BL armVCM4P10_Average_4x4_Align3_unsafe |
| B Block4x4LoopEnd |
| Case_c |
| ;// Case n |
| M_PRINTF "Case c \n" |
| |
| SUB pSrc, pSrc, srcStep, LSL #1 |
| MOV Height, #9 |
| BL armVCM4P10_InterpolateLuma_VerAlign4x_unsafe |
| BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe |
| ADD pSrc, pSrc, srcStep ;// Update pSrc to one row down |
| BL armVCM4P10_Average_4x4_Align0_unsafe |
| B Block4x4LoopEnd |
| Case_d |
| ;// Case p |
| M_PRINTF "Case d \n" |
| SUB pSrc, pSrc, #2 |
| ADD pSrc, pSrc, srcStep |
| MOV Height, #4 |
| M_ADR pDst, pTempBuf |
| MOV dstStep, #4 |
| BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe |
| BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe |
| M_ADR pArgs, ppArgs |
| LDM pArgs, {pSrc, srcStep, pDst, dstStep} |
| SUB pSrc, pSrc, srcStep, LSL #1 |
| M_ADR pBuf, pBuffer |
| MOV Height, #9 |
| BL armVCM4P10_InterpolateLuma_VerAlign4x_unsafe |
| BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe |
| M_ADR pSrc, pTempBuf |
| MOV srcStep, #4 |
| BL armVCM4P10_Average_4x4_Align0_unsafe |
| B Block4x4LoopEnd |
| Case_e |
| ;// Case q |
| M_PRINTF "Case e \n" |
| |
| SUB pSrc, pSrc, #2 |
| SUB pSrc, pSrc, srcStep, LSL #1 |
| MOV Height, #9 |
| BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe |
| M_ADR pBuf, pInterBuf |
| BL armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe |
| M_ADR idy, pTempBuf |
| BL armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe |
| ADD pSrc, pSrc, #4 |
| BL armVCM4P10_Average_4x4_Align0_unsafe |
| |
| B Block4x4LoopEnd |
| Case_f |
| ;// Case r |
| M_PRINTF "Case f \n" |
| SUB pSrc, pSrc, #2 |
| ADD pSrc, pSrc, srcStep |
| MOV Height, #4 |
| M_ADR pDst, pTempBuf |
| MOV dstStep, #4 |
| BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe |
| BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe |
| M_ADR pArgs, ppArgs |
| LDM pArgs, {pSrc, srcStep, pDst, dstStep} |
| SUB pSrc, pSrc, srcStep, LSL #1 |
| ADD pSrc, pSrc, #1 |
| M_ADR pBuf, pBuffer |
| MOV Height, #9 |
| BL armVCM4P10_InterpolateLuma_VerAlign4x_unsafe |
| BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe |
| M_ADR pSrc, pTempBuf |
| MOV srcStep, #4 |
| BL armVCM4P10_Average_4x4_Align0_unsafe |
| |
| Block4x4LoopEnd |
| |
| ;// Width Loop |
| SUBS iWidth, iWidth, #4 |
| M_ADR pArgs, ppArgs |
| LDM pArgs, {pSrc,srcStep,pDst,dstStep} ;// Load arguments |
| ADD pSrc, pSrc, #4 |
| ADD pDst, pDst, #4 |
| BGT Block4x4WidthLoop |
| |
| ;// Height Loop |
| SUBS iHeight, iHeight, #4 |
| M_LDR iWidth, ptrWidth |
| M_ADR pArgs, ppArgs |
| ADD pSrc, pSrc, srcStep, LSL #2 |
| ADD pDst, pDst, dstStep, LSL #2 |
| SUB pSrc, pSrc, iWidth |
| SUB pDst, pDst, iWidth |
| BGT Block4x4HeightLoop |
| |
| EndOfInterpolation |
| MOV r0, #0 |
| M_END |
| |
| ENDIF |
| |
| |
| END |
| |