blob: e3813d381942a0c26ff9df7e647bac6f83fea164 [file] [log] [blame]
;//
;// Copyright (C) 2007-2008 ARM Limited
;//
;// Licensed under the Apache License, Version 2.0 (the "License");
;// you may not use this file except in compliance with the License.
;// You may obtain a copy of the License at
;//
;// http://www.apache.org/licenses/LICENSE-2.0
;//
;// Unless required by applicable law or agreed to in writing, software
;// distributed under the License is distributed on an "AS IS" BASIS,
;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
;// See the License for the specific language governing permissions and
;// limitations under the License.
;//
;//
;//
;// File Name: armVCM4P10_DeblockingChroma_unsafe_s.s
;// OpenMAX DL: v1.0.2
;// Revision: 12290
;// Date: Wednesday, April 9, 2008
;//
;//
;//
;//
INCLUDE omxtypes_s.h
INCLUDE armCOMM_s.h
M_VARIANTS CortexA8
IF CortexA8
pAlpha RN 2
pBeta RN 3
pThresholds RN 5
pBS RN 4
bS3210 RN 6
;// Pixels
dP_0 DN D4.U8
dP_1 DN D5.U8
dP_2 DN D6.U8
dP_3 DN D7.U8
dQ_0 DN D8.U8
dQ_1 DN D9.U8
dQ_2 DN D10.U8
dQ_3 DN D11.U8
;// Filtering Decision
dAlpha DN D0.U8
dBeta DN D2.U8
dFilt DN D16.U8
dAqflg DN D12.U8
dApflg DN D17.U8
dAp0q0 DN D13.U8
;// bSLT4
dTC3210 DN D18.U8
dTCs DN D31.S8
dTC DN D31.U8
dMask_0 DN D14.U8
dMask_1 DN D15.U8
dMask_4 DN D26.U16
dTemp DN D28.U8
dDummy DN D17.U8
;// Computing P0,Q0
qDq0p0 QN Q10.S16
qDp1q1 QN Q11.S16
qDelta QN Q10.S16 ; reuse qDq0p0
dDelta DN D20.S8
;// Computing P1,Q1
qP_0n QN Q14.S16
qQ_0n QN Q12.S16
dQ_0n DN D24.U8
dP_0n DN D29.U8
;// bSGE4
dHSp0q1 DN D13.U8
dHSq0p1 DN D31.U8
dBS3210 DN D28.U16
dP_0t DN D13.U8 ;dHSp0q1
dQ_0t DN D31.U8 ;Temp1
dP_0n DN D29.U8
dQ_0n DN D24.U8 ;Temp2
;// Register usage for - armVCM4P10_DeblockingLumabSLT4_unsafe
;//
;// Inputs - Pixels - p0-p3: D4-D7, q0-q3: D8-D11
;// - Filter masks - filt: D16, aqflg: D12, apflg: D17
;// - Additional Params - pThresholds: r5
;//
;// Outputs - Pixels - P0-P1: D29-D30, Q0-Q1: D24-D25
;// - Additional Params - pThresholds: r5
;// Registers Corrupted - D18-D31
M_START armVCM4P10_DeblockingChromabSLT4_unsafe
;dTC3210 -18
;dTemp-28
VLD1 d18.U32[0], [pThresholds]! ;here
;// delta = (((q0-p0)<<2) + (p1-q1) + 4) >> 3;
;// dDelta = (qDp1q1 >> 2 + qDq0p0 + 1)>> 1
;// qDp1q1-11
;// qDq0p0-10
VSUBL qDp1q1, dP_1, dQ_1
VMOV dTemp, dTC3210
VSUBL qDq0p0, dQ_0, dP_0
VSHR qDp1q1, qDp1q1, #2
VZIP.8 dTC3210, dTemp
;// qDelta-qDq0p0-10
;// dTC = dTC01 + (dAplg & 1) + (dAqflg & 1)
;// dTC3210-18
;// dTemp-28
;// dTC-31
VBIF dTC3210, dMask_0, dFilt
VRHADD qDelta, qDp1q1, qDq0p0
VADD dTC, dTC3210, dMask_1
VQMOVN dDelta, qDelta
;// dDelta-d20
;// dDelta = (OMX_U8)armClip(0, 255, q0 - delta);
VLD1 {dAlpha[]}, [pAlpha]
VMIN dDelta, dDelta, dTCs
VNEG dTCs, dTCs
VLD1 {dBeta[]}, [pBeta]
;1
VMAX dDelta, dDelta, dTCs
;// dP_0n - 29
;// dQ_0n - 24
;// pQ0[-1*Step] = (OMX_U8)armClip(0, 255, dP_0 - delta);
;// pQ0[0*Step] = (OMX_U8)armClip(0, 255, dQ_0 - delta);
;// dP_0n = (OMX_U8)armClip(0, 255, dP_0 - dDelta);
;// dQ_0n = (OMX_U8)armClip(0, 255, dP_0 - dDelta);
;// qP_0n - 14
;// qQ_0n - 12
VMOVL qP_0n, dP_0
VMOVL qQ_0n, dQ_0
;1
VADDW qP_0n, qP_0n, dDelta
VSUBW qQ_0n, qQ_0n, dDelta
VQMOVUN dP_0n, qP_0n
VQMOVUN dQ_0n, qQ_0n
M_END
;// Register usage for - armVCM4P10_DeblockingLumabSGE4_unsafe()
;//
;// Inputs - Pixels - p0-p3: D4-D7, q0-q3: D8-D11
;// - Filter masks - filt: D16, aqflg: D12, apflg: D17
;// - Additional Params - alpha: D0, dMask_1: D15
;//
;// Outputs - Pixels - P0-P2: D29-D31, Q0-Q2: D24,D25,D28
;// Registers Corrupted - D18-D31
M_START armVCM4P10_DeblockingChromabSGE4_unsafe
;dHSq0p1 - 31
;dHSp0q1 - 13
VHADD dHSp0q1, dP_0, dQ_1
VHADD dHSq0p1, dQ_0, dP_1
;// Prepare the bS mask
;// dHSp0q1-13
;// dP_0t-dHSp0q1-13
;// dHSq0p1-31
;// dQ_0t-Temp1-31
VLD1 {dAlpha[]}, [pAlpha]
ADD pThresholds, pThresholds, #4
VLD1 {dBeta[]}, [pBeta]
VRHADD dP_0t, dHSp0q1, dP_1
VRHADD dQ_0t, dHSq0p1, dQ_1
M_END
ENDIF
END