blob: e4fbfa44e69b36dd733f905f49f27fb7778a78a9 [file] [log] [blame]
;//
;// Copyright (C) 2007-2008 ARM Limited
;//
;// Licensed under the Apache License, Version 2.0 (the "License");
;// you may not use this file except in compliance with the License.
;// You may obtain a copy of the License at
;//
;// http://www.apache.org/licenses/LICENSE-2.0
;//
;// Unless required by applicable law or agreed to in writing, software
;// distributed under the License is distributed on an "AS IS" BASIS,
;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
;// See the License for the specific language governing permissions and
;// limitations under the License.
;//
;//
;//
;// File Name: omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s
;// OpenMAX DL: v1.0.2
;// Revision: 9641
;// Date: Thursday, February 7, 2008
;//
;//
;//
;//
INCLUDE omxtypes_s.h
INCLUDE armCOMM_s.h
M_VARIANTS ARM1136JS
IMPORT armVCM4P10_DeblockingLumabSLT4_unsafe
IMPORT armVCM4P10_DeblockingLumabSGE4_unsafe
IF ARM1136JS
MASK_0 EQU 0x00000000
MASK_1 EQU 0x01010101
MASK_2 EQU 0xff00ff00
LOOP_COUNT EQU 0x11110000
;// Declare input registers
pSrcDst RN 0
srcdstStep RN 1
pAlphaArg RN 2
pBetaArg RN 3
pThresholds RN 14
pBS RN 9
pQ0 RN 0
bS RN 2
alpha RN 6
alpha0 RN 6
alpha1 RN 8
beta RN 7
beta0 RN 7
beta1 RN 9
;// Declare Local/Temporary variables
;// Pixels
p_0 RN 3
p_1 RN 5
p_2 RN 4
p_3 RN 2
q_0 RN 8
q_1 RN 9
q_2 RN 10
q_3 RN 12
;// Filtering
dp0q0 RN 12
dp1p0 RN 12
dq1q0 RN 12
dp2p0 RN 12
dq2q0 RN 12
ap0q0 RN 1
filt RN 2
m00 RN 14
m01 RN 11
apflg RN 0
aqflg RN 6
apqflg RN 0
;//Declarations for bSLT4 kernel
tC0 RN 7
ptC0 RN 1
pQ0a RN 0
Stepa RN 1
maska RN 14
P0a RN 1
P1a RN 8
Q0a RN 7
Q1a RN 11
;//Declarations for bSGE4 kernel
pQ0b RN 0
Stepb RN 1
maskb RN 14
P0b RN 6
P1b RN 7
P2b RN 1
P3b RN 3
Q0b RN 9
Q1b RN 0
Q2b RN 2
Q3b RN 3
;// Miscellanous
XY RN 8
t0 RN 3
t1 RN 12
t2 RN 14
t7 RN 7
t4 RN 4
t5 RN 1
t8 RN 6
a RN 0
;// Allocate stack memory
M_ALLOC4 ppThresholds,4
M_ALLOC4 pQ_3,4
M_ALLOC4 pP_3,4
M_ALLOC8 pAlphaBeta0,8
M_ALLOC8 pAlphaBeta1,8
M_ALLOC8 pXYBS,4
M_ALLOC4 ppBS,4
M_ALLOC8 ppQ0Step,4
M_ALLOC4 pStep,4
;// Function header
M_START omxVCM4P10_FilterDeblockingLuma_HorEdge_I, r11
;//Input arguments on the stack
M_ARG ppThresholdsArg, 4
M_ARG ppBSArg, 4
LDR t4,=MASK_1
LDRB alpha0, [pAlphaArg]
LDRB beta0, [pBetaArg]
LDRB alpha1, [pAlphaArg,#1]
LDRB beta1, [pBetaArg,#1]
MUL alpha0, alpha0, t4
MUL beta0, beta0, t4
MUL alpha1, alpha1, t4
MUL beta1, beta1, t4
M_STRD alpha0, beta0, pAlphaBeta0
M_STRD alpha1, beta1, pAlphaBeta1
LDR XY,=LOOP_COUNT
M_LDR pBS, ppBSArg
M_LDR pThresholds, ppThresholdsArg
M_STR srcdstStep, pStep
M_STRD XY, pBS, pXYBS
SUB pQ0, pQ0, srcdstStep, LSL #2
M_STR pThresholds, ppThresholds
LoopY
LoopX
;//---------------Load Pixels-------------------
M_STR pQ0, ppQ0Step
M_LDR p_3, [pQ0], srcdstStep
M_LDR p_2, [pQ0], srcdstStep
M_STR p_3, pP_3
LDRB bS, [pBS], #1
M_STR pBS, ppBS
M_LDR p_1, [pQ0], srcdstStep
CMP bS, #0
M_LDR p_0, [pQ0], srcdstStep
M_LDR q_0, [pQ0], srcdstStep
M_LDR q_1, [pQ0], srcdstStep
M_LDR q_2, [pQ0], srcdstStep
M_LDR q_3, [pQ0], srcdstStep
BEQ NoFilterBS0
CMP bS, #4
M_STR q_3, pQ_3
;//--------------Filtering Decision -------------------
LDR m01, =MASK_1 ;// 01010101 mask
MOV m00, #MASK_0 ;// 00000000 mask
;// Check |p0-q0|<Alpha
USUB8 dp0q0, p_0, q_0
USUB8 a, q_0, p_0
SEL ap0q0, a, dp0q0
USUB8 a, ap0q0, alpha
SEL filt, m00, m01
;// Check |p1-p0|<Beta
USUB8 dp1p0, p_1, p_0
USUB8 a, p_0, p_1
SEL a, a, dp1p0
USUB8 a, a, beta
SEL filt, m00, filt
;// Check |q1-q0|<Beta
USUB8 dq1q0, q_1, q_0
USUB8 a, q_0, q_1
SEL a, a, dq1q0
USUB8 a, a, beta
SEL filt, m00, filt
;// Check ap<Beta
USUB8 dp2p0, p_2, p_0
USUB8 a, p_0, p_2
SEL a, a, dp2p0
USUB8 a, a, beta
SEL apflg, m00, filt ;// apflg = filt && (ap<beta)
;// Check aq<Beta
USUB8 dq2q0, q_2, q_0
USUB8 t2, q_0, q_2
SEL t2, t2, dq2q0
USUB8 t2, t2, beta
MOV t7,#0
BLT bSLT4
;//-------------------Filter--------------------
bSGE4
;//---------bSGE4 Execution---------------
SEL t1, t7, filt ;// aqflg = filt && (aq<beta)
CMP filt, #0
ORR apqflg, apflg, t1, LSL #1
M_LDRD pQ0, srcdstStep, ppQ0Step, EQ
BEQ NoFilterFilt0
BL armVCM4P10_DeblockingLumabSGE4_unsafe
;//---------Store result---------------
M_LDR pThresholds, ppThresholds
MOV p_2, Q1b
MOV p_1, P2b
M_LDRD pQ0b, Stepb, ppQ0Step
ADD pThresholds, #1
M_STR pThresholds, ppThresholds
M_STR p_1, [pQ0b, Stepb]!
M_STR P1b, [pQ0b, Stepb]!
M_STR P0b, [pQ0b, Stepb]!
M_STR Q0b, [pQ0b, Stepb]!
STR p_2, [pQ0b, Stepb]
STR Q2b, [pQ0b, Stepb, LSL #1]
M_LDRD XY, pBS, pXYBS
SUB pQ0, pQ0b, Stepb, LSL #2
ADD pQ0, pQ0, #4
M_LDRD alpha, beta, pAlphaBeta0
ADDS XY, XY, XY
M_STR XY, pXYBS
BCC LoopX
B ExitLoopY
;//---------- Exit of LoopX --------------
;//---- for the case of no filtering -----
NoFilterBS0
SUB pQ0, pQ0, srcdstStep, LSL #3
NoFilterFilt0
ADD pQ0, pQ0, #4
;// Load counter for LoopX
M_LDRD XY, pBS, pXYBS
M_LDR pThresholds, ppThresholds
M_LDRD alpha, beta, pAlphaBeta0
;// Align the pointers
ADDS XY, XY, XY
ADD pThresholds, pThresholds, #1
M_STR pThresholds, ppThresholds
M_STR XY, pXYBS
BCC LoopX
B ExitLoopY
bSLT4
;//---------bSLT4 Execution---------------
SEL aqflg, t7, filt ;// aqflg = filt && (aq<beta)
M_LDR ptC0, ppThresholds
CMP filt, #0
M_LDRD pQ0, srcdstStep, ppQ0Step, EQ
BEQ NoFilterFilt0
LDRB tC0, [ptC0], #1
M_STR ptC0, ppThresholds
BL armVCM4P10_DeblockingLumabSLT4_unsafe
;//---------Store result---------------
MOV p_2, P0a
M_LDRD pQ0a, Stepa, ppQ0Step
M_STR P1a, [pQ0a, Stepa, LSL #1]!
M_STR p_2, [pQ0a, Stepa]!
M_STR Q0a, [pQ0a, Stepa]!
STR Q1a, [pQ0a, Stepa]
;// Load counter
M_LDRD XY, pBS, pXYBS
M_LDRD alpha, beta, pAlphaBeta0
SUB pQ0, pQ0a, Stepa, LSL #2
ADD pQ0, pQ0, #4
ADDS XY, XY, XY
M_STR XY, pXYBS
BCC LoopX
;//-------- Common Exit of LoopY -----------------
;// Align the pointers
ExitLoopY
M_LDRD alpha, beta, pAlphaBeta1
SUB pQ0, pQ0, #16
ADD pQ0, pQ0, srcdstStep, LSL #2
M_STRD alpha, beta, pAlphaBeta0
BNE LoopY
MOV r0, #OMX_Sts_NoErr
;//-----------------End Filter--------------------
M_END
ENDIF
END