blob: 0044636c35fbae9456d000bedb362e90a14f5e01 [file] [log] [blame]
;//
;// Copyright (C) 2007-2008 ARM Limited
;//
;// Licensed under the Apache License, Version 2.0 (the "License");
;// you may not use this file except in compliance with the License.
;// You may obtain a copy of the License at
;//
;// http://www.apache.org/licenses/LICENSE-2.0
;//
;// Unless required by applicable law or agreed to in writing, software
;// distributed under the License is distributed on an "AS IS" BASIS,
;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
;// See the License for the specific language governing permissions and
;// limitations under the License.
;//
/*
*
*/
.eabi_attribute 24, 1
.eabi_attribute 25, 1
.arm
.fpu neon
.text
.align 4
;//-------------------------------------------------------
;// This table for implementing switch case of C in asm by
;// the mehtod of two levels of indexing.
;//-------------------------------------------------------
armVCM4P10_pIndexTable16x16:
.word OMX_VC_16X16_VERT-(P0+8), OMX_VC_16X16_HOR-(P0+8)
.word OMX_VC_16X16_DC-(P0+8), OMX_VC_16X16_PLANE-(P0+8)
armVCM4P10_MultiplierTable16x16:
.hword 7, 6, 5, 4, 3, 2, 1, 8
.hword 0, 1, 2, 3, 4, 5, 6, 7
.hword 8, 9, 10, 11, 12, 13, 14, 15
.global omxVCM4P10_PredictIntra_16x16
omxVCM4P10_PredictIntra_16x16:
PUSH {r4-r12,lr}
VPUSH {d8-d15}
ADR r9, armVCM4P10_pIndexTable16x16
LDR r6,[sp,#0x70]
LDR r4,[sp,#0x68]
LDR r5,[sp,#0x6c]
LDR r7,[sp,#0x74]
MOV r12,#0x10
LDR r9,[r9,r6,LSL #2]
P0: ADD pc,r9
OMX_VC_16X16_VERT:
VLD1.8 {d0,d1},[r1]
ADD r8,r3,r5
ADD r10,r5,r5
VST1.8 {d0,d1},[r3],r10
VST1.8 {d0,d1},[r8],r10
VST1.8 {d0,d1},[r3],r10
VST1.8 {d0,d1},[r8],r10
VST1.8 {d0,d1},[r3],r10
VST1.8 {d0,d1},[r8],r10
VST1.8 {d0,d1},[r3],r10
VST1.8 {d0,d1},[r8],r10
VST1.8 {d0,d1},[r3],r10
VST1.8 {d0,d1},[r8],r10
VST1.8 {d0,d1},[r3],r10
VST1.8 {d0,d1},[r8],r10
VST1.8 {d0,d1},[r3],r10
VST1.8 {d0,d1},[r8],r10
VST1.8 {d0,d1},[r3]
VST1.8 {d0,d1},[r8]
MOV r0,#0
VPOP {d8-d15}
POP {r4-r12,pc}
OMX_VC_16X16_HOR:
ADD r8,r0,r4
ADD r4,r4,r4
ADD r11,r3,r5
ADD r5,r5,r5
L0x8c:
VLD1.8 {d2[],d3[]},[r0],r4
VLD1.8 {d0[],d1[]},[r8],r4
SUBS r12,r12,#8
VST1.8 {d2,d3},[r3],r5
VST1.8 {d0,d1},[r11],r5
VLD1.8 {d2[],d3[]},[r0],r4
VLD1.8 {d0[],d1[]},[r8],r4
VST1.8 {d2,d3},[r3],r5
VST1.8 {d0,d1},[r11],r5
VLD1.8 {d2[],d3[]},[r0],r4
VLD1.8 {d0[],d1[]},[r8],r4
VST1.8 {d2,d3},[r3],r5
VST1.8 {d0,d1},[r11],r5
VLD1.8 {d2[],d3[]},[r0],r4
VLD1.8 {d0[],d1[]},[r8],r4
VST1.8 {d2,d3},[r3],r5
VST1.8 {d0,d1},[r11],r5
BNE L0x8c
MOV r0,#0
VPOP {d8-d15}
POP {r4-r12,pc}
OMX_VC_16X16_DC:
MOV r11,#0
TST r7,#2
BEQ L0x14c
ADD r8,r0,r4
ADD r10,r4,r4
VLD1.8 {d2[0]},[r0],r10
VLD1.8 {d2[1]},[r8],r10
VLD1.8 {d2[2]},[r0],r10
VLD1.8 {d2[3]},[r8],r10
VLD1.8 {d2[4]},[r0],r10
VLD1.8 {d2[5]},[r8],r10
VLD1.8 {d2[6]},[r0],r10
VLD1.8 {d2[7]},[r8],r10
VLD1.8 {d3[0]},[r0],r10
VLD1.8 {d3[1]},[r8],r10
VLD1.8 {d3[2]},[r0],r10
VLD1.8 {d3[3]},[r8],r10
VLD1.8 {d3[4]},[r0],r10
VLD1.8 {d3[5]},[r8],r10
VLD1.8 {d3[6]},[r0],r10
VLD1.8 {d3[7]},[r8]
VPADDL.U8 q0,q1
ADD r11,r11,#1
VPADD.I16 d0,d0,d1
VPADDL.U16 d0,d0
VPADDL.U32 d6,d0
VRSHR.U64 d8,d6,#4
L0x14c:
TST r7,#1
BEQ L0x170
VLD1.8 {d0,d1},[r1]
ADD r11,r11,#1
VPADDL.U8 q0,q0
VPADD.I16 d0,d0,d1
VPADDL.U16 d0,d0
VPADDL.U32 d7,d0
VRSHR.U64 d8,d7,#4
L0x170:
CMP r11,#2
BNE L0x180
VADD.I64 d8,d7,d6
VRSHR.U64 d8,d8,#5
L0x180:
VDUP.8 q3,d8[0]
CMP r11,#0
ADD r8,r3,r5
ADD r10,r5,r5
BNE L0x198
VMOV.I8 q3,#0x80
L0x198:
VST1.8 {d6,d7},[r3],r10
VST1.8 {d6,d7},[r8],r10
VST1.8 {d6,d7},[r3],r10
VST1.8 {d6,d7},[r8],r10
VST1.8 {d6,d7},[r3],r10
VST1.8 {d6,d7},[r8],r10
VST1.8 {d6,d7},[r3],r10
VST1.8 {d6,d7},[r8],r10
VST1.8 {d6,d7},[r3],r10
VST1.8 {d6,d7},[r8],r10
VST1.8 {d6,d7},[r3],r10
VST1.8 {d6,d7},[r8],r10
VST1.8 {d6,d7},[r3],r10
VST1.8 {d6,d7},[r8],r10
VST1.8 {d6,d7},[r3],r10
VST1.8 {d6,d7},[r8],r10
MOV r0,#0
VPOP {d8-d15}
POP {r4-r12,pc}
OMX_VC_16X16_PLANE:
ADR r9, armVCM4P10_MultiplierTable16x16
VLD1.8 {d0,d1},[r1]
VLD1.8 {d4[0]},[r2]
ADD r8,r0,r4
ADD r10,r4,r4
VLD1.8 {d2[0]},[r0],r10
VLD1.8 {d2[1]},[r8],r10
VLD1.8 {d2[2]},[r0],r10
VLD1.8 {d2[3]},[r8],r10
VLD1.8 {d2[4]},[r0],r10
VLD1.8 {d2[5]},[r8],r10
VLD1.8 {d2[6]},[r0],r10
VLD1.8 {d2[7]},[r8],r10
VLD1.8 {d3[0]},[r0],r10
VLD1.8 {d3[1]},[r8],r10
VLD1.8 {d3[2]},[r0],r10
VLD1.8 {d3[3]},[r8],r10
VLD1.8 {d3[4]},[r0],r10
VLD1.8 {d3[5]},[r8],r10
VLD1.8 {d3[6]},[r0],r10
VLD1.8 {d3[7]},[r8]
VREV64.8 d5,d1
VSUBL.U8 q3,d5,d4
VSHR.U64 d5,d5,#8
VSUBL.U8 q4,d5,d0
VSHL.I64 d9,d9,#16
VEXT.8 d9,d9,d6,#2
VREV64.8 d12,d3
VSUBL.U8 q7,d12,d4
VSHR.U64 d12,d12,#8
VSUBL.U8 q8,d12,d2
VLD1.16 {d20,d21},[r9]!
VSHL.I64 d17,d17,#16
VEXT.8 d17,d17,d14,#2
VMULL.S16 q11,d8,d20
VMULL.S16 q12,d16,d20
VMLAL.S16 q11,d9,d21
VMLAL.S16 q12,d17,d21
VPADD.I32 d22,d23,d22
VPADD.I32 d23,d25,d24
VPADDL.S32 q11,q11
VSHL.I64 q12,q11,#2
VADD.I64 q11,q11,q12
VRSHR.S64 q11,q11,#6
VSHL.I64 q12,q11,#3
VSUB.I64 q12,q12,q11
VLD1.16 {d20,d21},[r9]!
VDUP.16 q6,d22[0]
VDUP.16 q7,d23[0]
VADDL.U8 q11,d1,d3
VSHL.I16 q11,q11,#4
VDUP.16 q11,d23[3]
VADD.I64 d1,d24,d25
VLD1.16 {d24,d25},[r9]
VDUP.16 q13,d1[0]
VSUB.I16 q13,q11,q13
VMUL.I16 q5,q6,q10
VMUL.I16 q6,q6,q12
VADD.I16 q0,q5,q13
VADD.I16 q1,q6,q13
L0x2d4:
VQRSHRUN.S16 d6,q0,#5
VQRSHRUN.S16 d7,q1,#5
SUBS r12,r12,#1
VST1.8 {d6,d7},[r3],r5
VADD.I16 q0,q0,q7
VADD.I16 q1,q1,q7
BNE L0x2d4
MOV r0,#0
VPOP {d8-d15}
POP {r4-r12,pc}
.end