blob: 38a078132ea07b5f2b6e568e8c92bf41ba50d697 [file] [log] [blame]
;
; Copyright (C) 2009 The Android Open Source Project
;
; Licensed under the Apache License, Version 2.0 (the "License");
; you may not use this file except in compliance with the License.
; You may obtain a copy of the License at
;
; http://www.apache.org/licenses/LICENSE-2.0
;
; Unless required by applicable law or agreed to in writing, software
; distributed under the License is distributed on an "AS IS" BASIS,
; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
; See the License for the specific language governing permissions and
; limitations under the License.
;
REQUIRE8
PRESERVE8
AREA |.text|, CODE
EXPORT h264bsdWriteMacroblock
; Input / output registers
image RN 0
data RN 1
width RN 2
luma RN 3
cb RN 4
cr RN 5
cwidth RN 6
; -- NEON registers --
qRow0 QN Q0.U8
qRow1 QN Q1.U8
qRow2 QN Q2.U8
qRow3 QN Q3.U8
qRow4 QN Q4.U8
qRow5 QN Q5.U8
qRow6 QN Q6.U8
qRow7 QN Q7.U8
qRow8 QN Q8.U8
qRow9 QN Q9.U8
qRow10 QN Q10.U8
qRow11 QN Q11.U8
qRow12 QN Q12.U8
qRow13 QN Q13.U8
qRow14 QN Q14.U8
qRow15 QN Q15.U8
dRow0 DN D0.U8
dRow1 DN D1.U8
dRow2 DN D2.U8
dRow3 DN D3.U8
dRow4 DN D4.U8
dRow5 DN D5.U8
dRow6 DN D6.U8
dRow7 DN D7.U8
dRow8 DN D8.U8
dRow9 DN D9.U8
dRow10 DN D10.U8
dRow11 DN D11.U8
dRow12 DN D12.U8
dRow13 DN D13.U8
dRow14 DN D14.U8
dRow15 DN D15.U8
;/*------------------------------------------------------------------------------
;
; Function: h264bsdWriteMacroblock
;
; Functional description:
; Write one macroblock into the image. Both luma and chroma
; components will be written at the same time.
;
; Inputs:
; data pointer to macroblock data to be written, 256 values for
; luma followed by 64 values for both chroma components
;
; Outputs:
; image pointer to the image where the macroblock will be written
;
; Returns:
; none
;
;------------------------------------------------------------------------------*/
h264bsdWriteMacroblock
PUSH {r4-r6,lr}
VPUSH {q4-q7}
LDR width, [image, #4]
LDR luma, [image, #0xC]
LDR cb, [image, #0x10]
LDR cr, [image, #0x14]
; Write luma
VLD1 {qRow0, qRow1}, [data]!
LSL width, width, #4
VLD1 {qRow2, qRow3}, [data]!
LSR cwidth, width, #1
VST1 {qRow0}, [luma@128], width
VLD1 {qRow4, qRow5}, [data]!
VST1 {qRow1}, [luma@128], width
VLD1 {qRow6, qRow7}, [data]!
VST1 {qRow2}, [luma@128], width
VLD1 {qRow8, qRow9}, [data]!
VST1 {qRow3}, [luma@128], width
VLD1 {qRow10, qRow11}, [data]!
VST1 {qRow4}, [luma@128], width
VLD1 {qRow12, qRow13}, [data]!
VST1 {qRow5}, [luma@128], width
VLD1 {qRow14, qRow15}, [data]!
VST1 {qRow6}, [luma@128], width
VLD1 {qRow0, qRow1}, [data]! ;cb rows 0,1,2,3
VST1 {qRow7}, [luma@128], width
VLD1 {qRow2, qRow3}, [data]! ;cb rows 4,5,6,7
VST1 {qRow8}, [luma@128], width
VLD1 {qRow4, qRow5}, [data]! ;cr rows 0,1,2,3
VST1 {qRow9}, [luma@128], width
VLD1 {qRow6, qRow7}, [data]! ;cr rows 4,5,6,7
VST1 {qRow10}, [luma@128], width
VST1 {dRow0}, [cb@64], cwidth
VST1 {dRow8}, [cr@64], cwidth
VST1 {qRow11}, [luma@128], width
VST1 {dRow1}, [cb@64], cwidth
VST1 {dRow9}, [cr@64], cwidth
VST1 {qRow12}, [luma@128], width
VST1 {dRow2}, [cb@64], cwidth
VST1 {dRow10}, [cr@64], cwidth
VST1 {qRow13}, [luma@128], width
VST1 {dRow3}, [cb@64], cwidth
VST1 {dRow11}, [cr@64], cwidth
VST1 {qRow14}, [luma@128], width
VST1 {dRow4}, [cb@64], cwidth
VST1 {dRow12}, [cr@64], cwidth
VST1 {qRow15}, [luma]
VST1 {dRow5}, [cb@64], cwidth
VST1 {dRow13}, [cr@64], cwidth
VST1 {dRow6}, [cb@64], cwidth
VST1 {dRow14}, [cr@64], cwidth
VST1 {dRow7}, [cb@64]
VST1 {dRow15}, [cr@64]
VPOP {q4-q7}
POP {r4-r6,pc}
END