| @ |
| @ Copyright (C) 2009 The Android Open Source Project |
| @ |
| @ Licensed under the Apache License, Version 2.0 (the "License"); |
| @ you may not use this file except in compliance with the License. |
| @ You may obtain a copy of the License at |
| @ |
| @ http://www.apache.org/licenses/LICENSE-2.0 |
| @ |
| @ Unless required by applicable law or agreed to in writing, software |
| @ distributed under the License is distributed on an "AS IS" BASIS, |
| @ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| @ See the License for the specific language governing permissions and |
| @ limitations under the License. |
| @ |
| |
| #include "asm_common.S" |
| |
| REQUIRE8 |
| PRESERVE8 |
| |
| .arm |
| .fpu neon |
| .text |
| |
| /* Input / output registers */ |
| #define image r0 |
| #define data r1 |
| #define width r2 |
| #define luma r3 |
| #define cb r4 |
| #define cr r5 |
| #define cwidth r6 |
| |
| /* -- NEON registers -- */ |
| |
| #define qRow0 Q0 |
| #define qRow1 Q1 |
| #define qRow2 Q2 |
| #define qRow3 Q3 |
| #define qRow4 Q4 |
| #define qRow5 Q5 |
| #define qRow6 Q6 |
| #define qRow7 Q7 |
| #define qRow8 Q8 |
| #define qRow9 Q9 |
| #define qRow10 Q10 |
| #define qRow11 Q11 |
| #define qRow12 Q12 |
| #define qRow13 Q13 |
| #define qRow14 Q14 |
| #define qRow15 Q15 |
| |
| #define dRow0 D0 |
| #define dRow1 D1 |
| #define dRow2 D2 |
| #define dRow3 D3 |
| #define dRow4 D4 |
| #define dRow5 D5 |
| #define dRow6 D6 |
| #define dRow7 D7 |
| #define dRow8 D8 |
| #define dRow9 D9 |
| #define dRow10 D10 |
| #define dRow11 D11 |
| #define dRow12 D12 |
| #define dRow13 D13 |
| #define dRow14 D14 |
| #define dRow15 D15 |
| |
| /*------------------------------------------------------------------------------ |
| |
| Function: h264bsdWriteMacroblock |
| |
| Functional description: |
| Write one macroblock into the image. Both luma and chroma |
| components will be written at the same time. |
| |
| Inputs: |
| data pointer to macroblock data to be written, 256 values for |
| luma followed by 64 values for both chroma components |
| |
| Outputs: |
| image pointer to the image where the macroblock will be written |
| |
| Returns: |
| none |
| |
| ------------------------------------------------------------------------------*/ |
| |
| function h264bsdWriteMacroblock, export=1 |
| PUSH {r4-r6,lr} |
| VPUSH {q4-q7} |
| |
| LDR width, [image, #4] |
| LDR luma, [image, #0xC] |
| LDR cb, [image, #0x10] |
| LDR cr, [image, #0x14] |
| |
| |
| @ Write luma |
| VLD1.8 {qRow0, qRow1}, [data]! |
| LSL width, width, #4 |
| VLD1.8 {qRow2, qRow3}, [data]! |
| LSR cwidth, width, #1 |
| VST1.8 {qRow0}, [luma,:128], width |
| VLD1.8 {qRow4, qRow5}, [data]! |
| VST1.8 {qRow1}, [luma,:128], width |
| VLD1.8 {qRow6, qRow7}, [data]! |
| VST1.8 {qRow2}, [luma,:128], width |
| VLD1.8 {qRow8, qRow9}, [data]! |
| VST1.8 {qRow3}, [luma,:128], width |
| VLD1.8 {qRow10, qRow11}, [data]! |
| VST1.8 {qRow4}, [luma,:128], width |
| VLD1.8 {qRow12, qRow13}, [data]! |
| VST1.8 {qRow5}, [luma,:128], width |
| VLD1.8 {qRow14, qRow15}, [data]! |
| VST1.8 {qRow6}, [luma,:128], width |
| |
| VLD1.8 {qRow0, qRow1}, [data]! ;//cb rows 0,1,2,3 |
| VST1.8 {qRow7}, [luma,:128], width |
| VLD1.8 {qRow2, qRow3}, [data]! ;//cb rows 4,5,6,7 |
| VST1.8 {qRow8}, [luma,:128], width |
| VLD1.8 {qRow4, qRow5}, [data]! ;//cr rows 0,1,2,3 |
| VST1.8 {qRow9}, [luma,:128], width |
| VLD1.8 {qRow6, qRow7}, [data]! ;//cr rows 4,5,6,7 |
| VST1.8 {qRow10}, [luma,:128], width |
| VST1.8 {dRow0}, [cb,:64], cwidth |
| VST1.8 {dRow8}, [cr,:64], cwidth |
| VST1.8 {qRow11}, [luma,:128], width |
| VST1.8 {dRow1}, [cb,:64], cwidth |
| VST1.8 {dRow9}, [cr,:64], cwidth |
| VST1.8 {qRow12}, [luma,:128], width |
| VST1.8 {dRow2}, [cb,:64], cwidth |
| VST1.8 {dRow10}, [cr,:64], cwidth |
| VST1.8 {qRow13}, [luma,:128], width |
| VST1.8 {dRow3}, [cb,:64], cwidth |
| VST1.8 {dRow11}, [cr,:64], cwidth |
| VST1.8 {qRow14}, [luma,:128], width |
| VST1.8 {dRow4}, [cb,:64], cwidth |
| VST1.8 {dRow12}, [cr,:64], cwidth |
| VST1.8 {qRow15}, [luma] |
| VST1.8 {dRow5}, [cb,:64], cwidth |
| VST1.8 {dRow13}, [cr,:64], cwidth |
| VST1.8 {dRow6}, [cb,:64], cwidth |
| VST1.8 {dRow14}, [cr,:64], cwidth |
| VST1.8 {dRow7}, [cb,:64] |
| VST1.8 {dRow15}, [cr,:64] |
| |
| VPOP {q4-q7} |
| POP {r4-r6,pc} |
| @ BX lr |
| |
| |
| |
| |