;//
;// Copyright (C) 2007-2008 ARM Limited
;//
;// Licensed under the Apache License, Version 2.0 (the "License");
;// you may not use this file except in compliance with the License.
;// You may obtain a copy of the License at
;//
;//      http://www.apache.org/licenses/LICENSE-2.0
;//
;// Unless required by applicable law or agreed to in writing, software
;// distributed under the License is distributed on an "AS IS" BASIS,
;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
;// See the License for the specific language governing permissions and
;// limitations under the License.
;//
;//
;// 
;// File Name:  armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.s
;// OpenMAX DL: v1.0.2
;// Revision:   9641
;// Date:       Thursday, February 7, 2008
;// 
;// 
;// 
;//

        INCLUDE omxtypes_s.h
        INCLUDE armCOMM_s.h
        
        M_VARIANTS ARM1136JS

        EXPORT armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe
        EXPORT armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe

;// Functions: 
;//     armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe and
;//     armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe 
;//
;// Implements re-arrangement of data from temporary buffer to a buffer pointed by pBuf.
;// This will do the convertion of data from 16 bit to 8 bit and it also
;// remove offset and check for saturation.
;//
;// Registers used as input for this function
;// r0,r1,r7 where r0 is input pointer and r2 its step size, r7 is output pointer
;//
;// Registers preserved for top level function
;// r4,r5,r6,r8,r9,r14
;//
;// Registers modified by the function
;// r7,r10,r11,r12
;//
;// Output registers
;// r0 - pointer to the destination location
;// r1 - step size to this destination location


DEBUG_ON    SETL {FALSE}
        
MASK            EQU 0x80808080  ;// Mask is used to implement (a+b+1)/2

;// Declare input registers

pSrc0           RN 0
srcStep0        RN 1

;// Declare other intermediate registers
Temp1           RN 4
Temp2           RN 5
Temp3           RN 10
Temp4           RN 11
pBuf            RN 7
r0x0fe00fe0     RN 6
r0x00ff00ff     RN 12
Count           RN 14
ValueA0         RN 10
ValueA1         RN 11

    IF ARM1136JS


        ;// Function header
        M_START armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe, r6

        ;// Code start     
        MOV         Count, #4   
        LDR         r0x0fe00fe0, =0x0fe00fe0
        LDR         r0x00ff00ff, =0x00ff00ff        
LoopStart1
        LDR         Temp4, [pSrc0, #12]
        LDR         Temp3, [pSrc0, #8]        
        LDR         Temp2, [pSrc0, #4]
        M_LDR       Temp1, [pSrc0], srcStep0              
        UQSUB16     Temp4, Temp4, r0x0fe00fe0        
        UQSUB16     Temp3, Temp3, r0x0fe00fe0                 
        UQSUB16     Temp2, Temp2, r0x0fe00fe0        
        UQSUB16     Temp1, Temp1, r0x0fe00fe0                 
        USAT16      Temp4, #13, Temp4
        USAT16      Temp3, #13, Temp3                          
        USAT16      Temp2, #13, Temp2
        USAT16      Temp1, #13, Temp1                                  
        AND         Temp4, r0x00ff00ff, Temp4, LSR #5         
        AND         Temp3, r0x00ff00ff, Temp3, LSR #5         
        AND         Temp2, r0x00ff00ff, Temp2, LSR #5         
        AND         Temp1, r0x00ff00ff, Temp1, LSR #5         
        ORR         ValueA1, Temp3, Temp4, LSL #8             
        ORR         ValueA0, Temp1, Temp2, LSL #8             
        SUBS        Count, Count, #1                   
        STRD        ValueA0, [pBuf], #8 
        BGT         LoopStart1
End1
        SUB        pSrc0, pBuf, #32
        MOV        srcStep0, #8

        M_END


        ;// Function header
        M_START armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe, r6
        
        ;// Code start        
        LDR         r0x0fe00fe0, =0x0fe00fe0
        LDR         r0x00ff00ff, =0x00ff00ff
        MOV         Count, #2

LoopStart    
        LDR         Temp4, [pSrc0, #12]
        LDR         Temp3, [pSrc0, #8]        
        LDR         Temp2, [pSrc0, #4]
        M_LDR       Temp1, [pSrc0], srcStep0
        
        UQSUB16     Temp4, Temp4, r0x0fe00fe0        
        UQSUB16     Temp3, Temp3, r0x0fe00fe0                 
        UQSUB16     Temp2, Temp2, r0x0fe00fe0        
        UQSUB16     Temp1, Temp1, r0x0fe00fe0                 
        
        USAT16      Temp4, #13, Temp4
        USAT16      Temp3, #13, Temp3                          
        USAT16      Temp2, #13, Temp2
        USAT16      Temp1, #13, Temp1
                                  
        AND         Temp4, r0x00ff00ff, Temp4, LSR #5         
        AND         Temp3, r0x00ff00ff, Temp3, LSR #5         
        AND         Temp2, r0x00ff00ff, Temp2, LSR #5         
        AND         Temp1, r0x00ff00ff, Temp1, LSR #5         
        ORR         ValueA1, Temp3, Temp4, LSL #8        ;// [d2 c2 d0 c0]             
        ORR         ValueA0, Temp1, Temp2, LSL #8        ;// [b2 a2 b0 a0]         
                    
        PKHBT       Temp1, ValueA0, ValueA1, LSL #16     ;// [d0 c0 b0 a0]

        STR         Temp1, [pBuf], #8 
        PKHTB       Temp2, ValueA1, ValueA0, ASR #16     ;// [d2 c2 b2 a2]
        STR         Temp2, [pBuf], #-4  

        LDR         Temp4, [pSrc0, #12]
        LDR         Temp3, [pSrc0, #8]        
        LDR         Temp2, [pSrc0, #4]
        M_LDR       Temp1, [pSrc0], srcStep0
        
        UQSUB16     Temp4, Temp4, r0x0fe00fe0        
        UQSUB16     Temp3, Temp3, r0x0fe00fe0                 
        UQSUB16     Temp2, Temp2, r0x0fe00fe0        
        UQSUB16     Temp1, Temp1, r0x0fe00fe0                 
        
        USAT16      Temp4, #13, Temp4
        USAT16      Temp3, #13, Temp3                          
        USAT16      Temp2, #13, Temp2
        USAT16      Temp1, #13, Temp1
                                  
        AND         Temp4, r0x00ff00ff, Temp4, LSR #5         
        AND         Temp3, r0x00ff00ff, Temp3, LSR #5         
        AND         Temp2, r0x00ff00ff, Temp2, LSR #5         
        AND         Temp1, r0x00ff00ff, Temp1, LSR #5         
        ORR         ValueA1, Temp3, Temp4, LSL #8        ;// [d2 c2 d0 c0]             
        ORR         ValueA0, Temp1, Temp2, LSL #8        ;// [b2 a2 b0 a0]         
                    
        PKHBT       Temp1, ValueA0, ValueA1, LSL #16     ;// [d0 c0 b0 a0]
        SUBS        Count, Count, #1
        STR         Temp1, [pBuf], #8 
        PKHTB       Temp2, ValueA1, ValueA0, ASR #16     ;// [d2 c2 b2 a2]
        STR         Temp2, [pBuf], #4  
        
        BGT         LoopStart
End2
        SUB         pSrc0, pBuf, #32-8
        MOV         srcStep0, #4

        M_END

    ENDIF
    
    END
    
