blob: fe9e7dc6516856934e17f3f4d1f0714285ee220a [file] [log] [blame]
/* ------------------------------------------------------------------
* Copyright (C) 1998-2009 PacketVideo
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied.
* See the License for the specific language governing permissions
* and limitations under the License.
* -------------------------------------------------------------------
*/
/*
------------------------------------------------------------------------------
INPUT AND OUTPUT DEFINITIONS
Inputs:
xpos = x half-pixel of (x,y) coordinates within a VOP; motion
compensated coordinates; native type
ypos = y half-pixel of (x,y) coordinates within a VOP; motion
compensated coordinates; native type
comp = pointer to 8-bit compensated prediction values within a VOP;
computed by this module (i/o); full-pel resolution
c_prev = pointer to previous 8-bit prediction values within a VOP;
values range from (0-255); full-pel resolution
sh_d = pointer to residual values used to compensate the predicted
value; values range from (-512 to 511); full-pel resolution
width = width of the VOP in pixels (x axis); full-pel resolution
rnd1 = rounding value for case when one dimension uses half-pel
resolution
rnd2 = rounding value for case when two dimensions uses half-pel
resolution
CBP = flag indicating whether residual is all zeros
(0 -> all zeros, 1 -> not all zeros)
outside_flag = flag indicating whether motion vector is outside the
VOP (0 -> inside, 1 -> outside)
Outputs:
returns 1
Local Stores/Buffers/Pointers Needed:
None
Global Stores/Buffers/Pointers Needed:
None
Pointers and Buffers Modified:
comp = buffer contains newly computed compensated prediction values
Local Stores Modified:
None
Global Stores Modified:
None
------------------------------------------------------------------------------
FUNCTION DESCRIPTION
Compute pixel values for a block in the current VOP. The prediction
values are generated by averaging pixel values in the previous VOP; the
block position in the previous frame is computed from the current block's
motion vector. The computed pixel values are then computed by adding the
prediction values to the block residual values.
------------------------------------------------------------------------------
*/
/*----------------------------------------------------------------------------
; INCLUDES
----------------------------------------------------------------------------*/
#include "mp4dec_lib.h"
#include "motion_comp.h"
#define OSCL_DISABLE_WARNING_CONV_POSSIBLE_LOSS_OF_DATA
int GetPredAdvancedBy0x0(
uint8 *prev, /* i */
uint8 *pred_block, /* i */
int width, /* i */
int pred_width_rnd /* i */
)
{
uint i; /* loop variable */
int offset, offset2;
uint32 pred_word, word1, word2;
int tmp;
/* initialize offset to adjust pixel counter */
/* the next row; full-pel resolution */
offset = width - B_SIZE; /* offset for prev */
offset2 = (pred_width_rnd >> 1) - 4; /* offset for pred_block */
tmp = (uintptr_t)prev & 0x3;
pred_block -= offset2; /* preset */
if (tmp == 0) /* word-aligned */
{
for (i = B_SIZE; i > 0; i--)
{
*((uint32*)(pred_block += offset2)) = *((uint32*)prev);
*((uint32*)(pred_block += 4)) = *((uint32*)(prev + 4));
prev += width;
}
return 1;
}
else if (tmp == 1) /* first position */
{
prev--; /* word-aligned */
for (i = B_SIZE; i > 0; i--)
{
word1 = *((uint32*)prev); /* read 4 bytes, b4 b3 b2 b1 */
word2 = *((uint32*)(prev += 4)); /* read 4 bytes, b8 b7 b6 b5 */
word1 >>= 8; /* 0 b4 b3 b2 */
pred_word = word1 | (word2 << 24); /* b5 b4 b3 b2 */
*((uint32*)(pred_block += offset2)) = pred_word;
word1 = *((uint32*)(prev += 4)); /* b12 b11 b10 b9 */
word2 >>= 8; /* 0 b8 b7 b6 */
pred_word = word2 | (word1 << 24); /* b9 b8 b7 b6 */
*((uint32*)(pred_block += 4)) = pred_word;
prev += offset;
}
return 1;
}
else if (tmp == 2) /* second position */
{
prev -= 2; /* word1-aligned */
for (i = B_SIZE; i > 0; i--)
{
word1 = *((uint32*)prev); /* read 4 bytes, b4 b3 b2 b1 */
word2 = *((uint32*)(prev += 4)); /* read 4 bytes, b8 b7 b6 b5 */
word1 >>= 16; /* 0 0 b4 b3 */
pred_word = word1 | (word2 << 16); /* b6 b5 b4 b3 */
*((uint32*)(pred_block += offset2)) = pred_word;
word1 = *((uint32*)(prev += 4)); /* b12 b11 b10 b9 */
word2 >>= 16; /* 0 0 b8 b7 */
pred_word = word2 | (word1 << 16); /* b10 b9 b8 b7 */
*((uint32*)(pred_block += 4)) = pred_word;
prev += offset;
}
return 1;
}
else /* third position */
{
prev -= 3; /* word1-aligned */
for (i = B_SIZE; i > 0; i--)
{
word1 = *((uint32*)prev); /* read 4 bytes, b4 b3 b2 b1 */
word2 = *((uint32*)(prev += 4)); /* read 4 bytes, b8 b7 b6 b5 */
word1 >>= 24; /* 0 0 0 b4 */
pred_word = word1 | (word2 << 8); /* b7 b6 b5 b4 */
*((uint32*)(pred_block += offset2)) = pred_word;
word1 = *((uint32*)(prev += 4)); /* b12 b11 b10 b9 */
word2 >>= 24; /* 0 0 0 b8 */
pred_word = word2 | (word1 << 8); /* b11 b10 b9 b8 */
*((uint32*)(pred_block += 4)) = pred_word;
prev += offset;
}
return 1;
}
}
/**************************************************************************/
int GetPredAdvancedBy0x1(
uint8 *prev, /* i */
uint8 *pred_block, /* i */
int width, /* i */
int pred_width_rnd /* i */
)
{
uint i; /* loop variable */
int offset, offset2;
uint32 word1, word2, word3, word12;
int tmp;
int rnd1;
uint32 mask;
/* initialize offset to adjust pixel counter */
/* the next row; full-pel resolution */
offset = width - B_SIZE; /* offset for prev */
offset2 = (pred_width_rnd >> 1) - 4; /* offset of pred_block */
rnd1 = pred_width_rnd & 1;
/* Branch based on pixel location (half-pel or full-pel) for x and y */
pred_block -= offset2; /* preset */
tmp = (uintptr_t)prev & 3;
mask = 254;
mask |= (mask << 8);
mask |= (mask << 16); /* 0xFEFEFEFE */
if (tmp == 0) /* word-aligned */
{
if (rnd1 == 1)
{
for (i = B_SIZE; i > 0; i--)
{
word1 = *((uint32*)prev); /* b4 b3 b2 b1 */
word2 = *((uint32*)(prev += 4)); /* b8 b7 b6 b5 */
word12 = (word1 >> 8); /* 0 b4 b3 b2 */
word12 |= (word2 << 24); /* b5 b4 b3 b2 */
word3 = word1 | word12; // rnd1 = 1; otherwise word3 = word1&word12
word1 &= mask;
word3 &= (~mask); /* 0x1010101, check last bit */
word12 &= mask;
word1 >>= 1;
word1 = word1 + (word12 >> 1);
word1 += word3;
*((uint32*)(pred_block += offset2)) = word1; /* write 4 pixels */
word1 = *((uint32*)(prev += 4)); /* b12 b11 b10 b9 */
word12 = (word2 >> 8); /* 0 b8 b7 b6 */
word12 |= (word1 << 24); /* b9 b8 b7 b6 */
word3 = word2 | word12;
word2 &= mask;
word3 &= (~mask); /* 0x1010101, check last bit */
word12 &= mask;
word2 >>= 1;
word2 = word2 + (word12 >> 1);
word2 += word3;
*((uint32*)(pred_block += 4)) = word2; /* write 4 pixels */
prev += offset;
}
return 1;
}
else /* rnd1 == 0 */
{
for (i = B_SIZE; i > 0; i--)
{
word1 = *((uint32*)prev); /* b4 b3 b2 b1 */
word2 = *((uint32*)(prev += 4)); /* b8 b7 b6 b5 */
word12 = (word1 >> 8); /* 0 b4 b3 b2 */
word12 |= (word2 << 24); /* b5 b4 b3 b2 */
word3 = word1 & word12; // rnd1 = 1; otherwise word3 = word1&word12
word1 &= mask;
word3 &= (~mask); /* 0x1010101, check last bit */
word12 &= mask;
word1 >>= 1;
word1 = word1 + (word12 >> 1);
word1 += word3;
*((uint32*)(pred_block += offset2)) = word1; /* write 4 pixels */
word1 = *((uint32*)(prev += 4)); /* b12 b11 b10 b9 */
word12 = (word2 >> 8); /* 0 b8 b7 b6 */
word12 |= (word1 << 24); /* b9 b8 b7 b6 */
word3 = word2 & word12;
word2 &= mask;
word3 &= (~mask); /* 0x1010101, check last bit */
word12 &= mask;
word2 >>= 1;
word2 = word2 + (word12 >> 1);
word2 += word3;
*((uint32*)(pred_block += 4)) = word2; /* write 4 pixels */
prev += offset;
}
return 1;
} /* rnd1 */
}
else if (tmp == 1)
{
prev--; /* word-aligned */
if (rnd1 == 1)
{
for (i = B_SIZE; i > 0; i--)
{
word1 = *((uint32*)prev); /* b3 b2 b1 b0 */
word2 = *((uint32*)(prev += 4)); /* b7 b6 b5 b4 */
word12 = (word1 >> 8); /* 0 b3 b2 b1 */
word1 >>= 16; /* 0 0 b3 b2 */
word12 |= (word2 << 24); /* b4 b3 b2 b1 */
word1 |= (word2 << 16); /* b5 b4 b3 b2 */
word3 = word1 | word12; // rnd1 = 1; otherwise word3 = word1&word12
word1 &= mask;
word3 &= (~mask); /* 0x1010101, check last bit */
word12 &= mask;
word1 >>= 1;
word1 = word1 + (word12 >> 1);
word1 += word3;
*((uint32*)(pred_block += offset2)) = word1; /* write 4 pixels */
word1 = *((uint32*)(prev += 4)); /* b11 b10 b9 b8 */
word12 = (word2 >> 8); /* 0 b7 b6 b5 */
word2 >>= 16; /* 0 0 b7 b6 */
word12 |= (word1 << 24); /* b8 b7 b6 b5 */
word2 |= (word1 << 16); /* b9 b8 b7 b6 */
word3 = word2 | word12; // rnd1 = 1; otherwise word3 = word2&word12
word2 &= mask;
word3 &= (~mask); /* 0x1010101, check last bit */
word12 &= mask;
word2 >>= 1;
word2 = word2 + (word12 >> 1);
word2 += word3;
*((uint32*)(pred_block += 4)) = word2; /* write 4 pixels */
prev += offset;
}
return 1;
}
else /* rnd1 = 0 */
{
for (i = B_SIZE; i > 0; i--)
{
word1 = *((uint32*)prev); /* b3 b2 b1 b0 */
word2 = *((uint32*)(prev += 4)); /* b7 b6 b5 b4 */
word12 = (word1 >> 8); /* 0 b3 b2 b1 */
word1 >>= 16; /* 0 0 b3 b2 */
word12 |= (word2 << 24); /* b4 b3 b2 b1 */
word1 |= (word2 << 16); /* b5 b4 b3 b2 */
word3 = word1 & word12;
word1 &= mask;
word3 &= (~mask); /* 0x1010101, check last bit */
word12 &= mask;
word1 >>= 1;
word1 = word1 + (word12 >> 1);
word1 += word3;
*((uint32*)(pred_block += offset2)) = word1; /* write 4 pixels */
word1 = *((uint32*)(prev += 4)); /* b11 b10 b9 b8 */
word12 = (word2 >> 8); /* 0 b7 b6 b5 */
word2 >>= 16; /* 0 0 b7 b6 */
word12 |= (word1 << 24); /* b8 b7 b6 b5 */
word2 |= (word1 << 16); /* b9 b8 b7 b6 */
word3 = word2 & word12;
word2 &= mask;
word3 &= (~mask); /* 0x1010101, check last bit */
word12 &= mask;
word2 >>= 1;
word2 = word2 + (word12 >> 1);
word2 += word3;
*((uint32*)(pred_block += 4)) = word2; /* write 4 pixels */
prev += offset;
}
return 1;
} /* rnd1 */
}
else if (tmp == 2)
{
prev -= 2; /* word-aligned */
if (rnd1 == 1)
{
for (i = B_SIZE; i > 0; i--)
{
word1 = *((uint32*)prev); /* b2 b1 b0 bN1 */
word2 = *((uint32*)(prev += 4)); /* b6 b5 b4 b3 */
word12 = (word1 >> 16); /* 0 0 b2 b1 */
word1 >>= 24; /* 0 0 0 b2 */
word12 |= (word2 << 16); /* b4 b3 b2 b1 */
word1 |= (word2 << 8); /* b5 b4 b3 b2 */
word3 = word1 | word12; // rnd1 = 1; otherwise word3 = word1&word12
word1 &= mask;
word3 &= (~mask); /* 0x1010101, check last bit */
word12 &= mask;
word1 >>= 1;
word1 = word1 + (word12 >> 1);
word1 += word3;
*((uint32*)(pred_block += offset2)) = word1; /* write 4 pixels */
word1 = *((uint32*)(prev += 4)); /* b10 b9 b8 b7 */
word12 = (word2 >> 16); /* 0 0 b6 b5 */
word2 >>= 24; /* 0 0 0 b6 */
word12 |= (word1 << 16); /* b8 b7 b6 b5 */
word2 |= (word1 << 8); /* b9 b8 b7 b6 */
word3 = word2 | word12; // rnd1 = 1; otherwise word3 = word1&word12
word2 &= mask;
word3 &= (~mask); /* 0x1010101, check last bit */
word12 &= mask;
word2 >>= 1;
word2 = word2 + (word12 >> 1);
word2 += word3;
*((uint32*)(pred_block += 4)) = word2; /* write 4 pixels */
prev += offset;
}
return 1;
}
else /* rnd1 == 0 */
{
for (i = B_SIZE; i > 0; i--)
{
word1 = *((uint32*)prev); /* b2 b1 b0 bN1 */
word2 = *((uint32*)(prev += 4)); /* b6 b5 b4 b3 */
word12 = (word1 >> 16); /* 0 0 b2 b1 */
word1 >>= 24; /* 0 0 0 b2 */
word12 |= (word2 << 16); /* b4 b3 b2 b1 */
word1 |= (word2 << 8); /* b5 b4 b3 b2 */
word3 = word1 & word12; // rnd1 = 1; otherwise word3 = word1&word12
word1 &= mask;
word3 &= (~mask); /* 0x1010101, check last bit */
word12 &= mask;
word1 >>= 1;
word1 = word1 + (word12 >> 1);
word1 += word3;
*((uint32*)(pred_block += offset2)) = word1; /* write 4 pixels */
word1 = *((uint32*)(prev += 4)); /* b10 b9 b8 b7 */
word12 = (word2 >> 16); /* 0 0 b6 b5 */
word2 >>= 24; /* 0 0 0 b6 */
word12 |= (word1 << 16); /* b8 b7 b6 b5 */
word2 |= (word1 << 8); /* b9 b8 b7 b6 */
word3 = word2 & word12; // rnd1 = 1; otherwise word3 = word1&word12
word2 &= mask;
word3 &= (~mask); /* 0x1010101, check last bit */
word12 &= mask;
word2 >>= 1;
word2 = word2 + (word12 >> 1);
word2 += word3;
*((uint32*)(pred_block += 4)) = word2; /* write 4 pixels */
prev += offset;
}
return 1;
}
}
else /* tmp = 3 */
{
prev -= 3; /* word-aligned */
if (rnd1 == 1)
{
for (i = B_SIZE; i > 0; i--)
{
word1 = *((uint32*)prev); /* b1 b0 bN1 bN2 */
word2 = *((uint32*)(prev += 4)); /* b5 b4 b3 b2 */
word12 = (word1 >> 24); /* 0 0 0 b1 */
word12 |= (word2 << 8); /* b4 b3 b2 b1 */
word1 = word2;
word3 = word1 | word12; // rnd1 = 1; otherwise word3 = word1&word12
word1 &= mask;
word3 &= (~mask); /* 0x1010101, check last bit */
word12 &= mask;
word1 >>= 1;
word1 = word1 + (word12 >> 1);
word1 += word3;
*((uint32*)(pred_block += offset2)) = word1; /* write 4 pixels */
word1 = *((uint32*)(prev += 4)); /* b9 b8 b7 b6 */
word12 = (word2 >> 24); /* 0 0 0 b5 */
word12 |= (word1 << 8); /* b8 b7 b6 b5 */
word2 = word1; /* b9 b8 b7 b6 */
word3 = word2 | word12; // rnd1 = 1; otherwise word3 = word1&word12
word2 &= mask;
word3 &= (~mask); /* 0x1010101, check last bit */
word12 &= mask;
word2 >>= 1;
word2 = word2 + (word12 >> 1);
word2 += word3;
*((uint32*)(pred_block += 4)) = word2; /* write 4 pixels */
prev += offset;
}
return 1;
}
else
{
for (i = B_SIZE; i > 0; i--)
{
word1 = *((uint32*)prev); /* b1 b0 bN1 bN2 */
word2 = *((uint32*)(prev += 4)); /* b5 b4 b3 b2 */
word12 = (word1 >> 24); /* 0 0 0 b1 */
word12 |= (word2 << 8); /* b4 b3 b2 b1 */
word1 = word2;
word3 = word1 & word12; // rnd1 = 1; otherwise word3 = word1&word12
word1 &= mask;
word3 &= (~mask); /* 0x1010101, check last bit */
word12 &= mask;
word1 >>= 1;
word1 = word1 + (word12 >> 1);
word1 += word3;
*((uint32*)(pred_block += offset2)) = word1; /* write 4 pixels */
word1 = *((uint32*)(prev += 4)); /* b9 b8 b7 b6 */
word12 = (word2 >> 24); /* 0 0 0 b5 */
word12 |= (word1 << 8); /* b8 b7 b6 b5 */
word2 = word1; /* b9 b8 b7 b6 */
word3 = word2 & word12; // rnd1 = 1; otherwise word3 = word1&word12
word2 &= mask;
word3 &= (~mask); /* 0x1010101, check last bit */
word12 &= mask;
word2 >>= 1;
word2 = word2 + (word12 >> 1);
word2 += word3;
*((uint32*)(pred_block += 4)) = word2; /* write 4 pixels */
prev += offset;
}
return 1;
}
}
}
/**************************************************************************/
int GetPredAdvancedBy1x0(
uint8 *prev, /* i */
uint8 *pred_block, /* i */
int width, /* i */
int pred_width_rnd /* i */
)
{
uint i; /* loop variable */
int offset, offset2;
uint32 word1, word2, word3, word12, word22;
int tmp;
int rnd1;
uint32 mask;
/* initialize offset to adjust pixel counter */
/* the next row; full-pel resolution */
offset = width - B_SIZE; /* offset for prev */
offset2 = (pred_width_rnd >> 1) - 4; /* offset for pred_block */
rnd1 = pred_width_rnd & 1;
/* Branch based on pixel location (half-pel or full-pel) for x and y */
pred_block -= offset2; /* preset */
tmp = (uintptr_t)prev & 3;
mask = 254;
mask |= (mask << 8);
mask |= (mask << 16); /* 0xFEFEFEFE */
if (tmp == 0) /* word-aligned */
{
prev -= 4;
if (rnd1 == 1)
{
for (i = B_SIZE; i > 0; i--)
{
word1 = *((uint32*)(prev += 4));
word2 = *((uint32*)(prev + width));
word3 = word1 | word2; // rnd1 = 1; otherwise word3 = word1&word2
word1 &= mask;
word3 &= (~mask); /* 0x1010101, check last bit */
word2 &= mask;
word1 >>= 1;
word1 = word1 + (word2 >> 1);
word1 += word3;
*((uint32*)(pred_block += offset2)) = word1;
word1 = *((uint32*)(prev += 4));
word2 = *((uint32*)(prev + width));
word3 = word1 | word2; // rnd1 = 1; otherwise word3 = word1&word2
word1 &= mask;
word3 &= (~mask); /* 0x1010101, check last bit */
word2 &= mask;
word1 >>= 1;
word1 = word1 + (word2 >> 1);
word1 += word3;
*((uint32*)(pred_block += 4)) = word1;
prev += offset;
}
return 1;
}
else /* rnd1 = 0 */
{
for (i = B_SIZE; i > 0; i--)
{
word1 = *((uint32*)(prev += 4));
word2 = *((uint32*)(prev + width));
word3 = word1 & word2; /* rnd1 = 0; */
word1 &= mask;
word3 &= (~mask); /* 0x1010101, check last bit */
word2 &= mask;
word1 >>= 1;
word1 = word1 + (word2 >> 1);
word1 += word3;
*((uint32*)(pred_block += offset2)) = word1;
word1 = *((uint32*)(prev += 4));
word2 = *((uint32*)(prev + width));
word3 = word1 & word2; /* rnd1 = 0; */
word1 &= mask;
word3 &= (~mask); /* 0x1010101, check last bit */
word2 &= mask;
word1 >>= 1;
word1 = word1 + (word2 >> 1);
word1 += word3;
*((uint32*)(pred_block += 4)) = word1;
prev += offset;
}
return 1;
}
}
else if (tmp == 1)
{
prev--; /* word-aligned */
if (rnd1 == 1)
{
for (i = B_SIZE; i > 0; i--)
{
word12 = *((uint32*)prev); /* read b4 b3 b2 b1 */
word22 = *((uint32*)(prev + width));
word1 = *((uint32*)(prev += 4)); /* read b8 b7 b6 b5 */
word2 = *((uint32*)(prev + width));
word12 >>= 8; /* 0 b4 b3 b2 */
word22 >>= 8;
word12 = word12 | (word1 << 24); /* b5 b4 b3 b2 */
word22 = word22 | (word2 << 24);
word3 = word12 | word22;
word12 &= mask;
word22 &= mask;
word3 &= (~mask); /* 0x1010101, check last bit */
word12 >>= 1;
word12 = word12 + (word22 >> 1);
word12 += word3;
*((uint32*)(pred_block += offset2)) = word12;
word12 = *((uint32*)(prev += 4)); /* read b12 b11 b10 b9 */
word22 = *((uint32*)(prev + width));
word1 >>= 8; /* 0 b8 b7 b6 */
word2 >>= 8;
word1 = word1 | (word12 << 24); /* b9 b8 b7 b6 */
word2 = word2 | (word22 << 24);
word3 = word1 | word2;
word1 &= mask;
word2 &= mask;
word3 &= (~mask); /* 0x1010101, check last bit */
word1 >>= 1;
word1 = word1 + (word2 >> 1);
word1 += word3;
*((uint32*)(pred_block += 4)) = word1;
prev += offset;
}
return 1;
}
else /* rnd1 = 0 */
{
for (i = B_SIZE; i > 0; i--)
{
word12 = *((uint32*)prev); /* read b4 b3 b2 b1 */
word22 = *((uint32*)(prev + width));
word1 = *((uint32*)(prev += 4)); /* read b8 b7 b6 b5 */
word2 = *((uint32*)(prev + width));
word12 >>= 8; /* 0 b4 b3 b2 */
word22 >>= 8;
word12 = word12 | (word1 << 24); /* b5 b4 b3 b2 */
word22 = word22 | (word2 << 24);
word3 = word12 & word22;
word12 &= mask;
word22 &= mask;
word3 &= (~mask); /* 0x1010101, check last bit */
word12 >>= 1;
word12 = word12 + (word22 >> 1);
word12 += word3;
*((uint32*)(pred_block += offset2)) = word12;
word12 = *((uint32*)(prev += 4)); /* read b12 b11 b10 b9 */
word22 = *((uint32*)(prev + width));
word1 >>= 8; /* 0 b8 b7 b6 */
word2 >>= 8;
word1 = word1 | (word12 << 24); /* b9 b8 b7 b6 */
word2 = word2 | (word22 << 24);
word3 = word1 & word2;
word1 &= mask;
word2 &= mask;
word3 &= (~mask); /* 0x1010101, check last bit */
word1 >>= 1;
word1 = word1 + (word2 >> 1);
word1 += word3;
*((uint32*)(pred_block += 4)) = word1;
prev += offset;
}
return 1;
}
}
else if (tmp == 2)
{
prev -= 2; /* word-aligned */
if (rnd1 == 1)
{
for (i = B_SIZE; i > 0; i--)
{
word12 = *((uint32*)prev); /* read b4 b3 b2 b1 */
word22 = *((uint32*)(prev + width));
word1 = *((uint32*)(prev += 4)); /* read b8 b7 b6 b5 */
word2 = *((uint32*)(prev + width));
word12 >>= 16; /* 0 0 b4 b3 */
word22 >>= 16;
word12 = word12 | (word1 << 16); /* b6 b5 b4 b3 */
word22 = word22 | (word2 << 16);
word3 = word12 | word22;
word12 &= mask;
word22 &= mask;
word3 &= (~mask); /* 0x1010101, check last bit */
word12 >>= 1;
word12 = word12 + (word22 >> 1);
word12 += word3;
*((uint32*)(pred_block += offset2)) = word12;
word12 = *((uint32*)(prev += 4)); /* read b12 b11 b10 b9 */
word22 = *((uint32*)(prev + width));
word1 >>= 16; /* 0 0 b8 b7 */
word2 >>= 16;
word1 = word1 | (word12 << 16); /* b10 b9 b8 b7 */
word2 = word2 | (word22 << 16);
word3 = word1 | word2;
word1 &= mask;
word2 &= mask;
word3 &= (~mask); /* 0x1010101, check last bit */
word1 >>= 1;
word1 = word1 + (word2 >> 1);
word1 += word3;
*((uint32*)(pred_block += 4)) = word1;
prev += offset;
}
return 1;
}
else /* rnd1 = 0 */
{
for (i = B_SIZE; i > 0; i--)
{
word12 = *((uint32*)prev); /* read b4 b3 b2 b1 */
word22 = *((uint32*)(prev + width));
word1 = *((uint32*)(prev += 4)); /* read b8 b7 b6 b5 */
word2 = *((uint32*)(prev + width));
word12 >>= 16; /* 0 0 b4 b3 */
word22 >>= 16;
word12 = word12 | (word1 << 16); /* b6 b5 b4 b3 */
word22 = word22 | (word2 << 16);
word3 = word12 & word22;
word12 &= mask;
word22 &= mask;
word3 &= (~mask); /* 0x1010101, check last bit */
word12 >>= 1;
word12 = word12 + (word22 >> 1);
word12 += word3;
*((uint32*)(pred_block += offset2)) = word12;
word12 = *((uint32*)(prev += 4)); /* read b12 b11 b10 b9 */
word22 = *((uint32*)(prev + width));
word1 >>= 16; /* 0 0 b8 b7 */
word2 >>= 16;
word1 = word1 | (word12 << 16); /* b10 b9 b8 b7 */
word2 = word2 | (word22 << 16);
word3 = word1 & word2;
word1 &= mask;
word2 &= mask;
word3 &= (~mask); /* 0x1010101, check last bit */
word1 >>= 1;
word1 = word1 + (word2 >> 1);
word1 += word3;
*((uint32*)(pred_block += 4)) = word1;
prev += offset;
}
return 1;
}
}
else /* tmp == 3 */
{
prev -= 3; /* word-aligned */
if (rnd1 == 1)
{
for (i = B_SIZE; i > 0; i--)
{
word12 = *((uint32*)prev); /* read b4 b3 b2 b1 */
word22 = *((uint32*)(prev + width));
word1 = *((uint32*)(prev += 4)); /* read b8 b7 b6 b5 */
word2 = *((uint32*)(prev + width));
word12 >>= 24; /* 0 0 0 b4 */
word22 >>= 24;
word12 = word12 | (word1 << 8); /* b7 b6 b5 b4 */
word22 = word22 | (word2 << 8);
word3 = word12 | word22;
word12 &= mask;
word22 &= mask;
word3 &= (~mask); /* 0x1010101, check last bit */
word12 >>= 1;
word12 = word12 + (word22 >> 1);
word12 += word3;
*((uint32*)(pred_block += offset2)) = word12;
word12 = *((uint32*)(prev += 4)); /* read b12 b11 b10 b9 */
word22 = *((uint32*)(prev + width));
word1 >>= 24; /* 0 0 0 b8 */
word2 >>= 24;
word1 = word1 | (word12 << 8); /* b11 b10 b9 b8 */
word2 = word2 | (word22 << 8);
word3 = word1 | word2;
word1 &= mask;
word2 &= mask;
word3 &= (~mask); /* 0x1010101, check last bit */
word1 >>= 1;
word1 = word1 + (word2 >> 1);
word1 += word3;
*((uint32*)(pred_block += 4)) = word1;
prev += offset;
}
return 1;
}
else /* rnd1 = 0 */
{
for (i = B_SIZE; i > 0; i--)
{
word12 = *((uint32*)prev); /* read b4 b3 b2 b1 */
word22 = *((uint32*)(prev + width));
word1 = *((uint32*)(prev += 4)); /* read b8 b7 b6 b5 */
word2 = *((uint32*)(prev + width));
word12 >>= 24; /* 0 0 0 b4 */
word22 >>= 24;
word12 = word12 | (word1 << 8); /* b7 b6 b5 b4 */
word22 = word22 | (word2 << 8);
word3 = word12 & word22;
word12 &= mask;
word22 &= mask;
word3 &= (~mask); /* 0x1010101, check last bit */
word12 >>= 1;
word12 = word12 + (word22 >> 1);
word12 += word3;
*((uint32*)(pred_block += offset2)) = word12;
word12 = *((uint32*)(prev += 4)); /* read b12 b11 b10 b9 */
word22 = *((uint32*)(prev + width));
word1 >>= 24; /* 0 0 0 b8 */
word2 >>= 24;
word1 = word1 | (word12 << 8); /* b11 b10 b9 b8 */
word2 = word2 | (word22 << 8);
word3 = word1 & word2;
word1 &= mask;
word2 &= mask;
word3 &= (~mask); /* 0x1010101, check last bit */
word1 >>= 1;
word1 = word1 + (word2 >> 1);
word1 += word3;
*((uint32*)(pred_block += 4)) = word1;
prev += offset;
}
return 1;
} /* rnd */
} /* tmp */
}
/**********************************************************************************/
int GetPredAdvancedBy1x1(
uint8 *prev, /* i */
uint8 *pred_block, /* i */
int width, /* i */
int pred_width_rnd /* i */
)
{
uint i; /* loop variable */
int offset, offset2;
uint32 x1, x2, x1m, x2m, y1, y2, y1m, y2m; /* new way */
int tmp;
int rnd1, rnd2;
uint32 mask;
/* initialize offset to adjust pixel counter */
/* the next row; full-pel resolution */
offset = width - B_SIZE; /* offset for prev */
offset2 = (pred_width_rnd >> 1) - 8; /* offset for pred_block */
rnd1 = pred_width_rnd & 1;
rnd2 = rnd1 + 1;
rnd2 |= (rnd2 << 8);
rnd2 |= (rnd2 << 16);
mask = 0x3F;
mask |= (mask << 8);
mask |= (mask << 16); /* 0x3f3f3f3f */
tmp = (uintptr_t)prev & 3;
pred_block -= 4; /* preset */
if (tmp == 0) /* word-aligned */
{
for (i = B_SIZE; i > 0; i--)
{
x1 = *((uint32*)prev); /* load a3 a2 a1 a0 */
x2 = *((uint32*)(prev + width)); /* load b3 b2 b1 b0, another line */
y1 = *((uint32*)(prev += 4)); /* a7 a6 a5 a4 */
y2 = *((uint32*)(prev + width)); /* b7 b6 b5 b4 */
x1m = (x1 >> 2) & mask; /* zero out last 2 bits */
x2m = (x2 >> 2) & mask;
x1 = x1 ^(x1m << 2);
x2 = x2 ^(x2m << 2);
x1m += x2m;
x1 += x2;
/* x2m, x2 free */
y1m = (y1 >> 2) & mask; /* zero out last 2 bits */
y2m = (y2 >> 2) & mask;
y1 = y1 ^(y1m << 2);
y2 = y2 ^(y2m << 2);
y1m += y2m;
y1 += y2;
/* y2m, y2 free */
/* x2m, x2 free */
x2 = *((uint32*)(prev += 4)); /* a11 a10 a9 a8 */
y2 = *((uint32*)(prev + width)); /* b11 b10 b9 b8 */
x2m = (x2 >> 2) & mask;
y2m = (y2 >> 2) & mask;
x2 = x2 ^(x2m << 2);
y2 = y2 ^(y2m << 2);
x2m += y2m;
x2 += y2;
/* y2m, y2 free */
/* now operate on x1m, x1, y1m, y1, x2m, x2 */
/* x1m = a3+b3, a2+b2, a1+b1, a0+b0 */
/* y1m = a7+b7, a6+b6, a5+b5, a4+b4 */
/* x2m = a11+b11, a10+b10, a9+b9, a8+b8 */
/* x1, y1, x2 */
y2m = x1m >> 8;
y2 = x1 >> 8;
y2m |= (y1m << 24); /* a4+b4, a3+b3, a2+b2, a1+b1 */
y2 |= (y1 << 24);
x1m += y2m; /* a3+b3+a4+b4, ....., a0+b0+a1+b1 */
x1 += y2;
x1 += rnd2;
x1 &= (mask << 2);
x1m += (x1 >> 2);
*((uint32*)(pred_block += 4)) = x1m; /* save x1m */
y2m = y1m >> 8;
y2 = y1 >> 8;
y2m |= (x2m << 24); /* a8+b8, a7+b7, a6+b6, a5+b5 */
y2 |= (x2 << 24);
y1m += y2m; /* a7+b7+a8+b8, ....., a4+b4+a5+b5 */
y1 += y2;
y1 += rnd2;
y1 &= (mask << 2);
y1m += (y1 >> 2);
*((uint32*)(pred_block += 4)) = y1m; /* save y1m */
pred_block += offset2;
prev += offset;
}
return 1;
}
else if (tmp == 1)
{
prev--; /* to word-aligned */
for (i = B_SIZE; i > 0; i--)
{
x1 = *((uint32*)prev); /* load a3 a2 a1 a0 */
x2 = *((uint32*)(prev + width)); /* load b3 b2 b1 b0, another line */
y1 = *((uint32*)(prev += 4)); /* a7 a6 a5 a4 */
y2 = *((uint32*)(prev + width)); /* b7 b6 b5 b4 */
x1m = (x1 >> 2) & mask; /* zero out last 2 bits */
x2m = (x2 >> 2) & mask;
x1 = x1 ^(x1m << 2);
x2 = x2 ^(x2m << 2);
x1m += x2m;
x1 += x2;
/* x2m, x2 free */
y1m = (y1 >> 2) & mask; /* zero out last 2 bits */
y2m = (y2 >> 2) & mask;
y1 = y1 ^(y1m << 2);
y2 = y2 ^(y2m << 2);
y1m += y2m;
y1 += y2;
/* y2m, y2 free */
/* x2m, x2 free */
x2 = *((uint32*)(prev += 4)); /* a11 a10 a9 a8 */
y2 = *((uint32*)(prev + width)); /* b11 b10 b9 b8 */
x2m = (x2 >> 2) & mask;
y2m = (y2 >> 2) & mask;
x2 = x2 ^(x2m << 2);
y2 = y2 ^(y2m << 2);
x2m += y2m;
x2 += y2;
/* y2m, y2 free */
/* now operate on x1m, x1, y1m, y1, x2m, x2 */
/* x1m = a3+b3, a2+b2, a1+b1, a0+b0 */
/* y1m = a7+b7, a6+b6, a5+b5, a4+b4 */
/* x2m = a11+b11, a10+b10, a9+b9, a8+b8 */
/* x1, y1, x2 */
x1m >>= 8 ;
x1 >>= 8;
x1m |= (y1m << 24); /* a4+b4, a3+b3, a2+b2, a1+b1 */
x1 |= (y1 << 24);
y2m = (y1m << 16);
y2 = (y1 << 16);
y2m |= (x1m >> 8); /* a5+b5, a4+b4, a3+b3, a2+b2 */
y2 |= (x1 >> 8);
x1 += rnd2;
x1m += y2m; /* a4+b4+a5+b5, ....., a1+b1+a2+b2 */
x1 += y2;
x1 &= (mask << 2);
x1m += (x1 >> 2);
*((uint32*)(pred_block += 4)) = x1m; /* save x1m */
y1m >>= 8;
y1 >>= 8;
y1m |= (x2m << 24); /* a8+b8, a7+b7, a6+b6, a5+b5 */
y1 |= (x2 << 24);
y2m = (x2m << 16);
y2 = (x2 << 16);
y2m |= (y1m >> 8); /* a9+b9, a8+b8, a7+b7, a6+b6,*/
y2 |= (y1 >> 8);
y1 += rnd2;
y1m += y2m; /* a8+b8+a9+b9, ....., a5+b5+a6+b6 */
y1 += y2;
y1 &= (mask << 2);
y1m += (y1 >> 2);
*((uint32*)(pred_block += 4)) = y1m; /* save y1m */
pred_block += offset2;
prev += offset;
}
return 1;
}
else if (tmp == 2)
{
prev -= 2; /* to word-aligned */
for (i = B_SIZE; i > 0; i--)
{
x1 = *((uint32*)prev); /* load a3 a2 a1 a0 */
x2 = *((uint32*)(prev + width)); /* load b3 b2 b1 b0, another line */
y1 = *((uint32*)(prev += 4)); /* a7 a6 a5 a4 */
y2 = *((uint32*)(prev + width)); /* b7 b6 b5 b4 */
x1m = (x1 >> 2) & mask; /* zero out last 2 bits */
x2m = (x2 >> 2) & mask;
x1 = x1 ^(x1m << 2);
x2 = x2 ^(x2m << 2);
x1m += x2m;
x1 += x2;
/* x2m, x2 free */
y1m = (y1 >> 2) & mask; /* zero out last 2 bits */
y2m = (y2 >> 2) & mask;
y1 = y1 ^(y1m << 2);
y2 = y2 ^(y2m << 2);
y1m += y2m;
y1 += y2;
/* y2m, y2 free */
/* x2m, x2 free */
x2 = *((uint32*)(prev += 4)); /* a11 a10 a9 a8 */
y2 = *((uint32*)(prev + width)); /* b11 b10 b9 b8 */
x2m = (x2 >> 2) & mask;
y2m = (y2 >> 2) & mask;
x2 = x2 ^(x2m << 2);
y2 = y2 ^(y2m << 2);
x2m += y2m;
x2 += y2;
/* y2m, y2 free */
/* now operate on x1m, x1, y1m, y1, x2m, x2 */
/* x1m = a3+b3, a2+b2, a1+b1, a0+b0 */
/* y1m = a7+b7, a6+b6, a5+b5, a4+b4 */
/* x2m = a11+b11, a10+b10, a9+b9, a8+b8 */
/* x1, y1, x2 */
x1m >>= 16 ;
x1 >>= 16;
x1m |= (y1m << 16); /* a5+b5, a4+b4, a3+b3, a2+b2 */
x1 |= (y1 << 16);
y2m = (y1m << 8);
y2 = (y1 << 8);
y2m |= (x1m >> 8); /* a6+b6, a5+b5, a4+b4, a3+b3 */
y2 |= (x1 >> 8);
x1 += rnd2;
x1m += y2m; /* a5+b5+a6+b6, ....., a2+b2+a3+b3 */
x1 += y2;
x1 &= (mask << 2);
x1m += (x1 >> 2);
*((uint32*)(pred_block += 4)) = x1m; /* save x1m */
y1m >>= 16;
y1 >>= 16;
y1m |= (x2m << 16); /* a9+b9, a8+b8, a7+b7, a6+b6 */
y1 |= (x2 << 16);
y2m = (x2m << 8);
y2 = (x2 << 8);
y2m |= (y1m >> 8); /* a10+b10, a9+b9, a8+b8, a7+b7,*/
y2 |= (y1 >> 8);
y1 += rnd2;
y1m += y2m; /* a9+b9+a10+b10, ....., a6+b6+a7+b7 */
y1 += y2;
y1 &= (mask << 2);
y1m += (y1 >> 2);
*((uint32*)(pred_block += 4)) = y1m; /* save y1m */
pred_block += offset2;
prev += offset;
}
return 1;
}
else /* tmp == 3 */
{
prev -= 3; /* to word-aligned */
for (i = B_SIZE; i > 0; i--)
{
x1 = *((uint32*)prev); /* load a3 a2 a1 a0 */
x2 = *((uint32*)(prev + width)); /* load b3 b2 b1 b0, another line */
y1 = *((uint32*)(prev += 4)); /* a7 a6 a5 a4 */
y2 = *((uint32*)(prev + width)); /* b7 b6 b5 b4 */
x1m = (x1 >> 2) & mask; /* zero out last 2 bits */
x2m = (x2 >> 2) & mask;
x1 = x1 ^(x1m << 2);
x2 = x2 ^(x2m << 2);
x1m += x2m;
x1 += x2;
/* x2m, x2 free */
y1m = (y1 >> 2) & mask; /* zero out last 2 bits */
y2m = (y2 >> 2) & mask;
y1 = y1 ^(y1m << 2);
y2 = y2 ^(y2m << 2);
y1m += y2m;
y1 += y2;
/* y2m, y2 free */
/* x2m, x2 free */
x2 = *((uint32*)(prev += 4)); /* a11 a10 a9 a8 */
y2 = *((uint32*)(prev + width)); /* b11 b10 b9 b8 */
x2m = (x2 >> 2) & mask;
y2m = (y2 >> 2) & mask;
x2 = x2 ^(x2m << 2);
y2 = y2 ^(y2m << 2);
x2m += y2m;
x2 += y2;
/* y2m, y2 free */
/* now operate on x1m, x1, y1m, y1, x2m, x2 */
/* x1m = a3+b3, a2+b2, a1+b1, a0+b0 */
/* y1m = a7+b7, a6+b6, a5+b5, a4+b4 */
/* x2m = a11+b11, a10+b10, a9+b9, a8+b8 */
/* x1, y1, x2 */
x1m >>= 24 ;
x1 >>= 24;
x1m |= (y1m << 8); /* a6+b6, a5+b5, a4+b4, a3+b3 */
x1 |= (y1 << 8);
x1m += y1m; /* a6+b6+a7+b7, ....., a3+b3+a4+b4 */
x1 += y1;
x1 += rnd2;
x1 &= (mask << 2);
x1m += (x1 >> 2);
*((uint32*)(pred_block += 4)) = x1m; /* save x1m */
y1m >>= 24;
y1 >>= 24;
y1m |= (x2m << 8); /* a10+b10, a9+b9, a8+b8, a7+b7 */
y1 |= (x2 << 8);
y1m += x2m; /* a10+b10+a11+b11, ....., a7+b7+a8+b8 */
y1 += x2;
y1 += rnd2;
y1 &= (mask << 2);
y1m += (y1 >> 2);
*((uint32*)(pred_block += 4)) = y1m; /* save y1m */
pred_block += offset2;
prev += offset;
}
return 1;
}
}