Commit a60dc9d6 authored by David Barker's avatar David Barker Committed by Sarah Parker
Browse files

Adjust chroma position in warp filter

When using chroma subsampling, the warp filter currently behaves
strangely when projecting chroma pixels, especially when the
subsamplings are not equal along the x and y axes.

For example, when subsampling_x = 1 and subsampling_y = 0, we
calculate the destination coordinates (dx, dy) from the source
coordinates (sx, sy) as:
dx = project(2*sx+0.5, 2*sy+0.5)/2 - 0.5
dy = project(sx, sy)
where project() applies the affine warp model.

This patch changes to a simpler and more consistent model,
where we:
* Project the chroma sample into luma coordinates, taking
  the chroma sample to be co-located with the top-left luma
  sample in its (2x2, or 2x1, or 1x2) subsampling block
  (this is done for simplicity; we don't expect the exact
   position to make much difference to the output quality)
* Apply the transformation in luma coordinates
* Project the resulting luma sample back into chroma coordinates

Change to software speed is in the noise, but this approach
should be simpler in hardware, and should slightly improve
quality for 4:2:2 and 4:4:0 videos.

Change-Id: Idd455fdd3897594ca7d4edff5b85b78961d1638d
parent 7380b25e
......@@ -969,25 +969,21 @@ void av1_highbd_warp_affine_c(const int32_t *mat, const uint16_t *ref,
for (i = p_row; i < p_row + p_height; i += 8) {
for (j = p_col; j < p_col + p_width; j += 8) {
int32_t x4, y4, ix4, sx4, iy4, sy4;
if (subsampling_x)
x4 = (mat[2] * 4 * (j + 4) + mat[3] * 4 * (i + 4) + mat[0] * 2 +
(mat[2] + mat[3] - (1 << WARPEDMODEL_PREC_BITS))) /
4;
else
x4 = mat[2] * (j + 4) + mat[3] * (i + 4) + mat[0];
if (subsampling_y)
y4 = (mat[4] * 4 * (j + 4) + mat[5] * 4 * (i + 4) + mat[1] * 2 +
(mat[4] + mat[5] - (1 << WARPEDMODEL_PREC_BITS))) /
4;
else
y4 = mat[4] * (j + 4) + mat[5] * (i + 4) + mat[1];
ix4 = x4 >> WARPEDMODEL_PREC_BITS;
sx4 = x4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
iy4 = y4 >> WARPEDMODEL_PREC_BITS;
sy4 = y4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
// Calculate the center of this 8x8 block,
// project to luma coordinates (if in a subsampled chroma plane),
// apply the affine transformation,
// then convert back to the original coordinates (if necessary)
const int32_t src_x = (j + 4) << subsampling_x;
const int32_t src_y = (i + 4) << subsampling_y;
const int32_t dst_x = mat[2] * src_x + mat[3] * src_y + mat[0];
const int32_t dst_y = mat[4] * src_x + mat[5] * src_y + mat[1];
const int32_t x4 = dst_x >> subsampling_x;
const int32_t y4 = dst_y >> subsampling_y;
int32_t ix4 = x4 >> WARPEDMODEL_PREC_BITS;
int32_t sx4 = x4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
int32_t iy4 = y4 >> WARPEDMODEL_PREC_BITS;
int32_t sy4 = y4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
sx4 += alpha * (-4) + beta * (-4);
sy4 += gamma * (-4) + delta * (-4);
......@@ -1311,25 +1307,21 @@ void av1_warp_affine_c(const int32_t *mat, const uint8_t *ref, int width,
for (i = p_row; i < p_row + p_height; i += 8) {
for (j = p_col; j < p_col + p_width; j += 8) {
int32_t x4, y4, ix4, sx4, iy4, sy4;
if (subsampling_x)
x4 = (mat[2] * 4 * (j + 4) + mat[3] * 4 * (i + 4) + mat[0] * 2 +
(mat[2] + mat[3] - (1 << WARPEDMODEL_PREC_BITS))) /
4;
else
x4 = mat[2] * (j + 4) + mat[3] * (i + 4) + mat[0];
if (subsampling_y)
y4 = (mat[4] * 4 * (j + 4) + mat[5] * 4 * (i + 4) + mat[1] * 2 +
(mat[4] + mat[5] - (1 << WARPEDMODEL_PREC_BITS))) /
4;
else
y4 = mat[4] * (j + 4) + mat[5] * (i + 4) + mat[1];
ix4 = x4 >> WARPEDMODEL_PREC_BITS;
sx4 = x4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
iy4 = y4 >> WARPEDMODEL_PREC_BITS;
sy4 = y4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
// Calculate the center of this 8x8 block,
// project to luma coordinates (if in a subsampled chroma plane),
// apply the affine transformation,
// then convert back to the original coordinates (if necessary)
const int32_t src_x = (j + 4) << subsampling_x;
const int32_t src_y = (i + 4) << subsampling_y;
const int32_t dst_x = mat[2] * src_x + mat[3] * src_y + mat[0];
const int32_t dst_y = mat[4] * src_x + mat[5] * src_y + mat[1];
const int32_t x4 = dst_x >> subsampling_x;
const int32_t y4 = dst_y >> subsampling_y;
int32_t ix4 = x4 >> WARPEDMODEL_PREC_BITS;
int32_t sx4 = x4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
int32_t iy4 = y4 >> WARPEDMODEL_PREC_BITS;
int32_t sy4 = y4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
sx4 += alpha * (-4) + beta * (-4);
sy4 += gamma * (-4) + delta * (-4);
......
......@@ -57,30 +57,17 @@ void av1_highbd_warp_affine_ssse3(const int32_t *mat, const uint16_t *ref,
for (i = 0; i < p_height; i += 8) {
for (j = 0; j < p_width; j += 8) {
// (x, y) coordinates of the center of this block in the destination
// image
const int32_t dst_x = p_col + j + 4;
const int32_t dst_y = p_row + i + 4;
int32_t x4, y4, ix4, sx4, iy4, sy4;
if (subsampling_x)
x4 = (mat[2] * 4 * dst_x + mat[3] * 4 * dst_y + mat[0] * 2 +
(mat[2] + mat[3] - (1 << WARPEDMODEL_PREC_BITS))) /
4;
else
x4 = mat[2] * dst_x + mat[3] * dst_y + mat[0];
if (subsampling_y)
y4 = (mat[4] * 4 * dst_x + mat[5] * 4 * dst_y + mat[1] * 2 +
(mat[4] + mat[5] - (1 << WARPEDMODEL_PREC_BITS))) /
4;
else
y4 = mat[4] * dst_x + mat[5] * dst_y + mat[1];
ix4 = x4 >> WARPEDMODEL_PREC_BITS;
sx4 = x4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
iy4 = y4 >> WARPEDMODEL_PREC_BITS;
sy4 = y4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
const int32_t src_x = (p_col + j + 4) << subsampling_x;
const int32_t src_y = (p_row + i + 4) << subsampling_y;
const int32_t dst_x = mat[2] * src_x + mat[3] * src_y + mat[0];
const int32_t dst_y = mat[4] * src_x + mat[5] * src_y + mat[1];
const int32_t x4 = dst_x >> subsampling_x;
const int32_t y4 = dst_y >> subsampling_y;
int32_t ix4 = x4 >> WARPEDMODEL_PREC_BITS;
int32_t sx4 = x4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
int32_t iy4 = y4 >> WARPEDMODEL_PREC_BITS;
int32_t sy4 = y4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
// Add in all the constant terms, including rounding and offset
sx4 += alpha * (-4) + beta * (-4) + (1 << (WARPEDDIFF_PREC_BITS - 1)) +
......
......@@ -53,30 +53,17 @@ void av1_warp_affine_sse2(const int32_t *mat, const uint8_t *ref, int width,
for (i = 0; i < p_height; i += 8) {
for (j = 0; j < p_width; j += 8) {
// (x, y) coordinates of the center of this block in the destination
// image
const int32_t dst_x = p_col + j + 4;
const int32_t dst_y = p_row + i + 4;
int32_t x4, y4, ix4, sx4, iy4, sy4;
if (subsampling_x)
x4 = (mat[2] * 4 * dst_x + mat[3] * 4 * dst_y + mat[0] * 2 +
(mat[2] + mat[3] - (1 << WARPEDMODEL_PREC_BITS))) /
4;
else
x4 = mat[2] * dst_x + mat[3] * dst_y + mat[0];
if (subsampling_y)
y4 = (mat[4] * 4 * dst_x + mat[5] * 4 * dst_y + mat[1] * 2 +
(mat[4] + mat[5] - (1 << WARPEDMODEL_PREC_BITS))) /
4;
else
y4 = mat[4] * dst_x + mat[5] * dst_y + mat[1];
ix4 = x4 >> WARPEDMODEL_PREC_BITS;
sx4 = x4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
iy4 = y4 >> WARPEDMODEL_PREC_BITS;
sy4 = y4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
const int32_t src_x = (p_col + j + 4) << subsampling_x;
const int32_t src_y = (p_row + i + 4) << subsampling_y;
const int32_t dst_x = mat[2] * src_x + mat[3] * src_y + mat[0];
const int32_t dst_y = mat[4] * src_x + mat[5] * src_y + mat[1];
const int32_t x4 = dst_x >> subsampling_x;
const int32_t y4 = dst_y >> subsampling_y;
int32_t ix4 = x4 >> WARPEDMODEL_PREC_BITS;
int32_t sx4 = x4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
int32_t iy4 = y4 >> WARPEDMODEL_PREC_BITS;
int32_t sy4 = y4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
// Add in all the constant terms, including rounding and offset
sx4 += alpha * (-4) + beta * (-4) + (1 << (WARPEDDIFF_PREC_BITS - 1)) +
......
......@@ -240,30 +240,17 @@ void av1_warp_affine_ssse3(const int32_t *mat, const uint8_t *ref, int width,
for (i = 0; i < p_height; i += 8) {
for (j = 0; j < p_width; j += 8) {
// (x, y) coordinates of the center of this block in the destination
// image
const int32_t dst_x = p_col + j + 4;
const int32_t dst_y = p_row + i + 4;
int32_t x4, y4, ix4, sx4, iy4, sy4;
if (subsampling_x)
x4 = (mat[2] * 4 * dst_x + mat[3] * 4 * dst_y + mat[0] * 2 +
(mat[2] + mat[3] - (1 << WARPEDMODEL_PREC_BITS))) /
4;
else
x4 = mat[2] * dst_x + mat[3] * dst_y + mat[0];
if (subsampling_y)
y4 = (mat[4] * 4 * dst_x + mat[5] * 4 * dst_y + mat[1] * 2 +
(mat[4] + mat[5] - (1 << WARPEDMODEL_PREC_BITS))) /
4;
else
y4 = mat[4] * dst_x + mat[5] * dst_y + mat[1];
ix4 = x4 >> WARPEDMODEL_PREC_BITS;
sx4 = x4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
iy4 = y4 >> WARPEDMODEL_PREC_BITS;
sy4 = y4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
const int32_t src_x = (p_col + j + 4) << subsampling_x;
const int32_t src_y = (p_row + i + 4) << subsampling_y;
const int32_t dst_x = mat[2] * src_x + mat[3] * src_y + mat[0];
const int32_t dst_y = mat[4] * src_x + mat[5] * src_y + mat[1];
const int32_t x4 = dst_x >> subsampling_x;
const int32_t y4 = dst_y >> subsampling_y;
int32_t ix4 = x4 >> WARPEDMODEL_PREC_BITS;
int32_t sx4 = x4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
int32_t iy4 = y4 >> WARPEDMODEL_PREC_BITS;
int32_t sy4 = y4 & ((1 << WARPEDMODEL_PREC_BITS) - 1);
// Add in all the constant terms, including rounding and offset
sx4 += alpha * (-4) + beta * (-4) + (1 << (WARPEDDIFF_PREC_BITS - 1)) +
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment