Commit 5fdd1caa authored by Debargha Mukherjee's avatar Debargha Mukherjee

Merge and remove WARPEDPIXEL_FILTER_BITS

This macro is same as FILTER_BITS and the two can be safely merged.

Change-Id: Idbc7e6492c14070aa6eb054ce3c0a30bc89d41be
parent e82709a5
......@@ -49,18 +49,11 @@ typedef struct mv32 {
#define WARPEDPIXEL_PREC_BITS 6
#define WARPEDPIXEL_PREC_SHIFTS (1 << WARPEDPIXEL_PREC_BITS)
// Taps for ntap filter
#define WARPEDPIXEL_FILTER_TAPS 6
// Precision of filter taps
#define WARPEDPIXEL_FILTER_BITS 7
#define WARP_PARAM_REDUCE_BITS 6
// Precision bits reduction after horizontal shear
#define HORSHEAR_REDUCE_PREC_BITS 3
#define VERSHEAR_REDUCE_PREC_BITS \
(2 * WARPEDPIXEL_FILTER_BITS - HORSHEAR_REDUCE_PREC_BITS)
#define VERSHEAR_REDUCE_PREC_BITS (2 * FILTER_BITS - HORSHEAR_REDUCE_PREC_BITS)
#define WARPEDDIFF_PREC_BITS (WARPEDMODEL_PREC_BITS - WARPEDPIXEL_PREC_BITS)
......
......@@ -426,24 +426,22 @@ void av1_highbd_warp_affine_c(const int32_t *mat, const uint16_t *ref,
(conv_params->round == CONVOLVE_OPT_NO_ROUND && conv_params->dst);
int reduce_bits_horiz =
use_conv_params ? conv_params->round_0 : HORSHEAR_REDUCE_PREC_BITS;
if (!use_conv_params &&
bd + WARPEDPIXEL_FILTER_BITS + 2 - reduce_bits_horiz > 16)
reduce_bits_horiz += bd + WARPEDPIXEL_FILTER_BITS - reduce_bits_horiz - 14;
const int reduce_bits_vert =
use_conv_params ? conv_params->round_1
: 2 * WARPEDPIXEL_FILTER_BITS - reduce_bits_horiz;
const int max_bits_horiz =
use_conv_params ? bd + FILTER_BITS + 1 - conv_params->round_0
: bd + WARPEDPIXEL_FILTER_BITS + 1 - reduce_bits_horiz;
if (!use_conv_params && bd + FILTER_BITS + 2 - reduce_bits_horiz > 16)
reduce_bits_horiz += bd + FILTER_BITS - reduce_bits_horiz - 14;
const int reduce_bits_vert = use_conv_params
? conv_params->round_1
: 2 * FILTER_BITS - reduce_bits_horiz;
const int max_bits_horiz = use_conv_params
? bd + FILTER_BITS + 1 - conv_params->round_0
: bd + FILTER_BITS + 1 - reduce_bits_horiz;
const int offset_bits_horiz =
use_conv_params ? bd + FILTER_BITS - 1 : bd + WARPEDPIXEL_FILTER_BITS - 1;
const int offset_bits_vert =
use_conv_params ? bd + 2 * FILTER_BITS - conv_params->round_0
: bd + 2 * WARPEDPIXEL_FILTER_BITS - reduce_bits_horiz;
use_conv_params ? bd + FILTER_BITS - 1 : bd + FILTER_BITS - 1;
const int offset_bits_vert = use_conv_params
? bd + 2 * FILTER_BITS - conv_params->round_0
: bd + 2 * FILTER_BITS - reduce_bits_horiz;
if (use_conv_params) {
conv_params->do_post_rounding = 1;
}
assert(FILTER_BITS == WARPEDPIXEL_FILTER_BITS);
(void)max_bits_horiz;
for (int i = p_row; i < p_row + p_height; i += 8) {
......@@ -685,7 +683,7 @@ static INLINE int error_measure(int err) {
This allows the derivation of the appropriate bit widths and offsets for
the various intermediate values: If
F := WARPEDPIXEL_FILTER_BITS = 7 (or else the above ranges need adjusting)
F := FILTER_BITS = 7 (or else the above ranges need adjusting)
So a *single* filter stage maps a k-bit input to a (k + F + 1)-bit
intermediate value.
H := HORSHEAR_REDUCE_PREC_BITS
......@@ -728,23 +726,20 @@ void av1_warp_affine_c(const int32_t *mat, const uint8_t *ref, int width,
(conv_params->round == CONVOLVE_OPT_NO_ROUND && conv_params->dst);
const int reduce_bits_horiz =
use_conv_params ? conv_params->round_0 : HORSHEAR_REDUCE_PREC_BITS;
const int reduce_bits_vert =
use_conv_params ? conv_params->round_1
: 2 * WARPEDPIXEL_FILTER_BITS - reduce_bits_horiz;
const int reduce_bits_vert = use_conv_params
? conv_params->round_1
: 2 * FILTER_BITS - reduce_bits_horiz;
const int max_bits_horiz =
use_conv_params
? bd + FILTER_BITS + 1 - conv_params->round_0
: bd + WARPEDPIXEL_FILTER_BITS + 1 - HORSHEAR_REDUCE_PREC_BITS;
use_conv_params ? bd + FILTER_BITS + 1 - conv_params->round_0
: bd + FILTER_BITS + 1 - HORSHEAR_REDUCE_PREC_BITS;
const int offset_bits_horiz =
use_conv_params ? bd + FILTER_BITS - 1 : bd + WARPEDPIXEL_FILTER_BITS - 1;
use_conv_params ? bd + FILTER_BITS - 1 : bd + FILTER_BITS - 1;
const int offset_bits_vert =
use_conv_params
? bd + 2 * FILTER_BITS - conv_params->round_0
: bd + 2 * WARPEDPIXEL_FILTER_BITS - HORSHEAR_REDUCE_PREC_BITS;
use_conv_params ? bd + 2 * FILTER_BITS - conv_params->round_0
: bd + 2 * FILTER_BITS - HORSHEAR_REDUCE_PREC_BITS;
if (use_conv_params) {
conv_params->do_post_rounding = 1;
}
assert(FILTER_BITS == WARPEDPIXEL_FILTER_BITS);
(void)max_bits_horiz;
for (int i = p_row; i < p_row + p_height; i += 8) {
......
......@@ -28,18 +28,17 @@ void av1_highbd_warp_affine_sse4_1(const int32_t *mat, const uint16_t *ref,
(conv_params->round == CONVOLVE_OPT_NO_ROUND && conv_params->dst);
int reduce_bits_horiz =
use_conv_params ? conv_params->round_0 : HORSHEAR_REDUCE_PREC_BITS;
if (!use_conv_params &&
bd + WARPEDPIXEL_FILTER_BITS + 2 - reduce_bits_horiz > 16)
reduce_bits_horiz += bd + WARPEDPIXEL_FILTER_BITS - reduce_bits_horiz - 14;
const int reduce_bits_vert =
use_conv_params ? conv_params->round_1
: 2 * WARPEDPIXEL_FILTER_BITS - reduce_bits_horiz;
if (!use_conv_params && bd + FILTER_BITS + 2 - reduce_bits_horiz > 16)
reduce_bits_horiz += bd + FILTER_BITS - reduce_bits_horiz - 14;
const int reduce_bits_vert = use_conv_params
? conv_params->round_1
: 2 * FILTER_BITS - reduce_bits_horiz;
const int offset_bits_horiz =
use_conv_params ? bd + FILTER_BITS - 1 : bd + WARPEDPIXEL_FILTER_BITS - 1;
use_conv_params ? bd + FILTER_BITS - 1 : bd + FILTER_BITS - 1;
if (use_conv_params) {
conv_params->do_post_rounding = 1;
}
assert(FILTER_BITS == WARPEDPIXEL_FILTER_BITS);
assert(FILTER_BITS == FILTER_BITS);
#if CONFIG_JNT_COMP
const int w0 = conv_params->fwd_offset;
const int w1 = conv_params->bck_offset;
......@@ -94,9 +93,8 @@ void av1_highbd_warp_affine_sse4_1(const int32_t *mat, const uint16_t *ref,
else if (iy > height - 1)
iy = height - 1;
tmp[k + 7] = _mm_set1_epi16(
(1 << (bd + WARPEDPIXEL_FILTER_BITS - reduce_bits_horiz - 1)) +
ref[iy * stride] *
(1 << (WARPEDPIXEL_FILTER_BITS - reduce_bits_horiz)));
(1 << (bd + FILTER_BITS - reduce_bits_horiz - 1)) +
ref[iy * stride] * (1 << (FILTER_BITS - reduce_bits_horiz)));
}
} else if (ix4 >= width + 6) {
for (k = -7; k < AOMMIN(8, p_height - i); ++k) {
......@@ -105,10 +103,10 @@ void av1_highbd_warp_affine_sse4_1(const int32_t *mat, const uint16_t *ref,
iy = 0;
else if (iy > height - 1)
iy = height - 1;
tmp[k + 7] = _mm_set1_epi16(
(1 << (bd + WARPEDPIXEL_FILTER_BITS - reduce_bits_horiz - 1)) +
ref[iy * stride + (width - 1)] *
(1 << (WARPEDPIXEL_FILTER_BITS - reduce_bits_horiz)));
tmp[k + 7] =
_mm_set1_epi16((1 << (bd + FILTER_BITS - reduce_bits_horiz - 1)) +
ref[iy * stride + (width - 1)] *
(1 << (FILTER_BITS - reduce_bits_horiz)));
}
} else {
for (k = -7; k < AOMMIN(8, p_height - i); ++k) {
......
......@@ -28,18 +28,17 @@ void av1_highbd_warp_affine_ssse3(const int32_t *mat, const uint16_t *ref,
(conv_params->round == CONVOLVE_OPT_NO_ROUND && conv_params->dst);
int reduce_bits_horiz =
use_conv_params ? conv_params->round_0 : HORSHEAR_REDUCE_PREC_BITS;
if (!use_conv_params &&
bd + WARPEDPIXEL_FILTER_BITS + 2 - reduce_bits_horiz > 16)
reduce_bits_horiz += bd + WARPEDPIXEL_FILTER_BITS - reduce_bits_horiz - 14;
const int reduce_bits_vert =
use_conv_params ? conv_params->round_1
: 2 * WARPEDPIXEL_FILTER_BITS - reduce_bits_horiz;
if (!use_conv_params && bd + FILTER_BITS + 2 - reduce_bits_horiz > 16)
reduce_bits_horiz += bd + FILTER_BITS - reduce_bits_horiz - 14;
const int reduce_bits_vert = use_conv_params
? conv_params->round_1
: 2 * FILTER_BITS - reduce_bits_horiz;
const int offset_bits_horiz =
use_conv_params ? bd + FILTER_BITS - 1 : bd + WARPEDPIXEL_FILTER_BITS - 1;
use_conv_params ? bd + FILTER_BITS - 1 : bd + FILTER_BITS - 1;
if (use_conv_params) {
conv_params->do_post_rounding = 1;
}
assert(FILTER_BITS == WARPEDPIXEL_FILTER_BITS);
assert(FILTER_BITS == FILTER_BITS);
if (bd == 12 && reduce_bits_horiz < 5) printf("Error\n");
/* Note: For this code to work, the left/right frame borders need to be
......@@ -89,9 +88,8 @@ void av1_highbd_warp_affine_ssse3(const int32_t *mat, const uint16_t *ref,
else if (iy > height - 1)
iy = height - 1;
tmp[k + 7] = _mm_set1_epi16(
(1 << (bd + WARPEDPIXEL_FILTER_BITS - reduce_bits_horiz - 1)) +
ref[iy * stride] *
(1 << (WARPEDPIXEL_FILTER_BITS - reduce_bits_horiz)));
(1 << (bd + FILTER_BITS - reduce_bits_horiz - 1)) +
ref[iy * stride] * (1 << (FILTER_BITS - reduce_bits_horiz)));
}
} else if (ix4 >= width + 6) {
for (k = -7; k < AOMMIN(8, p_height - i); ++k) {
......@@ -100,10 +98,10 @@ void av1_highbd_warp_affine_ssse3(const int32_t *mat, const uint16_t *ref,
iy = 0;
else if (iy > height - 1)
iy = height - 1;
tmp[k + 7] = _mm_set1_epi16(
(1 << (bd + WARPEDPIXEL_FILTER_BITS - reduce_bits_horiz - 1)) +
ref[iy * stride + (width - 1)] *
(1 << (WARPEDPIXEL_FILTER_BITS - reduce_bits_horiz)));
tmp[k + 7] =
_mm_set1_epi16((1 << (bd + FILTER_BITS - reduce_bits_horiz - 1)) +
ref[iy * stride + (width - 1)] *
(1 << (FILTER_BITS - reduce_bits_horiz)));
}
} else {
for (k = -7; k < AOMMIN(8, p_height - i); ++k) {
......
......@@ -28,15 +28,15 @@ void av1_warp_affine_sse2(const int32_t *mat, const uint8_t *ref, int width,
(conv_params->round == CONVOLVE_OPT_NO_ROUND && conv_params->dst);
const int reduce_bits_horiz =
use_conv_params ? conv_params->round_0 : HORSHEAR_REDUCE_PREC_BITS;
const int reduce_bits_vert =
use_conv_params ? conv_params->round_1
: 2 * WARPEDPIXEL_FILTER_BITS - reduce_bits_horiz;
const int reduce_bits_vert = use_conv_params
? conv_params->round_1
: 2 * FILTER_BITS - reduce_bits_horiz;
const int offset_bits_horiz =
use_conv_params ? bd + FILTER_BITS - 1 : bd + WARPEDPIXEL_FILTER_BITS - 1;
use_conv_params ? bd + FILTER_BITS - 1 : bd + FILTER_BITS - 1;
if (use_conv_params) {
conv_params->do_post_rounding = 1;
}
assert(FILTER_BITS == WARPEDPIXEL_FILTER_BITS);
assert(FILTER_BITS == FILTER_BITS);
/* Note: For this code to work, the left/right frame borders need to be
extended by at least 13 pixels each. By the time we get here, other
......@@ -85,9 +85,8 @@ void av1_warp_affine_sse2(const int32_t *mat, const uint8_t *ref, int width,
else if (iy > height - 1)
iy = height - 1;
tmp[k + 7] = _mm_set1_epi16(
(1 << (bd + WARPEDPIXEL_FILTER_BITS - reduce_bits_horiz - 1)) +
ref[iy * stride] *
(1 << (WARPEDPIXEL_FILTER_BITS - reduce_bits_horiz)));
(1 << (bd + FILTER_BITS - reduce_bits_horiz - 1)) +
ref[iy * stride] * (1 << (FILTER_BITS - reduce_bits_horiz)));
}
} else if (ix4 >= width + 6) {
for (k = -7; k < AOMMIN(8, p_height - i); ++k) {
......@@ -96,10 +95,10 @@ void av1_warp_affine_sse2(const int32_t *mat, const uint8_t *ref, int width,
iy = 0;
else if (iy > height - 1)
iy = height - 1;
tmp[k + 7] = _mm_set1_epi16(
(1 << (bd + WARPEDPIXEL_FILTER_BITS - reduce_bits_horiz - 1)) +
ref[iy * stride + (width - 1)] *
(1 << (WARPEDPIXEL_FILTER_BITS - reduce_bits_horiz)));
tmp[k + 7] =
_mm_set1_epi16((1 << (bd + FILTER_BITS - reduce_bits_horiz - 1)) +
ref[iy * stride + (width - 1)] *
(1 << (FILTER_BITS - reduce_bits_horiz)));
}
} else {
for (k = -7; k < AOMMIN(8, p_height - i); ++k) {
......
......@@ -216,11 +216,11 @@ void av1_warp_affine_sse4_1(const int32_t *mat, const uint8_t *ref, int width,
(conv_params->round == CONVOLVE_OPT_NO_ROUND && conv_params->dst);
const int reduce_bits_horiz =
use_conv_params ? conv_params->round_0 : HORSHEAR_REDUCE_PREC_BITS;
const int reduce_bits_vert =
use_conv_params ? conv_params->round_1
: 2 * WARPEDPIXEL_FILTER_BITS - reduce_bits_horiz;
const int reduce_bits_vert = use_conv_params
? conv_params->round_1
: 2 * FILTER_BITS - reduce_bits_horiz;
const int offset_bits_horiz =
use_conv_params ? bd + FILTER_BITS - 1 : bd + WARPEDPIXEL_FILTER_BITS - 1;
use_conv_params ? bd + FILTER_BITS - 1 : bd + FILTER_BITS - 1;
if (use_conv_params) {
conv_params->do_post_rounding = 1;
}
......@@ -230,7 +230,7 @@ void av1_warp_affine_sse4_1(const int32_t *mat, const uint8_t *ref, int width,
const __m128i wt0 = _mm_set1_epi32(w0);
const __m128i wt1 = _mm_set1_epi32(w1);
#endif // CONFIG_JNT_COMP
assert(FILTER_BITS == WARPEDPIXEL_FILTER_BITS);
assert(FILTER_BITS == FILTER_BITS);
/* Note: For this code to work, the left/right frame borders need to be
extended by at least 13 pixels each. By the time we get here, other
......@@ -279,9 +279,8 @@ void av1_warp_affine_sse4_1(const int32_t *mat, const uint8_t *ref, int width,
else if (iy > height - 1)
iy = height - 1;
tmp[k + 7] = _mm_set1_epi16(
(1 << (bd + WARPEDPIXEL_FILTER_BITS - reduce_bits_horiz - 1)) +
ref[iy * stride] *
(1 << (WARPEDPIXEL_FILTER_BITS - reduce_bits_horiz)));
(1 << (bd + FILTER_BITS - reduce_bits_horiz - 1)) +
ref[iy * stride] * (1 << (FILTER_BITS - reduce_bits_horiz)));
}
} else if (ix4 >= width + 6) {
for (k = -7; k < AOMMIN(8, p_height - i); ++k) {
......@@ -290,10 +289,10 @@ void av1_warp_affine_sse4_1(const int32_t *mat, const uint8_t *ref, int width,
iy = 0;
else if (iy > height - 1)
iy = height - 1;
tmp[k + 7] = _mm_set1_epi16(
(1 << (bd + WARPEDPIXEL_FILTER_BITS - reduce_bits_horiz - 1)) +
ref[iy * stride + (width - 1)] *
(1 << (WARPEDPIXEL_FILTER_BITS - reduce_bits_horiz)));
tmp[k + 7] =
_mm_set1_epi16((1 << (bd + FILTER_BITS - reduce_bits_horiz - 1)) +
ref[iy * stride + (width - 1)] *
(1 << (FILTER_BITS - reduce_bits_horiz)));
}
} else {
for (k = -7; k < AOMMIN(8, p_height - i); ++k) {
......
......@@ -216,14 +216,14 @@ void av1_warp_affine_ssse3(const int32_t *mat, const uint8_t *ref, int width,
const int reduce_bits_horiz =
use_conv_params ? conv_params->round_0 : HORSHEAR_REDUCE_PREC_BITS;
const int offset_bits_horiz =
use_conv_params ? bd + FILTER_BITS - 1 : bd + WARPEDPIXEL_FILTER_BITS - 1;
const int reduce_bits_vert =
use_conv_params ? conv_params->round_1
: 2 * WARPEDPIXEL_FILTER_BITS - reduce_bits_horiz;
use_conv_params ? bd + FILTER_BITS - 1 : bd + FILTER_BITS - 1;
const int reduce_bits_vert = use_conv_params
? conv_params->round_1
: 2 * FILTER_BITS - reduce_bits_horiz;
if (use_conv_params) {
conv_params->do_post_rounding = 1;
}
assert(FILTER_BITS == WARPEDPIXEL_FILTER_BITS);
assert(FILTER_BITS == FILTER_BITS);
/* Note: For this code to work, the left/right frame borders need to be
extended by at least 13 pixels each. By the time we get here, other
......@@ -272,9 +272,8 @@ void av1_warp_affine_ssse3(const int32_t *mat, const uint8_t *ref, int width,
else if (iy > height - 1)
iy = height - 1;
tmp[k + 7] = _mm_set1_epi16(
(1 << (bd + WARPEDPIXEL_FILTER_BITS - reduce_bits_horiz - 1)) +
ref[iy * stride] *
(1 << (WARPEDPIXEL_FILTER_BITS - reduce_bits_horiz)));
(1 << (bd + FILTER_BITS - reduce_bits_horiz - 1)) +
ref[iy * stride] * (1 << (FILTER_BITS - reduce_bits_horiz)));
}
} else if (ix4 >= width + 6) {
for (k = -7; k < AOMMIN(8, p_height - i); ++k) {
......@@ -283,10 +282,10 @@ void av1_warp_affine_ssse3(const int32_t *mat, const uint8_t *ref, int width,
iy = 0;
else if (iy > height - 1)
iy = height - 1;
tmp[k + 7] = _mm_set1_epi16(
(1 << (bd + WARPEDPIXEL_FILTER_BITS - reduce_bits_horiz - 1)) +
ref[iy * stride + (width - 1)] *
(1 << (WARPEDPIXEL_FILTER_BITS - reduce_bits_horiz)));
tmp[k + 7] =
_mm_set1_epi16((1 << (bd + FILTER_BITS - reduce_bits_horiz - 1)) +
ref[iy * stride + (width - 1)] *
(1 << (FILTER_BITS - reduce_bits_horiz)));
}
} else {
for (k = -7; k < AOMMIN(8, p_height - i); ++k) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment