Commit 66b98991 authored by Debargha Mukherjee's avatar Debargha Mukherjee

Rep HORSHEAR_REDUCE_PRECISION_BITS w/ ROUND0_BITS

Replace HORSHEAR_REDUCE_PRECISION_BITS with ROUND0_BITS for
consistency.
Also includes other clean-ups.

Change-Id: If0794cb16fdc8844859a614ed524c761fb46eddf
parent a458afb5
...@@ -51,10 +51,6 @@ typedef struct mv32 { ...@@ -51,10 +51,6 @@ typedef struct mv32 {
#define WARP_PARAM_REDUCE_BITS 6 #define WARP_PARAM_REDUCE_BITS 6
// Precision bits reduction after horizontal shear
#define HORSHEAR_REDUCE_PREC_BITS 3
#define VERSHEAR_REDUCE_PREC_BITS (2 * FILTER_BITS - HORSHEAR_REDUCE_PREC_BITS)
#define WARPEDDIFF_PREC_BITS (WARPEDMODEL_PREC_BITS - WARPEDPIXEL_PREC_BITS) #define WARPEDDIFF_PREC_BITS (WARPEDMODEL_PREC_BITS - WARPEDPIXEL_PREC_BITS)
/* clang-format off */ /* clang-format off */
......
...@@ -424,21 +424,15 @@ void av1_highbd_warp_affine_c(const int32_t *mat, const uint16_t *ref, ...@@ -424,21 +424,15 @@ void av1_highbd_warp_affine_c(const int32_t *mat, const uint16_t *ref,
int32_t tmp[15 * 8]; int32_t tmp[15 * 8];
const int use_conv_params = const int use_conv_params =
(conv_params->round == CONVOLVE_OPT_NO_ROUND && conv_params->dst); (conv_params->round == CONVOLVE_OPT_NO_ROUND && conv_params->dst);
int reduce_bits_horiz = int reduce_bits_horiz = conv_params->round_0;
use_conv_params ? conv_params->round_0 : HORSHEAR_REDUCE_PREC_BITS;
if (!use_conv_params && bd + FILTER_BITS + 2 - reduce_bits_horiz > 16) if (!use_conv_params && bd + FILTER_BITS + 2 - reduce_bits_horiz > 16)
reduce_bits_horiz += bd + FILTER_BITS - reduce_bits_horiz - 14; reduce_bits_horiz += bd + FILTER_BITS - reduce_bits_horiz - 14;
const int reduce_bits_vert = use_conv_params const int reduce_bits_vert = use_conv_params
? conv_params->round_1 ? conv_params->round_1
: 2 * FILTER_BITS - reduce_bits_horiz; : 2 * FILTER_BITS - reduce_bits_horiz;
const int max_bits_horiz = use_conv_params const int max_bits_horiz = bd + FILTER_BITS + 1 - reduce_bits_horiz;
? bd + FILTER_BITS + 1 - conv_params->round_0 const int offset_bits_horiz = bd + FILTER_BITS - 1;
: bd + FILTER_BITS + 1 - reduce_bits_horiz; const int offset_bits_vert = bd + 2 * FILTER_BITS - reduce_bits_horiz;
const int offset_bits_horiz =
use_conv_params ? bd + FILTER_BITS - 1 : bd + FILTER_BITS - 1;
const int offset_bits_vert = use_conv_params
? bd + 2 * FILTER_BITS - conv_params->round_0
: bd + 2 * FILTER_BITS - reduce_bits_horiz;
if (use_conv_params) { if (use_conv_params) {
conv_params->do_post_rounding = 1; conv_params->do_post_rounding = 1;
} }
...@@ -511,10 +505,10 @@ void av1_highbd_warp_affine_c(const int32_t *mat, const uint16_t *ref, ...@@ -511,10 +505,10 @@ void av1_highbd_warp_affine_c(const int32_t *mat, const uint16_t *ref,
&conv_params &conv_params
->dst[(i - p_row + k + 4) * conv_params->dst_stride + ->dst[(i - p_row + k + 4) * conv_params->dst_stride +
(j - p_col + l + 4)]; (j - p_col + l + 4)];
sum = ROUND_POWER_OF_TWO(sum, conv_params->round_1) - sum = ROUND_POWER_OF_TWO(sum, reduce_bits_vert) -
(1 << (offset_bits_horiz + FILTER_BITS - (1 << (offset_bits_horiz + FILTER_BITS - reduce_bits_horiz -
conv_params->round_0 - conv_params->round_1)) - reduce_bits_vert)) -
(1 << (offset_bits_vert - conv_params->round_1)); (1 << (offset_bits_vert - reduce_bits_vert));
#if CONFIG_JNT_COMP #if CONFIG_JNT_COMP
if (conv_params->use_jnt_comp_avg) { if (conv_params->use_jnt_comp_avg) {
if (conv_params->do_average) { if (conv_params->do_average) {
...@@ -686,7 +680,7 @@ static INLINE int error_measure(int err) { ...@@ -686,7 +680,7 @@ static INLINE int error_measure(int err) {
F := FILTER_BITS = 7 (or else the above ranges need adjusting) F := FILTER_BITS = 7 (or else the above ranges need adjusting)
So a *single* filter stage maps a k-bit input to a (k + F + 1)-bit So a *single* filter stage maps a k-bit input to a (k + F + 1)-bit
intermediate value. intermediate value.
H := HORSHEAR_REDUCE_PREC_BITS H := ROUND0_BITS
V := VERSHEAR_REDUCE_PREC_BITS V := VERSHEAR_REDUCE_PREC_BITS
(and note that we must have H + V = 2*F for the output to have the same (and note that we must have H + V = 2*F for the output to have the same
scale as the input) scale as the input)
...@@ -724,19 +718,13 @@ void av1_warp_affine_c(const int32_t *mat, const uint8_t *ref, int width, ...@@ -724,19 +718,13 @@ void av1_warp_affine_c(const int32_t *mat, const uint8_t *ref, int width,
const int bd = 8; const int bd = 8;
const int use_conv_params = const int use_conv_params =
(conv_params->round == CONVOLVE_OPT_NO_ROUND && conv_params->dst); (conv_params->round == CONVOLVE_OPT_NO_ROUND && conv_params->dst);
const int reduce_bits_horiz = const int reduce_bits_horiz = conv_params->round_0;
use_conv_params ? conv_params->round_0 : HORSHEAR_REDUCE_PREC_BITS;
const int reduce_bits_vert = use_conv_params const int reduce_bits_vert = use_conv_params
? conv_params->round_1 ? conv_params->round_1
: 2 * FILTER_BITS - reduce_bits_horiz; : 2 * FILTER_BITS - reduce_bits_horiz;
const int max_bits_horiz = const int max_bits_horiz = bd + FILTER_BITS + 1 - reduce_bits_horiz;
use_conv_params ? bd + FILTER_BITS + 1 - conv_params->round_0 const int offset_bits_horiz = bd + FILTER_BITS - 1;
: bd + FILTER_BITS + 1 - HORSHEAR_REDUCE_PREC_BITS; const int offset_bits_vert = bd + 2 * FILTER_BITS - reduce_bits_horiz;
const int offset_bits_horiz =
use_conv_params ? bd + FILTER_BITS - 1 : bd + FILTER_BITS - 1;
const int offset_bits_vert =
use_conv_params ? bd + 2 * FILTER_BITS - conv_params->round_0
: bd + 2 * FILTER_BITS - HORSHEAR_REDUCE_PREC_BITS;
if (use_conv_params) { if (use_conv_params) {
conv_params->do_post_rounding = 1; conv_params->do_post_rounding = 1;
} }
...@@ -815,10 +803,10 @@ void av1_warp_affine_c(const int32_t *mat, const uint8_t *ref, int width, ...@@ -815,10 +803,10 @@ void av1_warp_affine_c(const int32_t *mat, const uint8_t *ref, int width,
&conv_params &conv_params
->dst[(i - p_row + k + 4) * conv_params->dst_stride + ->dst[(i - p_row + k + 4) * conv_params->dst_stride +
(j - p_col + l + 4)]; (j - p_col + l + 4)];
sum = ROUND_POWER_OF_TWO(sum, conv_params->round_1) - sum = ROUND_POWER_OF_TWO(sum, reduce_bits_vert) -
(1 << (offset_bits_horiz + FILTER_BITS - (1 << (offset_bits_horiz + FILTER_BITS - reduce_bits_horiz -
conv_params->round_0 - conv_params->round_1)) - reduce_bits_vert)) -
(1 << (offset_bits_vert - conv_params->round_1)); (1 << (offset_bits_vert - reduce_bits_vert));
#if CONFIG_JNT_COMP #if CONFIG_JNT_COMP
if (conv_params->use_jnt_comp_avg) { if (conv_params->use_jnt_comp_avg) {
if (conv_params->do_average) { if (conv_params->do_average) {
......
...@@ -26,15 +26,13 @@ void av1_highbd_warp_affine_sse4_1(const int32_t *mat, const uint16_t *ref, ...@@ -26,15 +26,13 @@ void av1_highbd_warp_affine_sse4_1(const int32_t *mat, const uint16_t *ref,
int i, j, k; int i, j, k;
const int use_conv_params = const int use_conv_params =
(conv_params->round == CONVOLVE_OPT_NO_ROUND && conv_params->dst); (conv_params->round == CONVOLVE_OPT_NO_ROUND && conv_params->dst);
int reduce_bits_horiz = int reduce_bits_horiz = conv_params->round_0;
use_conv_params ? conv_params->round_0 : HORSHEAR_REDUCE_PREC_BITS;
if (!use_conv_params && bd + FILTER_BITS + 2 - reduce_bits_horiz > 16) if (!use_conv_params && bd + FILTER_BITS + 2 - reduce_bits_horiz > 16)
reduce_bits_horiz += bd + FILTER_BITS - reduce_bits_horiz - 14; reduce_bits_horiz += bd + FILTER_BITS - reduce_bits_horiz - 14;
const int reduce_bits_vert = use_conv_params const int reduce_bits_vert = use_conv_params
? conv_params->round_1 ? conv_params->round_1
: 2 * FILTER_BITS - reduce_bits_horiz; : 2 * FILTER_BITS - reduce_bits_horiz;
const int offset_bits_horiz = const int offset_bits_horiz = bd + FILTER_BITS - 1;
use_conv_params ? bd + FILTER_BITS - 1 : bd + FILTER_BITS - 1;
if (use_conv_params) { if (use_conv_params) {
conv_params->do_post_rounding = 1; conv_params->do_post_rounding = 1;
} }
...@@ -309,11 +307,10 @@ void av1_highbd_warp_affine_sse4_1(const int32_t *mat, const uint16_t *ref, ...@@ -309,11 +307,10 @@ void av1_highbd_warp_affine_sse4_1(const int32_t *mat, const uint16_t *ref,
(__m128i *)&conv_params (__m128i *)&conv_params
->dst[(i + k + 4) * conv_params->dst_stride + j]; ->dst[(i + k + 4) * conv_params->dst_stride + j];
const __m128i round_const = _mm_set1_epi32( const __m128i round_const = _mm_set1_epi32(
-(1 << (bd + 2 * FILTER_BITS - conv_params->round_0 - 1)) + -(1 << (bd + 2 * FILTER_BITS - reduce_bits_horiz - 1)) +
((1 << (conv_params->round_1)) >> 1)); ((1 << (reduce_bits_vert)) >> 1));
res_lo = _mm_add_epi32(res_lo, round_const); res_lo = _mm_add_epi32(res_lo, round_const);
res_lo = res_lo = _mm_sra_epi32(res_lo, _mm_cvtsi32_si128(reduce_bits_vert));
_mm_sra_epi32(res_lo, _mm_cvtsi32_si128(conv_params->round_1));
#if CONFIG_JNT_COMP #if CONFIG_JNT_COMP
if (conv_params->use_jnt_comp_avg) { if (conv_params->use_jnt_comp_avg) {
if (comp_avg) { if (comp_avg) {
...@@ -335,8 +332,7 @@ void av1_highbd_warp_affine_sse4_1(const int32_t *mat, const uint16_t *ref, ...@@ -335,8 +332,7 @@ void av1_highbd_warp_affine_sse4_1(const int32_t *mat, const uint16_t *ref,
if (p_width > 4) { if (p_width > 4) {
res_hi = _mm_add_epi32(res_hi, round_const); res_hi = _mm_add_epi32(res_hi, round_const);
res_hi = res_hi = _mm_sra_epi32(res_hi, _mm_cvtsi32_si128(reduce_bits_vert));
_mm_sra_epi32(res_hi, _mm_cvtsi32_si128(conv_params->round_1));
#if CONFIG_JNT_COMP #if CONFIG_JNT_COMP
if (conv_params->use_jnt_comp_avg) { if (conv_params->use_jnt_comp_avg) {
......
...@@ -214,13 +214,11 @@ void av1_warp_affine_sse4_1(const int32_t *mat, const uint8_t *ref, int width, ...@@ -214,13 +214,11 @@ void av1_warp_affine_sse4_1(const int32_t *mat, const uint8_t *ref, int width,
const int bd = 8; const int bd = 8;
const int use_conv_params = const int use_conv_params =
(conv_params->round == CONVOLVE_OPT_NO_ROUND && conv_params->dst); (conv_params->round == CONVOLVE_OPT_NO_ROUND && conv_params->dst);
const int reduce_bits_horiz = const int reduce_bits_horiz = conv_params->round_0;
use_conv_params ? conv_params->round_0 : HORSHEAR_REDUCE_PREC_BITS;
const int reduce_bits_vert = use_conv_params const int reduce_bits_vert = use_conv_params
? conv_params->round_1 ? conv_params->round_1
: 2 * FILTER_BITS - reduce_bits_horiz; : 2 * FILTER_BITS - reduce_bits_horiz;
const int offset_bits_horiz = const int offset_bits_horiz = bd + FILTER_BITS - 1;
use_conv_params ? bd + FILTER_BITS - 1 : bd + FILTER_BITS - 1;
if (use_conv_params) { if (use_conv_params) {
conv_params->do_post_rounding = 1; conv_params->do_post_rounding = 1;
} }
...@@ -482,11 +480,10 @@ void av1_warp_affine_sse4_1(const int32_t *mat, const uint8_t *ref, int width, ...@@ -482,11 +480,10 @@ void av1_warp_affine_sse4_1(const int32_t *mat, const uint8_t *ref, int width,
(__m128i *)&conv_params (__m128i *)&conv_params
->dst[(i + k + 4) * conv_params->dst_stride + j]; ->dst[(i + k + 4) * conv_params->dst_stride + j];
const __m128i round_const = _mm_set1_epi32( const __m128i round_const = _mm_set1_epi32(
-(1 << (bd + 2 * FILTER_BITS - conv_params->round_0 - 1)) + -(1 << (bd + 2 * FILTER_BITS - reduce_bits_horiz - 1)) +
((1 << (conv_params->round_1)) >> 1)); ((1 << (reduce_bits_vert)) >> 1));
res_lo = _mm_add_epi32(res_lo, round_const); res_lo = _mm_add_epi32(res_lo, round_const);
res_lo = res_lo = _mm_sra_epi32(res_lo, _mm_cvtsi32_si128(reduce_bits_vert));
_mm_sra_epi32(res_lo, _mm_cvtsi32_si128(conv_params->round_1));
#if CONFIG_JNT_COMP #if CONFIG_JNT_COMP
if (conv_params->use_jnt_comp_avg) { if (conv_params->use_jnt_comp_avg) {
if (comp_avg) { if (comp_avg) {
...@@ -506,8 +503,7 @@ void av1_warp_affine_sse4_1(const int32_t *mat, const uint8_t *ref, int width, ...@@ -506,8 +503,7 @@ void av1_warp_affine_sse4_1(const int32_t *mat, const uint8_t *ref, int width,
#endif #endif
if (p_width > 4) { if (p_width > 4) {
res_hi = _mm_add_epi32(res_hi, round_const); res_hi = _mm_add_epi32(res_hi, round_const);
res_hi = res_hi = _mm_sra_epi32(res_hi, _mm_cvtsi32_si128(reduce_bits_vert));
_mm_sra_epi32(res_hi, _mm_cvtsi32_si128(conv_params->round_1));
#if CONFIG_JNT_COMP #if CONFIG_JNT_COMP
if (conv_params->use_jnt_comp_avg) { if (conv_params->use_jnt_comp_avg) {
if (comp_avg) { if (comp_avg) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment