Commit 7caa7382 authored by Cheng Chen's avatar Cheng Chen Committed by Jingning Han

JNT_COMP: Round the weighted sum

Previously the weighted sums in convolve are right shifted without
rounding. This patch adds rounding value before right shifts.

Change-Id: Iea39aca419ac0ca0c32756f345293ce5e28dbd5b
parent ef34fff7
......@@ -476,7 +476,8 @@ void av1_jnt_convolve_2d_c(const uint8_t *src, int src_stride,
} else {
dst[y * dst_stride + x] += res * conv_params->bck_offset;
dst[y * dst_stride + x] >>= (DIST_PRECISION_BITS - 1);
dst[y * dst_stride + x] = ROUND_POWER_OF_TWO(dst[y * dst_stride + x],
DIST_PRECISION_BITS - 1);
}
}
}
......@@ -546,7 +547,8 @@ void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride,
} else {
dst[y * dst_stride + x] += res * conv_params->bck_offset;
dst[y * dst_stride + x] >>= (DIST_PRECISION_BITS - 1);
dst[y * dst_stride + x] = ROUND_POWER_OF_TWO(dst[y * dst_stride + x],
DIST_PRECISION_BITS - 1);
}
}
#else
......@@ -676,7 +678,8 @@ void av1_jnt_convolve_2d_c(const uint8_t *src, int src_stride,
if (conv_params->do_average) {
dst[y * dst_stride + x] += res * conv_params->bck_offset;
dst[y * dst_stride + x] >>= (DIST_PRECISION_BITS - 1);
dst[y * dst_stride + x] = ROUND_POWER_OF_TWO(dst[y * dst_stride + x],
DIST_PRECISION_BITS - 1);
} else {
dst[y * dst_stride + x] = res * conv_params->fwd_offset;
}
......@@ -752,7 +755,8 @@ void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride,
if (conv_params->do_average) {
dst[y * dst_stride + x] += res * conv_params->bck_offset;
dst[y * dst_stride + x] >>= (DIST_PRECISION_BITS - 1);
dst[y * dst_stride + x] = ROUND_POWER_OF_TWO(dst[y * dst_stride + x],
DIST_PRECISION_BITS - 1);
} else {
dst[y * dst_stride + x] = res * conv_params->fwd_offset;
}
......
......@@ -347,7 +347,8 @@ static void vfilter(const int32_t *src, int src_stride, int32_t *dst,
if (conv_params->do_average) {
dst[y * dst_stride + x] += res * conv_params->bck_offset;
dst[y * dst_stride + x] >>= (DIST_PRECISION_BITS - 1);
dst[y * dst_stride + x] = ROUND_POWER_OF_TWO(dst[y * dst_stride + x],
DIST_PRECISION_BITS - 1);
} else {
dst[y * dst_stride + x] = res * conv_params->fwd_offset;
}
......@@ -465,7 +466,8 @@ static void vfilter8(const int32_t *src, int src_stride, int32_t *dst,
if (conv_params->do_average) {
dst[y * dst_stride + x] += res * conv_params->bck_offset;
dst[y * dst_stride + x] >>= (DIST_PRECISION_BITS - 1);
dst[y * dst_stride + x] = ROUND_POWER_OF_TWO(dst[y * dst_stride + x],
DIST_PRECISION_BITS - 1);
} else {
dst[y * dst_stride + x] = res * conv_params->fwd_offset;
}
......
......@@ -42,6 +42,9 @@ void av1_jnt_convolve_2d_sse4_1(const uint8_t *src, int src_stride,
const int w1 = conv_params->bck_offset;
const __m128i wt0 = _mm_set_epi32(w0, w0, w0, w0);
const __m128i wt1 = _mm_set_epi32(w1, w1, w1, w1);
const int jnt_round_const = 1 << (DIST_PRECISION_BITS - 2);
const __m128i jnt_r = _mm_set_epi32(jnt_round_const, jnt_round_const,
jnt_round_const, jnt_round_const);
/* Horizontal filter */
{
......@@ -196,14 +199,18 @@ void av1_jnt_convolve_2d_sse4_1(const uint8_t *src, int src_stride,
if (do_average) {
_mm_storeu_si128(
p + 0, _mm_srai_epi32(
_mm_add_epi32(_mm_loadu_si128(p + 0),
_mm_mullo_epi32(res_lo_round, wt1)),
_mm_add_epi32(_mm_add_epi32(_mm_loadu_si128(p + 0),
_mm_mullo_epi32(
res_lo_round, wt1)),
jnt_r),
DIST_PRECISION_BITS - 1));
_mm_storeu_si128(
p + 1, _mm_srai_epi32(
_mm_add_epi32(_mm_loadu_si128(p + 1),
_mm_mullo_epi32(res_hi_round, wt1)),
_mm_add_epi32(_mm_add_epi32(_mm_loadu_si128(p + 1),
_mm_mullo_epi32(
res_hi_round, wt1)),
jnt_r),
DIST_PRECISION_BITS - 1));
} else {
_mm_storeu_si128(p + 0, _mm_mullo_epi32(res_lo_round, wt0));
......@@ -251,6 +258,9 @@ void av1_jnt_convolve_2d_sse4_1(const uint8_t *src, int src_stride,
const int w1 = conv_params->bck_offset;
const __m128i wt0 = _mm_set_epi32(w0, w0, w0, w0);
const __m128i wt1 = _mm_set_epi32(w1, w1, w1, w1);
const int jnt_round_const = 1 << (DIST_PRECISION_BITS - 2);
const __m128i jnt_r = _mm_set_epi32(jnt_round_const, jnt_round_const,
jnt_round_const, jnt_round_const);
/* Horizontal filter */
{
......@@ -406,14 +416,18 @@ void av1_jnt_convolve_2d_sse4_1(const uint8_t *src, int src_stride,
if (do_average) {
_mm_storeu_si128(
p + 0, _mm_srai_epi32(
_mm_add_epi32(_mm_loadu_si128(p + 0),
_mm_mullo_epi32(res_lo_round, wt1)),
_mm_add_epi32(_mm_add_epi32(_mm_loadu_si128(p + 0),
_mm_mullo_epi32(
res_lo_round, wt1)),
jnt_r),
DIST_PRECISION_BITS - 1));
_mm_storeu_si128(
p + 1, _mm_srai_epi32(
_mm_add_epi32(_mm_loadu_si128(p + 1),
_mm_mullo_epi32(res_hi_round, wt1)),
_mm_add_epi32(_mm_add_epi32(_mm_loadu_si128(p + 1),
_mm_mullo_epi32(
res_hi_round, wt1)),
jnt_r),
DIST_PRECISION_BITS - 1));
} else {
_mm_storeu_si128(p + 0, _mm_mullo_epi32(res_lo_round, wt0));
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment