Commit 8ec5c077 authored by Yaowu Xu's avatar Yaowu Xu

Remove two more LPF macros

Change-Id: I60278e399f4f65aa63526e459947e88084f0e889
parent 6d0ed3ed
This diff is collapsed.
......@@ -130,7 +130,6 @@ static INLINE void highbd_flat_mask4(const __m128i *th, const __m128i *p,
flat_mask_internal(th, p, q, bd, 1, 4, flat);
}
#if CONFIG_DEBLOCK_13TAP
// Note:
// access p[6-4], p[0], and q[6-4], q[0]
static INLINE void highbd_flat_mask4_13(const __m128i *th, const __m128i *p,
......@@ -138,7 +137,6 @@ static INLINE void highbd_flat_mask4_13(const __m128i *th, const __m128i *p,
int bd) {
flat_mask_internal(th, p, q, bd, 4, 7, flat);
}
#endif
// Note:
// access p[7-4], p[0], and q[7-4], q[0]
......@@ -224,13 +222,8 @@ static INLINE void highbd_lpf_horz_edge_8_internal(uint16_t *s, int pitch,
__m128i blimit, limit, thresh;
get_limit(blt, lt, thr, bd, &blimit, &limit, &thresh);
#if CONFIG_DEBLOCK_13TAP
__m128i p[7], q[7];
load_highbd_pixel(s, 7, pitch, p, q);
#else
__m128i p[8], q[8];
load_highbd_pixel(s, 8, pitch, p, q);
#endif
__m128i mask;
highbd_filter_mask(p, q, &limit, &blimit, &mask);
......@@ -238,12 +231,7 @@ static INLINE void highbd_lpf_horz_edge_8_internal(uint16_t *s, int pitch,
__m128i flat, flat2;
const __m128i one = _mm_set1_epi16(1);
highbd_flat_mask4(&one, p, q, &flat, bd);
#if CONFIG_DEBLOCK_13TAP
highbd_flat_mask4_13(&one, p, q, &flat2, bd);
#else
highbd_flat_mask5(&one, p, q, &flat2, bd);
#endif
flat = _mm_and_si128(flat, mask);
flat2 = _mm_and_si128(flat2, flat);
......@@ -253,16 +241,11 @@ static INLINE void highbd_lpf_horz_edge_8_internal(uint16_t *s, int pitch,
// flat and wide flat calculations
__m128i flat_p[3], flat_q[3];
#if CONFIG_DEBLOCK_13TAP
__m128i flat2_p[6], flat2_q[6];
#else
__m128i flat2_p[7], flat2_q[7];
#endif
{
const __m128i eight = _mm_set1_epi16(8);
const __m128i four = _mm_set1_epi16(4);
#if CONFIG_DEBLOCK_13TAP
__m128i sum_p = _mm_add_epi16(p[5], _mm_add_epi16(p[4], p[3]));
__m128i sum_q = _mm_add_epi16(q[5], _mm_add_epi16(q[4], q[3]));
......@@ -379,79 +362,6 @@ static INLINE void highbd_lpf_horz_edge_8_internal(uint16_t *s, int pitch,
sum_q, _mm_add_epi16(
sum_q6, _mm_add_epi16(q[5], _mm_add_epi16(q[4], q[6])))),
4);
#else
__m128i sum_p =
_mm_add_epi16(_mm_add_epi16(p[6], p[5]), _mm_add_epi16(p[4], p[3]));
__m128i sum_q =
_mm_add_epi16(_mm_add_epi16(q[6], q[5]), _mm_add_epi16(q[4], q[3]));
__m128i sum_lp = _mm_add_epi16(p[0], _mm_add_epi16(p[2], p[1]));
sum_p = _mm_add_epi16(sum_p, sum_lp);
__m128i sum_lq = _mm_add_epi16(q[0], _mm_add_epi16(q[2], q[1]));
sum_q = _mm_add_epi16(sum_q, sum_lq);
sum_p = _mm_add_epi16(eight, _mm_add_epi16(sum_p, sum_q));
sum_lp = _mm_add_epi16(four, _mm_add_epi16(sum_lp, sum_lq));
flat2_p[0] =
_mm_srli_epi16(_mm_add_epi16(sum_p, _mm_add_epi16(p[7], p[0])), 4);
flat2_q[0] =
_mm_srli_epi16(_mm_add_epi16(sum_p, _mm_add_epi16(q[7], q[0])), 4);
flat_p[0] =
_mm_srli_epi16(_mm_add_epi16(sum_lp, _mm_add_epi16(p[3], p[0])), 3);
flat_q[0] =
_mm_srli_epi16(_mm_add_epi16(sum_lp, _mm_add_epi16(q[3], q[0])), 3);
__m128i sum_p7 = _mm_add_epi16(p[7], p[7]);
__m128i sum_q7 = _mm_add_epi16(q[7], q[7]);
__m128i sum_p3 = _mm_add_epi16(p[3], p[3]);
__m128i sum_q3 = _mm_add_epi16(q[3], q[3]);
sum_q = _mm_sub_epi16(sum_p, p[6]);
sum_p = _mm_sub_epi16(sum_p, q[6]);
flat2_p[1] =
_mm_srli_epi16(_mm_add_epi16(sum_p, _mm_add_epi16(sum_p7, p[1])), 4);
flat2_q[1] =
_mm_srli_epi16(_mm_add_epi16(sum_q, _mm_add_epi16(sum_q7, q[1])), 4);
sum_lq = _mm_sub_epi16(sum_lp, p[2]);
sum_lp = _mm_sub_epi16(sum_lp, q[2]);
flat_p[1] =
_mm_srli_epi16(_mm_add_epi16(sum_lp, _mm_add_epi16(sum_p3, p[1])), 3);
flat_q[1] =
_mm_srli_epi16(_mm_add_epi16(sum_lq, _mm_add_epi16(sum_q3, q[1])), 3);
sum_p7 = _mm_add_epi16(sum_p7, p[7]);
sum_q7 = _mm_add_epi16(sum_q7, q[7]);
sum_p3 = _mm_add_epi16(sum_p3, p[3]);
sum_q3 = _mm_add_epi16(sum_q3, q[3]);
sum_p = _mm_sub_epi16(sum_p, q[5]);
sum_q = _mm_sub_epi16(sum_q, p[5]);
flat2_p[2] =
_mm_srli_epi16(_mm_add_epi16(sum_p, _mm_add_epi16(sum_p7, p[2])), 4);
flat2_q[2] =
_mm_srli_epi16(_mm_add_epi16(sum_q, _mm_add_epi16(sum_q7, q[2])), 4);
sum_lp = _mm_sub_epi16(sum_lp, q[1]);
sum_lq = _mm_sub_epi16(sum_lq, p[1]);
flat_p[2] =
_mm_srli_epi16(_mm_add_epi16(sum_lp, _mm_add_epi16(sum_p3, p[2])), 3);
flat_q[2] =
_mm_srli_epi16(_mm_add_epi16(sum_lq, _mm_add_epi16(sum_q3, q[2])), 3);
int i;
for (i = 3; i < 7; ++i) {
sum_p7 = _mm_add_epi16(sum_p7, p[7]);
sum_q7 = _mm_add_epi16(sum_q7, q[7]);
sum_p = _mm_sub_epi16(sum_p, q[7 - i]);
sum_q = _mm_sub_epi16(sum_q, p[7 - i]);
flat2_p[i] =
_mm_srli_epi16(_mm_add_epi16(sum_p, _mm_add_epi16(sum_p7, p[i])), 4);
flat2_q[i] =
_mm_srli_epi16(_mm_add_epi16(sum_q, _mm_add_epi16(sum_q7, q[i])), 4);
}
#endif
}
// highbd_filter8
......@@ -474,12 +384,8 @@ static INLINE void highbd_lpf_horz_edge_8_internal(uint16_t *s, int pitch,
q[i] = _mm_or_si128(qs[i], flat_q[i]);
}
// highbd_filter16
#if CONFIG_DEBLOCK_13TAP
// highbd_filter16
for (i = 5; i >= 0; i--) {
#else
for (i = 6; i >= 0; i--) {
#endif
// p[i] remains unchanged if !(flat2 && flat && mask)
p[i] = _mm_andnot_si128(flat2, p[i]);
flat2_p[i] = _mm_and_si128(flat2, flat2_p[i]);
......
This diff is collapsed.
......@@ -35,13 +35,6 @@ static const int delta_lf_id_lut[MAX_MB_PLANE][2] = {
#endif // CONFIG_EXT_DELTA_Q
#endif // CONFIG_LOOPFILTER_LEVEL
#if CONFIG_DEBLOCK_13TAP
#define PARALLEL_DEBLOCKING_5_TAP_CHROMA 1
#else
#define PARALLEL_DEBLOCKING_5_TAP_CHROMA 0
#endif
#if PARALLEL_DEBLOCKING_5_TAP_CHROMA
extern void aom_highbd_lpf_horizontal_6_c(uint16_t *s, int p,
const uint8_t *blimit,
const uint8_t *limit,
......@@ -51,7 +44,6 @@ extern void aom_highbd_lpf_vertical_6_c(uint16_t *s, int pitch,
const uint8_t *blimit,
const uint8_t *limit,
const uint8_t *thresh, int bd);
#endif
// 64 bit masks for left transform size. Each 1 represents a position where
// we should apply a loop filter across the left border of an 8x8 block
......@@ -2125,21 +2117,15 @@ static void set_lpf_parameters(
if (TX_4X4 >= min_ts) {
params->filter_length = 4;
} else if (TX_8X8 == min_ts) {
#if PARALLEL_DEBLOCKING_5_TAP_CHROMA
if (plane != 0)
params->filter_length = 6;
else
#endif
params->filter_length = 8;
} else {
params->filter_length = 16;
// No wide filtering for chroma plane
if (plane != 0) {
#if PARALLEL_DEBLOCKING_5_TAP_CHROMA
params->filter_length = 6;
#else
params->filter_length = 8;
#endif
}
}
......@@ -2199,7 +2185,6 @@ static void av1_filter_block_plane_vert(
aom_lpf_vertical_4(p, dst_stride, params.mblim, params.lim,
params.hev_thr);
break;
#if PARALLEL_DEBLOCKING_5_TAP_CHROMA
case 6: // apply 6-tap filter for chroma plane only
assert(plane != 0);
if (cm->use_highbitdepth)
......@@ -2210,7 +2195,6 @@ static void av1_filter_block_plane_vert(
aom_lpf_vertical_6(p, dst_stride, params.mblim, params.lim,
params.hev_thr);
break;
#endif
// apply 8-tap filtering
case 8:
if (cm->use_highbitdepth)
......@@ -2224,23 +2208,12 @@ static void av1_filter_block_plane_vert(
// apply 16-tap filtering
case 16:
if (cm->use_highbitdepth)
#if CONFIG_DEBLOCK_13TAP
aom_highbd_lpf_vertical_16(CONVERT_TO_SHORTPTR(p), dst_stride,
params.mblim, params.lim, params.hev_thr,
cm->bit_depth);
#else
aom_highbd_lpf_vertical_16(CONVERT_TO_SHORTPTR(p), dst_stride,
params.mblim, params.lim, params.hev_thr,
cm->bit_depth);
#endif
else
#if CONFIG_DEBLOCK_13TAP
aom_lpf_vertical_16(p, dst_stride, params.mblim, params.lim,
params.hev_thr);
#else
aom_lpf_vertical_16(p, dst_stride, params.mblim, params.lim,
params.hev_thr);
#endif
break;
// no filtering
default: break;
......@@ -2289,7 +2262,6 @@ static void av1_filter_block_plane_horz(
aom_lpf_horizontal_4(p, dst_stride, params.mblim, params.lim,
params.hev_thr);
break;
#if PARALLEL_DEBLOCKING_5_TAP_CHROMA
// apply 6-tap filtering
case 6:
assert(plane != 0);
......@@ -2301,7 +2273,6 @@ static void av1_filter_block_plane_horz(
aom_lpf_horizontal_6(p, dst_stride, params.mblim, params.lim,
params.hev_thr);
break;
#endif
// apply 8-tap filtering
case 8:
if (cm->use_highbitdepth)
......@@ -2315,23 +2286,12 @@ static void av1_filter_block_plane_horz(
// apply 16-tap filtering
case 16:
if (cm->use_highbitdepth)
#if CONFIG_DEBLOCK_13TAP
aom_highbd_lpf_horizontal_16(CONVERT_TO_SHORTPTR(p), dst_stride,
params.mblim, params.lim,
params.hev_thr, cm->bit_depth);
#else
aom_highbd_lpf_horizontal_16(CONVERT_TO_SHORTPTR(p), dst_stride,
params.mblim, params.lim,
params.hev_thr, cm->bit_depth);
#endif
else
#if CONFIG_DEBLOCK_13TAP
aom_lpf_horizontal_16(p, dst_stride, params.mblim, params.lim,
params.hev_thr);
#else
aom_lpf_horizontal_16(p, dst_stride, params.mblim, params.lim,
params.hev_thr);
#endif
break;
// no filtering
default: break;
......
......@@ -95,7 +95,6 @@ set(CONFIG_CDF_STORAGE_REDUCTION 0 CACHE NUMBER "AV1 experiment flag.")
set(CONFIG_CFL 1 CACHE NUMBER "AV1 experiment flag.")
set(CONFIG_CICP 0 CACHE NUMBER "AV1 experiment flag.")
set(CONFIG_COLORSPACE_HEADERS 0 CACHE NUMBER "AV1 experiment flag.")
set(CONFIG_DEBLOCK_13TAP 1 CACHE NUMBER "AV1 experiment flag.")
set(CONFIG_DEPENDENT_HORZTILEGROUPS 0 CACHE NUMBER "AV1 experiment flag.")
set(CONFIG_DEPENDENT_HORZTILES 0 CACHE NUMBER "AV1 experiment flag.")
set(CONFIG_DIST_8X8 1 CACHE NUMBER "AV1 experiment flag.")
......
......@@ -446,19 +446,10 @@ const hbdloop_param_t kHbdLoop8Test6[] = {
make_tuple(&aom_highbd_lpf_horizontal_4_sse2, &aom_highbd_lpf_horizontal_4_c,
8),
make_tuple(&aom_highbd_lpf_vertical_4_sse2, &aom_highbd_lpf_vertical_4_c, 8),
#if PARALLEL_DEBLOCKING_5_TAP_CHROMA
make_tuple(&aom_highbd_lpf_horizontal_6_sse2, &aom_highbd_lpf_horizontal_6_c,
8),
make_tuple(&aom_highbd_lpf_vertical_6_sse2, &aom_highbd_lpf_vertical_6_c, 8),
#endif
make_tuple(&aom_highbd_lpf_horizontal_8_sse2, &aom_highbd_lpf_horizontal_8_c,
8),
make_tuple(&aom_highbd_lpf_horizontal_16_sse2,
&aom_highbd_lpf_horizontal_16_c, 8),
#if !CONFIG_DEBLOCK_13TAP // No SIMD implementation for deblock_13tap yet
make_tuple(&aom_highbd_lpf_horizontal_16_dual_sse2,
&aom_highbd_lpf_horizontal_16_dual_c, 8),
#endif
make_tuple(&aom_highbd_lpf_vertical_8_sse2, &aom_highbd_lpf_vertical_8_c, 8),
make_tuple(&aom_highbd_lpf_vertical_16_sse2, &aom_highbd_lpf_vertical_16_c,
8),
......@@ -469,10 +460,6 @@ const hbdloop_param_t kHbdLoop8Test6[] = {
10),
make_tuple(&aom_highbd_lpf_horizontal_16_sse2,
&aom_highbd_lpf_horizontal_16_c, 10),
#if !CONFIG_DEBLOCK_13TAP // No SIMD implementation for deblock_13tap yet
make_tuple(&aom_highbd_lpf_horizontal_16_dual_sse2,
&aom_highbd_lpf_horizontal_16_dual_c, 10),
#endif
make_tuple(&aom_highbd_lpf_vertical_8_sse2, &aom_highbd_lpf_vertical_8_c, 10),
make_tuple(&aom_highbd_lpf_vertical_16_sse2, &aom_highbd_lpf_vertical_16_c,
10),
......@@ -483,18 +470,6 @@ const hbdloop_param_t kHbdLoop8Test6[] = {
12),
make_tuple(&aom_highbd_lpf_horizontal_16_sse2,
&aom_highbd_lpf_horizontal_16_c, 12),
#if !CONFIG_DEBLOCK_13TAP // No SIMD implementation for deblock_13tap yet
make_tuple(&aom_highbd_lpf_horizontal_16_dual_sse2,
&aom_highbd_lpf_horizontal_16_dual_c, 12),
make_tuple(&aom_highbd_lpf_vertical_16_sse2, &aom_highbd_lpf_vertical_16_c,
12),
make_tuple(&aom_highbd_lpf_vertical_16_dual_sse2,
&aom_highbd_lpf_vertical_16_dual_c, 8),
make_tuple(&aom_highbd_lpf_vertical_16_dual_sse2,
&aom_highbd_lpf_vertical_16_dual_c, 10),
make_tuple(&aom_highbd_lpf_vertical_16_dual_sse2,
&aom_highbd_lpf_vertical_16_dual_c, 12),
#endif
make_tuple(&aom_highbd_lpf_vertical_8_sse2, &aom_highbd_lpf_vertical_8_c, 12)
};
......@@ -504,21 +479,12 @@ INSTANTIATE_TEST_CASE_P(SSE2, Loop8Test6Param_hbd,
const loop_param_t kLoop8Test6[] = {
make_tuple(&aom_lpf_horizontal_4_sse2, &aom_lpf_horizontal_4_c, 8),
make_tuple(&aom_lpf_horizontal_8_sse2, &aom_lpf_horizontal_8_c, 8),
#if CONFIG_DEBLOCK_13TAP
make_tuple(&aom_lpf_horizontal_6_sse2, &aom_lpf_horizontal_6_c, 8),
make_tuple(&aom_lpf_vertical_6_sse2, &aom_lpf_vertical_6_c, 8),
#endif
make_tuple(&aom_lpf_horizontal_16_sse2, &aom_lpf_horizontal_16_c, 8),
#if !CONFIG_DEBLOCK_13TAP // No SIMD implementation for deblock_13tap yet
make_tuple(&aom_lpf_horizontal_16_dual_sse2, &aom_lpf_horizontal_16_dual_c,
8),
#endif
make_tuple(&aom_lpf_vertical_4_sse2, &aom_lpf_vertical_4_c, 8),
make_tuple(&aom_lpf_vertical_8_sse2, &aom_lpf_vertical_8_c, 8),
make_tuple(&aom_lpf_vertical_16_sse2, &aom_lpf_vertical_16_c, 8),
#if !CONFIG_DEBLOCK_13TAP
make_tuple(&aom_lpf_vertical_16_dual_sse2, &aom_lpf_vertical_16_dual_c, 8)
#endif
};
INSTANTIATE_TEST_CASE_P(SSE2, Loop8Test6Param_lbd,
......@@ -526,28 +492,6 @@ INSTANTIATE_TEST_CASE_P(SSE2, Loop8Test6Param_lbd,
#endif // HAVE_SSE2
#if HAVE_AVX2
#if !CONFIG_DEBLOCK_13TAP // No SIMD implementation for deblock_13tap yet
const hbddual_loop_param_t kHbdLoop8Test9Avx2[] = {
make_tuple(&aom_highbd_lpf_horizontal_16_dual_avx2,
&aom_highbd_lpf_horizontal_16_dual_c, 8),
make_tuple(&aom_highbd_lpf_horizontal_16_dual_avx2,
&aom_highbd_lpf_horizontal_16_dual_c, 10),
make_tuple(&aom_highbd_lpf_horizontal_16_dual_avx2,
&aom_highbd_lpf_horizontal_16_dual_c, 12),
make_tuple(&aom_highbd_lpf_vertical_16_dual_avx2,
&aom_highbd_lpf_vertical_16_dual_c, 8),
make_tuple(&aom_highbd_lpf_vertical_16_dual_avx2,
&aom_highbd_lpf_vertical_16_dual_c, 10),
make_tuple(&aom_highbd_lpf_vertical_16_dual_avx2,
&aom_highbd_lpf_vertical_16_dual_c, 12)
};
INSTANTIATE_TEST_CASE_P(AVX2, Loop8Test9Param_hbd,
::testing::ValuesIn(kHbdLoop8Test9Avx2));
#endif
#endif
#if HAVE_SSE2
const hbddual_loop_param_t kHbdLoop8Test9[] = {
make_tuple(&aom_highbd_lpf_horizontal_4_dual_sse2,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment