Commit 9b44d9d0 authored by James Zern's avatar James Zern
Browse files

split vpx_highbd_lpf_horizontal_16 in two

replace with vpx_highbd_lpf_horizontal_edge_16 and
vpx_highbd_lpf_horizontal_edge_8 to avoid passing a count parameter

Change-Id: I551f8cec0fce57032cb2652584bb802e2248644d
parent 1b519fb6
...@@ -475,10 +475,10 @@ INSTANTIATE_TEST_CASE_P( ...@@ -475,10 +475,10 @@ INSTANTIATE_TEST_CASE_P(
&wrapper_nc<vpx_highbd_lpf_vertical_4_c>, 8, 1), &wrapper_nc<vpx_highbd_lpf_vertical_4_c>, 8, 1),
make_tuple(&wrapper_nc<vpx_highbd_lpf_horizontal_8_sse2>, make_tuple(&wrapper_nc<vpx_highbd_lpf_horizontal_8_sse2>,
&wrapper_nc<vpx_highbd_lpf_horizontal_8_c>, 8, 1), &wrapper_nc<vpx_highbd_lpf_horizontal_8_c>, 8, 1),
make_tuple(&vpx_highbd_lpf_horizontal_16_sse2, make_tuple(&wrapper_nc<vpx_highbd_lpf_horizontal_edge_8_sse2>,
&vpx_highbd_lpf_horizontal_16_c, 8, 1), &wrapper_nc<vpx_highbd_lpf_horizontal_edge_8_c>, 8, 1),
make_tuple(&vpx_highbd_lpf_horizontal_16_sse2, make_tuple(&wrapper_nc<vpx_highbd_lpf_horizontal_edge_16_sse2>,
&vpx_highbd_lpf_horizontal_16_c, 8, 2), &wrapper_nc<vpx_highbd_lpf_horizontal_edge_16_c>, 8, 1),
make_tuple(&wrapper_nc<vpx_highbd_lpf_vertical_8_sse2>, make_tuple(&wrapper_nc<vpx_highbd_lpf_vertical_8_sse2>,
&wrapper_nc<vpx_highbd_lpf_vertical_8_c>, 8, 1), &wrapper_nc<vpx_highbd_lpf_vertical_8_c>, 8, 1),
make_tuple(&wrapper_nc<vpx_highbd_lpf_vertical_16_sse2>, make_tuple(&wrapper_nc<vpx_highbd_lpf_vertical_16_sse2>,
...@@ -489,10 +489,10 @@ INSTANTIATE_TEST_CASE_P( ...@@ -489,10 +489,10 @@ INSTANTIATE_TEST_CASE_P(
&wrapper_nc<vpx_highbd_lpf_vertical_4_c>, 10, 1), &wrapper_nc<vpx_highbd_lpf_vertical_4_c>, 10, 1),
make_tuple(&wrapper_nc<vpx_highbd_lpf_horizontal_8_sse2>, make_tuple(&wrapper_nc<vpx_highbd_lpf_horizontal_8_sse2>,
&wrapper_nc<vpx_highbd_lpf_horizontal_8_c>, 10, 1), &wrapper_nc<vpx_highbd_lpf_horizontal_8_c>, 10, 1),
make_tuple(&vpx_highbd_lpf_horizontal_16_sse2, make_tuple(&wrapper_nc<vpx_highbd_lpf_horizontal_edge_8_sse2>,
&vpx_highbd_lpf_horizontal_16_c, 10, 1), &wrapper_nc<vpx_highbd_lpf_horizontal_edge_8_c>, 10, 1),
make_tuple(&vpx_highbd_lpf_horizontal_16_sse2, make_tuple(&wrapper_nc<vpx_highbd_lpf_horizontal_edge_16_sse2>,
&vpx_highbd_lpf_horizontal_16_c, 10, 2), &wrapper_nc<vpx_highbd_lpf_horizontal_edge_16_c>, 10, 1),
make_tuple(&wrapper_nc<vpx_highbd_lpf_vertical_8_sse2>, make_tuple(&wrapper_nc<vpx_highbd_lpf_vertical_8_sse2>,
&wrapper_nc<vpx_highbd_lpf_vertical_8_c>, 10, 1), &wrapper_nc<vpx_highbd_lpf_vertical_8_c>, 10, 1),
make_tuple(&wrapper_nc<vpx_highbd_lpf_vertical_16_sse2>, make_tuple(&wrapper_nc<vpx_highbd_lpf_vertical_16_sse2>,
...@@ -503,10 +503,10 @@ INSTANTIATE_TEST_CASE_P( ...@@ -503,10 +503,10 @@ INSTANTIATE_TEST_CASE_P(
&wrapper_nc<vpx_highbd_lpf_vertical_4_c>, 12, 1), &wrapper_nc<vpx_highbd_lpf_vertical_4_c>, 12, 1),
make_tuple(&wrapper_nc<vpx_highbd_lpf_horizontal_8_sse2>, make_tuple(&wrapper_nc<vpx_highbd_lpf_horizontal_8_sse2>,
&wrapper_nc<vpx_highbd_lpf_horizontal_8_c>, 12, 1), &wrapper_nc<vpx_highbd_lpf_horizontal_8_c>, 12, 1),
make_tuple(&vpx_highbd_lpf_horizontal_16_sse2, make_tuple(&wrapper_nc<vpx_highbd_lpf_horizontal_edge_8_sse2>,
&vpx_highbd_lpf_horizontal_16_c, 12, 1), &wrapper_nc<vpx_highbd_lpf_horizontal_edge_8_c>, 12, 1),
make_tuple(&vpx_highbd_lpf_horizontal_16_sse2, make_tuple(&wrapper_nc<vpx_highbd_lpf_horizontal_edge_16_sse2>,
&vpx_highbd_lpf_horizontal_16_c, 12, 2), &wrapper_nc<vpx_highbd_lpf_horizontal_edge_16_c>, 12, 1),
make_tuple(&wrapper_nc<vpx_highbd_lpf_vertical_8_sse2>, make_tuple(&wrapper_nc<vpx_highbd_lpf_vertical_8_sse2>,
&wrapper_nc<vpx_highbd_lpf_vertical_8_c>, 12, 1), &wrapper_nc<vpx_highbd_lpf_vertical_8_c>, 12, 1),
make_tuple(&wrapper_nc<vpx_highbd_lpf_vertical_16_sse2>, make_tuple(&wrapper_nc<vpx_highbd_lpf_vertical_16_sse2>,
......
...@@ -609,12 +609,12 @@ static void highbd_filter_selectively_horiz(uint16_t *s, int pitch, ...@@ -609,12 +609,12 @@ static void highbd_filter_selectively_horiz(uint16_t *s, int pitch,
if (mask & 1) { if (mask & 1) {
if (mask_16x16 & 1) { if (mask_16x16 & 1) {
if ((mask_16x16 & 3) == 3) { if ((mask_16x16 & 3) == 3) {
vpx_highbd_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim, vpx_highbd_lpf_horizontal_edge_16(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, 2, bd); lfi->hev_thr, bd);
count = 2; count = 2;
} else { } else {
vpx_highbd_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim, vpx_highbd_lpf_horizontal_edge_8(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, 1, bd); lfi->hev_thr, bd);
} }
} else if (mask_8x8 & 1) { } else if (mask_8x8 & 1) {
if ((mask_8x8 & 3) == 3) { if ((mask_8x8 & 3) == 3) {
......
...@@ -609,12 +609,12 @@ static void highbd_filter_selectively_horiz(uint16_t *s, int pitch, ...@@ -609,12 +609,12 @@ static void highbd_filter_selectively_horiz(uint16_t *s, int pitch,
if (mask & 1) { if (mask & 1) {
if (mask_16x16 & 1) { if (mask_16x16 & 1) {
if ((mask_16x16 & 3) == 3) { if ((mask_16x16 & 3) == 3) {
vpx_highbd_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim, vpx_highbd_lpf_horizontal_edge_16(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, 2, bd); lfi->hev_thr, bd);
count = 2; count = 2;
} else { } else {
vpx_highbd_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim, vpx_highbd_lpf_horizontal_edge_8(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, 1, bd); lfi->hev_thr, bd);
} }
} else if (mask_8x8 & 1) { } else if (mask_8x8 & 1) {
if ((mask_8x8 & 3) == 3) { if ((mask_8x8 & 3) == 3) {
......
...@@ -669,9 +669,11 @@ static INLINE void highbd_filter16(int8_t mask, uint8_t thresh, ...@@ -669,9 +669,11 @@ static INLINE void highbd_filter16(int8_t mask, uint8_t thresh,
} }
} }
void vpx_highbd_lpf_horizontal_16_c(uint16_t *s, int p, const uint8_t *blimit, static void highbd_mb_lpf_horizontal_edge_w(uint16_t *s, int p,
const uint8_t *limit, const uint8_t *thresh, const uint8_t *blimit,
int count, int bd) { const uint8_t *limit,
const uint8_t *thresh,
int count, int bd) {
int i; int i;
// loop filter designed to work using chars so that we can make maximum use // loop filter designed to work using chars so that we can make maximum use
...@@ -703,6 +705,20 @@ void vpx_highbd_lpf_horizontal_16_c(uint16_t *s, int p, const uint8_t *blimit, ...@@ -703,6 +705,20 @@ void vpx_highbd_lpf_horizontal_16_c(uint16_t *s, int p, const uint8_t *blimit,
} }
} }
void vpx_highbd_lpf_horizontal_edge_8_c(uint16_t *s, int p,
const uint8_t *blimit,
const uint8_t *limit,
const uint8_t *thresh, int bd) {
highbd_mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 1, bd);
}
void vpx_highbd_lpf_horizontal_edge_16_c(uint16_t *s, int p,
const uint8_t *blimit,
const uint8_t *limit,
const uint8_t *thresh, int bd) {
highbd_mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 2, bd);
}
static void highbd_mb_lpf_vertical_edge_w(uint16_t *s, int p, static void highbd_mb_lpf_vertical_edge_w(uint16_t *s, int p,
const uint8_t *blimit, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *limit,
......
...@@ -588,8 +588,11 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { ...@@ -588,8 +588,11 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/void vpx_highbd_lpf_vertical_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd"; add_proto qw/void vpx_highbd_lpf_vertical_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
specialize qw/vpx_highbd_lpf_vertical_4_dual sse2/; specialize qw/vpx_highbd_lpf_vertical_4_dual sse2/;
add_proto qw/void vpx_highbd_lpf_horizontal_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd"; add_proto qw/void vpx_highbd_lpf_horizontal_edge_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
specialize qw/vpx_highbd_lpf_horizontal_16 sse2/; specialize qw/vpx_highbd_lpf_horizontal_edge_8 sse2/;
add_proto qw/void vpx_highbd_lpf_horizontal_edge_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
specialize qw/vpx_highbd_lpf_horizontal_edge_16 sse2/;
add_proto qw/void vpx_highbd_lpf_horizontal_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; add_proto qw/void vpx_highbd_lpf_horizontal_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
specialize qw/vpx_highbd_lpf_horizontal_8 sse2/; specialize qw/vpx_highbd_lpf_horizontal_8 sse2/;
......
...@@ -51,12 +51,10 @@ static INLINE __m128i signed_char_clamp_bd_sse2(__m128i value, int bd) { ...@@ -51,12 +51,10 @@ static INLINE __m128i signed_char_clamp_bd_sse2(__m128i value, int bd) {
// TODO(debargha, peter): Break up large functions into smaller ones // TODO(debargha, peter): Break up large functions into smaller ones
// in this file. // in this file.
static void highbd_mb_lpf_horizontal_edge_w_sse2_8(uint16_t *s, void vpx_highbd_lpf_horizontal_edge_8_sse2(uint16_t *s, int p,
int p, const uint8_t *_blimit,
const uint8_t *_blimit, const uint8_t *_limit,
const uint8_t *_limit, const uint8_t *_thresh, int bd) {
const uint8_t *_thresh,
int bd) {
const __m128i zero = _mm_set1_epi16(0); const __m128i zero = _mm_set1_epi16(0);
const __m128i one = _mm_set1_epi16(1); const __m128i one = _mm_set1_epi16(1);
__m128i blimit, limit, thresh; __m128i blimit, limit, thresh;
...@@ -496,27 +494,12 @@ static void highbd_mb_lpf_horizontal_edge_w_sse2_8(uint16_t *s, ...@@ -496,27 +494,12 @@ static void highbd_mb_lpf_horizontal_edge_w_sse2_8(uint16_t *s,
_mm_store_si128((__m128i *)(s - 0 * p), q0); _mm_store_si128((__m128i *)(s - 0 * p), q0);
} }
static void highbd_mb_lpf_horizontal_edge_w_sse2_16(uint16_t *s, void vpx_highbd_lpf_horizontal_edge_16_sse2(uint16_t *s, int p,
int p, const uint8_t *_blimit,
const uint8_t *_blimit, const uint8_t *_limit,
const uint8_t *_limit, const uint8_t *_thresh, int bd) {
const uint8_t *_thresh, vpx_highbd_lpf_horizontal_edge_8_sse2(s, p, _blimit, _limit, _thresh, bd);
int bd) { vpx_highbd_lpf_horizontal_edge_8_sse2(s + 8, p, _blimit, _limit, _thresh, bd);
highbd_mb_lpf_horizontal_edge_w_sse2_8(s, p, _blimit, _limit, _thresh, bd);
highbd_mb_lpf_horizontal_edge_w_sse2_8(s + 8, p, _blimit, _limit, _thresh,
bd);
}
// TODO(yunqingwang): remove count and call these 2 functions(8 or 16) directly.
void vpx_highbd_lpf_horizontal_16_sse2(uint16_t *s, int p,
const uint8_t *_blimit,
const uint8_t *_limit,
const uint8_t *_thresh,
int count, int bd) {
if (count == 1)
highbd_mb_lpf_horizontal_edge_w_sse2_8(s, p, _blimit, _limit, _thresh, bd);
else
highbd_mb_lpf_horizontal_edge_w_sse2_16(s, p, _blimit, _limit, _thresh, bd);
} }
void vpx_highbd_lpf_horizontal_8_sse2(uint16_t *s, int p, void vpx_highbd_lpf_horizontal_8_sse2(uint16_t *s, int p,
...@@ -1171,8 +1154,8 @@ void vpx_highbd_lpf_vertical_16_sse2(uint16_t *s, int p, ...@@ -1171,8 +1154,8 @@ void vpx_highbd_lpf_vertical_16_sse2(uint16_t *s, int p,
highbd_transpose(src, p, dst, 8, 2); highbd_transpose(src, p, dst, 8, 2);
// Loop filtering // Loop filtering
highbd_mb_lpf_horizontal_edge_w_sse2_8(t_dst + 8 * 8, 8, blimit, limit, vpx_highbd_lpf_horizontal_edge_8_sse2(t_dst + 8 * 8, 8, blimit, limit,
thresh, bd); thresh, bd);
src[0] = t_dst; src[0] = t_dst;
src[1] = t_dst + 8 * 8; src[1] = t_dst + 8 * 8;
dst[0] = s - 8; dst[0] = s - 8;
...@@ -1195,8 +1178,8 @@ void vpx_highbd_lpf_vertical_16_dual_sse2(uint16_t *s, ...@@ -1195,8 +1178,8 @@ void vpx_highbd_lpf_vertical_16_dual_sse2(uint16_t *s,
highbd_transpose8x16(s, s + 8 * p, p, t_dst + 8 * 16, 16); highbd_transpose8x16(s, s + 8 * p, p, t_dst + 8 * 16, 16);
// Loop filtering // Loop filtering
highbd_mb_lpf_horizontal_edge_w_sse2_16(t_dst + 8 * 16, 16, blimit, limit, vpx_highbd_lpf_horizontal_edge_16_sse2(t_dst + 8 * 16, 16, blimit, limit,
thresh, bd); thresh, bd);
// Transpose back // Transpose back
highbd_transpose8x16(t_dst, t_dst + 8 * 16, 16, s - 8, p); highbd_transpose8x16(t_dst, t_dst + 8 * 16, 16, s - 8, p);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment