Commit 238bc287 authored by Cheng Chen's avatar Cheng Chen

Turn on jnt_comp by default

Turn of CONFIG_RD_DEBUG when jnt_comp is on, to avoid stack size
overflow.

Make subpel processing for width <= 4 correct.

Change-Id: Ic1de96ff2eff4a80543e19531fa75511b0a2f427
parent 1ed20248
......@@ -25,7 +25,7 @@ void aom_var_filter_block2d_bil_first_pass_ssse3(
const uint8_t *a, uint16_t *b, unsigned int src_pixels_per_line,
unsigned int pixel_step, unsigned int output_height,
unsigned int output_width, const uint8_t *filter) {
// Note: filter[0], filter[1] and be {128, 0}, where 128 will overflow
// Note: filter[0], filter[1] could be {128, 0}, where 128 will overflow
// in computation using _mm_maddubs_epi16.
// Change {128, 0} to {64, 0} and reduce FILTER_BITS by 1 to avoid overflow.
const int16_t round = (1 << (FILTER_BITS - 1)) >> 1;
......@@ -70,8 +70,7 @@ void aom_var_filter_block2d_bil_first_pass_ssse3(
a += src_pixels_per_line - output_width;
}
} else {
// output_width := 4, process two lines
for (i = 0; i < output_height; i += 2) {
for (i = 0; i < output_height; ++i) {
// load source, only first 5 values are meaningful:
// { a[0], a[1], a[2], a[3], a[4], xxxx }
__m128i source = xx_loadl_64(a);
......@@ -79,27 +78,15 @@ void aom_var_filter_block2d_bil_first_pass_ssse3(
// shuffle, up to the first 8 are useful
// { a[0], a[1], a[1], a[2], a[2], a[3], a[3], a[4],
// a[4], a[5], a[5], a[6], a[6], a[7], a[7], a[8] }
__m128i shuffle_lo = _mm_shuffle_epi8(source, shuffle_mask);
__m128i source_high_0 = xx_loadl_32(a + src_pixels_per_line);
__m128i source_high_1 = _mm_setzero_si128();
// avoid load undefined memory
if (a + src_pixels_per_line + 4 != NULL)
source_high_1 = xx_loadl_32(a + src_pixels_per_line + 4);
source = _mm_unpacklo_epi32(source_high_0, source_high_1);
__m128i shuffle_hi = _mm_shuffle_epi8(source, shuffle_mask);
__m128i source_shuffle = _mm_unpacklo_epi64(shuffle_lo, shuffle_hi);
__m128i source_shuffle = _mm_shuffle_epi8(source, shuffle_mask);
__m128i res = _mm_maddubs_epi16(source_shuffle, filters);
res = _mm_srai_epi16(_mm_add_epi16(res, r), FILTER_BITS - 1);
xx_storel_64(b, res);
xx_storel_64(b + output_width, _mm_srli_si128(res, 8));
a += src_pixels_per_line * 2;
b += output_width * 2;
a += src_pixels_per_line;
b += output_width;
}
}
}
......
......@@ -125,7 +125,7 @@ set(CONFIG_INTER_STATS_ONLY 0 CACHE NUMBER "AV1 experiment flag.")
set(CONFIG_INTRABC 1 CACHE NUMBER "AV1 experiment flag.")
set(CONFIG_INTRA_EDGE 1 CACHE NUMBER "AV1 experiment flag.")
set(CONFIG_INTRA_EDGE2 1 CACHE NUMBER "AV1 experiment flag.")
set(CONFIG_JNT_COMP 0 CACHE NUMBER "AV1 experiment flag.")
set(CONFIG_JNT_COMP 1 CACHE NUMBER "AV1 experiment flag.")
set(CONFIG_LOOPFILTERING_ACROSS_TILES 1 CACHE NUMBER "AV1 experiment flag.")
set(CONFIG_LOOPFILTERING_ACROSS_TILES_EXT 1 CACHE NUMBER "AV1 experiment flag.")
set(CONFIG_LOOPFILTER_LEVEL 1 CACHE NUMBER "AV1 experiment flag.")
......
......@@ -64,6 +64,12 @@ macro (fix_experiment_configs)
change_config_and_warn(CONFIG_AOM_QM 1 CONFIG_AOM_QM_EXT)
endif ()
endif ()
if (CONFIG_JNT_COMP)
if (CONFIG_RD_DEBUG)
change_config_and_warn(CONFIG_RD_DEBUG 0 CONFIG_JNT_COMP)
endif()
endif()
endmacro ()
endif () # AOM_BUILD_CMAKE_AOM_EXPERIMENT_DEPS_CMAKE_
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment