Commit 8ac419f3 authored by Yaowu Xu's avatar Yaowu Xu Committed by Gerrit Code Review
Browse files

Merge changes Ic3a68557,Ib1dbe41a,I0da09270,Ibdbd720d into nextgenv2

* changes:
  Deringing cleanup: remove DERING_REFINEMENT (always on now)
  Don't run the deringing filter on skipped blocks within a superblock
  Don't dering skipped superblocks
  On x86 use _mm_set_epi32 when _mm_cvtsi64_si128 isn't available
parents 89d3f2fd e874ce03
......@@ -162,7 +162,11 @@ SIMD_INLINE v128 v128_unziphi_8(v128 a, v128 b) {
SIMD_INLINE v128 v128_unziplo_8(v128 a, v128 b) {
#if defined(__SSSE3__)
#ifdef __x86_64__
v128 order = _mm_cvtsi64_si128(0x0e0c0a0806040200LL);
#else
v128 order = _mm_set_epi32(0, 0, 0x0e0c0a08, 0x06040200);
#endif
return _mm_unpacklo_epi64(_mm_shuffle_epi8(b, order),
_mm_shuffle_epi8(a, order));
#else
......@@ -176,7 +180,11 @@ SIMD_INLINE v128 v128_unziphi_16(v128 a, v128 b) {
SIMD_INLINE v128 v128_unziplo_16(v128 a, v128 b) {
#if defined(__SSSE3__)
#ifdef __x86_64__
v128 order = _mm_cvtsi64_si128(0x0d0c090805040100LL);
#else
v128 order = _mm_set_epi32(0, 0, 0x0d0c0908, 0x05040100);
#endif
return _mm_unpacklo_epi64(_mm_shuffle_epi8(b, order),
_mm_shuffle_epi8(a, order));
#else
......
......@@ -47,7 +47,11 @@ SIMD_INLINE v64 v64_from_32(uint32_t x, uint32_t y) {
}
SIMD_INLINE v64 v64_from_64(uint64_t x) {
#ifdef __x86_64__
return _mm_cvtsi64_si128(x);
#else
return _mm_set_epi32(0, 0, x >> 32, (uint32_t)x);
#endif
}
SIMD_INLINE uint64_t v64_u64(v64 x) {
......@@ -168,7 +172,7 @@ SIMD_INLINE v64 v64_pack_s16_s8(v64 a, v64 b) {
SIMD_INLINE v64 v64_unziphi_8(v64 a, v64 b) {
#if defined(__SSSE3__)
return _mm_shuffle_epi8(_mm_unpacklo_epi64(b, a),
_mm_cvtsi64_si128(0x0f0d0b0907050301LL));
v64_from_64(0x0f0d0b0907050301LL));
#else
return _mm_packus_epi16(
_mm_unpacklo_epi64(_mm_srli_epi16(b, 8), _mm_srli_epi16(a, 8)),
......@@ -179,7 +183,7 @@ SIMD_INLINE v64 v64_unziphi_8(v64 a, v64 b) {
SIMD_INLINE v64 v64_unziplo_8(v64 a, v64 b) {
#if defined(__SSSE3__)
return _mm_shuffle_epi8(_mm_unpacklo_epi64(b, a),
_mm_cvtsi64_si128(0x0e0c0a0806040200LL));
v64_from_64(0x0e0c0a0806040200LL));
#else
return v64_unziphi_8(_mm_slli_si128(a, 1), _mm_slli_si128(b, 1));
#endif
......@@ -188,7 +192,7 @@ SIMD_INLINE v64 v64_unziplo_8(v64 a, v64 b) {
SIMD_INLINE v64 v64_unziphi_16(v64 a, v64 b) {
#if defined(__SSSE3__)
return _mm_shuffle_epi8(_mm_unpacklo_epi64(b, a),
_mm_cvtsi64_si128(0x0f0e0b0a07060302LL));
v64_from_64(0x0f0e0b0a07060302LL));
#else
return _mm_packs_epi32(
_mm_unpacklo_epi64(_mm_srai_epi32(b, 16), _mm_srai_epi32(a, 16)),
......@@ -199,7 +203,7 @@ SIMD_INLINE v64 v64_unziphi_16(v64 a, v64 b) {
SIMD_INLINE v64 v64_unziplo_16(v64 a, v64 b) {
#if defined(__SSSE3__)
return _mm_shuffle_epi8(_mm_unpacklo_epi64(b, a),
_mm_cvtsi64_si128(0x0d0c090805040100LL));
v64_from_64(0x0d0c090805040100LL));
#else
return v64_unziphi_16(_mm_slli_si128(a, 2), _mm_slli_si128(b, 2));
#endif
......
......@@ -101,19 +101,15 @@ void av1_dering_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
for (pli = 0; pli < 3; pli++) {
int16_t dst[MAX_MIB_SIZE * MAX_MIB_SIZE * 8 * 8];
int threshold;
#if DERING_REFINEMENT
level = compute_level_from_index(
global_level,
cm->mi_grid_visible[MAX_MIB_SIZE * sbr * cm->mi_stride +
MAX_MIB_SIZE * sbc]
->mbmi.dering_gain);
#else
level = global_level;
#endif
/* FIXME: This is a temporary hack that uses more conservative
deringing for chroma. */
if (pli) level = (level * 5 + 4) >> 3;
if (sb_all_skip(cm, sbr * MAX_MIB_SIZE, sbc * MAX_MIB_SIZE)) level = 0;
if (sb_all_skip(cm, sbr * MAX_MIB_SIZE, sbc * MAX_MIB_SIZE)) continue;
threshold = level << coeff_shift;
od_dering(&OD_DERING_VTBL_C, dst, MAX_MIB_SIZE * bsize[pli],
&src[pli][sbr * stride * bsize[pli] * MAX_MIB_SIZE +
......
......@@ -24,7 +24,6 @@ extern "C" {
#define DERING_LEVEL_BITS 6
#define MAX_DERING_LEVEL (1 << DERING_LEVEL_BITS)
#define DERING_REFINEMENT 1
#define DERING_REFINEMENT_BITS 2
#define DERING_REFINEMENT_LEVELS 4
......
......@@ -275,6 +275,13 @@ void od_dering(const od_dering_opt_vtbl *vtbl, int16_t *y, int ystride,
in[i * OD_FILT_BSTRIDE + j] = x[i * xstride + j];
}
}
/* Assume deringing filter is sparsely applied, so do one large copy rather
than small copies later if deringing is skipped. */
for (i = 0; i < nvb << bsize; i++) {
for (j = 0; j < nhb << bsize; j++) {
y[i * ystride + j] = in[i * OD_FILT_BSTRIDE + j];
}
}
if (pli == 0) {
for (by = 0; by < nvb; by++) {
for (bx = 0; bx < nhb; bx++) {
......@@ -325,6 +332,7 @@ void od_dering(const od_dering_opt_vtbl *vtbl, int16_t *y, int ystride,
}
for (by = 0; by < nvb; by++) {
for (bx = 0; bx < nhb; bx++) {
if (thresh[by][bx] == 0) continue;
(vtbl->filter_dering_direction[bsize - OD_LOG_BSIZE0])(
&y[(by * ystride << bsize) + (bx << bsize)], ystride,
&in[(by * OD_FILT_BSTRIDE << bsize) + (bx << bsize)], thresh[by][bx],
......@@ -338,6 +346,7 @@ void od_dering(const od_dering_opt_vtbl *vtbl, int16_t *y, int ystride,
}
for (by = 0; by < nvb; by++) {
for (bx = 0; bx < nhb; bx++) {
if (thresh[by][bx] == 0) continue;
(vtbl->filter_dering_orthogonal[bsize - OD_LOG_BSIZE0])(
&y[(by * ystride << bsize) + (bx << bsize)], ystride,
&in[(by * OD_FILT_BSTRIDE << bsize) + (bx << bsize)],
......
......@@ -1772,7 +1772,7 @@ static void decode_partition(AV1Decoder *const pbi, MACROBLOCKD *const xd,
if (bsize >= BLOCK_8X8 &&
(bsize == BLOCK_8X8 || partition != PARTITION_SPLIT))
dec_update_partition_context(xd, mi_row, mi_col, subsize, num_8x8_wh);
#if DERING_REFINEMENT
#if CONFIG_DERING
if (bsize == BLOCK_64X64) {
if (cm->dering_level != 0 && !sb_all_skip(cm, mi_row, mi_col)) {
cm->mi_grid_visible[mi_row * cm->mi_stride + mi_col]->mbmi.dering_gain =
......@@ -1782,7 +1782,7 @@ static void decode_partition(AV1Decoder *const pbi, MACROBLOCKD *const xd,
0;
}
}
#endif // DERGING_REFINEMENT
#endif
#endif // CONFIG_EXT_PARTITION_TYPES
}
......
......@@ -1869,7 +1869,7 @@ static void write_modes_sb(AV1_COMP *const cpi, const TileInfo *const tile,
(bsize == BLOCK_8X8 || partition != PARTITION_SPLIT))
update_partition_context(xd, mi_row, mi_col, subsize, bsize);
#if DERING_REFINEMENT
#if CONFIG_DERING
if (bsize == BLOCK_64X64 && cm->dering_level != 0 &&
!sb_all_skip(cm, mi_row, mi_col)) {
aom_write_literal(
......
......@@ -96,6 +96,7 @@ int av1_dering_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
int16_t dst[MAX_MIB_SIZE * MAX_MIB_SIZE * 8 * 8];
nhb = AOMMIN(MAX_MIB_SIZE, cm->mi_cols - MAX_MIB_SIZE * sbc);
nvb = AOMMIN(MAX_MIB_SIZE, cm->mi_rows - MAX_MIB_SIZE * sbr);
if (sb_all_skip(cm, sbr * MAX_MIB_SIZE, sbc * MAX_MIB_SIZE)) continue;
for (level = 0; level < 64; level++) {
int cur_mse;
int threshold;
......@@ -117,7 +118,6 @@ int av1_dering_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
}
}
}
#if DERING_REFINEMENT
best_level = 0;
/* Search for the best global level one value at a time. */
for (global_level = 2; global_level < MAX_DERING_LEVEL; global_level++) {
......@@ -126,6 +126,7 @@ int av1_dering_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
for (sbc = 0; sbc < nhsb; sbc++) {
int gi;
int best_mse = mse[nhsb * sbr + sbc][0];
if (sb_all_skip(cm, sbr * MAX_MIB_SIZE, sbc * MAX_MIB_SIZE)) continue;
for (gi = 1; gi < 4; gi++) {
level = compute_level_from_index(global_level, gi);
if (mse[nhsb * sbr + sbc][level] < best_mse) {
......@@ -145,6 +146,7 @@ int av1_dering_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
int gi;
int best_gi;
int best_mse = mse[nhsb * sbr + sbc][0];
if (sb_all_skip(cm, sbr * MAX_MIB_SIZE, sbc * MAX_MIB_SIZE)) continue;
best_gi = 0;
for (gi = 1; gi < DERING_REFINEMENT_LEVELS; gi++) {
level = compute_level_from_index(best_level, gi);
......@@ -158,12 +160,6 @@ int av1_dering_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
->mbmi.dering_gain = best_gi;
}
}
#else
best_level = 0;
for (level = 0; level < MAX_DERING_LEVEL; level++) {
if (tot_mse[level] < tot_mse[best_level]) best_level = level;
}
#endif
aom_free(src);
aom_free(ref_coeff);
aom_free(bskip);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment