Commit 8fd3f9a2 authored by Marco's avatar Marco

Enable non-rd mode coding on key frame, for speed 6.

For key frame at speed 6: enable the non-rd mode selection in speed setting
and use the (non-rd) variance_based partition.

Adjust some logic/thresholds in variance partition selection for key frame only (no change to delta frames),
mainly to bias to selecting smaller prediction blocks, and also set max tx size of 16x16.

Loss in key frame quality (~0.6-0.7dB) compared to rd coding,
but speeds up key frame encoding by at least 6x.
Average PNSR/SSIM metrics over RTC clips go down by ~1-2% for speed 6.

Change-Id: Ie4845e0127e876337b9c105aa37e93b286193405
parent 99874f55
...@@ -57,7 +57,7 @@ class AverageTestBase : public ::testing::Test { ...@@ -57,7 +57,7 @@ class AverageTestBase : public ::testing::Test {
} }
// Sum Pixels // Sum Pixels
unsigned int ReferenceAverage(const uint8_t* source, int pitch ) { unsigned int ReferenceAverage8x8(const uint8_t* source, int pitch ) {
unsigned int average = 0; unsigned int average = 0;
for (int h = 0; h < 8; ++h) for (int h = 0; h < 8; ++h)
for (int w = 0; w < 8; ++w) for (int w = 0; w < 8; ++w)
...@@ -65,6 +65,14 @@ class AverageTestBase : public ::testing::Test { ...@@ -65,6 +65,14 @@ class AverageTestBase : public ::testing::Test {
return ((average + 32) >> 6); return ((average + 32) >> 6);
} }
unsigned int ReferenceAverage4x4(const uint8_t* source, int pitch ) {
unsigned int average = 0;
for (int h = 0; h < 4; ++h)
for (int w = 0; w < 4; ++w)
average += source[h * source_stride_ + w];
return ((average + 8) >> 4);
}
void FillConstant(uint8_t fill_constant) { void FillConstant(uint8_t fill_constant) {
for (int i = 0; i < width_ * height_; ++i) { for (int i = 0; i < width_ * height_; ++i) {
source_data_[i] = fill_constant; source_data_[i] = fill_constant;
...@@ -85,7 +93,7 @@ class AverageTestBase : public ::testing::Test { ...@@ -85,7 +93,7 @@ class AverageTestBase : public ::testing::Test {
}; };
typedef unsigned int (*AverageFunction)(const uint8_t* s, int pitch); typedef unsigned int (*AverageFunction)(const uint8_t* s, int pitch);
typedef std::tr1::tuple<int, int, int, AverageFunction> AvgFunc; typedef std::tr1::tuple<int, int, int, int, AverageFunction> AvgFunc;
class AverageTest class AverageTest
: public AverageTestBase, : public AverageTestBase,
...@@ -95,12 +103,18 @@ class AverageTest ...@@ -95,12 +103,18 @@ class AverageTest
protected: protected:
void CheckAverages() { void CheckAverages() {
unsigned int expected = ReferenceAverage(source_data_+ GET_PARAM(2), unsigned int expected = 0;
source_stride_); if (GET_PARAM(3) == 8) {
expected = ReferenceAverage8x8(source_data_+ GET_PARAM(2),
source_stride_);
} else if (GET_PARAM(3) == 4) {
expected = ReferenceAverage4x4(source_data_+ GET_PARAM(2),
source_stride_);
}
ASM_REGISTER_STATE_CHECK(GET_PARAM(3)(source_data_+ GET_PARAM(2), ASM_REGISTER_STATE_CHECK(GET_PARAM(4)(source_data_+ GET_PARAM(2),
source_stride_)); source_stride_));
unsigned int actual = GET_PARAM(3)(source_data_+ GET_PARAM(2), unsigned int actual = GET_PARAM(4)(source_data_+ GET_PARAM(2),
source_stride_); source_stride_);
EXPECT_EQ(expected, actual); EXPECT_EQ(expected, actual);
...@@ -134,16 +148,20 @@ using std::tr1::make_tuple; ...@@ -134,16 +148,20 @@ using std::tr1::make_tuple;
INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P(
C, AverageTest, C, AverageTest,
::testing::Values( ::testing::Values(
make_tuple(16, 16, 1, &vp9_avg_8x8_c))); make_tuple(16, 16, 1, 8, &vp9_avg_8x8_c),
make_tuple(16, 16, 1, 4, &vp9_avg_4x4_c)));
#if HAVE_SSE2 #if HAVE_SSE2
INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P(
SSE2, AverageTest, SSE2, AverageTest,
::testing::Values( ::testing::Values(
make_tuple(16, 16, 0, &vp9_avg_8x8_sse2), make_tuple(16, 16, 0, 8, &vp9_avg_8x8_sse2),
make_tuple(16, 16, 5, &vp9_avg_8x8_sse2), make_tuple(16, 16, 5, 8, &vp9_avg_8x8_sse2),
make_tuple(32, 32, 15, &vp9_avg_8x8_sse2))); make_tuple(32, 32, 15, 8, &vp9_avg_8x8_sse2),
make_tuple(16, 16, 0, 4, &vp9_avg_4x4_sse2),
make_tuple(16, 16, 5, 4, &vp9_avg_4x4_sse2),
make_tuple(32, 32, 15, 4, &vp9_avg_4x4_sse2)));
#endif #endif
......
...@@ -1135,9 +1135,14 @@ specialize qw/vp9_get_mb_ss/, "$sse2_x86inc"; ...@@ -1135,9 +1135,14 @@ specialize qw/vp9_get_mb_ss/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_avg_8x8/, "const uint8_t *, int p"; add_proto qw/unsigned int vp9_avg_8x8/, "const uint8_t *, int p";
specialize qw/vp9_avg_8x8 sse2/; specialize qw/vp9_avg_8x8 sse2/;
add_proto qw/unsigned int vp9_avg_4x4/, "const uint8_t *, int p";
specialize qw/vp9_avg_4x4 sse2/;
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/unsigned int vp9_highbd_avg_8x8/, "const uint8_t *, int p"; add_proto qw/unsigned int vp9_highbd_avg_8x8/, "const uint8_t *, int p";
specialize qw/vp9_highbd_avg_8x8/; specialize qw/vp9_highbd_avg_8x8/;
add_proto qw/unsigned int vp9_highbd_avg_4x4/, "const uint8_t *, int p";
specialize qw/vp9_highbd_avg_4x4/;
} }
# ENCODEMB INVOKE # ENCODEMB INVOKE
......
...@@ -19,6 +19,15 @@ unsigned int vp9_avg_8x8_c(const uint8_t *s, int p) { ...@@ -19,6 +19,15 @@ unsigned int vp9_avg_8x8_c(const uint8_t *s, int p) {
return (sum + 32) >> 6; return (sum + 32) >> 6;
} }
unsigned int vp9_avg_4x4_c(const uint8_t *s, int p) {
int i, j;
int sum = 0;
for (i = 0; i < 4; ++i, s+=p)
for (j = 0; j < 4; sum += s[j], ++j) {}
return (sum + 8) >> 4;
}
#if CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH
unsigned int vp9_highbd_avg_8x8_c(const uint8_t *s8, int p) { unsigned int vp9_highbd_avg_8x8_c(const uint8_t *s8, int p) {
int i, j; int i, j;
...@@ -29,5 +38,16 @@ unsigned int vp9_highbd_avg_8x8_c(const uint8_t *s8, int p) { ...@@ -29,5 +38,16 @@ unsigned int vp9_highbd_avg_8x8_c(const uint8_t *s8, int p) {
return (sum + 32) >> 6; return (sum + 32) >> 6;
} }
unsigned int vp9_highbd_avg_4x4_c(const uint8_t *s8, int p) {
int i, j;
int sum = 0;
const uint16_t* s = CONVERT_TO_SHORTPTR(s8);
for (i = 0; i < 4; ++i, s+=p)
for (j = 0; j < 4; sum += s[j], ++j) {}
return (sum + 8) >> 4;
}
#endif // CONFIG_VP9_HIGHBITDEPTH #endif // CONFIG_VP9_HIGHBITDEPTH
...@@ -291,6 +291,11 @@ typedef struct { ...@@ -291,6 +291,11 @@ typedef struct {
typedef struct { typedef struct {
partition_variance part_variances; partition_variance part_variances;
var split[4]; var split[4];
} v4x4;
typedef struct {
partition_variance part_variances;
v4x4 split[4];
} v8x8; } v8x8;
typedef struct { typedef struct {
...@@ -348,6 +353,13 @@ static void tree_to_node(void *data, BLOCK_SIZE bsize, variance_node *node) { ...@@ -348,6 +353,13 @@ static void tree_to_node(void *data, BLOCK_SIZE bsize, variance_node *node) {
case BLOCK_8X8: { case BLOCK_8X8: {
v8x8 *vt = (v8x8 *) data; v8x8 *vt = (v8x8 *) data;
node->part_variances = &vt->part_variances; node->part_variances = &vt->part_variances;
for (i = 0; i < 4; i++)
node->split[i] = &vt->split[i].part_variances.none;
break;
}
case BLOCK_4X4: {
v4x4 *vt = (v4x4 *) data;
node->part_variances = &vt->part_variances;
for (i = 0; i < 4; i++) for (i = 0; i < 4; i++)
node->split[i] = &vt->split[i]; node->split[i] = &vt->split[i];
break; break;
...@@ -398,64 +410,76 @@ static int set_vt_partitioning(VP9_COMP *cpi, ...@@ -398,64 +410,76 @@ static int set_vt_partitioning(VP9_COMP *cpi,
variance_node vt; variance_node vt;
const int block_width = num_8x8_blocks_wide_lookup[bsize]; const int block_width = num_8x8_blocks_wide_lookup[bsize];
const int block_height = num_8x8_blocks_high_lookup[bsize]; const int block_height = num_8x8_blocks_high_lookup[bsize];
// TODO(debargha): Choose this more intelligently. // TODO(marpan): Adjust/tune these thresholds.
const int threshold_multiplier = cm->frame_type == KEY_FRAME ? 64 : 4; const int threshold_multiplier = cm->frame_type == KEY_FRAME ? 80 : 4;
int64_t threshold = int64_t threshold =
(int64_t)(threshold_multiplier * (int64_t)(threshold_multiplier *
vp9_convert_qindex_to_q(cm->base_qindex, cm->bit_depth)); vp9_convert_qindex_to_q(cm->base_qindex, cm->bit_depth));
int64_t threshold_bsize_ref = threshold << 6;
int64_t threshold_low = threshold;
BLOCK_SIZE bsize_ref = BLOCK_16X16;
assert(block_height == block_width); assert(block_height == block_width);
tree_to_node(data, bsize, &vt); tree_to_node(data, bsize, &vt);
// Split none is available only if we have more than half a block size if (cm->frame_type == KEY_FRAME) {
// in width and height inside the visible image. bsize_ref = BLOCK_8X8;
if (mi_col + block_width / 2 < cm->mi_cols && // Choose lower thresholds for key frame variance to favor split.
mi_row + block_height / 2 < cm->mi_rows && threshold_bsize_ref = threshold >> 1;
vt.part_variances->none.variance < threshold) { threshold_low = threshold >> 2;
set_block_size(cpi, xd, mi_row, mi_col, bsize);
return 1;
} }
// Only allow split for blocks above 16x16. // For bsize=bsize_ref (16x16/8x8 for 8x8/4x4 downsampling), select if
if (bsize > BLOCK_16X16) { // variance is below threshold, otherwise split will be selected.
// Vertical split is available on all but the bottom border. // No check for vert/horiz split as too few samples for variance.
if (bsize == bsize_ref) {
if (mi_col + block_width / 2 < cm->mi_cols &&
mi_row + block_height / 2 < cm->mi_rows &&
vt.part_variances->none.variance < threshold_bsize_ref) {
set_block_size(cpi, xd, mi_row, mi_col, bsize);
return 1;
}
return 0;
} else if (bsize > bsize_ref) {
// For key frame, for bsize above 32X32, or very high variance, take split.
if (cm->frame_type == KEY_FRAME &&
(bsize > BLOCK_32X32 ||
vt.part_variances->none.variance > (threshold << 2))) {
return 0;
}
// If variance is low, take the bsize (no split).
if (mi_col + block_width / 2 < cm->mi_cols &&
mi_row + block_height / 2 < cm->mi_rows &&
vt.part_variances->none.variance < threshold_low) {
set_block_size(cpi, xd, mi_row, mi_col, bsize);
return 1;
}
// Check vertical split.
if (mi_row + block_height / 2 < cm->mi_rows && if (mi_row + block_height / 2 < cm->mi_rows &&
vt.part_variances->vert[0].variance < threshold && vt.part_variances->vert[0].variance < threshold_low &&
vt.part_variances->vert[1].variance < threshold) { vt.part_variances->vert[1].variance < threshold_low) {
BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_VERT); BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_VERT);
set_block_size(cpi, xd, mi_row, mi_col, subsize); set_block_size(cpi, xd, mi_row, mi_col, subsize);
set_block_size(cpi, xd, mi_row, mi_col + block_width / 2, subsize); set_block_size(cpi, xd, mi_row, mi_col + block_width / 2, subsize);
return 1; return 1;
} }
// Check horizontal split.
// Horizontal split is available on all but the right border.
if (mi_col + block_width / 2 < cm->mi_cols && if (mi_col + block_width / 2 < cm->mi_cols &&
vt.part_variances->horz[0].variance < threshold && vt.part_variances->horz[0].variance < threshold_low &&
vt.part_variances->horz[1].variance < threshold) { vt.part_variances->horz[1].variance < threshold_low) {
BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_HORZ); BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_HORZ);
set_block_size(cpi, xd, mi_row, mi_col, subsize); set_block_size(cpi, xd, mi_row, mi_col, subsize);
set_block_size(cpi, xd, mi_row + block_height / 2, mi_col, subsize); set_block_size(cpi, xd, mi_row + block_height / 2, mi_col, subsize);
return 1; return 1;
} }
} return 0;
// This will only allow 8x8 if the 16x16 variance is very large.
if (bsize == BLOCK_16X16) {
if (mi_col + block_width / 2 < cm->mi_cols &&
mi_row + block_height / 2 < cm->mi_rows &&
vt.part_variances->none.variance < (threshold << 6)) {
set_block_size(cpi, xd, mi_row, mi_col, bsize);
return 1;
}
} }
return 0; return 0;
} }
// This function chooses partitioning based on the variance // This function chooses partitioning based on the variance between source and
// between source and reconstructed last, where variance is // reconstructed last, where variance is computed for downsampled inputs.
// computed for 8x8 downsampled inputs. Some things to check: // Currently 8x8 downsampling is used for delta frames, 4x4 for key frames.
// using the last source rather than reconstructed last, and
// allowing for small downsampling (4x4 or 2x2) for selection
// of smaller block sizes (i.e., < 16x16).
static void choose_partitioning(VP9_COMP *cpi, static void choose_partitioning(VP9_COMP *cpi,
const TileInfo *const tile, const TileInfo *const tile,
MACROBLOCK *x, MACROBLOCK *x,
...@@ -463,7 +487,7 @@ static void choose_partitioning(VP9_COMP *cpi, ...@@ -463,7 +487,7 @@ static void choose_partitioning(VP9_COMP *cpi,
VP9_COMMON * const cm = &cpi->common; VP9_COMMON * const cm = &cpi->common;
MACROBLOCKD *xd = &x->e_mbd; MACROBLOCKD *xd = &x->e_mbd;
int i, j, k; int i, j, k, m;
v64x64 vt; v64x64 vt;
uint8_t *s; uint8_t *s;
const uint8_t *d; const uint8_t *d;
...@@ -525,38 +549,63 @@ static void choose_partitioning(VP9_COMP *cpi, ...@@ -525,38 +549,63 @@ static void choose_partitioning(VP9_COMP *cpi,
const int y16_idx = y32_idx + ((j >> 1) << 4); const int y16_idx = y32_idx + ((j >> 1) << 4);
v16x16 *vst = &vt.split[i].split[j]; v16x16 *vst = &vt.split[i].split[j];
for (k = 0; k < 4; k++) { for (k = 0; k < 4; k++) {
int x_idx = x16_idx + ((k & 1) << 3); int x8_idx = x16_idx + ((k & 1) << 3);
int y_idx = y16_idx + ((k >> 1) << 3); int y8_idx = y16_idx + ((k >> 1) << 3);
unsigned int sse = 0; if (cm->frame_type != KEY_FRAME) {
int sum = 0; unsigned int sse = 0;
int sum = 0;
if (x_idx < pixels_wide && y_idx < pixels_high) { if (x8_idx < pixels_wide && y8_idx < pixels_high) {
int s_avg, d_avg; int s_avg, d_avg;
#if CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
s_avg = vp9_highbd_avg_8x8(s + y_idx * sp + x_idx, sp); s_avg = vp9_highbd_avg_8x8(s + y8_idx * sp + x8_idx, sp);
d_avg = vp9_highbd_avg_8x8(d + y_idx * dp + x_idx, dp); d_avg = vp9_highbd_avg_8x8(d + y8_idx * dp + x8_idx, dp);
} else { } else {
s_avg = vp9_avg_8x8(s + y_idx * sp + x_idx, sp); s_avg = vp9_avg_8x8(s + y8_idx * sp + x8_idx, sp);
d_avg = vp9_avg_8x8(d + y_idx * dp + x_idx, dp); d_avg = vp9_avg_8x8(d + y8_idx * dp + x8_idx, dp);
} }
#else #else
s_avg = vp9_avg_8x8(s + y_idx * sp + x_idx, sp); s_avg = vp9_avg_8x8(s + y8_idx * sp + x8_idx, sp);
d_avg = vp9_avg_8x8(d + y_idx * dp + x_idx, dp); d_avg = vp9_avg_8x8(d + y8_idx * dp + x8_idx, dp);
#endif #endif
sum = s_avg - d_avg; sum = s_avg - d_avg;
sse = sum * sum; sse = sum * sum;
}
// If variance is based on 8x8 downsampling, we stop here and have
// one sample for 8x8 block (so use 1 for count in fill_variance),
// which of course means variance = 0 for 8x8 block.
fill_variance(sse, sum, 1, &vst->split[k].part_variances.none);
} else {
// For key frame, go down to 4x4.
v8x8 *vst2 = &vst->split[k];
for (m = 0; m < 4; m++) {
int x4_idx = x8_idx + ((m & 1) << 2);
int y4_idx = y8_idx + ((m >> 1) << 2);
unsigned int sse = 0;
int sum = 0;
if (x4_idx < pixels_wide && y4_idx < pixels_high) {
int s_avg = vp9_avg_4x4(s + y4_idx * sp + x4_idx, sp);
// For key frame, reference is set to 128.
sum = s_avg - 128;
sse = sum * sum;
}
// If variance is based on 4x4 downsampling, we stop here and have
// one sample for 4x4 block (so use 1 for count in fill_variance),
// which of course means variance = 0 for 4x4 block.
fill_variance(sse, sum, 1, &vst2->split[m].part_variances.none);
}
} }
// For an 8x8 block we have just one value the average of all 64
// pixels, so use 1. This means of course that there is no variance
// in an 8x8 block.
fill_variance(sse, sum, 1, &vst->split[k].part_variances.none);
} }
} }
} }
// Fill the rest of the variance tree by summing split partition values. // Fill the rest of the variance tree by summing split partition values.
for (i = 0; i < 4; i++) { for (i = 0; i < 4; i++) {
for (j = 0; j < 4; j++) { for (j = 0; j < 4; j++) {
if (cm->frame_type == KEY_FRAME) {
for (m = 0; m < 4; m++) {
fill_variance_tree(&vt.split[i].split[j].split[m], BLOCK_8X8);
}
}
fill_variance_tree(&vt.split[i].split[j], BLOCK_16X16); fill_variance_tree(&vt.split[i].split[j], BLOCK_16X16);
} }
fill_variance_tree(&vt.split[i], BLOCK_32X32); fill_variance_tree(&vt.split[i], BLOCK_32X32);
...@@ -564,8 +613,7 @@ static void choose_partitioning(VP9_COMP *cpi, ...@@ -564,8 +613,7 @@ static void choose_partitioning(VP9_COMP *cpi,
fill_variance_tree(&vt, BLOCK_64X64); fill_variance_tree(&vt, BLOCK_64X64);
// Now go through the entire structure, splitting every block size until // Now go through the entire structure, splitting every block size until
// we get to one that's got a variance lower than our threshold, or we // we get to one that's got a variance lower than our threshold.
// hit 8x8.
if ( mi_col + 8 > cm->mi_cols || mi_row + 8 > cm->mi_rows || if ( mi_col + 8 > cm->mi_cols || mi_row + 8 > cm->mi_rows ||
!set_vt_partitioning(cpi, xd, &vt, BLOCK_64X64, mi_row, mi_col)) { !set_vt_partitioning(cpi, xd, &vt, BLOCK_64X64, mi_row, mi_col)) {
for (i = 0; i < 4; ++i) { for (i = 0; i < 4; ++i) {
...@@ -576,11 +624,13 @@ static void choose_partitioning(VP9_COMP *cpi, ...@@ -576,11 +624,13 @@ static void choose_partitioning(VP9_COMP *cpi,
for (j = 0; j < 4; ++j) { for (j = 0; j < 4; ++j) {
const int x16_idx = ((j & 1) << 1); const int x16_idx = ((j & 1) << 1);
const int y16_idx = ((j >> 1) << 1); const int y16_idx = ((j >> 1) << 1);
// NOTE: Since this uses 8x8 downsampling for variance calculation // Note: If 8x8 downsampling is used for variance calculation we
// we cannot really select block size 8x8 (or even 8x16/16x8), // cannot really select block size 8x8 (or even 8x16/16x8), since we
// since we do not sufficient samples for variance. // don't have sufficient samples for variance. So on delta frames,
// For now, 8x8 partition is only set if the variance of the 16x16 // 8x8 partition is only set if variance of the 16x16 block is very
// block is very high. This is controlled in set_vt_partitioning. // high. For key frames, 4x4 downsampling is used, so we can better
// select 8x16/16x8 and 8x8. 4x4 partition can potentially be set
// used here too, but for now 4x4 is not allowed.
if (!set_vt_partitioning(cpi, xd, &vt.split[i].split[j], if (!set_vt_partitioning(cpi, xd, &vt.split[i].split[j],
BLOCK_16X16, BLOCK_16X16,
mi_row + y32_idx + y16_idx, mi_row + y32_idx + y16_idx,
...@@ -588,10 +638,26 @@ static void choose_partitioning(VP9_COMP *cpi, ...@@ -588,10 +638,26 @@ static void choose_partitioning(VP9_COMP *cpi,
for (k = 0; k < 4; ++k) { for (k = 0; k < 4; ++k) {
const int x8_idx = (k & 1); const int x8_idx = (k & 1);
const int y8_idx = (k >> 1); const int y8_idx = (k >> 1);
set_block_size(cpi, xd, // TODO(marpan): Allow for setting 4x4 partition on key frame.
(mi_row + y32_idx + y16_idx + y8_idx), /*
(mi_col + x32_idx + x16_idx + x8_idx), if (cm->frame_type == KEY_FRAME) {
BLOCK_8X8); if (!set_vt_partitioning(cpi, xd,
&vt.split[i].split[j].split[k],
BLOCK_8X8,
mi_row + y32_idx + y16_idx + y8_idx,
mi_col + x32_idx + x16_idx + x8_idx)) {
set_block_size(cpi, xd,
(mi_row + y32_idx + y16_idx + y8_idx),
(mi_col + x32_idx + x16_idx + x8_idx),
BLOCK_4X4);
}
} else {
*/
set_block_size(cpi, xd,
(mi_row + y32_idx + y16_idx + y8_idx),
(mi_col + x32_idx + x16_idx + x8_idx),
BLOCK_8X8);
// }
} }
} }
} }
...@@ -2511,7 +2577,7 @@ static void encode_rd_sb_row(VP9_COMP *cpi, ...@@ -2511,7 +2577,7 @@ static void encode_rd_sb_row(VP9_COMP *cpi,
rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
BLOCK_64X64, &dummy_rate, &dummy_dist, 1, td->pc_root); BLOCK_64X64, &dummy_rate, &dummy_dist, 1, td->pc_root);
} else if (sf->partition_search_type == VAR_BASED_PARTITION && } else if (sf->partition_search_type == VAR_BASED_PARTITION &&
cm->frame_type != KEY_FRAME ) { cm->frame_type != KEY_FRAME) {
choose_partitioning(cpi, tile_info, x, mi_row, mi_col); choose_partitioning(cpi, tile_info, x, mi_row, mi_col);
rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
BLOCK_64X64, &dummy_rate, &dummy_dist, 1, td->pc_root); BLOCK_64X64, &dummy_rate, &dummy_dist, 1, td->pc_root);
...@@ -3532,6 +3598,11 @@ static void encode_frame_internal(VP9_COMP *cpi) { ...@@ -3532,6 +3598,11 @@ static void encode_frame_internal(VP9_COMP *cpi) {
cm->uv_ac_delta_q == 0; cm->uv_ac_delta_q == 0;
cm->tx_mode = select_tx_mode(cpi, xd); cm->tx_mode = select_tx_mode(cpi, xd);
if (cm->frame_type == KEY_FRAME &&
cpi->sf.use_nonrd_pick_mode &&
cpi->sf.partition_search_type == VAR_BASED_PARTITION) {
cm->tx_mode = ALLOW_16X16;
}
#if CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH
if (cm->use_highbitdepth) if (cm->use_highbitdepth)
......
...@@ -321,7 +321,7 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf, ...@@ -321,7 +321,7 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
sf->partition_search_type = VAR_BASED_PARTITION; sf->partition_search_type = VAR_BASED_PARTITION;
// Turn on this to use non-RD key frame coding mode. // Turn on this to use non-RD key frame coding mode.
// sf->use_nonrd_pick_mode = 1; sf->use_nonrd_pick_mode = 1;
sf->mv.search_method = NSTEP; sf->mv.search_method = NSTEP;
sf->tx_size_search_method = is_keyframe ? USE_LARGESTALL : USE_TX_8X8; sf->tx_size_search_method = is_keyframe ? USE_LARGESTALL : USE_TX_8X8;
sf->mv.reduce_first_step_size = 1; sf->mv.reduce_first_step_size = 1;
......
...@@ -38,3 +38,21 @@ unsigned int vp9_avg_8x8_sse2(const uint8_t *s, int p) { ...@@ -38,3 +38,21 @@ unsigned int vp9_avg_8x8_sse2(const uint8_t *s, int p) {
avg = _mm_extract_epi16(s0, 0); avg = _mm_extract_epi16(s0, 0);
return (avg + 32) >> 6; return (avg + 32) >> 6;
} }
unsigned int vp9_avg_4x4_sse2(const uint8_t *s, int p) {
__m128i s0, s1, u0;
unsigned int avg = 0;
u0 = _mm_setzero_si128();
s0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s)), u0);
s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + p)), u0);
s0 = _mm_adds_epu16(s0, s1);
s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 2 * p)), u0);
s0 = _mm_adds_epu16(s0, s1);
s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 3 * p)), u0);
s0 = _mm_adds_epu16(s0, s1);
s0 = _mm_adds_epu16(s0, _mm_srli_si128(s0, 4));
s0 = _mm_adds_epu16(s0, _mm_srli_epi64(s0, 16));
avg = _mm_extract_epi16(s0, 0);
return (avg + 8) >> 4;
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment