Commit 5d24b6f0 authored by Timothy B. Terriberry's avatar Timothy B. Terriberry Committed by Tim Terriberry

encoder: Remove 64x upsampled reference buffers

They do not handle border extension correctly (interpolation and
border extension do not commute unless you upsample into the
border), nor do they handle crop dimensions that are not a multiple
of 8 (the upsampled version is not sufficiently large), in addition
to using massive amounts of memory and being a criminal waste of
cache (1 byte used for every 8 bytes fetched).

This commit reimplements use_upsampled_references by computing the
subpixel samples on the fly. This implementation not only corrects
the border handling, but is also faster, while maintaining the
same quality.

HL AWCY results are basically noise:
    PSNR | PSNR HVS |   SSIM | MS SSIM | CIEDE 2000
  0.0188 |   0.0187 | 0.0045 |  0.0063 |     0.0228

Change-Id: I7527db9f83b87a7bb8b35342f7e6457cd0bef9cd
parent 0eac3199
......@@ -911,15 +911,15 @@ if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
#
# ...
#
add_proto qw/void aom_upsampled_pred/, "uint8_t *comp_pred, int width, int height, const uint8_t *ref, int ref_stride";
add_proto qw/void aom_upsampled_pred/, "uint8_t *comp_pred, int width, int height, int subsample_x_q3, int subsample_y_q3, const uint8_t *ref, int ref_stride";
specialize qw/aom_upsampled_pred sse2/;
add_proto qw/void aom_comp_avg_upsampled_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride";
add_proto qw/void aom_comp_avg_upsampled_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, int subsample_x_q3, int subsample_y_q3, const uint8_t *ref, int ref_stride";
specialize qw/aom_comp_avg_upsampled_pred sse2/;
if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
add_proto qw/void aom_highbd_upsampled_pred/, "uint16_t *comp_pred, int width, int height, const uint8_t *ref8, int ref_stride";
add_proto qw/void aom_highbd_upsampled_pred/, "uint16_t *comp_pred, int width, int height, int subsample_x_q3, int subsample_y_q3, const uint8_t *ref8, int ref_stride, int bd";
specialize qw/aom_highbd_upsampled_pred sse2/;
add_proto qw/void aom_highbd_comp_avg_upsampled_pred/, "uint16_t *comp_pred, const uint8_t *pred8, int width, int height, const uint8_t *ref8, int ref_stride";
add_proto qw/void aom_highbd_comp_avg_upsampled_pred/, "uint16_t *comp_pred, const uint8_t *pred8, int width, int height, int subsample_x_q3, int subsample_y_q3, const uint8_t *ref8, int ref_stride, int bd";
specialize qw/aom_highbd_comp_avg_upsampled_pred sse2/;
}
......@@ -1480,10 +1480,10 @@ if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
if (aom_config("CONFIG_EXT_INTER") eq "yes") {
add_proto qw/void aom_comp_mask_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride, const uint8_t *mask, int mask_stride, int invert_mask";
add_proto qw/void aom_comp_mask_upsampled_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride, const uint8_t *mask, int mask_stride, int invert_mask";
add_proto qw/void aom_comp_mask_upsampled_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, int subsample_x_q3, int subsample_y_q3, const uint8_t *ref, int ref_stride, const uint8_t *mask, int mask_stride, int invert_mask";
if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
add_proto qw/void aom_highbd_comp_mask_pred/, "uint16_t *comp_pred, const uint8_t *pred8, int width, int height, const uint8_t *ref8, int ref_stride, const uint8_t *mask, int mask_stride, int invert_mask";
add_proto qw/void aom_highbd_comp_mask_upsampled_pred/, "uint16_t *comp_pred, const uint8_t *pred8, int width, int height, const uint8_t *ref8, int ref_stride, const uint8_t *mask, int mask_stride, int invert_mask";
add_proto qw/void aom_highbd_comp_mask_upsampled_pred/, "uint16_t *comp_pred, const uint8_t *pred8, int width, int height, int subsample_x_q3, int subsample_y_q3, const uint8_t *ref8, int ref_stride, const uint8_t *mask, int mask_stride, int invert_mask, int bd";
}
}
......
This diff is collapsed.
......@@ -9,6 +9,7 @@
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#include <assert.h>
#include <emmintrin.h> // SSE2
#include "./aom_config.h"
......@@ -16,6 +17,9 @@
#include "aom_ports/mem.h"
#include "./av1_rtcd.h"
#include "av1/common/filter.h"
typedef uint32_t (*high_variance_fn_t)(const uint16_t *src, int src_stride,
const uint16_t *ref, int ref_stride,
uint32_t *sse, int *sum);
......@@ -565,131 +569,96 @@ FNS(sse2);
#undef FN
void aom_highbd_upsampled_pred_sse2(uint16_t *comp_pred, int width, int height,
const uint8_t *ref8, int ref_stride) {
int i, j;
int stride = ref_stride << 3;
uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
if (width >= 8) {
// read 8 points at one time
for (i = 0; i < height; i++) {
for (j = 0; j < width; j += 8) {
__m128i s0 = _mm_cvtsi32_si128(*(const uint32_t *)ref);
__m128i s1 = _mm_cvtsi32_si128(*(const uint32_t *)(ref + 8));
__m128i s2 = _mm_cvtsi32_si128(*(const uint32_t *)(ref + 16));
__m128i s3 = _mm_cvtsi32_si128(*(const uint32_t *)(ref + 24));
__m128i s4 = _mm_cvtsi32_si128(*(const uint32_t *)(ref + 32));
__m128i s5 = _mm_cvtsi32_si128(*(const uint32_t *)(ref + 40));
__m128i s6 = _mm_cvtsi32_si128(*(const uint32_t *)(ref + 48));
__m128i s7 = _mm_cvtsi32_si128(*(const uint32_t *)(ref + 56));
__m128i t0, t1, t2, t3;
t0 = _mm_unpacklo_epi16(s0, s1);
t1 = _mm_unpacklo_epi16(s2, s3);
t2 = _mm_unpacklo_epi16(s4, s5);
t3 = _mm_unpacklo_epi16(s6, s7);
t0 = _mm_unpacklo_epi32(t0, t1);
t2 = _mm_unpacklo_epi32(t2, t3);
t0 = _mm_unpacklo_epi64(t0, t2);
_mm_storeu_si128((__m128i *)(comp_pred), t0);
int subpel_x_q3, int subpel_y_q3,
const uint8_t *ref8, int ref_stride,
int bd) {
if (!subpel_x_q3 && !subpel_y_q3) {
uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
if (width >= 8) {
int i;
assert(!(width & 7));
/*Read 8 pixels one row at a time.*/
for (i = 0; i < height; i++) {
int j;
for (j = 0; j < width; j += 8) {
__m128i s0 = _mm_loadu_si128((const __m128i *)ref);
_mm_storeu_si128((__m128i *)comp_pred, s0);
comp_pred += 8;
ref += 8;
}
ref += ref_stride - width;
}
} else {
int i;
assert(!(width & 3));
/*Read 4 pixels two rows at a time.*/
for (i = 0; i < height; i += 2) {
__m128i s0 = _mm_loadl_epi64((const __m128i *)ref);
__m128i s1 = _mm_loadl_epi64((const __m128i *)(ref + ref_stride));
__m128i t0 = _mm_unpacklo_epi64(s0, s1);
_mm_storeu_si128((__m128i *)comp_pred, t0);
comp_pred += 8;
ref += 64; // 8 * 8;
ref += 2 * ref_stride;
}
ref += stride - (width << 3);
}
} else {
// read 4 points at one time
for (i = 0; i < height; i++) {
for (j = 0; j < width; j += 4) {
__m128i s0 = _mm_cvtsi32_si128(*(const uint32_t *)ref);
__m128i s1 = _mm_cvtsi32_si128(*(const uint32_t *)(ref + 8));
__m128i s2 = _mm_cvtsi32_si128(*(const uint32_t *)(ref + 16));
__m128i s3 = _mm_cvtsi32_si128(*(const uint32_t *)(ref + 24));
__m128i t0, t1;
t0 = _mm_unpacklo_epi16(s0, s1);
t1 = _mm_unpacklo_epi16(s2, s3);
t0 = _mm_unpacklo_epi32(t0, t1);
_mm_storel_epi64((__m128i *)(comp_pred), t0);
comp_pred += 4;
ref += 4 * 8;
}
ref += stride - (width << 3);
InterpFilterParams filter;
filter = av1_get_interp_filter_params(EIGHTTAP_REGULAR);
if (!subpel_y_q3) {
const int16_t *kernel;
kernel = av1_get_interp_filter_subpel_kernel(filter, subpel_x_q3 << 1);
aom_highbd_convolve8_horiz(ref8, ref_stride,
CONVERT_TO_BYTEPTR(comp_pred), width, kernel,
16, NULL, -1, width, height, bd);
} else if (!subpel_x_q3) {
const int16_t *kernel;
kernel = av1_get_interp_filter_subpel_kernel(filter, subpel_y_q3 << 1);
aom_highbd_convolve8_vert(ref8, ref_stride, CONVERT_TO_BYTEPTR(comp_pred),
width, NULL, -1, kernel, 16, width, height, bd);
} else {
DECLARE_ALIGNED(16, uint16_t,
temp[((MAX_SB_SIZE * 2 + 16) + 16) * MAX_SB_SIZE]);
const uint16_t *ref;
const int16_t *kernel_x;
const int16_t *kernel_y;
int intermediate_height;
ref = CONVERT_TO_SHORTPTR(ref8);
kernel_x = av1_get_interp_filter_subpel_kernel(filter, subpel_x_q3 << 1);
kernel_y = av1_get_interp_filter_subpel_kernel(filter, subpel_y_q3 << 1);
intermediate_height =
(((height - 1) * 8 + subpel_y_q3) >> 3) + filter.taps;
assert(intermediate_height <= (MAX_SB_SIZE * 2 + 16) + 16);
aom_highbd_convolve8_horiz(
CONVERT_TO_BYTEPTR(ref - ref_stride * ((filter.taps >> 1) - 1)),
ref_stride, CONVERT_TO_BYTEPTR(temp), MAX_SB_SIZE, kernel_x, 16, NULL,
-1, width, intermediate_height, bd);
aom_highbd_convolve8_vert(
CONVERT_TO_BYTEPTR(temp + MAX_SB_SIZE * ((filter.taps >> 1) - 1)),
MAX_SB_SIZE, CONVERT_TO_BYTEPTR(comp_pred), width, NULL, -1, kernel_y,
16, width, height, bd);
}
}
}
void aom_highbd_comp_avg_upsampled_pred_sse2(uint16_t *comp_pred,
const uint8_t *pred8, int width,
int height, const uint8_t *ref8,
int ref_stride) {
const __m128i one = _mm_set1_epi16(1);
int i, j;
int stride = ref_stride << 3;
int height, int subpel_x_q3,
int subpel_y_q3,
const uint8_t *ref8,
int ref_stride, int bd) {
uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
if (width >= 8) {
// read 8 points at one time
for (i = 0; i < height; i++) {
for (j = 0; j < width; j += 8) {
__m128i s0 = _mm_cvtsi32_si128(*(const uint32_t *)ref);
__m128i s1 = _mm_cvtsi32_si128(*(const uint32_t *)(ref + 8));
__m128i s2 = _mm_cvtsi32_si128(*(const uint32_t *)(ref + 16));
__m128i s3 = _mm_cvtsi32_si128(*(const uint32_t *)(ref + 24));
__m128i s4 = _mm_cvtsi32_si128(*(const uint32_t *)(ref + 32));
__m128i s5 = _mm_cvtsi32_si128(*(const uint32_t *)(ref + 40));
__m128i s6 = _mm_cvtsi32_si128(*(const uint32_t *)(ref + 48));
__m128i s7 = _mm_cvtsi32_si128(*(const uint32_t *)(ref + 56));
__m128i p0 = _mm_loadu_si128((const __m128i *)pred);
__m128i t0, t1, t2, t3;
t0 = _mm_unpacklo_epi16(s0, s1);
t1 = _mm_unpacklo_epi16(s2, s3);
t2 = _mm_unpacklo_epi16(s4, s5);
t3 = _mm_unpacklo_epi16(s6, s7);
t0 = _mm_unpacklo_epi32(t0, t1);
t2 = _mm_unpacklo_epi32(t2, t3);
t0 = _mm_unpacklo_epi64(t0, t2);
p0 = _mm_adds_epu16(t0, p0);
p0 = _mm_adds_epu16(p0, one);
p0 = _mm_srli_epi16(p0, 1);
_mm_storeu_si128((__m128i *)(comp_pred), p0);
comp_pred += 8;
pred += 8;
ref += 8 * 8;
}
ref += stride - (width << 3);
}
} else {
// read 4 points at one time
for (i = 0; i < height; i++) {
for (j = 0; j < width; j += 4) {
__m128i s0 = _mm_cvtsi32_si128(*(const uint32_t *)ref);
__m128i s1 = _mm_cvtsi32_si128(*(const uint32_t *)(ref + 8));
__m128i s2 = _mm_cvtsi32_si128(*(const uint32_t *)(ref + 16));
__m128i s3 = _mm_cvtsi32_si128(*(const uint32_t *)(ref + 24));
__m128i p0 = _mm_loadl_epi64((const __m128i *)pred);
__m128i t0, t1;
t0 = _mm_unpacklo_epi16(s0, s1);
t1 = _mm_unpacklo_epi16(s2, s3);
t0 = _mm_unpacklo_epi32(t0, t1);
p0 = _mm_adds_epu16(t0, p0);
p0 = _mm_adds_epu16(p0, one);
p0 = _mm_srli_epi16(p0, 1);
_mm_storel_epi64((__m128i *)(comp_pred), p0);
comp_pred += 4;
pred += 4;
ref += 4 * 8;
}
ref += stride - (width << 3);
}
int n;
int i;
aom_highbd_upsampled_pred(comp_pred, width, height, subpel_x_q3, subpel_y_q3,
ref8, ref_stride, bd);
/*The total number of pixels must be a multiple of 8 (e.g., 4x4).*/
assert(!(width * height & 7));
n = width * height >> 3;
for (i = 0; i < n; i++) {
__m128i s0 = _mm_loadu_si128((const __m128i *)comp_pred);
__m128i p0 = _mm_loadu_si128((const __m128i *)pred);
_mm_storeu_si128((__m128i *)comp_pred, _mm_avg_epu16(s0, p0));
comp_pred += 8;
pred += 8;
}
}
This diff is collapsed.
This diff is collapsed.
......@@ -397,13 +397,6 @@ typedef struct AV1_COMP {
YV12_BUFFER_CONFIG *unscaled_last_source;
YV12_BUFFER_CONFIG scaled_last_source;
// Up-sampled reference buffers
// NOTE(zoeliu): It is needed to allocate sufficient space to the up-sampled
// reference buffers, which should include the up-sampled version of all the
// possibly stored references plus the currently coded frame itself.
EncRefCntBuffer upsampled_ref_bufs[REF_FRAMES + 1];
int upsampled_ref_idx[REF_FRAMES + 1];
// For a still frame, this flag is set to 1 to skip partition search.
int partition_search_skippable_frame;
......@@ -749,14 +742,6 @@ static INLINE YV12_BUFFER_CONFIG *get_ref_frame_buffer(
: NULL;
}
static INLINE const YV12_BUFFER_CONFIG *get_upsampled_ref(
const AV1_COMP *cpi, const MV_REFERENCE_FRAME ref_frame) {
// Use up-sampled reference frames.
const int buf_idx =
cpi->upsampled_ref_idx[get_ref_frame_map_idx(cpi, ref_frame)];
return &cpi->upsampled_ref_bufs[buf_idx].buf;
}
#if CONFIG_EXT_REFS || CONFIG_TEMPMV_SIGNALING
static INLINE int enc_is_ref_frame_buf(AV1_COMP *cpi, RefCntBuffer *frame_buf) {
MV_REFERENCE_FRAME ref_frame;
......
This diff is collapsed.
......@@ -143,11 +143,10 @@ int av1_obmc_full_pixel_diamond(const struct AV1_COMP *cpi, MACROBLOCK *x,
const aom_variance_fn_ptr_t *fn_ptr,
const MV *ref_mv, MV *dst_mv, int is_second);
int av1_find_best_obmc_sub_pixel_tree_up(
const struct AV1_COMP *cpi, MACROBLOCK *x, int mi_row, int mi_col,
MV *bestmv, const MV *ref_mv, int allow_hp, int error_per_bit,
const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step,
int *mvjcost, int *mvcost[2], int *distortion, unsigned int *sse1,
int is_second, int use_upsampled_ref);
MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp,
int error_per_bit, const aom_variance_fn_ptr_t *vfp, int forced_stop,
int iters_per_step, int *mvjcost, int *mvcost[2], int *distortion,
unsigned int *sse1, int is_second, int use_upsampled_ref);
#endif // CONFIG_MOTION_VAR
#ifdef __cplusplus
} // extern "C"
......
......@@ -5293,6 +5293,8 @@ static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
if (!has_second_ref(mbmi)) is_global[1] = is_global[0];
#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF
#endif // CONFIG_GLOBAL_MOTION
#else // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
(void)block;
#endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION
// Do joint motion search in compound mode to get more accurate mv.
......@@ -5491,52 +5493,15 @@ static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
if (bestsme < INT_MAX) {
int dis; /* TODO: use dis in distortion calculation later. */
unsigned int sse;
if (cpi->sf.use_upsampled_references) {
// Use up-sampled reference frames.
struct buf_2d backup_pred = pd->pre[0];
const YV12_BUFFER_CONFIG *upsampled_ref =
get_upsampled_ref(cpi, refs[id]);
// Set pred for Y plane
setup_pred_plane(&pd->pre[0], bsize, upsampled_ref->y_buffer,
upsampled_ref->y_crop_width,
upsampled_ref->y_crop_height, upsampled_ref->y_stride,
(mi_row << 3), (mi_col << 3), NULL, pd->subsampling_x,
pd->subsampling_y);
// If bsize < BLOCK_8X8, adjust pred pointer for this block
#if !CONFIG_CB4X4
if (bsize < BLOCK_8X8)
pd->pre[0].buf =
&pd->pre[0].buf[(av1_raster_block_offset(BLOCK_8X8, block,
pd->pre[0].stride))
<< 3];
#endif // !CONFIG_CB4X4
bestsme = cpi->find_fractional_mv_step(
x, &ref_mv[id].as_mv, cpi->common.allow_high_precision_mv,
x->errorperbit, &cpi->fn_ptr[bsize], 0,
cpi->sf.mv.subpel_iters_per_step, NULL, x->nmvjointcost, x->mvcost,
&dis, &sse, second_pred,
#if CONFIG_EXT_INTER
mask, mask_stride, id,
#endif
pw, ph, 1);
// Restore the reference frames.
pd->pre[0] = backup_pred;
} else {
(void)block;
bestsme = cpi->find_fractional_mv_step(
x, &ref_mv[id].as_mv, cpi->common.allow_high_precision_mv,
x->errorperbit, &cpi->fn_ptr[bsize], 0,
cpi->sf.mv.subpel_iters_per_step, NULL, x->nmvjointcost, x->mvcost,
&dis, &sse, second_pred,
bestsme = cpi->find_fractional_mv_step(
x, &ref_mv[id].as_mv, cpi->common.allow_high_precision_mv,
x->errorperbit, &cpi->fn_ptr[bsize], 0,
cpi->sf.mv.subpel_iters_per_step, NULL, x->nmvjointcost, x->mvcost,
&dis, &sse, second_pred,
#if CONFIG_EXT_INTER
mask, mask_stride, id,
mask, mask_stride, id,
#endif
pw, ph, 0);
}
pw, ph, cpi->sf.use_upsampled_references);
}
// Restore the pointer to the first (possibly scaled) prediction buffer.
......@@ -6113,17 +6078,6 @@ static void single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x,
x->second_best_mv.as_int != x->best_mv.as_int;
const int pw = block_size_wide[bsize];
const int ph = block_size_high[bsize];
// Use up-sampled reference frames.
struct macroblockd_plane *const pd = &xd->plane[0];
struct buf_2d backup_pred = pd->pre[ref_idx];
const YV12_BUFFER_CONFIG *upsampled_ref = get_upsampled_ref(cpi, ref);
// Set pred for Y plane
setup_pred_plane(
&pd->pre[ref_idx], bsize, upsampled_ref->y_buffer,
upsampled_ref->y_crop_width, upsampled_ref->y_crop_height,
upsampled_ref->y_stride, (mi_row << 3), (mi_col << 3), NULL,
pd->subsampling_x, pd->subsampling_y);
best_mv_var = cpi->find_fractional_mv_step(
x, &ref_mv, cm->allow_high_precision_mv, x->errorperbit,
......@@ -6166,9 +6120,6 @@ static void single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x,
x->best_mv.as_mv = best_mv;
}
}
// Restore the reference frames.
pd->pre[ref_idx] = backup_pred;
} else {
cpi->find_fractional_mv_step(
x, &ref_mv, cm->allow_high_precision_mv, x->errorperbit,
......@@ -6184,11 +6135,10 @@ static void single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x,
break;
case OBMC_CAUSAL:
av1_find_best_obmc_sub_pixel_tree_up(
cpi, x, mi_row, mi_col, &x->best_mv.as_mv, &ref_mv,
cm->allow_high_precision_mv, x->errorperbit, &cpi->fn_ptr[bsize],
cpi->sf.mv.subpel_force_stop, cpi->sf.mv.subpel_iters_per_step,
x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], 0,
cpi->sf.use_upsampled_references);
x, &x->best_mv.as_mv, &ref_mv, cm->allow_high_precision_mv,
x->errorperbit, &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop,
cpi->sf.mv.subpel_iters_per_step, x->nmvjointcost, x->mvcost, &dis,
&x->pred_sse[ref], 0, cpi->sf.use_upsampled_references);
break;
default: assert("Invalid motion mode!\n");
}
......@@ -6332,10 +6282,12 @@ static void build_second_inter_pred(const AV1_COMP *cpi, MACROBLOCK *x,
// Search for the best mv for one component of a compound,
// given that the other component is fixed.
static void compound_single_motion_search(
const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, MV *this_mv,
int mi_row, int mi_col, const uint8_t *second_pred, const uint8_t *mask,
int mask_stride, int *rate_mv, const int block, int ref_idx) {
static void compound_single_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
BLOCK_SIZE bsize, MV *this_mv,
int mi_row, int mi_col,
const uint8_t *second_pred,
const uint8_t *mask, int mask_stride,
int *rate_mv, int ref_idx) {
const int pw = block_size_wide[bsize];
const int ph = block_size_high[bsize];
MACROBLOCKD *xd = &x->e_mbd;
......@@ -6423,43 +6375,11 @@ static void compound_single_motion_search(
if (bestsme < INT_MAX) {
int dis; /* TODO: use dis in distortion calculation later. */
unsigned int sse;
if (cpi->sf.use_upsampled_references) {
// Use up-sampled reference frames.
struct buf_2d backup_pred = pd->pre[0];
const YV12_BUFFER_CONFIG *upsampled_ref = get_upsampled_ref(cpi, ref);
// Set pred for Y plane
setup_pred_plane(&pd->pre[0], bsize, upsampled_ref->y_buffer,
upsampled_ref->y_crop_width,
upsampled_ref->y_crop_height, upsampled_ref->y_stride,
(mi_row << 3), (mi_col << 3), NULL, pd->subsampling_x,
pd->subsampling_y);
// If bsize < BLOCK_8X8, adjust pred pointer for this block
#if !CONFIG_CB4X4
if (bsize < BLOCK_8X8)
pd->pre[0].buf =
&pd->pre[0].buf[(av1_raster_block_offset(BLOCK_8X8, block,
pd->pre[0].stride))
<< 3];
#endif // !CONFIG_CB4X4
bestsme = cpi->find_fractional_mv_step(
x, &ref_mv.as_mv, cpi->common.allow_high_precision_mv, x->errorperbit,
&cpi->fn_ptr[bsize], 0, cpi->sf.mv.subpel_iters_per_step, NULL,
x->nmvjointcost, x->mvcost, &dis, &sse, second_pred, mask,
mask_stride, ref_idx, pw, ph, 1);
// Restore the reference frames.
pd->pre[0] = backup_pred;
} else {
(void)block;
bestsme = cpi->find_fractional_mv_step(
x, &ref_mv.as_mv, cpi->common.allow_high_precision_mv, x->errorperbit,
&cpi->fn_ptr[bsize], 0, cpi->sf.mv.subpel_iters_per_step, NULL,
x->nmvjointcost, x->mvcost, &dis, &sse, second_pred, mask,
mask_stride, ref_idx, pw, ph, 0);
}
bestsme = cpi->find_fractional_mv_step(
x, &ref_mv.as_mv, cpi->common.allow_high_precision_mv, x->errorperbit,
&cpi->fn_ptr[bsize], 0, cpi->sf.mv.subpel_iters_per_step, NULL,
x->nmvjointcost, x->mvcost, &dis, &sse, second_pred, mask, mask_stride,
ref_idx, pw, ph, cpi->sf.use_upsampled_references);
}
// Restore the pointer to the first (possibly scaled) prediction buffer.
......@@ -6539,7 +6459,7 @@ static void compound_single_motion_search_interinter(
ref_idx, second_pred);
compound_single_motion_search(cpi, x, bsize, this_mv, mi_row, mi_col,
second_pred, mask, mask_stride, rate_mv, block,
second_pred, mask, mask_stride, rate_mv,
ref_idx);
}
......@@ -8471,7 +8391,7 @@ static int64_t handle_inter_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
tmp_mv.as_int = x->mbmi_ext->ref_mvs[refs[0]][0].as_int;
compound_single_motion_search(cpi, x, bsize, &tmp_mv.as_mv, mi_row,
mi_col, intrapred, mask, bw,
&tmp_rate_mv, 0, 0);
&tmp_rate_mv, 0);
mbmi->mv[0].as_int = tmp_mv.as_int;
av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, &orig_dst,
bsize);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment