Commit d3c13e88 authored by Yunqing Wang's avatar Yunqing Wang

Remove sample sorting in warped motion sample selection

The original sample selection process involves finding best 8 sorted
samples according to motion vector difference(MVD) between neighbor
block and current block, and then trimming samples. To reduce the
complexity, use the current block width/height as the MVD threshold,
and trim the samples without sorting.

This gives slightly less gain than the original method.
AWCY result:
         PSNR   PSNR HVS   SSIM
Average  -0.07   -0.13     -0.12
Borg test result:
             avg_psnr ovr_psnr ssim
cam_lowres:  -0.112   -0.112  -0.180
lowres:      -0.068   -0.073  -0.125

Change-Id: Ic2f79a170441d5bcb04ea87dddf490ef7fbba8bc
parent 8494fbb7
......@@ -1601,9 +1601,9 @@ void av1_setup_motion_field(AV1_COMMON *cm) {
#if CONFIG_EXT_WARPED_MOTION
static INLINE void record_samples(MB_MODE_INFO *mbmi, int *pts, int *pts_inref,
int *pts_mv, int global_offset_r,
int global_offset_c, int row_offset,
int sign_r, int col_offset, int sign_c) {
int global_offset_r, int global_offset_c,
int row_offset, int sign_r, int col_offset,
int sign_c) {
int bw = block_size_wide[mbmi->sb_type];
int bh = block_size_high[mbmi->sb_type];
int cr_offset = row_offset * MI_SIZE + sign_r * AOMMAX(bh, MI_SIZE) / 2 - 1;
......@@ -1615,56 +1615,48 @@ static INLINE void record_samples(MB_MODE_INFO *mbmi, int *pts, int *pts_inref,
pts[1] = (y * 8);
pts_inref[0] = (x * 8) + mbmi->mv[0].as_mv.col;
pts_inref[1] = (y * 8) + mbmi->mv[0].as_mv.row;
pts_mv[0] = mbmi->mv[0].as_mv.col;
pts_mv[1] = mbmi->mv[0].as_mv.row;
}
// Only sort pts and pts_inref, and pts_mv is not sorted.
#define TRIM_THR 16
int sortSamples(int *pts_mv, MV *mv, int *pts, int *pts_inref, int len) {
// Select samples according to the motion vector difference.
int selectSamples(MV *mv, int *pts, int *pts_inref, int len, BLOCK_SIZE bsize) {
const int bw = block_size_wide[bsize];
const int bh = block_size_high[bsize];
const int thresh = clamp(AOMMAX(bw, bh), 16, 112);
int pts_mvd[SAMPLES_ARRAY_SIZE] = { 0 };
int i, j, k;
int ret = len;
assert(len <= SAMPLES_MAX);
for (i = 0; i < len; ++i)
pts_mvd[i] =
abs(pts_mv[2 * i] - mv->col) + abs(pts_mv[2 * i + 1] - mv->row);
// TODO(yunqingwang): len is capped at 8 after this. Thus, modify this to find
// the best 8 samples instead of sorting all samples.
for (i = 1; i <= len - 1; ++i) {
for (j = 0; j < i; ++j) {
if (pts_mvd[j] > pts_mvd[i]) {
int temp, tempi, tempj, ptempi, ptempj;
temp = pts_mvd[i];
tempi = pts[2 * i];
tempj = pts[2 * i + 1];
ptempi = pts_inref[2 * i];
ptempj = pts_inref[2 * i + 1];
for (k = i; k > j; k--) {
pts_mvd[k] = pts_mvd[k - 1];
pts[2 * k] = pts[2 * (k - 1)];
pts[2 * k + 1] = pts[2 * (k - 1) + 1];
pts_inref[2 * k] = pts_inref[2 * (k - 1)];
pts_inref[2 * k + 1] = pts_inref[2 * (k - 1) + 1];
}
pts_mvd[j] = temp;
pts[2 * j] = tempi;
pts[2 * j + 1] = tempj;
pts_inref[2 * j] = ptempi;
pts_inref[2 * j + 1] = ptempj;
break;
}
}
int i, j, k, l = len;
int ret = 0;
assert(len <= LEAST_SQUARES_SAMPLES_MAX);
// Obtain the motion vector difference.
for (i = 0; i < len; ++i) {
pts_mvd[i] = abs(pts_inref[2 * i] - pts[2 * i] - mv->col) +
abs(pts_inref[2 * i + 1] - pts[2 * i + 1] - mv->row);
if (pts_mvd[i] > thresh)
pts_mvd[i] = -1;
else
ret++;
}
len = AOMMIN(len, LEAST_SQUARES_SAMPLES_MAX);
for (i = len - 1; i >= 1; i--) {
if ((pts_mvd[i] - pts_mvd[i - 1]) >= TRIM_THR) ret = i;
// Keep at least 1 sample.
if (!ret) return 1;
i = 0;
j = l - 1;
for (k = 0; k < l - ret; k++) {
while (pts_mvd[i] != -1) i++;
while (pts_mvd[j] == -1) j--;
assert(i != j);
if (i > j) break;
// Replace the discarded samples;
pts_mvd[i] = pts_mvd[j];
pts[2 * i] = pts[2 * j];
pts[2 * i + 1] = pts[2 * j + 1];
pts_inref[2 * i] = pts_inref[2 * j];
pts_inref[2 * i + 1] = pts_inref[2 * j + 1];
i++;
j--;
}
return ret;
......@@ -1672,7 +1664,7 @@ int sortSamples(int *pts_mv, MV *mv, int *pts, int *pts_inref, int len) {
// Note: Samples returned are at 1/8-pel precision
int findSamples(const AV1_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col,
int *pts, int *pts_inref, int *pts_mv) {
int *pts, int *pts_inref) {
MB_MODE_INFO *const mbmi0 = &(xd->mi[0]->mbmi);
int ref_frame = mbmi0->ref_frame[0];
int up_available = xd->up_available;
......@@ -1700,13 +1692,12 @@ int findSamples(const AV1_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col,
if (col_offset + n8_w > xd->n8_w) do_tr = 0;
if (mbmi->ref_frame[0] == ref_frame && mbmi->ref_frame[1] == NONE_FRAME) {
record_samples(mbmi, pts, pts_inref, pts_mv, global_offset_r,
global_offset_c, 0, -1, col_offset, 1);
record_samples(mbmi, pts, pts_inref, global_offset_r, global_offset_c,
0, -1, col_offset, 1);
pts += 2;
pts_inref += 2;
pts_mv += 2;
np++;
if (np >= SAMPLES_MAX) return SAMPLES_MAX;
if (np >= LEAST_SQUARES_SAMPLES_MAX) return LEAST_SQUARES_SAMPLES_MAX;
}
} else {
// Handle "current block width > above block width" case.
......@@ -1719,18 +1710,17 @@ int findSamples(const AV1_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col,
if (mbmi->ref_frame[0] == ref_frame &&
mbmi->ref_frame[1] == NONE_FRAME) {
record_samples(mbmi, pts, pts_inref, pts_mv, global_offset_r,
global_offset_c, 0, -1, i, 1);
record_samples(mbmi, pts, pts_inref, global_offset_r, global_offset_c,
0, -1, i, 1);
pts += 2;
pts_inref += 2;
pts_mv += 2;
np++;
if (np >= SAMPLES_MAX) return SAMPLES_MAX;
if (np >= LEAST_SQUARES_SAMPLES_MAX) return LEAST_SQUARES_SAMPLES_MAX;
}
}
}
}
assert(np <= SAMPLES_MAX);
assert(np <= LEAST_SQUARES_SAMPLES_MAX);
// scan the nearest left columns
if (left_available) {
......@@ -1747,13 +1737,12 @@ int findSamples(const AV1_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col,
if (row_offset < 0) do_tl = 0;
if (mbmi->ref_frame[0] == ref_frame && mbmi->ref_frame[1] == NONE_FRAME) {
record_samples(mbmi, pts, pts_inref, pts_mv, global_offset_r,
global_offset_c, row_offset, 1, 0, -1);
record_samples(mbmi, pts, pts_inref, global_offset_r, global_offset_c,
row_offset, 1, 0, -1);
pts += 2;
pts_inref += 2;
pts_mv += 2;
np++;
if (np >= SAMPLES_MAX) return SAMPLES_MAX;
if (np >= LEAST_SQUARES_SAMPLES_MAX) return LEAST_SQUARES_SAMPLES_MAX;
}
} else {
// Handle "current block height > above block height" case.
......@@ -1766,18 +1755,17 @@ int findSamples(const AV1_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col,
if (mbmi->ref_frame[0] == ref_frame &&
mbmi->ref_frame[1] == NONE_FRAME) {
record_samples(mbmi, pts, pts_inref, pts_mv, global_offset_r,
global_offset_c, i, 1, 0, -1);
record_samples(mbmi, pts, pts_inref, global_offset_r, global_offset_c,
i, 1, 0, -1);
pts += 2;
pts_inref += 2;
pts_mv += 2;
np++;
if (np >= SAMPLES_MAX) return SAMPLES_MAX;
if (np >= LEAST_SQUARES_SAMPLES_MAX) return LEAST_SQUARES_SAMPLES_MAX;
}
}
}
}
assert(np <= SAMPLES_MAX);
assert(np <= LEAST_SQUARES_SAMPLES_MAX);
// Top-left block
if (do_tl && left_available && up_available) {
......@@ -1788,16 +1776,15 @@ int findSamples(const AV1_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col,
MB_MODE_INFO *mbmi = &mi->mbmi;
if (mbmi->ref_frame[0] == ref_frame && mbmi->ref_frame[1] == NONE_FRAME) {
record_samples(mbmi, pts, pts_inref, pts_mv, global_offset_r,
global_offset_c, 0, -1, 0, -1);
record_samples(mbmi, pts, pts_inref, global_offset_r, global_offset_c, 0,
-1, 0, -1);
pts += 2;
pts_inref += 2;
pts_mv += 2;
np++;
if (np >= SAMPLES_MAX) return SAMPLES_MAX;
if (np >= LEAST_SQUARES_SAMPLES_MAX) return LEAST_SQUARES_SAMPLES_MAX;
}
}
assert(np <= SAMPLES_MAX);
assert(np <= LEAST_SQUARES_SAMPLES_MAX);
// Top-right block
if (do_tr &&
......@@ -1812,14 +1799,14 @@ int findSamples(const AV1_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col,
MB_MODE_INFO *mbmi = &mi->mbmi;
if (mbmi->ref_frame[0] == ref_frame && mbmi->ref_frame[1] == NONE_FRAME) {
record_samples(mbmi, pts, pts_inref, pts_mv, global_offset_r,
global_offset_c, 0, -1, xd->n8_w, 1);
record_samples(mbmi, pts, pts_inref, global_offset_r, global_offset_c,
0, -1, xd->n8_w, 1);
np++;
if (np >= SAMPLES_MAX) return SAMPLES_MAX;
if (np >= LEAST_SQUARES_SAMPLES_MAX) return LEAST_SQUARES_SAMPLES_MAX;
}
}
}
assert(np <= SAMPLES_MAX);
assert(np <= LEAST_SQUARES_SAMPLES_MAX);
return np;
}
......
......@@ -480,13 +480,10 @@ void av1_update_mv_context(const AV1_COMMON *cm, const MACROBLOCKD *xd,
int16_t *mode_context);
#if CONFIG_EXT_WARPED_MOTION
int sortSamples(int *pts_mv, MV *mv, int *pts, int *pts_inref, int len);
int findSamples(const AV1_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col,
int *pts, int *pts_inref, int *pts_mv);
#else
int selectSamples(MV *mv, int *pts, int *pts_inref, int len, BLOCK_SIZE bsize);
#endif // CONFIG_EXT_WARPED_MOTION
int findSamples(const AV1_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col,
int *pts, int *pts_inref);
#endif // CONFIG_EXT_WARPED_MOTION
#if CONFIG_INTRABC
#define INTRABC_DELAY_PIXELS 256 // Delay of 256 pixels
......
......@@ -27,19 +27,8 @@
#define MAX_PARAMDIM 9
#define LEAST_SQUARES_SAMPLES_MAX_BITS 3
#define LEAST_SQUARES_SAMPLES_MAX (1 << LEAST_SQUARES_SAMPLES_MAX_BITS)
#define WARPED_MOTION_DEBUG 0
#if CONFIG_EXT_WARPED_MOTION
// Search 1 row on the top and 1 column on the left, 1 upper-left block,
// 1 upper-right block. In worst case, the samples are (MAX_MIB_SIZE * 2 + 2).
// Here force number of samples within SAMPLES_MAX.
#define SAMPLES_MAX (LEAST_SQUARES_SAMPLES_MAX * 2)
#define SAMPLES_ARRAY_SIZE (SAMPLES_MAX * 2)
#else
#define SAMPLES_ARRAY_SIZE (LEAST_SQUARES_SAMPLES_MAX * 2)
#endif // CONFIG_EXT_WARPED_MOTION
#define WARPED_MOTION_DEBUG 0
#define DEFAULT_WMTYPE AFFINE
extern const int16_t warped_filter[WARPEDPIXEL_PREC_SHIFTS * 3 + 1][8];
......
......@@ -1814,9 +1814,6 @@ static void read_inter_block_mode_info(AV1Decoder *const pbi,
int16_t inter_mode_ctx[MODE_CTX_REF_FRAMES];
int16_t compound_inter_mode_ctx[MODE_CTX_REF_FRAMES];
int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
#if CONFIG_EXT_WARPED_MOTION
int pts_mv[SAMPLES_ARRAY_SIZE];
#endif // CONFIG_EXT_WARPED_MOTION
FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
assert(NELEMENTS(mode_2_counter) == MB_MODE_COUNT);
......@@ -2111,12 +2108,7 @@ static void read_inter_block_mode_info(AV1Decoder *const pbi,
!mbmi->skip_mode &&
#endif // CONFIG_EXT_SKIP
!has_second_ref(mbmi))
#if CONFIG_EXT_WARPED_MOTION
mbmi->num_proj_ref[0] =
findSamples(cm, xd, mi_row, mi_col, pts, pts_inref, pts_mv);
#else
mbmi->num_proj_ref[0] = findSamples(cm, xd, mi_row, mi_col, pts, pts_inref);
#endif // CONFIG_EXT_WARPED_MOTION
av1_count_overlappable_neighbors(cm, xd, mi_row, mi_col);
if (mbmi->ref_frame[1] != INTRA_FRAME)
......@@ -2223,8 +2215,8 @@ static void read_inter_block_mode_info(AV1Decoder *const pbi,
#if CONFIG_EXT_WARPED_MOTION
if (mbmi->num_proj_ref[0] > 1)
mbmi->num_proj_ref[0] = sortSamples(pts_mv, &mbmi->mv[0].as_mv, pts,
pts_inref, mbmi->num_proj_ref[0]);
mbmi->num_proj_ref[0] = selectSamples(&mbmi->mv[0].as_mv, pts, pts_inref,
mbmi->num_proj_ref[0], bsize);
#endif // CONFIG_EXT_WARPED_MOTION
if (find_projection(mbmi->num_proj_ref[0], pts, pts_inref, bsize,
......
......@@ -944,7 +944,7 @@ unsigned int av1_compute_motion_cost(const AV1_COMP *cpi, MACROBLOCK *const x,
#if CONFIG_EXT_WARPED_MOTION
unsigned int av1_refine_warped_mv(const AV1_COMP *cpi, MACROBLOCK *const x,
BLOCK_SIZE bsize, int mi_row, int mi_col,
int *pts0, int *pts_inref0, int *pts_mv0,
int *pts0, int *pts_inref0,
int total_samples) {
#else
unsigned int av1_refine_warped_mv(const AV1_COMP *cpi, MACROBLOCK *const x,
......@@ -999,7 +999,7 @@ unsigned int av1_refine_warped_mv(const AV1_COMP *cpi, MACROBLOCK *const x,
memcpy(pts_inref, pts_inref0, total_samples * 2 * sizeof(*pts_inref0));
if (total_samples > 1)
mbmi->num_proj_ref[0] =
sortSamples(pts_mv0, &this_mv, pts, pts_inref, total_samples);
selectSamples(&this_mv, pts, pts_inref, total_samples, bsize);
#endif // CONFIG_EXT_WARPED_MOTION
if (!find_projection(mbmi->num_proj_ref[0], pts, pts_inref, bsize, *tr,
......
......@@ -158,8 +158,7 @@ unsigned int av1_compute_motion_cost(const struct AV1_COMP *cpi,
unsigned int av1_refine_warped_mv(const struct AV1_COMP *cpi,
MACROBLOCK *const x, BLOCK_SIZE bsize,
int mi_row, int mi_col, int *pts0,
int *pts_inref0, int *pts_mv0,
int total_samples);
int *pts_inref0, int total_samples);
#else
unsigned int av1_refine_warped_mv(const struct AV1_COMP *cpi,
MACROBLOCK *const x, BLOCK_SIZE bsize,
......
......@@ -7361,7 +7361,6 @@ static int64_t motion_mode_rd(
#if CONFIG_EXT_WARPED_MOTION
int pts0[SAMPLES_ARRAY_SIZE], pts_inref0[SAMPLES_ARRAY_SIZE];
int pts_mv0[SAMPLES_ARRAY_SIZE];
int total_samples;
#else
int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
......@@ -7372,8 +7371,7 @@ static int64_t motion_mode_rd(
if (cm->interp_filter == SWITCHABLE) rd_stats->rate += rs;
aom_clear_system_state();
#if CONFIG_EXT_WARPED_MOTION
mbmi->num_proj_ref[0] =
findSamples(cm, xd, mi_row, mi_col, pts0, pts_inref0, pts_mv0);
mbmi->num_proj_ref[0] = findSamples(cm, xd, mi_row, mi_col, pts0, pts_inref0);
total_samples = mbmi->num_proj_ref[0];
#else
mbmi->num_proj_ref[0] = findSamples(cm, xd, mi_row, mi_col, pts, pts_inref);
......@@ -7435,10 +7433,10 @@ static int64_t motion_mode_rd(
#if CONFIG_EXT_WARPED_MOTION
memcpy(pts, pts0, total_samples * 2 * sizeof(*pts0));
memcpy(pts_inref, pts_inref0, total_samples * 2 * sizeof(*pts_inref0));
// Rank the samples by motion vector difference
// Select the samples according to motion vector difference
if (mbmi->num_proj_ref[0] > 1) {
mbmi->num_proj_ref[0] = sortSamples(pts_mv0, &mbmi->mv[0].as_mv, pts,
pts_inref, mbmi->num_proj_ref[0]);
mbmi->num_proj_ref[0] = selectSamples(
&mbmi->mv[0].as_mv, pts, pts_inref, mbmi->num_proj_ref[0], bsize);
best_bmc_mbmi->num_proj_ref[0] = mbmi->num_proj_ref[0];
}
#endif // CONFIG_EXT_WARPED_MOTION
......@@ -7456,7 +7454,7 @@ static int64_t motion_mode_rd(
// Refine MV in a small range.
av1_refine_warped_mv(cpi, x, bsize, mi_row, mi_col, pts0, pts_inref0,
pts_mv0, total_samples);
total_samples);
#else
// Refine MV in a small range.
av1_refine_warped_mv(cpi, x, bsize, mi_row, mi_col, pts, pts_inref);
......@@ -10611,16 +10609,12 @@ void av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP *cpi,
av1_count_overlappable_neighbors(cm, xd, mi_row, mi_col);
if (is_motion_variation_allowed_bsize(bsize) && !has_second_ref(mbmi)) {
int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
mbmi->num_proj_ref[0] = findSamples(cm, xd, mi_row, mi_col, pts, pts_inref);
#if CONFIG_EXT_WARPED_MOTION
int pts_mv[SAMPLES_ARRAY_SIZE];
mbmi->num_proj_ref[0] =
findSamples(cm, xd, mi_row, mi_col, pts, pts_inref, pts_mv);
// Rank the samples by motion vector difference
// Select the samples according to motion vector difference
if (mbmi->num_proj_ref[0] > 1)
mbmi->num_proj_ref[0] = sortSamples(pts_mv, &mbmi->mv[0].as_mv, pts,
pts_inref, mbmi->num_proj_ref[0]);
#else
mbmi->num_proj_ref[0] = findSamples(cm, xd, mi_row, mi_col, pts, pts_inref);
mbmi->num_proj_ref[0] = selectSamples(&mbmi->mv[0].as_mv, pts, pts_inref,
mbmi->num_proj_ref[0], bsize);
#endif // CONFIG_EXT_WARPED_MOTION
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment