Commit 2ca7d42e authored by Sarah Parker's avatar Sarah Parker

Adding speed feature interface for ext tx search

This sets up the interface for 3 speed features that progressively
eliminate a greater number of transforms in ext tx using
pre-trained support vector machines.
Each speed feature still needs to be implemented.

Change-Id: Ia508aeadc0cffdc080fb227f357a5d1dfbca08e2
parent 339ef0ce
......@@ -354,6 +354,126 @@ static void swap_block_ptr(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
}
}
#if CONFIG_EXT_TX
typedef enum {
DCT_1D = 0,
ADST_1D = 1,
FLIPADST_1D = 2,
DST_1D = 3,
TX_TYPES_1D = 4,
} TX_TYPE_1D;
static int prune_two_for_sby(const VP10_COMP *cpi,
BLOCK_SIZE bsize,
MACROBLOCK *x,
MACROBLOCKD *xd) {
(void) cpi;
(void) bsize;
(void) x;
(void) xd;
return 3;
}
static int prune_three_for_sby(const VP10_COMP *cpi,
BLOCK_SIZE bsize,
MACROBLOCK *x,
MACROBLOCKD *xd) {
(void) cpi;
(void) bsize;
(void) x;
(void) xd;
return 7;
}
#endif // CONFIG_EXT_TX
static int prune_one_for_sby(const VP10_COMP *cpi,
BLOCK_SIZE bsize,
MACROBLOCK *x,
MACROBLOCKD *xd) {
(void) cpi;
(void) bsize;
(void) x;
(void) xd;
return 1;
}
static int prune_tx_types(const VP10_COMP *cpi,
BLOCK_SIZE bsize,
MACROBLOCK *x,
MACROBLOCKD *xd) {
switch (cpi->sf.tx_type_search) {
case NO_PRUNE:
return 0;
break;
case PRUNE_ONE :
return prune_one_for_sby(cpi, bsize, x, xd);
break;
#if CONFIG_EXT_TX
case PRUNE_TWO :
return prune_two_for_sby(cpi, bsize, x, xd);
break;
case PRUNE_THREE :
return prune_three_for_sby(cpi, bsize, x, xd);
break;
#endif
}
assert(0);
return 0;
}
static int do_tx_type_search(TX_TYPE tx_type,
int prune) {
// TODO(sarahparker) implement for non ext tx
#if CONFIG_EXT_TX
static TX_TYPE_1D vtx_tab[TX_TYPES] = {
DCT_1D,
ADST_1D,
DCT_1D,
ADST_1D,
FLIPADST_1D,
DCT_1D,
FLIPADST_1D,
ADST_1D,
FLIPADST_1D,
DST_1D,
DCT_1D,
DST_1D,
ADST_1D,
DST_1D,
FLIPADST_1D,
DST_1D,
};
static TX_TYPE_1D htx_tab[TX_TYPES] = {
DCT_1D,
DCT_1D,
ADST_1D,
ADST_1D,
DCT_1D,
FLIPADST_1D,
FLIPADST_1D,
FLIPADST_1D,
ADST_1D,
DCT_1D,
DST_1D,
ADST_1D,
DST_1D,
FLIPADST_1D,
DST_1D,
DST_1D,
};
if (tx_type == IDTX)
return 1;
return !(((prune >> vtx_tab[tx_type]) & 1) |
((prune >> (htx_tab[tx_type] + TX_TYPES_1D)) & 1));
#else
// temporary to avoid compiler warnings
(void) tx_type;
(void) prune;
return 1;
#endif
}
static void model_rd_for_sb(VP10_COMP *cpi, BLOCK_SIZE bsize,
MACROBLOCK *x, MACROBLOCKD *xd,
int *out_rate_sum, int64_t *out_dist_sum,
......@@ -394,8 +514,10 @@ static void model_rd_for_sb(VP10_COMP *cpi, BLOCK_SIZE bsize,
// low enough so that we can skip the mode search.
const int64_t low_dc_thr = VPXMIN(50, dc_thr >> 2);
const int64_t low_ac_thr = VPXMIN(80, ac_thr >> 2);
int bw = 1 << (b_width_log2_lookup[bs] - b_width_log2_lookup[unit_size]);
int bh = 1 << (b_height_log2_lookup[bs] - b_width_log2_lookup[unit_size]);
int bw_shift = (b_width_log2_lookup[bs] - b_width_log2_lookup[unit_size]);
int bh_shift = (b_height_log2_lookup[bs] - b_width_log2_lookup[unit_size]);
int bw = 1 << bw_shift;
int bh = 1 << bh_shift;
int idx, idy;
int lw = b_width_log2_lookup[unit_size] + 2;
int lh = b_height_log2_lookup[unit_size] + 2;
......@@ -406,7 +528,7 @@ static void model_rd_for_sb(VP10_COMP *cpi, BLOCK_SIZE bsize,
for (idx = 0; idx < bw; ++idx) {
uint8_t *src = p->src.buf + (idy * p->src.stride << lh) + (idx << lw);
uint8_t *dst = pd->dst.buf + (idy * pd->dst.stride << lh) + (idx << lh);
int block_idx = (idy << 1) + idx;
int block_idx = (idy << bw_shift) + idx;
int low_err_skip = 0;
var = cpi->fn_ptr[unit_size].vf(src, p->src.stride,
......@@ -988,11 +1110,14 @@ static void choose_largest_tx_size(VP10_COMP *cpi, MACROBLOCK *x,
vpx_prob skip_prob = vp10_get_skip_prob(cm, xd);
int s0 = vp10_cost_bit(skip_prob, 0);
int s1 = vp10_cost_bit(skip_prob, 1);
const int is_inter = is_inter_block(mbmi);
int prune = 0;
#if CONFIG_EXT_TX
int ext_tx_set;
#endif // CONFIG_EXT_TX
const int is_inter = is_inter_block(mbmi);
if (is_inter && cpi->sf.tx_type_search > 0)
prune = prune_tx_types(cpi, bs, x, xd);
mbmi->tx_size = VPXMIN(max_tx_size, largest_tx_size);
#if CONFIG_EXT_TX
......@@ -1004,6 +1129,15 @@ static void choose_largest_tx_size(VP10_COMP *cpi, MACROBLOCK *x,
if (is_inter) {
if (!ext_tx_used_inter[ext_tx_set][tx_type])
continue;
if (cpi->sf.tx_type_search > 0) {
if (!do_tx_type_search(tx_type, prune))
continue;
} else if (ext_tx_set == 1 &&
tx_type >= DST_ADST && tx_type < IDTX &&
best_tx_type == DCT_DCT) {
tx_type = IDTX - 1;
continue;
}
} else {
if (!ALLOW_INTRA_EXT_TX && bs >= BLOCK_8X8) {
if (tx_type != intra_mode_to_tx_type_context[mbmi->mode])
......@@ -1011,15 +1145,15 @@ static void choose_largest_tx_size(VP10_COMP *cpi, MACROBLOCK *x,
}
if (!ext_tx_used_intra[ext_tx_set][tx_type])
continue;
if (ext_tx_set == 1 &&
tx_type >= DST_ADST && tx_type < IDTX &&
best_tx_type == DCT_DCT) {
tx_type = IDTX - 1;
continue;
}
}
mbmi->tx_type = tx_type;
if (ext_tx_set == 1 &&
mbmi->tx_type >= DST_ADST && mbmi->tx_type < IDTX &&
best_tx_type == DCT_DCT) {
tx_type = IDTX - 1;
continue;
}
txfm_rd_in_plane(x,
cpi,
......@@ -1067,12 +1201,15 @@ static void choose_largest_tx_size(VP10_COMP *cpi, MACROBLOCK *x,
cpi->sf.use_fast_coef_costing);
if (r == INT_MAX)
continue;
if (is_inter)
if (is_inter) {
r += cpi->inter_tx_type_costs[mbmi->tx_size][mbmi->tx_type];
else
if (cpi->sf.tx_type_search > 0 && !do_tx_type_search(tx_type, prune))
continue;
} else {
r += cpi->intra_tx_type_costs[mbmi->tx_size]
[intra_mode_to_tx_type_context[mbmi->mode]]
[mbmi->tx_type];
}
if (s)
this_rd = RDCOST(x->rdmult, x->rddiv, s1, psse);
else
......@@ -1150,13 +1287,17 @@ static void choose_tx_size_from_rd(VP10_COMP *cpi, MACROBLOCK *x,
TX_SIZE best_tx = max_tx_size;
int start_tx, end_tx;
const int tx_select = cm->tx_mode == TX_MODE_SELECT;
TX_TYPE tx_type, best_tx_type = DCT_DCT;
const int is_inter = is_inter_block(mbmi);
const vpx_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc->tx_probs);
TX_TYPE tx_type, best_tx_type = DCT_DCT;
int prune = 0;
#if CONFIG_EXT_TX
int ext_tx_set;
#endif // CONFIG_EXT_TX
const vpx_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc->tx_probs);
if (is_inter && cpi->sf.tx_type_search > 0)
prune = prune_tx_types(cpi, bs, x, xd);
assert(skip_prob > 0);
s0 = vp10_cost_bit(skip_prob, 0);
s1 = vp10_cost_bit(skip_prob, 1);
......@@ -1187,6 +1328,15 @@ static void choose_tx_size_from_rd(VP10_COMP *cpi, MACROBLOCK *x,
if (is_inter) {
if (!ext_tx_used_inter[ext_tx_set][tx_type])
continue;
if (cpi->sf.tx_type_search > 0) {
if (!do_tx_type_search(tx_type, prune))
continue;
} else if (ext_tx_set == 1 &&
tx_type >= DST_ADST && tx_type < IDTX &&
best_tx_type == DCT_DCT) {
tx_type = IDTX - 1;
continue;
}
} else {
if (!ALLOW_INTRA_EXT_TX && bs >= BLOCK_8X8) {
if (tx_type != intra_mode_to_tx_type_context[mbmi->mode])
......@@ -1194,14 +1344,14 @@ static void choose_tx_size_from_rd(VP10_COMP *cpi, MACROBLOCK *x,
}
if (!ext_tx_used_intra[ext_tx_set][tx_type])
continue;
if (ext_tx_set == 1 &&
tx_type >= DST_ADST && tx_type < IDTX &&
best_tx_type == DCT_DCT) {
tx_type = IDTX - 1;
break;
}
}
mbmi->tx_type = tx_type;
if (ext_tx_set == 1 &&
mbmi->tx_type >= DST_ADST && mbmi->tx_type < IDTX &&
best_tx_type == DCT_DCT) {
tx_type = IDTX - 1;
break;
}
txfm_rd_in_plane(x,
cpi,
&r, &d, &s,
......@@ -1233,12 +1383,15 @@ static void choose_tx_size_from_rd(VP10_COMP *cpi, MACROBLOCK *x,
if (n < TX_32X32 &&
!xd->lossless[xd->mi[0]->mbmi.segment_id] &&
r != INT_MAX && !FIXED_TX_TYPE) {
if (is_inter)
if (is_inter) {
r += cpi->inter_tx_type_costs[mbmi->tx_size][mbmi->tx_type];
else
if (cpi->sf.tx_type_search > 0 && !do_tx_type_search(tx_type, prune))
continue;
} else {
r += cpi->intra_tx_type_costs[mbmi->tx_size]
[intra_mode_to_tx_type_context[mbmi->mode]]
[mbmi->tx_type];
}
}
#endif // CONFIG_EXT_TX
......@@ -2803,9 +2956,13 @@ static void select_tx_type_yrd(const VP10_COMP *cpi, MACROBLOCK *x,
uint8_t best_blk_skip[256];
const int n4 = 1 << (num_pels_log2_lookup[bsize] - 4);
int idx, idy;
int prune = 0;
#if CONFIG_EXT_TX
int ext_tx_set = get_ext_tx_set(max_tx_size, bsize, is_inter);
#endif
#endif // CONFIG_EXT_TX
if (is_inter && cpi->sf.tx_type_search > 0)
prune = prune_tx_types(cpi, bsize, x, xd);
*distortion = INT64_MAX;
*rate = INT_MAX;
......@@ -2821,6 +2978,15 @@ static void select_tx_type_yrd(const VP10_COMP *cpi, MACROBLOCK *x,
if (is_inter) {
if (!ext_tx_used_inter[ext_tx_set][tx_type])
continue;
if (cpi->sf.tx_type_search > 0) {
if (!do_tx_type_search(tx_type, prune))
continue;
} else if (ext_tx_set == 1 &&
tx_type >= DST_ADST && tx_type < IDTX &&
best_tx_type == DCT_DCT) {
tx_type = IDTX - 1;
continue;
}
} else {
if (!ALLOW_INTRA_EXT_TX && bsize >= BLOCK_8X8) {
if (tx_type != intra_mode_to_tx_type_context[mbmi->mode])
......@@ -2828,17 +2994,16 @@ static void select_tx_type_yrd(const VP10_COMP *cpi, MACROBLOCK *x,
}
if (!ext_tx_used_intra[ext_tx_set][tx_type])
continue;
if (ext_tx_set == 1 &&
tx_type >= DST_ADST && tx_type < IDTX &&
best_tx_type == DCT_DCT) {
tx_type = IDTX - 1;
break;
}
}
mbmi->tx_type = tx_type;
if (ext_tx_set == 1 &&
mbmi->tx_type >= DST_ADST && mbmi->tx_type < IDTX &&
best_tx_type == DCT_DCT) {
tx_type = IDTX - 1;
break;
}
inter_block_yrd(cpi, x, &this_rate, &this_dist, &this_skip, &this_sse,
bsize, ref_best_rd);
......@@ -2867,12 +3032,15 @@ static void select_tx_type_yrd(const VP10_COMP *cpi, MACROBLOCK *x,
if (max_tx_size < TX_32X32 &&
!xd->lossless[xd->mi[0]->mbmi.segment_id] &&
this_rate != INT_MAX) {
if (is_inter)
if (is_inter) {
this_rate += cpi->inter_tx_type_costs[max_tx_size][mbmi->tx_type];
else
if (cpi->sf.tx_type_search > 0 && !do_tx_type_search(tx_type, prune))
continue;
} else {
this_rate += cpi->intra_tx_type_costs[max_tx_size]
[intra_mode_to_tx_type_context[mbmi->mode]]
[mbmi->tx_type];
}
}
#endif // CONFIG_EXT_TX
......
......@@ -159,7 +159,7 @@ static void set_good_speed_feature(VP10_COMP *cpi, VP10_COMMON *cm,
sf->tx_size_search_breakout = 1;
sf->partition_search_breakout_rate_thr = 80;
sf->tx_type_search = PRUNE_ONE;
// Use transform domain distortion.
// Note var-tx expt always uses pixel domain distortion.
sf->use_transform_domain_distortion = 1;
......@@ -177,6 +177,9 @@ static void set_good_speed_feature(VP10_COMP *cpi, VP10_COMMON *cm,
sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX;
sf->allow_partition_search_skip = 1;
#if CONFIG_EXT_TX
sf->tx_type_search = PRUNE_TWO;
#endif
}
if (speed >= 3) {
......@@ -195,6 +198,9 @@ static void set_good_speed_feature(VP10_COMP *cpi, VP10_COMMON *cm,
sf->intra_y_mode_mask[TX_32X32] = INTRA_DC;
sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC;
sf->adaptive_interp_filter_search = 1;
#if CONFIG_EXT_TX
sf->tx_type_search = PRUNE_THREE;
#endif
}
if (speed >= 4) {
......@@ -473,6 +479,7 @@ void vp10_set_speed_features_framesize_independent(VP10_COMP *cpi) {
sf->alt_ref_search_fp = 0;
sf->use_quant_fp = 0;
sf->partition_search_type = SEARCH_PARTITION;
sf->tx_type_search = NO_PRUNE;
sf->less_rectangular_check = 0;
sf->use_square_partition_only = 0;
sf->auto_min_max_partition_size = NOT_IN_USE;
......
......@@ -181,6 +181,18 @@ typedef enum {
FLAG_SKIP_MULTITAP_SHARP = 1 << MULTITAP_SHARP,
} INTERP_FILTER_MASK;
typedef enum {
NO_PRUNE = 0,
// eliminates one tx type in vertical and horizontal direction
PRUNE_ONE = 1,
#if CONFIG_EXT_TX
// eliminates two tx types in each direction
PRUNE_TWO = 2,
// eliminates three tx types in each direction
PRUNE_THREE = 3,
#endif
} TX_TYPE_SEARCH;
typedef enum {
// Search partitions using RD criterion
SEARCH_PARTITION,
......@@ -298,6 +310,7 @@ typedef struct SPEED_FEATURES {
PARTITION_SEARCH_TYPE partition_search_type;
TX_TYPE_SEARCH tx_type_search;
// Used if partition_search_type = FIXED_SIZE_PARTITION
BLOCK_SIZE always_this_block_size;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment