Commit b3ad8128 authored by Alex Converse's avatar Alex Converse

Port switch to 9-bit rate cost to vp10.

Brings the following commits to vp10:
269428e3 Tie the bit cost scale to a define.
d13385ce Switch to 9-bit rate cost constants built on a 256 probability denominator.
ad43a738 Fix a signed overflow in vp9 motion cost.
1c9b0918 Fix some interger overflow errors
fac947df Restore previous motion search bit-error scale.

Change-Id: I598ba7ee7efcde18439c31dfa96b86cbf297a580
parent 00380700
......@@ -81,8 +81,14 @@ struct macroblock {
int skip_optimize;
int q_index;
// The equivalent error at the current rdmult of one whole bit (not one
// bitcost unit).
int errorperbit;
// The equivalend SAD error of one (whole) bit at the current quantizer
// for large blocks.
int sadperbit16;
// The equivalend SAD error of one (whole) bit at the current quantizer
// for sub-8x8 blocks.
int sadperbit4;
int rddiv;
int rdmult;
......
......@@ -12,29 +12,32 @@
#include "vp10/encoder/cost.h"
#include "vp10/common/entropy.h"
const unsigned int vp10_prob_cost[256] = {
2047, 2047, 1791, 1641, 1535, 1452, 1385, 1328, 1279, 1235, 1196, 1161,
1129, 1099, 1072, 1046, 1023, 1000, 979, 959, 940, 922, 905, 889,
873, 858, 843, 829, 816, 803, 790, 778, 767, 755, 744, 733,
723, 713, 703, 693, 684, 675, 666, 657, 649, 641, 633, 625,
617, 609, 602, 594, 587, 580, 573, 567, 560, 553, 547, 541,
534, 528, 522, 516, 511, 505, 499, 494, 488, 483, 477, 472,
467, 462, 457, 452, 447, 442, 437, 433, 428, 424, 419, 415,
410, 406, 401, 397, 393, 389, 385, 381, 377, 373, 369, 365,
361, 357, 353, 349, 346, 342, 338, 335, 331, 328, 324, 321,
317, 314, 311, 307, 304, 301, 297, 294, 291, 288, 285, 281,
278, 275, 272, 269, 266, 263, 260, 257, 255, 252, 249, 246,
243, 240, 238, 235, 232, 229, 227, 224, 221, 219, 216, 214,
211, 208, 206, 203, 201, 198, 196, 194, 191, 189, 186, 184,
181, 179, 177, 174, 172, 170, 168, 165, 163, 161, 159, 156,
154, 152, 150, 148, 145, 143, 141, 139, 137, 135, 133, 131,
129, 127, 125, 123, 121, 119, 117, 115, 113, 111, 109, 107,
105, 103, 101, 99, 97, 95, 93, 92, 90, 88, 86, 84,
82, 81, 79, 77, 75, 73, 72, 70, 68, 66, 65, 63,
61, 60, 58, 56, 55, 53, 51, 50, 48, 46, 45, 43,
41, 40, 38, 37, 35, 33, 32, 30, 29, 27, 25, 24,
22, 21, 19, 18, 16, 15, 13, 12, 10, 9, 7, 6,
4, 3, 1, 1};
/* round(-log2(i/256.) * (1 << VP9_PROB_COST_SHIFT))
Begins and ends with a bogus entry to satisfy use of prob=0 in the firstpass.
https://code.google.com/p/webm/issues/detail?id=1089 */
const uint16_t vp10_prob_cost[257] = {
4096, 4096, 3584, 3284, 3072, 2907, 2772, 2659, 2560, 2473, 2395, 2325,
2260, 2201, 2147, 2096, 2048, 2003, 1961, 1921, 1883, 1847, 1813, 1780,
1748, 1718, 1689, 1661, 1635, 1609, 1584, 1559, 1536, 1513, 1491, 1470,
1449, 1429, 1409, 1390, 1371, 1353, 1335, 1318, 1301, 1284, 1268, 1252,
1236, 1221, 1206, 1192, 1177, 1163, 1149, 1136, 1123, 1110, 1097, 1084,
1072, 1059, 1047, 1036, 1024, 1013, 1001, 990, 979, 968, 958, 947,
937, 927, 917, 907, 897, 887, 878, 868, 859, 850, 841, 832,
823, 814, 806, 797, 789, 780, 772, 764, 756, 748, 740, 732,
724, 717, 709, 702, 694, 687, 680, 673, 665, 658, 651, 644,
637, 631, 624, 617, 611, 604, 598, 591, 585, 578, 572, 566,
560, 554, 547, 541, 535, 530, 524, 518, 512, 506, 501, 495,
489, 484, 478, 473, 467, 462, 456, 451, 446, 441, 435, 430,
425, 420, 415, 410, 405, 400, 395, 390, 385, 380, 375, 371,
366, 361, 356, 352, 347, 343, 338, 333, 329, 324, 320, 316,
311, 307, 302, 298, 294, 289, 285, 281, 277, 273, 268, 264,
260, 256, 252, 248, 244, 240, 236, 232, 228, 224, 220, 216,
212, 209, 205, 201, 197, 194, 190, 186, 182, 179, 175, 171,
168, 164, 161, 157, 153, 150, 146, 143, 139, 136, 132, 129,
125, 122, 119, 115, 112, 109, 105, 102, 99, 95, 92, 89,
86, 82, 79, 76, 73, 70, 66, 63, 60, 57, 54, 51,
48, 45, 42, 38, 35, 32, 29, 26, 23, 20, 18, 15,
12, 9, 6, 3, 3};
static void cost(int *costs, vpx_tree tree, const vpx_prob *probs,
int i, int c) {
......
......@@ -12,18 +12,22 @@
#define VP10_ENCODER_COST_H_
#include "vpx_dsp/prob.h"
#include "vpx/vpx_integer.h"
#ifdef __cplusplus
extern "C" {
#endif
extern const unsigned int vp10_prob_cost[256];
extern const uint16_t vp10_prob_cost[257];
// The factor to scale from cost in bits to cost in vp10_prob_cost units.
#define VP9_PROB_COST_SHIFT 9
#define vp10_cost_zero(prob) (vp10_prob_cost[prob])
#define vp10_cost_one(prob) vp10_cost_zero(vpx_complement(prob))
#define vp10_cost_one(prob) vp10_cost_zero(256 - (prob))
#define vp10_cost_bit(prob, bit) vp10_cost_zero((bit) ? vpx_complement(prob) \
#define vp10_cost_bit(prob, bit) vp10_cost_zero((bit) ? 256 - (prob) \
: (prob))
static INLINE unsigned int cost_branch256(const unsigned int ct[2],
......
......@@ -52,7 +52,9 @@ void vp10_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
pd->dst.buf, pd->dst.stride);
}
#define RDTRUNC(RM, DM, R, D) ((128 + (R) * (RM)) & 0xFF)
#define RDTRUNC(RM, DM, R, D) \
(((1 << (VP9_PROB_COST_SHIFT - 1)) + (R) * (RM)) & \
((1 << VP9_PROB_COST_SHIFT) - 1))
typedef struct vp10_token_state {
int rate;
......@@ -119,9 +121,9 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block,
EXTRABIT e0;
int best, band, pt, i, final_eob;
#if CONFIG_VP9_HIGHBITDEPTH
const int16_t *cat6_high_cost = vp10_get_high_cost_table(xd->bd);
const int *cat6_high_cost = vp10_get_high_cost_table(xd->bd);
#else
const int16_t *cat6_high_cost = vp10_get_high_cost_table(8);
const int *cat6_high_cost = vp10_get_high_cost_table(8);
#endif
assert((!type && !plane) || (type && plane));
......
......@@ -80,24 +80,29 @@ int vp10_mv_bit_cost(const MV *mv, const MV *ref,
return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) * weight, 7);
}
static int mv_err_cost(const MV *mv, const MV *ref,
const int *mvjcost, int *mvcost[2],
int error_per_bit) {
#define PIXEL_TRANSFORM_ERROR_SCALE 4
static int mv_err_cost(const MV *mv, const MV *ref, const int *mvjcost,
int *mvcost[2], int error_per_bit) {
if (mvcost) {
const MV diff = { mv->row - ref->row,
mv->col - ref->col };
return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) *
error_per_bit, 13);
const MV diff = {mv->row - ref->row, mv->col - ref->col};
// This product sits at a 32-bit ceiling right now and any additional
// accuracy in either bit cost or error cost will cause it to overflow.
return ROUND_POWER_OF_TWO(
(unsigned)mv_cost(&diff, mvjcost, mvcost) * error_per_bit,
RDDIV_BITS + VP9_PROB_COST_SHIFT - RD_EPB_SHIFT +
PIXEL_TRANSFORM_ERROR_SCALE);
}
return 0;
}
static int mvsad_err_cost(const MACROBLOCK *x, const MV *mv, const MV *ref,
int error_per_bit) {
int sad_per_bit) {
const MV diff = { mv->row - ref->row,
mv->col - ref->col };
return ROUND_POWER_OF_TWO(mv_cost(&diff, x->nmvjointsadcost,
x->nmvsadcost) * error_per_bit, 8);
return ROUND_POWER_OF_TWO(
(unsigned)mv_cost(&diff, x->nmvjointsadcost, x->nmvsadcost) *
sad_per_bit,
VP9_PROB_COST_SHIFT);
}
void vp10_init_dsmotion_compensation(search_site_config *cfg, int stride) {
......@@ -155,12 +160,13 @@ void vp10_init3smotion_compensation(search_site_config *cfg, int stride) {
* could reduce the area.
*/
/* estimated cost of a motion vector (r,c) */
/* Estimated (square) error cost of a motion vector (r,c). The 14 scale comes
* from the same math as in mv_err_cost(). */
#define MVC(r, c) \
(mvcost ? \
((mvjcost[((r) != rr) * 2 + ((c) != rc)] + \
((unsigned)(mvjcost[((r) != rr) * 2 + ((c) != rc)] + \
mvcost[0][((r) - rr)] + mvcost[1][((c) - rc)]) * \
error_per_bit + 4096) >> 13 : 0)
error_per_bit + 8192) >> 14 : 0)
// convert motion vector component to offset for sv[a]f calc
......@@ -852,8 +858,8 @@ static INLINE void calc_int_cost_list(const MACROBLOCK *x,
cost_list[i + 1] = fn_ptr->vf(what->buf, what->stride,
get_buf_from_mv(in_what, &this_mv),
in_what->stride, &sse) +
// mvsad_err_cost(x, &this_mv, &fcenter_mv, sadpb);
mv_err_cost(&this_mv, &fcenter_mv, x->nmvjointcost, x->mvcost,
mv_err_cost(&this_mv, &fcenter_mv,
x->nmvjointcost, x->mvcost,
x->errorperbit);
}
} else {
......@@ -866,8 +872,8 @@ static INLINE void calc_int_cost_list(const MACROBLOCK *x,
cost_list[i + 1] = fn_ptr->vf(what->buf, what->stride,
get_buf_from_mv(in_what, &this_mv),
in_what->stride, &sse) +
// mvsad_err_cost(x, &this_mv, &fcenter_mv, sadpb);
mv_err_cost(&this_mv, &fcenter_mv, x->nmvjointcost, x->mvcost,
mv_err_cost(&this_mv, &fcenter_mv,
x->nmvjointcost, x->mvcost,
x->errorperbit);
}
}
......
......@@ -519,8 +519,7 @@ void vp10_init_plane_quantizers(VP10_COMP *cpi, MACROBLOCK *x) {
x->skip_block = segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP);
x->q_index = qindex;
x->errorperbit = rdmult >> 6;
x->errorperbit += (x->errorperbit == 0);
set_error_per_bit(x, rdmult);
vp10_initialize_me_consts(cpi, x, x->q_index);
}
......
......@@ -41,7 +41,6 @@
#include "vp10/encoder/tokenize.h"
#define RD_THRESH_POW 1.25
#define RD_MULT_EPB_RATIO 64
// Factor to weigh the rate for switchable interp filters.
#define SWITCHABLE_INTERP_RATE_FACTOR 1
......@@ -343,8 +342,7 @@ void vp10_initialize_rd_consts(VP10_COMP *cpi) {
rd->RDDIV = RDDIV_BITS; // In bits (to multiply D by 128).
rd->RDMULT = vp10_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q);
x->errorperbit = rd->RDMULT / RD_MULT_EPB_RATIO;
x->errorperbit += (x->errorperbit == 0);
set_error_per_bit(x, rd->RDMULT);
x->select_tx_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL &&
cm->frame_type != KEY_FRAME) ? 0 : 1;
......@@ -504,7 +502,7 @@ void vp10_model_rd_from_var_lapndz(unsigned int var, unsigned int n_log2,
(((uint64_t)qstep * qstep << (n_log2 + 10)) + (var >> 1)) / var;
const int xsq_q10 = (int)VPXMIN(xsq_q10_64, MAX_XSQ_Q10);
model_rd_norm(xsq_q10, &r_q10, &d_q10);
*rate = ((r_q10 << n_log2) + 2) >> 2;
*rate = ROUND_POWER_OF_TWO(r_q10 << n_log2, 10 - VP9_PROB_COST_SHIFT);
*dist = (var * (int64_t)d_q10 + 512) >> 10;
}
}
......
......@@ -17,18 +17,21 @@
#include "vp10/encoder/block.h"
#include "vp10/encoder/context_tree.h"
#include "vp10/encoder/cost.h"
#ifdef __cplusplus
extern "C" {
#endif
#define RDDIV_BITS 7
#define RD_EPB_SHIFT 6
#define RDCOST(RM, DM, R, D) \
(((128 + ((int64_t)R) * (RM)) >> 8) + (D << DM))
(ROUND_POWER_OF_TWO(((int64_t)R) * (RM), VP9_PROB_COST_SHIFT) + (D << DM))
#define RDCOST_DBL(RM, DM, R, D) \
(((((double)(R)) * (RM)) / 256.0) + ((double)(D) * (1 << (DM))))
(((((double)(R)) * (RM)) / (double)(1 << VP9_PROB_COST_SHIFT)) + \
((double)(D) * (1 << (DM))))
#define QIDX_SKIP_THRESH 115
......@@ -310,6 +313,11 @@ void vp10_mv_pred(struct VP10_COMP *cpi, MACROBLOCK *x,
uint8_t *ref_y_buffer, int ref_y_stride,
int ref_frame, BLOCK_SIZE block_size);
static INLINE void set_error_per_bit(MACROBLOCK *x, int rdmult) {
x->errorperbit = rdmult >> RD_EPB_SHIFT;
x->errorperbit += (x->errorperbit == 0);
}
void vp10_setup_pred_block(const MACROBLOCKD *xd,
struct buf_2d dst[MAX_MB_PLANE],
const YV12_BUFFER_CONFIG *src,
......
......@@ -411,7 +411,7 @@ static void model_rd_for_sb(VP10_COMP *cpi, BLOCK_SIZE bsize,
int quantizer = (pd->dequant[1] >> dequant_shift);
if (quantizer < 120)
rate = (square_error * (280 - quantizer)) >> 8;
rate = (square_error * (280 - quantizer)) >> (16 - VP9_PROB_COST_SHIFT);
else
rate = 0;
dist = (square_error * quantizer) >> 8;
......@@ -523,9 +523,9 @@ static int cost_coeffs(MACROBLOCK *x,
#endif
int c, cost;
#if CONFIG_VP9_HIGHBITDEPTH
const int16_t *cat6_high_cost = vp10_get_high_cost_table(xd->bd);
const int *cat6_high_cost = vp10_get_high_cost_table(xd->bd);
#else
const int16_t *cat6_high_cost = vp10_get_high_cost_table(8);
const int *cat6_high_cost = vp10_get_high_cost_table(8);
#endif
#if !CONFIG_VAR_TX && !CONFIG_SUPERTX
......
......@@ -80,7 +80,7 @@ static int remap_prob(int v, int m) {
static int prob_diff_update_cost(vpx_prob newp, vpx_prob oldp) {
int delp = remap_prob(newp, oldp);
return update_bits[delp] * 256;
return update_bits[delp] << VP9_PROB_COST_SHIFT;
}
static void encode_uniform(vpx_writer *w, int v) {
......
This diff is collapsed.
......@@ -77,25 +77,25 @@ extern const int16_t *vp10_dct_value_cost_ptr;
extern const TOKENVALUE *vp10_dct_value_tokens_ptr;
extern const TOKENVALUE *vp10_dct_cat_lt_10_value_tokens;
extern const int16_t vp10_cat6_low_cost[256];
extern const int16_t vp10_cat6_high_cost[128];
extern const int16_t vp10_cat6_high10_high_cost[512];
extern const int16_t vp10_cat6_high12_high_cost[2048];
static INLINE int16_t vp10_get_cost(int16_t token, EXTRABIT extrabits,
const int16_t *cat6_high_table) {
extern const int vp10_cat6_high_cost[64];
extern const int vp10_cat6_high10_high_cost[256];
extern const int vp10_cat6_high12_high_cost[1024];
static INLINE int vp10_get_cost(int16_t token, EXTRABIT extrabits,
const int *cat6_high_table) {
if (token != CATEGORY6_TOKEN)
return vp10_extra_bits[token].cost[extrabits];
return vp10_cat6_low_cost[extrabits & 0xff]
+ cat6_high_table[extrabits >> 8];
return vp10_extra_bits[token].cost[extrabits >> 1];
return vp10_cat6_low_cost[(extrabits >> 1) & 0xff]
+ cat6_high_table[extrabits >> 9];
}
#if CONFIG_VP9_HIGHBITDEPTH
static INLINE const int16_t* vp10_get_high_cost_table(int bit_depth) {
static INLINE const int* vp10_get_high_cost_table(int bit_depth) {
return bit_depth == 8 ? vp10_cat6_high_cost
: (bit_depth == 10 ? vp10_cat6_high10_high_cost :
vp10_cat6_high12_high_cost);
}
#else
static INLINE const int16_t* vp10_get_high_cost_table(int bit_depth) {
static INLINE const int* vp10_get_high_cost_table(int bit_depth) {
(void) bit_depth;
return vp10_cat6_high_cost;
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment