Commit 1eaf748c authored by Nathan Egge's avatar Nathan Egge

Port switch to 9-bit rate cost to aom.

Brings the following commit to aom:
b3ad8128 Port switch to 9-bit rate cost to vp10.

ntt-short1:

          MEDIUM (%)  HIGH (%)
    PSNR -0.078535   -0.398648
 PSNRHVS -0.416526   -0.468162
    SSIM -0.397539   -0.538516
FASTSSIM  0.008165    0.035524

subset1:

          RATE (%)  DSNR (dB)
    PSNR -0.00991   0.00063
 PSNRHVS  0.00115  -0.00009
    SSIM  0.01019  -0.00050
FASTSSIM  0.08721  -0.00279

Change-Id: I598ba7ee7efcde18439c31dfa96b86cbf297a580
parent 0b1606e7
......@@ -64,8 +64,14 @@ struct macroblock {
int skip_optimize;
int q_index;
// The equivalent error at the current rdmult of one whole bit (not one
// bitcost unit).
int errorperbit;
// The equivalend SAD error of one (whole) bit at the current quantizer
// for large blocks.
int sadperbit16;
// The equivalend SAD error of one (whole) bit at the current quantizer
// for sub-8x8 blocks.
int sadperbit4;
int rddiv;
int rdmult;
......
......@@ -11,27 +11,30 @@
#include "vp10/encoder/cost.h"
const unsigned int vp10_prob_cost[256] = {
2047, 2047, 1791, 1641, 1535, 1452, 1385, 1328, 1279, 1235, 1196, 1161, 1129,
1099, 1072, 1046, 1023, 1000, 979, 959, 940, 922, 905, 889, 873, 858,
843, 829, 816, 803, 790, 778, 767, 755, 744, 733, 723, 713, 703,
693, 684, 675, 666, 657, 649, 641, 633, 625, 617, 609, 602, 594,
587, 580, 573, 567, 560, 553, 547, 541, 534, 528, 522, 516, 511,
505, 499, 494, 488, 483, 477, 472, 467, 462, 457, 452, 447, 442,
437, 433, 428, 424, 419, 415, 410, 406, 401, 397, 393, 389, 385,
381, 377, 373, 369, 365, 361, 357, 353, 349, 346, 342, 338, 335,
331, 328, 324, 321, 317, 314, 311, 307, 304, 301, 297, 294, 291,
288, 285, 281, 278, 275, 272, 269, 266, 263, 260, 257, 255, 252,
249, 246, 243, 240, 238, 235, 232, 229, 227, 224, 221, 219, 216,
214, 211, 208, 206, 203, 201, 198, 196, 194, 191, 189, 186, 184,
181, 179, 177, 174, 172, 170, 168, 165, 163, 161, 159, 156, 154,
152, 150, 148, 145, 143, 141, 139, 137, 135, 133, 131, 129, 127,
125, 123, 121, 119, 117, 115, 113, 111, 109, 107, 105, 103, 101,
99, 97, 95, 93, 92, 90, 88, 86, 84, 82, 81, 79, 77,
75, 73, 72, 70, 68, 66, 65, 63, 61, 60, 58, 56, 55,
53, 51, 50, 48, 46, 45, 43, 41, 40, 38, 37, 35, 33,
32, 30, 29, 27, 25, 24, 22, 21, 19, 18, 16, 15, 13,
12, 10, 9, 7, 6, 4, 3, 1, 1
/* round(-log2(i/256.) * (1 << VP9_PROB_COST_SHIFT))
Begins and ends with a bogus entry to satisfy use of prob=0 in the firstpass.
https://code.google.com/p/webm/issues/detail?id=1089 */
const uint16_t vp10_prob_cost[257] = {
4096, 4096, 3584, 3284, 3072, 2907, 2772, 2659, 2560, 2473, 2395, 2325, 2260,
2201, 2147, 2096, 2048, 2003, 1961, 1921, 1883, 1847, 1813, 1780, 1748, 1718,
1689, 1661, 1635, 1609, 1584, 1559, 1536, 1513, 1491, 1470, 1449, 1429, 1409,
1390, 1371, 1353, 1335, 1318, 1301, 1284, 1268, 1252, 1236, 1221, 1206, 1192,
1177, 1163, 1149, 1136, 1123, 1110, 1097, 1084, 1072, 1059, 1047, 1036, 1024,
1013, 1001, 990, 979, 968, 958, 947, 937, 927, 917, 907, 897, 887,
878, 868, 859, 850, 841, 832, 823, 814, 806, 797, 789, 780, 772,
764, 756, 748, 740, 732, 724, 717, 709, 702, 694, 687, 680, 673,
665, 658, 651, 644, 637, 631, 624, 617, 611, 604, 598, 591, 585,
578, 572, 566, 560, 554, 547, 541, 535, 530, 524, 518, 512, 506,
501, 495, 489, 484, 478, 473, 467, 462, 456, 451, 446, 441, 435,
430, 425, 420, 415, 410, 405, 400, 395, 390, 385, 380, 375, 371,
366, 361, 356, 352, 347, 343, 338, 333, 329, 324, 320, 316, 311,
307, 302, 298, 294, 289, 285, 281, 277, 273, 268, 264, 260, 256,
252, 248, 244, 240, 236, 232, 228, 224, 220, 216, 212, 209, 205,
201, 197, 194, 190, 186, 182, 179, 175, 171, 168, 164, 161, 157,
153, 150, 146, 143, 139, 136, 132, 129, 125, 122, 119, 115, 112,
109, 105, 102, 99, 95, 92, 89, 86, 82, 79, 76, 73, 70,
66, 63, 60, 57, 54, 51, 48, 45, 42, 38, 35, 32, 29,
26, 23, 20, 18, 15, 12, 9, 6, 3, 3
};
static void cost(int *costs, vpx_tree tree, const vpx_prob *probs, int i,
......
......@@ -12,19 +12,22 @@
#define VP10_ENCODER_COST_H_
#include "vpx_dsp/prob.h"
#include "vpx/vpx_integer.h"
#ifdef __cplusplus
extern "C" {
#endif
extern const unsigned int vp10_prob_cost[256];
extern const uint16_t vp10_prob_cost[257];
// The factor to scale from cost in bits to cost in vp10_prob_cost units.
#define VP9_PROB_COST_SHIFT 9
#define vp10_cost_zero(prob) (vp10_prob_cost[prob])
#define vp10_cost_one(prob) vp10_cost_zero(vpx_complement(prob))
#define vp10_cost_one(prob) vp10_cost_zero(256 - (prob))
#define vp10_cost_bit(prob, bit) \
vp10_cost_zero((bit) ? vpx_complement(prob) : (prob))
#define vp10_cost_bit(prob, bit) vp10_cost_zero((bit) ? 256 - (prob) : (prob))
static INLINE unsigned int cost_branch256(const unsigned int ct[2],
vpx_prob p) {
......
......@@ -49,7 +49,9 @@ void vp10_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
pd->dst.buf, pd->dst.stride);
}
#define RDTRUNC(RM, DM, R, D) ((128 + (R) * (RM)) & 0xFF)
#define RDTRUNC(RM, DM, R, D) \
(((1 << (VP9_PROB_COST_SHIFT - 1)) + (R) * (RM)) & \
((1 << VP9_PROB_COST_SHIFT) - 1))
typedef struct vp10_token_state {
int rate;
......@@ -118,9 +120,9 @@ static int optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size,
EXTRABIT e0;
int best, band, pt, i, final_eob;
#if CONFIG_VPX_HIGHBITDEPTH
const int16_t *cat6_high_cost = vp10_get_high_cost_table(xd->bd);
const int *cat6_high_cost = vp10_get_high_cost_table(xd->bd);
#else
const int16_t *cat6_high_cost = vp10_get_high_cost_table(8);
const int *cat6_high_cost = vp10_get_high_cost_table(8);
#endif
assert((!type && !plane) || (type && plane));
......
......@@ -74,21 +74,27 @@ int vp10_mv_bit_cost(const MV *mv, const MV *ref, const int *mvjcost,
return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) * weight, 7);
}
#define PIXEL_TRANSFORM_ERROR_SCALE 4
static int mv_err_cost(const MV *mv, const MV *ref, const int *mvjcost,
int *mvcost[2], int error_per_bit) {
if (mvcost) {
const MV diff = { mv->row - ref->row, mv->col - ref->col };
return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) * error_per_bit,
13);
// This product sits at a 32-bit ceiling right now and any additional
// accuracy in either bit cost or error cost will cause it to overflow.
return ROUND_POWER_OF_TWO(
(unsigned)mv_cost(&diff, mvjcost, mvcost) * error_per_bit,
RDDIV_BITS + VP9_PROB_COST_SHIFT - RD_EPB_SHIFT +
PIXEL_TRANSFORM_ERROR_SCALE);
}
return 0;
}
static int mvsad_err_cost(const MACROBLOCK *x, const MV *mv, const MV *ref,
int error_per_bit) {
int sad_per_bit) {
const MV diff = { mv->row - ref->row, mv->col - ref->col };
return ROUND_POWER_OF_TWO(
mv_cost(&diff, x->nmvjointsadcost, x->nmvsadcost) * error_per_bit, 8);
(unsigned)mv_cost(&diff, x->nmvjointsadcost, x->nmvsadcost) * sad_per_bit,
VP9_PROB_COST_SHIFT);
}
void vp10_init_dsmotion_compensation(search_site_config *cfg, int stride) {
......@@ -145,14 +151,15 @@ void vp10_init3smotion_compensation(search_site_config *cfg, int stride) {
* could reduce the area.
*/
/* estimated cost of a motion vector (r,c) */
#define MVC(r, c) \
(mvcost \
? ((mvjcost[((r) != rr) * 2 + ((c) != rc)] + mvcost[0][((r)-rr)] + \
mvcost[1][((c)-rc)]) * \
error_per_bit + \
4096) >> \
13 \
/* Estimated (square) error cost of a motion vector (r,c). The 14 scale comes
* from the same math as in mv_err_cost(). */
#define MVC(r, c) \
(mvcost \
? ((unsigned)(mvjcost[((r) != rr) * 2 + ((c) != rc)] + \
mvcost[0][((r)-rr)] + mvcost[1][((c)-rc)]) * \
error_per_bit + \
8192) >> \
14 \
: 0)
// convert motion vector component to offset for sv[a]f calc
......@@ -770,7 +777,6 @@ static INLINE void calc_int_cost_list(const MACROBLOCK *x, const MV *ref_mv,
cost_list[i + 1] = fn_ptr->vf(what->buf, what->stride,
get_buf_from_mv(in_what, &this_mv),
in_what->stride, &sse) +
// mvsad_err_cost(x, &this_mv, &fcenter_mv, sadpb);
mv_err_cost(&this_mv, &fcenter_mv, x->nmvjointcost,
x->mvcost, x->errorperbit);
}
......@@ -783,7 +789,6 @@ static INLINE void calc_int_cost_list(const MACROBLOCK *x, const MV *ref_mv,
cost_list[i + 1] = fn_ptr->vf(what->buf, what->stride,
get_buf_from_mv(in_what, &this_mv),
in_what->stride, &sse) +
// mvsad_err_cost(x, &this_mv, &fcenter_mv, sadpb);
mv_err_cost(&this_mv, &fcenter_mv, x->nmvjointcost,
x->mvcost, x->errorperbit);
}
......
......@@ -441,8 +441,7 @@ void vp10_init_plane_quantizers(VP10_COMP *cpi, MACROBLOCK *x) {
x->skip_block = segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP);
x->q_index = qindex;
x->errorperbit = rdmult >> 6;
x->errorperbit += (x->errorperbit == 0);
set_error_per_bit(x, rdmult);
vp10_initialize_me_consts(cpi, x, x->q_index);
}
......
......@@ -41,7 +41,6 @@
#include "vp10/encoder/tokenize.h"
#define RD_THRESH_POW 1.25
#define RD_MULT_EPB_RATIO 64
// Factor to weigh the rate for switchable interp filters.
#define SWITCHABLE_INTERP_RATE_FACTOR 1
......@@ -269,8 +268,7 @@ void vp10_initialize_rd_consts(VP10_COMP *cpi) {
rd->RDDIV = RDDIV_BITS; // In bits (to multiply D by 128).
rd->RDMULT = vp10_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q);
x->errorperbit = rd->RDMULT / RD_MULT_EPB_RATIO;
x->errorperbit += (x->errorperbit == 0);
set_error_per_bit(x, rd->RDMULT);
x->select_tx_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL &&
cm->frame_type != KEY_FRAME)
......@@ -387,7 +385,7 @@ void vp10_model_rd_from_var_lapndz(unsigned int var, unsigned int n_log2,
(((uint64_t)qstep * qstep << (n_log2 + 10)) + (var >> 1)) / var;
const int xsq_q10 = (int)VPXMIN(xsq_q10_64, MAX_XSQ_Q10);
model_rd_norm(xsq_q10, &r_q10, &d_q10);
*rate = ((r_q10 << n_log2) + 2) >> 2;
*rate = ROUND_POWER_OF_TWO(r_q10 << n_log2, 10 - VP9_PROB_COST_SHIFT);
*dist = (var * (int64_t)d_q10 + 512) >> 10;
}
}
......
......@@ -17,14 +17,17 @@
#include "vp10/encoder/block.h"
#include "vp10/encoder/context_tree.h"
#include "vp10/encoder/cost.h"
#ifdef __cplusplus
extern "C" {
#endif
#define RDDIV_BITS 7
#define RD_EPB_SHIFT 6
#define RDCOST(RM, DM, R, D) (((128 + ((int64_t)R) * (RM)) >> 8) + (D << DM))
#define RDCOST(RM, DM, R, D) \
(ROUND_POWER_OF_TWO(((int64_t)R) * (RM), VP9_PROB_COST_SHIFT) + (D << DM))
#define QIDX_SKIP_THRESH 115
#define MV_COST_WEIGHT 108
......@@ -170,6 +173,11 @@ static INLINE int rd_less_than_thresh(int64_t best_rd, int thresh,
void vp10_mv_pred(struct VP10_COMP *cpi, MACROBLOCK *x, uint8_t *ref_y_buffer,
int ref_y_stride, int ref_frame, BLOCK_SIZE block_size);
static INLINE void set_error_per_bit(MACROBLOCK *x, int rdmult) {
x->errorperbit = rdmult >> RD_EPB_SHIFT;
x->errorperbit += (x->errorperbit == 0);
}
void vp10_setup_pred_block(const MACROBLOCKD *xd,
struct buf_2d dst[MAX_MB_PLANE],
const YV12_BUFFER_CONFIG *src, int mi_row,
......
......@@ -242,7 +242,7 @@ static void model_rd_for_sb(VP10_COMP *cpi, BLOCK_SIZE bsize, MACROBLOCK *x,
int quantizer = (pd->dequant[1] >> dequant_shift);
if (quantizer < 120)
rate = (square_error * (280 - quantizer)) >> 8;
rate = (square_error * (280 - quantizer)) >> (16 - VP9_PROB_COST_SHIFT);
else
rate = 0;
dist = (square_error * quantizer) >> 8;
......@@ -342,9 +342,9 @@ static int cost_coeffs(MACROBLOCK *x, int plane, int block, ENTROPY_CONTEXT *A,
int pt = combine_entropy_contexts(*A, *L);
int c, cost;
#if CONFIG_VPX_HIGHBITDEPTH
const int16_t *cat6_high_cost = vp10_get_high_cost_table(xd->bd);
const int *cat6_high_cost = vp10_get_high_cost_table(xd->bd);
#else
const int16_t *cat6_high_cost = vp10_get_high_cost_table(8);
const int *cat6_high_cost = vp10_get_high_cost_table(8);
#endif
// Check for consistency of tx_size with mode info
......
......@@ -84,7 +84,7 @@ static int remap_prob(int v, int m) {
static int prob_diff_update_cost(vpx_prob newp, vpx_prob oldp) {
int delp = remap_prob(newp, oldp);
return update_bits[delp] * 256;
return update_bits[delp] << VP9_PROB_COST_SHIFT;
}
static void encode_uniform(vpx_writer *w, int v) {
......
This diff is collapsed.
......@@ -61,23 +61,25 @@ extern const int16_t *vp10_dct_value_cost_ptr;
extern const TOKENVALUE *vp10_dct_value_tokens_ptr;
extern const TOKENVALUE *vp10_dct_cat_lt_10_value_tokens;
extern const int16_t vp10_cat6_low_cost[256];
extern const int16_t vp10_cat6_high_cost[128];
extern const int16_t vp10_cat6_high10_high_cost[512];
extern const int16_t vp10_cat6_high12_high_cost[2048];
static INLINE int16_t vp10_get_cost(int16_t token, EXTRABIT extrabits,
const int16_t *cat6_high_table) {
if (token != CATEGORY6_TOKEN) return vp10_extra_bits[token].cost[extrabits];
return vp10_cat6_low_cost[extrabits & 0xff] + cat6_high_table[extrabits >> 8];
extern const int vp10_cat6_high_cost[64];
extern const int vp10_cat6_high10_high_cost[256];
extern const int vp10_cat6_high12_high_cost[1024];
static INLINE int vp10_get_cost(int16_t token, EXTRABIT extrabits,
const int *cat6_high_table) {
if (token != CATEGORY6_TOKEN)
return vp10_extra_bits[token].cost[extrabits >> 1];
return vp10_cat6_low_cost[(extrabits >> 1) & 0xff] +
cat6_high_table[extrabits >> 9];
}
#if CONFIG_VPX_HIGHBITDEPTH
static INLINE const int16_t *vp10_get_high_cost_table(int bit_depth) {
static INLINE const int *vp10_get_high_cost_table(int bit_depth) {
return bit_depth == 8 ? vp10_cat6_high_cost
: (bit_depth == 10 ? vp10_cat6_high10_high_cost
: vp10_cat6_high12_high_cost);
}
#else
static INLINE const int16_t *vp10_get_high_cost_table(int bit_depth) {
static INLINE const int *vp10_get_high_cost_table(int bit_depth) {
(void)bit_depth;
return vp10_cat6_high_cost;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment