vp9_encodemb.c 24.3 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5 6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9 10 11
 */

#include "vpx_ports/config.h"
12
#include "vp9/encoder/vp9_encodemb.h"
13
#include "vp9/common/vp9_reconinter.h"
14 15
#include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_tokenize.h"
16 17
#include "vp9/common/vp9_invtrans.h"
#include "vp9/common/vp9_reconintra.h"
John Koleszar's avatar
John Koleszar committed
18
#include "vpx_mem/vpx_mem.h"
19
#include "vp9/encoder/vp9_rdopt.h"
20
#include "vp9/common/vp9_systemdependent.h"
21
#include "vp9_rtcd.h"
John Koleszar's avatar
John Koleszar committed
22

23
void vp9_subtract_b_c(BLOCK *be, BLOCKD *bd, int pitch) {
John Koleszar's avatar
John Koleszar committed
24 25 26 27 28 29
  unsigned char *src_ptr = (*(be->base_src) + be->src);
  short *diff_ptr = be->src_diff;
  unsigned char *pred_ptr = bd->predictor;
  int src_stride = be->src_stride;

  int r, c;
John Koleszar's avatar
John Koleszar committed
30

John Koleszar's avatar
John Koleszar committed
31 32 33
  for (r = 0; r < 4; r++) {
    for (c = 0; c < 4; c++) {
      diff_ptr[c] = src_ptr[c] - pred_ptr[c];
John Koleszar's avatar
John Koleszar committed
34
    }
John Koleszar's avatar
John Koleszar committed
35 36 37 38 39

    diff_ptr += pitch;
    pred_ptr += pitch;
    src_ptr  += src_stride;
  }
John Koleszar's avatar
John Koleszar committed
40 41
}

42
void vp9_subtract_4b_c(BLOCK *be, BLOCKD *bd, int pitch) {
John Koleszar's avatar
John Koleszar committed
43 44 45 46 47
  unsigned char *src_ptr = (*(be->base_src) + be->src);
  short *diff_ptr = be->src_diff;
  unsigned char *pred_ptr = bd->predictor;
  int src_stride = be->src_stride;
  int r, c;
48

John Koleszar's avatar
John Koleszar committed
49 50 51
  for (r = 0; r < 8; r++) {
    for (c = 0; c < 8; c++) {
      diff_ptr[c] = src_ptr[c] - pred_ptr[c];
Yaowu Xu's avatar
Yaowu Xu committed
52
    }
John Koleszar's avatar
John Koleszar committed
53 54 55 56
    diff_ptr += pitch;
    pred_ptr += pitch;
    src_ptr  += src_stride;
  }
Yaowu Xu's avatar
Yaowu Xu committed
57 58
}

59
void vp9_subtract_mbuv_s_c(short *diff, const unsigned char *usrc,
Ronald S. Bultje's avatar
Ronald S. Bultje committed
60 61 62
                           const unsigned char *vsrc, int src_stride,
                           const unsigned char *upred,
                           const unsigned char *vpred, int dst_stride) {
John Koleszar's avatar
John Koleszar committed
63 64 65
  short *udiff = diff + 256;
  short *vdiff = diff + 320;
  int r, c;
John Koleszar's avatar
John Koleszar committed
66

John Koleszar's avatar
John Koleszar committed
67 68 69
  for (r = 0; r < 8; r++) {
    for (c = 0; c < 8; c++) {
      udiff[c] = usrc[c] - upred[c];
John Koleszar's avatar
John Koleszar committed
70 71
    }

John Koleszar's avatar
John Koleszar committed
72
    udiff += 8;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
73 74
    upred += dst_stride;
    usrc  += src_stride;
John Koleszar's avatar
John Koleszar committed
75
  }
John Koleszar's avatar
John Koleszar committed
76

John Koleszar's avatar
John Koleszar committed
77 78 79
  for (r = 0; r < 8; r++) {
    for (c = 0; c < 8; c++) {
      vdiff[c] = vsrc[c] - vpred[c];
John Koleszar's avatar
John Koleszar committed
80 81
    }

John Koleszar's avatar
John Koleszar committed
82
    vdiff += 8;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
83 84
    vpred += dst_stride;
    vsrc  += src_stride;
John Koleszar's avatar
John Koleszar committed
85 86
  }
}
John Koleszar's avatar
John Koleszar committed
87

88
void vp9_subtract_mbuv_c(short *diff, unsigned char *usrc,
Ronald S. Bultje's avatar
Ronald S. Bultje committed
89
                         unsigned char *vsrc, unsigned char *pred, int stride) {
Ronald S. Bultje's avatar
Ronald S. Bultje committed
90 91 92
  unsigned char *upred = pred + 256;
  unsigned char *vpred = pred + 320;

93
  vp9_subtract_mbuv_s_c(diff, usrc, vsrc, stride, upred, vpred, 8);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
94 95
}

96
void vp9_subtract_mby_s_c(short *diff, const unsigned char *src, int src_stride,
Ronald S. Bultje's avatar
Ronald S. Bultje committed
97
                          const unsigned char *pred, int dst_stride) {
John Koleszar's avatar
John Koleszar committed
98
  int r, c;
John Koleszar's avatar
John Koleszar committed
99

John Koleszar's avatar
John Koleszar committed
100 101 102
  for (r = 0; r < 16; r++) {
    for (c = 0; c < 16; c++) {
      diff[c] = src[c] - pred[c];
John Koleszar's avatar
John Koleszar committed
103
    }
John Koleszar's avatar
John Koleszar committed
104 105

    diff += 16;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
106 107
    pred += dst_stride;
    src  += src_stride;
John Koleszar's avatar
John Koleszar committed
108
  }
John Koleszar's avatar
John Koleszar committed
109 110
}

111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
void vp9_subtract_sby_s_c(short *diff, const unsigned char *src, int src_stride,
                          const unsigned char *pred, int dst_stride) {
  int r, c;

  for (r = 0; r < 32; r++) {
    for (c = 0; c < 32; c++) {
      diff[c] = src[c] - pred[c];
    }

    diff += 32;
    pred += dst_stride;
    src  += src_stride;
  }
}

void vp9_subtract_sbuv_s_c(short *diff, const unsigned char *usrc,
                           const unsigned char *vsrc, int src_stride,
                           const unsigned char *upred,
                           const unsigned char *vpred, int dst_stride) {
  short *udiff = diff + 1024;
  short *vdiff = diff + 1024 + 256;
  int r, c;

  for (r = 0; r < 16; r++) {
    for (c = 0; c < 16; c++) {
      udiff[c] = usrc[c] - upred[c];
    }

    udiff += 16;
    upred += dst_stride;
    usrc  += src_stride;
  }

  for (r = 0; r < 16; r++) {
    for (c = 0; c < 16; c++) {
      vdiff[c] = vsrc[c] - vpred[c];
    }

    vdiff += 16;
    vpred += dst_stride;
    vsrc  += src_stride;
  }
}
#endif

157
void vp9_subtract_mby_c(short *diff, unsigned char *src,
Ronald S. Bultje's avatar
Ronald S. Bultje committed
158
                        unsigned char *pred, int stride) {
159
  vp9_subtract_mby_s_c(diff, src, stride, pred, 16);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
160 161
}

162
static void subtract_mb(MACROBLOCK *x) {
John Koleszar's avatar
John Koleszar committed
163
  BLOCK *b = &x->block[0];
164

165
  vp9_subtract_mby(x->src_diff, *(b->base_src), x->e_mbd.predictor,
Jim Bankoski's avatar
Jim Bankoski committed
166
                   b->src_stride);
167
  vp9_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer,
Jim Bankoski's avatar
Jim Bankoski committed
168
                    x->e_mbd.predictor, x->src.uv_stride);
John Koleszar's avatar
John Koleszar committed
169 170
}

171
static void build_dcblock_4x4(MACROBLOCK *x) {
John Koleszar's avatar
John Koleszar committed
172 173
  short *src_diff_ptr = &x->src_diff[384];
  int i;
John Koleszar's avatar
John Koleszar committed
174

John Koleszar's avatar
John Koleszar committed
175 176
  for (i = 0; i < 16; i++) {
    src_diff_ptr[i] = x->coeff[i * 16];
177
    x->coeff[i * 16] = 0;
John Koleszar's avatar
John Koleszar committed
178
  }
John Koleszar's avatar
John Koleszar committed
179
}
180

181
void vp9_transform_mby_4x4(MACROBLOCK *x) {
John Koleszar's avatar
John Koleszar committed
182
  int i;
183 184
  MACROBLOCKD *xd = &x->e_mbd;
  int has_2nd_order = get_2nd_order_usage(xd);
John Koleszar's avatar
John Koleszar committed
185

186 187 188 189 190 191 192 193 194 195
  for (i = 0; i < 16; i++) {
    BLOCK *b = &x->block[i];
    TX_TYPE tx_type = get_tx_type_4x4(xd, &xd->block[i]);
    if (tx_type != DCT_DCT) {
      assert(has_2nd_order == 0);
      vp9_fht_c(b->src_diff, 32, b->coeff, tx_type, 4);
    } else {
      x->vp9_short_fdct4x4(&x->block[i].src_diff[0],
                           &x->block[i].coeff[0], 32);
    }
John Koleszar's avatar
John Koleszar committed
196
  }
John Koleszar's avatar
John Koleszar committed
197

198
  if (has_2nd_order) {
199
    // build dc block from 16 y dc values
200
    build_dcblock_4x4(x);
201 202

    // do 2nd order transform on the dc block
John Koleszar's avatar
John Koleszar committed
203 204
    x->short_walsh4x4(&x->block[24].src_diff[0],
                      &x->block[24].coeff[0], 8);
205 206
  } else {
    vpx_memset(x->block[24].coeff, 0, 16 * sizeof(x->block[24].coeff[0]));
John Koleszar's avatar
John Koleszar committed
207
  }
John Koleszar's avatar
John Koleszar committed
208 209
}

210
void vp9_transform_mbuv_4x4(MACROBLOCK *x) {
John Koleszar's avatar
John Koleszar committed
211
  int i;
John Koleszar's avatar
John Koleszar committed
212

213
  for (i = 16; i < 24; i += 2) {
214
    x->vp9_short_fdct8x4(&x->block[i].src_diff[0],
John Koleszar's avatar
John Koleszar committed
215 216
                         &x->block[i].coeff[0], 16);
  }
217 218
}

219
static void transform_mb_4x4(MACROBLOCK *x) {
220 221
  vp9_transform_mby_4x4(x);
  vp9_transform_mbuv_4x4(x);
222 223
}

224
static void build_dcblock_8x8(MACROBLOCK *x) {
225
  int16_t *src_diff_ptr = x->block[24].src_diff;
John Koleszar's avatar
John Koleszar committed
226
  int i;
Paul Wilkins's avatar
Paul Wilkins committed
227

228 229
  for (i = 0; i < 16; i++) {
    src_diff_ptr[i] = 0;
John Koleszar's avatar
John Koleszar committed
230
  }
231 232 233 234
  src_diff_ptr[0] = x->coeff[0 * 16];
  src_diff_ptr[1] = x->coeff[4 * 16];
  src_diff_ptr[4] = x->coeff[8 * 16];
  src_diff_ptr[8] = x->coeff[12 * 16];
235 236 237 238
  x->coeff[0 * 16] = 0;
  x->coeff[4 * 16] = 0;
  x->coeff[8 * 16] = 0;
  x->coeff[12 * 16] = 0;
239 240
}

241
void vp9_transform_mby_8x8(MACROBLOCK *x) {
John Koleszar's avatar
John Koleszar committed
242
  int i;
243 244 245
  MACROBLOCKD *xd = &x->e_mbd;
  TX_TYPE tx_type;
  int has_2nd_order = get_2nd_order_usage(xd);
246

John Koleszar's avatar
John Koleszar committed
247
  for (i = 0; i < 9; i += 8) {
248 249 250 251 252 253 254 255 256
    BLOCK *b = &x->block[i];
    tx_type = get_tx_type_8x8(xd, &xd->block[i]);
    if (tx_type != DCT_DCT) {
      assert(has_2nd_order == 0);
      vp9_fht_c(b->src_diff, 32, b->coeff, tx_type, 8);
    } else {
      x->vp9_short_fdct8x8(&x->block[i].src_diff[0],
                           &x->block[i].coeff[0], 32);
    }
John Koleszar's avatar
John Koleszar committed
257 258
  }
  for (i = 2; i < 11; i += 8) {
259 260 261 262 263 264 265 266 267
    BLOCK *b = &x->block[i];
    tx_type = get_tx_type_8x8(xd, &xd->block[i]);
    if (tx_type != DCT_DCT) {
      assert(has_2nd_order == 0);
      vp9_fht_c(b->src_diff, 32, (b + 2)->coeff, tx_type, 8);
    } else {
      x->vp9_short_fdct8x8(&x->block[i].src_diff[0],
                           &x->block[i + 2].coeff[0], 32);
    }
John Koleszar's avatar
John Koleszar committed
268
  }
269

270
  if (has_2nd_order) {
271
    // build dc block from 2x2 y dc values
272
    build_dcblock_8x8(x);
273 274

    // do 2nd order transform on the dc block
John Koleszar's avatar
John Koleszar committed
275 276
    x->short_fhaar2x2(&x->block[24].src_diff[0],
                      &x->block[24].coeff[0], 8);
277 278
  } else {
    vpx_memset(x->block[24].coeff, 0, 16 * sizeof(x->block[24].coeff[0]));
John Koleszar's avatar
John Koleszar committed
279
  }
280 281
}

282
void vp9_transform_mbuv_8x8(MACROBLOCK *x) {
Daniel Kang's avatar
Daniel Kang committed
283 284
  int i;

285
  for (i = 16; i < 24; i += 4) {
286
    x->vp9_short_fdct8x8(&x->block[i].src_diff[0],
287 288
                         &x->block[i].coeff[0], 16);
  }
Daniel Kang's avatar
Daniel Kang committed
289 290
}

291 292 293
void vp9_transform_mb_8x8(MACROBLOCK *x) {
  vp9_transform_mby_8x8(x);
  vp9_transform_mbuv_8x8(x);
Daniel Kang's avatar
Daniel Kang committed
294 295
}

296
void vp9_transform_mby_16x16(MACROBLOCK *x) {
297 298 299
  MACROBLOCKD *xd = &x->e_mbd;
  BLOCK *b = &x->block[0];
  TX_TYPE tx_type = get_tx_type_16x16(xd, &xd->block[0]);
300
  vp9_clear_system_state();
301 302 303 304 305 306
  if (tx_type != DCT_DCT) {
    vp9_fht_c(b->src_diff, 32, b->coeff, tx_type, 16);
  } else {
    x->vp9_short_fdct16x16(&x->block[0].src_diff[0],
                           &x->block[0].coeff[0], 32);
  }
Daniel Kang's avatar
Daniel Kang committed
307 308
}

309 310 311
void vp9_transform_mb_16x16(MACROBLOCK *x) {
  vp9_transform_mby_16x16(x);
  vp9_transform_mbuv_8x8(x);
Daniel Kang's avatar
Daniel Kang committed
312
}
313

314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
void vp9_transform_sby_32x32(MACROBLOCK *x) {
  SUPERBLOCK * const x_sb = &x->sb_coeff_data;
  vp9_short_fdct32x32(x_sb->src_diff, x_sb->coeff, 64);
}

void vp9_transform_sbuv_16x16(MACROBLOCK *x) {
  SUPERBLOCK * const x_sb = &x->sb_coeff_data;
  vp9_clear_system_state();
  x->vp9_short_fdct16x16(x_sb->src_diff + 1024,
                         x_sb->coeff + 1024, 32);
  x->vp9_short_fdct16x16(x_sb->src_diff + 1280,
                         x_sb->coeff + 1280, 32);
}
#endif

330 331
#define RDTRUNC(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF )
#define RDTRUNC_8x8(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF )
332
typedef struct vp9_token_state vp9_token_state;
333

334
struct vp9_token_state {
335 336
  int           rate;
  int           error;
Daniel Kang's avatar
Daniel Kang committed
337
  int           next;
338 339 340 341
  signed char   token;
  short         qc;
};

342
// TODO: experiments to find optimal multiple numbers
343 344
#define Y1_RD_MULT 4
#define UV_RD_MULT 2
345
#define Y2_RD_MULT 4
346

John Koleszar's avatar
John Koleszar committed
347 348 349 350 351
static const int plane_rd_mult[4] = {
  Y1_RD_MULT,
  Y2_RD_MULT,
  UV_RD_MULT,
  Y1_RD_MULT
352 353
};

354 355 356 357 358 359 360 361 362 363
#define UPDATE_RD_COST()\
{\
  rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0);\
  rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1);\
  if (rd_cost0 == rd_cost1) {\
    rd_cost0 = RDTRUNC(rdmult, rddiv, rate0, error0);\
    rd_cost1 = RDTRUNC(rdmult, rddiv, rate1, error1);\
  }\
}

364 365
static void optimize_b(MACROBLOCK *mb, int i, PLANE_TYPE type,
                       ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
366
                       int tx_size) {
367 368 369 370 371 372 373 374 375 376 377 378 379
  BLOCK *b = &mb->block[i];
  BLOCKD *d = &mb->e_mbd.block[i];
  vp9_token_state tokens[257][2];
  unsigned best_index[257][2];
  const short *dequant_ptr = d->dequant, *coeff_ptr = b->coeff;
  short *qcoeff_ptr = d->qcoeff;
  short *dqcoeff_ptr = d->dqcoeff;
  int eob = d->eob, final_eob, sz = 0;
  int i0 = (type == PLANE_TYPE_Y_NO_DC);
  int rc, x, next;
  int64_t rdmult, rddiv, rd_cost0, rd_cost1;
  int rate0, rate1, error0, error1, t0, t1;
  int best, band, pt;
John Koleszar's avatar
John Koleszar committed
380
  int err_mult = plane_rd_mult[type];
381 382
  int default_eob;
  int const *scan, *bands;
John Koleszar's avatar
John Koleszar committed
383

Deb Mukherjee's avatar
Deb Mukherjee committed
384
  switch (tx_size) {
385 386
    default:
    case TX_4X4:
387 388
      scan = vp9_default_zig_zag1d;
      bands = vp9_coef_bands;
389 390 391 392
      default_eob = 16;
      // TODO: this isn't called (for intra4x4 modes), but will be left in
      // since it could be used later
      {
393
        TX_TYPE tx_type = get_tx_type_4x4(&mb->e_mbd, d);
Deb Mukherjee's avatar
Deb Mukherjee committed
394 395
        if (tx_type != DCT_DCT) {
          switch (tx_type) {
396
            case ADST_DCT:
397
              scan = vp9_row_scan;
398 399 400
              break;

            case DCT_ADST:
401
              scan = vp9_col_scan;
402 403 404
              break;

            default:
405
              scan = vp9_default_zig_zag1d;
406 407
              break;
          }
Deb Mukherjee's avatar
Deb Mukherjee committed
408
        } else {
409
          scan = vp9_default_zig_zag1d;
Deb Mukherjee's avatar
Deb Mukherjee committed
410
        }
411 412 413
      }
      break;
    case TX_8X8:
414 415
      scan = vp9_default_zig_zag1d_8x8;
      bands = vp9_coef_bands_8x8;
416 417
      default_eob = 64;
      break;
418 419 420 421 422
    case TX_16X16:
      scan = vp9_default_zig_zag1d_16x16;
      bands = vp9_coef_bands_16x16;
      default_eob = 256;
      break;
423
  }
John Koleszar's avatar
John Koleszar committed
424 425 426 427 428 429

  /* Now set up a Viterbi trellis to evaluate alternative roundings. */
  rdmult = mb->rdmult * err_mult;
  if (mb->e_mbd.mode_info_context->mbmi.ref_frame == INTRA_FRAME)
    rdmult = (rdmult * 9) >> 4;
  rddiv = mb->rddiv;
430
  memset(best_index, 0, sizeof(best_index));
John Koleszar's avatar
John Koleszar committed
431 432 433
  /* Initialize the sentinel node of the trellis. */
  tokens[eob][0].rate = 0;
  tokens[eob][0].error = 0;
434
  tokens[eob][0].next = default_eob;
John Koleszar's avatar
John Koleszar committed
435 436 437 438 439
  tokens[eob][0].token = DCT_EOB_TOKEN;
  tokens[eob][0].qc = 0;
  *(tokens[eob] + 1) = *(tokens[eob] + 0);
  next = eob;
  for (i = eob; i-- > i0;) {
440
    int base_bits, d2, dx;
John Koleszar's avatar
John Koleszar committed
441

442
    rc = scan[i];
John Koleszar's avatar
John Koleszar committed
443 444 445 446 447 448 449 450 451
    x = qcoeff_ptr[rc];
    /* Only add a trellis state for non-zero coefficients. */
    if (x) {
      int shortcut = 0;
      error0 = tokens[next][0].error;
      error1 = tokens[next][1].error;
      /* Evaluate the first possibility for this state. */
      rate0 = tokens[next][0].rate;
      rate1 = tokens[next][1].rate;
452
      t0 = (vp9_dct_value_tokens_ptr + x)->Token;
John Koleszar's avatar
John Koleszar committed
453
      /* Consider both possible successor states. */
454 455
      if (next < default_eob) {
        band = bands[i + 1];
456
        pt = vp9_prev_token_class[t0];
John Koleszar's avatar
John Koleszar committed
457
        rate0 +=
Deb Mukherjee's avatar
Deb Mukherjee committed
458
          mb->token_costs[tx_size][type][band][pt][tokens[next][0].token];
John Koleszar's avatar
John Koleszar committed
459
        rate1 +=
Deb Mukherjee's avatar
Deb Mukherjee committed
460
          mb->token_costs[tx_size][type][band][pt][tokens[next][1].token];
John Koleszar's avatar
John Koleszar committed
461
      }
462
      UPDATE_RD_COST();
John Koleszar's avatar
John Koleszar committed
463 464
      /* And pick the best. */
      best = rd_cost1 < rd_cost0;
465
      base_bits = *(vp9_dct_value_cost_ptr + x);
John Koleszar's avatar
John Koleszar committed
466 467 468 469 470 471 472
      dx = dqcoeff_ptr[rc] - coeff_ptr[rc];
      d2 = dx * dx;
      tokens[i][0].rate = base_bits + (best ? rate1 : rate0);
      tokens[i][0].error = d2 + (best ? error1 : error0);
      tokens[i][0].next = next;
      tokens[i][0].token = t0;
      tokens[i][0].qc = x;
473
      best_index[i][0] = best;
John Koleszar's avatar
John Koleszar committed
474 475 476 477
      /* Evaluate the second possibility for this state. */
      rate0 = tokens[next][0].rate;
      rate1 = tokens[next][1].rate;

478 479
      if ((abs(x)*dequant_ptr[rc != 0] > abs(coeff_ptr[rc])) &&
          (abs(x)*dequant_ptr[rc != 0] < abs(coeff_ptr[rc]) + dequant_ptr[rc != 0]))
John Koleszar's avatar
John Koleszar committed
480 481 482 483 484 485 486 487 488 489 490 491 492
        shortcut = 1;
      else
        shortcut = 0;

      if (shortcut) {
        sz = -(x < 0);
        x -= 2 * sz + 1;
      }

      /* Consider both possible successor states. */
      if (!x) {
        /* If we reduced this coefficient to zero, check to see if
         *  we need to move the EOB back here.
493
         */
John Koleszar's avatar
John Koleszar committed
494 495 496 497 498
        t0 = tokens[next][0].token == DCT_EOB_TOKEN ?
             DCT_EOB_TOKEN : ZERO_TOKEN;
        t1 = tokens[next][1].token == DCT_EOB_TOKEN ?
             DCT_EOB_TOKEN : ZERO_TOKEN;
      } else {
499
        t0 = t1 = (vp9_dct_value_tokens_ptr + x)->Token;
John Koleszar's avatar
John Koleszar committed
500
      }
501 502
      if (next < default_eob) {
        band = bands[i + 1];
John Koleszar's avatar
John Koleszar committed
503
        if (t0 != DCT_EOB_TOKEN) {
504
          pt = vp9_prev_token_class[t0];
Deb Mukherjee's avatar
Deb Mukherjee committed
505
          rate0 += mb->token_costs[tx_size][type][band][pt][
506
              tokens[next][0].token];
John Koleszar's avatar
John Koleszar committed
507
        }
John Koleszar's avatar
John Koleszar committed
508
        if (t1 != DCT_EOB_TOKEN) {
509
          pt = vp9_prev_token_class[t1];
Deb Mukherjee's avatar
Deb Mukherjee committed
510
          rate1 += mb->token_costs[tx_size][type][band][pt][
511
              tokens[next][1].token];
John Koleszar's avatar
John Koleszar committed
512 513
        }
      }
John Koleszar's avatar
John Koleszar committed
514

515
      UPDATE_RD_COST();
John Koleszar's avatar
John Koleszar committed
516 517
      /* And pick the best. */
      best = rd_cost1 < rd_cost0;
518
      base_bits = *(vp9_dct_value_cost_ptr + x);
John Koleszar's avatar
John Koleszar committed
519 520

      if (shortcut) {
521
        dx -= (dequant_ptr[rc != 0] + sz) ^ sz;
John Koleszar's avatar
John Koleszar committed
522 523 524 525 526 527 528
        d2 = dx * dx;
      }
      tokens[i][1].rate = base_bits + (best ? rate1 : rate0);
      tokens[i][1].error = d2 + (best ? error1 : error0);
      tokens[i][1].next = next;
      tokens[i][1].token = best ? t1 : t0;
      tokens[i][1].qc = x;
529
      best_index[i][1] = best;
John Koleszar's avatar
John Koleszar committed
530 531
      /* Finally, make this the new head of the trellis. */
      next = i;
532
    }
John Koleszar's avatar
John Koleszar committed
533 534 535 536
    /* There's no choice to make for a zero coefficient, so we don't
     *  add a new trellis node, but we do need to update the costs.
     */
    else {
537
      band = bands[i + 1];
John Koleszar's avatar
John Koleszar committed
538 539 540 541
      t0 = tokens[next][0].token;
      t1 = tokens[next][1].token;
      /* Update the cost of each path if we're past the EOB token. */
      if (t0 != DCT_EOB_TOKEN) {
Deb Mukherjee's avatar
Deb Mukherjee committed
542
        tokens[next][0].rate += mb->token_costs[tx_size][type][band][0][t0];
John Koleszar's avatar
John Koleszar committed
543 544 545
        tokens[next][0].token = ZERO_TOKEN;
      }
      if (t1 != DCT_EOB_TOKEN) {
Deb Mukherjee's avatar
Deb Mukherjee committed
546
        tokens[next][1].rate += mb->token_costs[tx_size][type][band][0][t1];
John Koleszar's avatar
John Koleszar committed
547 548 549
        tokens[next][1].token = ZERO_TOKEN;
      }
      /* Don't update next, because we didn't add a new node. */
550
    }
John Koleszar's avatar
John Koleszar committed
551 552 553
  }

  /* Now pick the best path through the whole trellis. */
554
  band = bands[i + 1];
555
  VP9_COMBINEENTROPYCONTEXTS(pt, *a, *l);
John Koleszar's avatar
John Koleszar committed
556 557 558 559 560 561
  rate0 = tokens[next][0].rate;
  rate1 = tokens[next][1].rate;
  error0 = tokens[next][0].error;
  error1 = tokens[next][1].error;
  t0 = tokens[next][0].token;
  t1 = tokens[next][1].token;
Deb Mukherjee's avatar
Deb Mukherjee committed
562 563
  rate0 += mb->token_costs[tx_size][type][band][pt][t0];
  rate1 += mb->token_costs[tx_size][type][band][pt][t1];
564
  UPDATE_RD_COST();
John Koleszar's avatar
John Koleszar committed
565 566 567 568 569 570
  best = rd_cost1 < rd_cost0;
  final_eob = i0 - 1;
  for (i = next; i < eob; i = next) {
    x = tokens[i][best].qc;
    if (x)
      final_eob = i;
571
    rc = scan[i];
John Koleszar's avatar
John Koleszar committed
572
    qcoeff_ptr[rc] = x;
573 574
    dqcoeff_ptr[rc] = (x * dequant_ptr[rc != 0]);

John Koleszar's avatar
John Koleszar committed
575
    next = tokens[i][best].next;
576
    best = best_index[i][best];
John Koleszar's avatar
John Koleszar committed
577 578 579 580
  }
  final_eob++;

  d->eob = final_eob;
581
  *a = *l = (d->eob > !type);
John Koleszar's avatar
John Koleszar committed
582 583
}

John Koleszar's avatar
John Koleszar committed
584 585 586 587 588 589 590 591
/**************************************************************************
our inverse hadamard transform effectively is weighted sum of all 16 inputs
with weight either 1 or -1. It has a last stage scaling of (sum+1)>>2. And
dc only idct is (dc+16)>>5. So if all the sums are between -65 and 63 the
output after inverse wht and idct will be all zero. A sum of absolute value
smaller than 65 guarantees all 16 different (+1/-1) weighted sums in wht
fall between -65 and +65.
**************************************************************************/
592 593
#define SUM_2ND_COEFF_THRESH 65

594
static void check_reset_2nd_coeffs(MACROBLOCKD *xd,
John Koleszar's avatar
John Koleszar committed
595 596 597
                                   ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l) {
  int sum = 0;
  int i;
Paul Wilkins's avatar
Paul Wilkins committed
598
  BLOCKD *bd = &xd->block[24];
John Koleszar's avatar
John Koleszar committed
599 600 601 602 603
  if (bd->dequant[0] >= SUM_2ND_COEFF_THRESH
      && bd->dequant[1] >= SUM_2ND_COEFF_THRESH)
    return;

  for (i = 0; i < bd->eob; i++) {
604
    int coef = bd->dqcoeff[vp9_default_zig_zag1d[i]];
John Koleszar's avatar
John Koleszar committed
605 606 607 608 609 610 611
    sum += (coef >= 0) ? coef : -coef;
    if (sum >= SUM_2ND_COEFF_THRESH)
      return;
  }

  if (sum < SUM_2ND_COEFF_THRESH) {
    for (i = 0; i < bd->eob; i++) {
612
      int rc = vp9_default_zig_zag1d[i];
John Koleszar's avatar
John Koleszar committed
613 614
      bd->qcoeff[rc] = 0;
      bd->dqcoeff[rc] = 0;
615
    }
John Koleszar's avatar
John Koleszar committed
616
    bd->eob = 0;
617
    *a = *l = (bd->eob != 0);
John Koleszar's avatar
John Koleszar committed
618
  }
619
}
620

621
#define SUM_2ND_COEFF_THRESH_8X8 32
622
static void check_reset_8x8_2nd_coeffs(MACROBLOCKD *xd,
John Koleszar's avatar
John Koleszar committed
623 624
                                       ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l) {
  int sum = 0;
Paul Wilkins's avatar
Paul Wilkins committed
625
  BLOCKD *bd = &xd->block[24];
John Koleszar's avatar
John Koleszar committed
626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646
  int coef;

  coef = bd->dqcoeff[0];
  sum += (coef >= 0) ? coef : -coef;
  coef = bd->dqcoeff[1];
  sum += (coef >= 0) ? coef : -coef;
  coef = bd->dqcoeff[4];
  sum += (coef >= 0) ? coef : -coef;
  coef = bd->dqcoeff[8];
  sum += (coef >= 0) ? coef : -coef;

  if (sum < SUM_2ND_COEFF_THRESH_8X8) {
    bd->qcoeff[0] = 0;
    bd->dqcoeff[0] = 0;
    bd->qcoeff[1] = 0;
    bd->dqcoeff[1] = 0;
    bd->qcoeff[4] = 0;
    bd->dqcoeff[4] = 0;
    bd->qcoeff[8] = 0;
    bd->dqcoeff[8] = 0;
    bd->eob = 0;
647
    *a = *l = (bd->eob != 0);
John Koleszar's avatar
John Koleszar committed
648
  }
649 650
}

651
void vp9_optimize_mby_4x4(MACROBLOCK *x) {
John Koleszar's avatar
John Koleszar committed
652
  int b;
653
  PLANE_TYPE type;
John Koleszar's avatar
John Koleszar committed
654 655 656 657
  int has_2nd_order;
  ENTROPY_CONTEXT_PLANES t_above, t_left;
  ENTROPY_CONTEXT *ta;
  ENTROPY_CONTEXT *tl;
658

659
  if (!x->e_mbd.above_context || !x->e_mbd.left_context)
John Koleszar's avatar
John Koleszar committed
660
    return;
661

John Koleszar's avatar
John Koleszar committed
662 663
  vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
  vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
664

John Koleszar's avatar
John Koleszar committed
665 666
  ta = (ENTROPY_CONTEXT *)&t_above;
  tl = (ENTROPY_CONTEXT *)&t_left;
667

668 669
  has_2nd_order = get_2nd_order_usage(&x->e_mbd);

John Koleszar's avatar
John Koleszar committed
670
  type = has_2nd_order ? PLANE_TYPE_Y_NO_DC : PLANE_TYPE_Y_WITH_DC;
John Koleszar's avatar
John Koleszar committed
671

John Koleszar's avatar
John Koleszar committed
672 673
  for (b = 0; b < 16; b++) {
    optimize_b(x, b, type,
674
               ta + vp9_block2above[b], tl + vp9_block2left[b], TX_4X4);
John Koleszar's avatar
John Koleszar committed
675
  }
John Koleszar's avatar
John Koleszar committed
676

John Koleszar's avatar
John Koleszar committed
677 678 679
  if (has_2nd_order) {
    b = 24;
    optimize_b(x, b, PLANE_TYPE_Y2,
680
               ta + vp9_block2above[b], tl + vp9_block2left[b], TX_4X4);
681
    check_reset_2nd_coeffs(&x->e_mbd,
682
                           ta + vp9_block2above[b], tl + vp9_block2left[b]);
John Koleszar's avatar
John Koleszar committed
683
  }
John Koleszar's avatar
John Koleszar committed
684 685
}

686
void vp9_optimize_mbuv_4x4(MACROBLOCK *x) {
John Koleszar's avatar
John Koleszar committed
687 688 689 690
  int b;
  ENTROPY_CONTEXT_PLANES t_above, t_left;
  ENTROPY_CONTEXT *ta;
  ENTROPY_CONTEXT *tl;
John Koleszar's avatar
John Koleszar committed
691

692
  if (!x->e_mbd.above_context || !x->e_mbd.left_context)
John Koleszar's avatar
John Koleszar committed
693
    return;
John Koleszar's avatar
John Koleszar committed
694

John Koleszar's avatar
John Koleszar committed
695 696
  vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
  vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
John Koleszar's avatar
John Koleszar committed
697

John Koleszar's avatar
John Koleszar committed
698 699
  ta = (ENTROPY_CONTEXT *)&t_above;
  tl = (ENTROPY_CONTEXT *)&t_left;
John Koleszar's avatar
John Koleszar committed
700

John Koleszar's avatar
John Koleszar committed
701 702
  for (b = 16; b < 24; b++) {
    optimize_b(x, b, PLANE_TYPE_UV,
703
               ta + vp9_block2above[b], tl + vp9_block2left[b], TX_4X4);
John Koleszar's avatar
John Koleszar committed
704
  }
705 706
}

707 708 709
static void optimize_mb_4x4(MACROBLOCK *x) {
  vp9_optimize_mby_4x4(x);
  vp9_optimize_mbuv_4x4(x);
710 711
}

712
void vp9_optimize_mby_8x8(MACROBLOCK *x) {
John Koleszar's avatar
John Koleszar committed
713
  int b;
714
  PLANE_TYPE type;
John Koleszar's avatar
John Koleszar committed
715 716 717
  ENTROPY_CONTEXT_PLANES t_above, t_left;
  ENTROPY_CONTEXT *ta;
  ENTROPY_CONTEXT *tl;
718
  int has_2nd_order = get_2nd_order_usage(&x->e_mbd);
719

720
  if (!x->e_mbd.above_context || !x->e_mbd.left_context)
John Koleszar's avatar
John Koleszar committed
721
    return;
722

John Koleszar's avatar
John Koleszar committed
723 724
  vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
  vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
725

John Koleszar's avatar
John Koleszar committed
726 727
  ta = (ENTROPY_CONTEXT *)&t_above;
  tl = (ENTROPY_CONTEXT *)&t_left;
728
  type = has_2nd_order ? PLANE_TYPE_Y_NO_DC : PLANE_TYPE_Y_WITH_DC;
John Koleszar's avatar
John Koleszar committed
729
  for (b = 0; b < 16; b += 4) {
Yaowu Xu's avatar
Yaowu Xu committed
730 731 732 733 734 735 736 737 738 739 740 741
    ENTROPY_CONTEXT *const a = ta + vp9_block2above_8x8[b];
    ENTROPY_CONTEXT *const l = tl + vp9_block2left_8x8[b];
#if CONFIG_CNVCONTEXT
    ENTROPY_CONTEXT above_ec = (a[0] + a[1]) != 0;
    ENTROPY_CONTEXT left_ec = (l[0] + l[1]) != 0;
#else
    ENTROPY_CONTEXT above_ec = a[0];
    ENTROPY_CONTEXT left_ec = l[0];
#endif
    optimize_b(x, b, type, &above_ec, &left_ec, TX_8X8);
    a[1] = a[0] = above_ec;
    l[1] = l[0] = left_ec;
John Koleszar's avatar
John Koleszar committed
742
  }
743

Yaowu Xu's avatar
Yaowu Xu committed
744
  // 8x8 always have 2nd order block
745 746
  if (has_2nd_order) {
    check_reset_8x8_2nd_coeffs(&x->e_mbd,
747 748
                               ta + vp9_block2above_8x8[24],
                               tl + vp9_block2left_8x8[24]);
749
  }
750 751
}

752
void vp9_optimize_mbuv_8x8(MACROBLOCK *x) {
John Koleszar's avatar
John Koleszar committed
753
  int b;
Yaowu Xu's avatar
Yaowu Xu committed
754 755
  ENTROPY_CONTEXT *const ta = (ENTROPY_CONTEXT *)x->e_mbd.above_context;
  ENTROPY_CONTEXT *const tl = (ENTROPY_CONTEXT *)x->e_mbd.left_context;
756

Yaowu Xu's avatar
Yaowu Xu committed
757
  if (!ta || !tl)
John Koleszar's avatar
John Koleszar committed
758
    return;
759

John Koleszar's avatar
John Koleszar committed
760
  for (b = 16; b < 24; b += 4) {
Yaowu Xu's avatar
Yaowu Xu committed
761 762 763 764 765 766 767 768 769 770
    ENTROPY_CONTEXT *const a = ta + vp9_block2above_8x8[b];
    ENTROPY_CONTEXT *const l = tl + vp9_block2left_8x8[b];
#if CONFIG_CNVCONTEXT
    ENTROPY_CONTEXT above_ec = (a[0] + a[1]) != 0;
    ENTROPY_CONTEXT left_ec = (l[0] + l[1]) != 0;
#else
    ENTROPY_CONTEXT above_ec = a[0];
    ENTROPY_CONTEXT left_ec = l[0];
#endif
    optimize_b(x, b, PLANE_TYPE_UV, &above_ec, &left_ec, TX_8X8);
John Koleszar's avatar
John Koleszar committed
771
  }
772 773
}

774 775 776
static void optimize_mb_8x8(MACROBLOCK *x) {
  vp9_optimize_mby_8x8(x);
  vp9_optimize_mbuv_8x8(x);
777 778
}

779
void vp9_optimize_mby_16x16(MACROBLOCK *x) {
Yaowu Xu's avatar
Yaowu Xu committed
780 781 782
  ENTROPY_CONTEXT_PLANES *const t_above = x->e_mbd.above_context;
  ENTROPY_CONTEXT_PLANES *const t_left = x->e_mbd.left_context;
  ENTROPY_CONTEXT ta, tl;
783

Yaowu Xu's avatar
Yaowu Xu committed
784
  if (!t_above || !t_left)
785 786
    return;

Yaowu Xu's avatar
Yaowu Xu committed
787 788 789 790 791 792 793 794
#if CONFIG_CNVCONTEXT
  ta = (t_above->y1[0] + t_above->y1[1] + t_above->y1[2] + t_above->y1[3]) != 0;
  tl = (t_left->y1[0] + t_left->y1[1] + t_left->y1[2] + t_left->y1[3]) != 0;
#else
  ta = t_above->y1[0];
  tl = t_left->y1[0];
#endif
  optimize_b(x, 0, PLANE_TYPE_Y_WITH_DC, &ta, &tl, TX_16X16);
Daniel Kang's avatar
Daniel Kang committed
795 796
}

797 798 799
static void optimize_mb_16x16(MACROBLOCK *x) {
  vp9_optimize_mby_16x16(x);
  vp9_optimize_mbuv_8x8(x);
Daniel Kang's avatar
Daniel Kang committed
800 801
}

802
void vp9_fidct_mb(MACROBLOCK *x) {
803
  MACROBLOCKD *const xd = &x->e_mbd;
804
  TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size;
John Koleszar's avatar
John Koleszar committed
805

806
  if (tx_size == TX_16X16) {
807 808
    vp9_transform_mb_16x16(x);
    vp9_quantize_mb_16x16(x);
809
    if (x->optimize)
810 811
      optimize_mb_16x16(x);
    vp9_inverse_transform_mb_16x16(xd);
812
  } else if (tx_size == TX_8X8) {
813 814
    if (xd->mode_info_context->mbmi.mode == SPLITMV) {
      assert(xd->mode_info_context->mbmi.partitioning != PARTITIONING_4X4);
815 816 817 818
      vp9_transform_mby_8x8(x);
      vp9_transform_mbuv_4x4(x);
      vp9_quantize_mby_8x8(x);
      vp9_quantize_mbuv_4x4(x);
819
      if (x->optimize) {
820 821
        vp9_optimize_mby_8x8(x);
        vp9_optimize_mbuv_4x4(x);
822
      }
823 824
      vp9_inverse_transform_mby_8x8(xd);
      vp9_inverse_transform_mbuv_4x4(xd);
825
    } else {
826 827
      vp9_transform_mb_8x8(x);
      vp9_quantize_mb_8x8(x);
828
      if (x->optimize)