vp9_encodemb.c 24.4 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9
10
11
 */

#include "vpx_ports/config.h"
12
#include "vp9/encoder/vp9_encodemb.h"
13
#include "vp9/common/vp9_reconinter.h"
14
15
#include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_tokenize.h"
16
17
#include "vp9/common/vp9_invtrans.h"
#include "vp9/common/vp9_reconintra.h"
John Koleszar's avatar
John Koleszar committed
18
#include "vpx_mem/vpx_mem.h"
19
#include "vp9/encoder/vp9_rdopt.h"
20
#include "vp9/common/vp9_systemdependent.h"
21
#include "vp9_rtcd.h"
John Koleszar's avatar
John Koleszar committed
22

23
void vp9_subtract_b_c(BLOCK *be, BLOCKD *bd, int pitch) {
John Koleszar's avatar
John Koleszar committed
24
25
26
27
28
29
  unsigned char *src_ptr = (*(be->base_src) + be->src);
  short *diff_ptr = be->src_diff;
  unsigned char *pred_ptr = bd->predictor;
  int src_stride = be->src_stride;

  int r, c;
John Koleszar's avatar
John Koleszar committed
30

John Koleszar's avatar
John Koleszar committed
31
32
33
  for (r = 0; r < 4; r++) {
    for (c = 0; c < 4; c++) {
      diff_ptr[c] = src_ptr[c] - pred_ptr[c];
John Koleszar's avatar
John Koleszar committed
34
    }
John Koleszar's avatar
John Koleszar committed
35
36
37
38
39

    diff_ptr += pitch;
    pred_ptr += pitch;
    src_ptr  += src_stride;
  }
John Koleszar's avatar
John Koleszar committed
40
41
}

42
void vp9_subtract_4b_c(BLOCK *be, BLOCKD *bd, int pitch) {
John Koleszar's avatar
John Koleszar committed
43
44
45
46
47
  unsigned char *src_ptr = (*(be->base_src) + be->src);
  short *diff_ptr = be->src_diff;
  unsigned char *pred_ptr = bd->predictor;
  int src_stride = be->src_stride;
  int r, c;
48

John Koleszar's avatar
John Koleszar committed
49
50
51
  for (r = 0; r < 8; r++) {
    for (c = 0; c < 8; c++) {
      diff_ptr[c] = src_ptr[c] - pred_ptr[c];
Yaowu Xu's avatar
Yaowu Xu committed
52
    }
John Koleszar's avatar
John Koleszar committed
53
54
55
56
    diff_ptr += pitch;
    pred_ptr += pitch;
    src_ptr  += src_stride;
  }
Yaowu Xu's avatar
Yaowu Xu committed
57
58
}

59
void vp9_subtract_mbuv_s_c(short *diff, const unsigned char *usrc,
Ronald S. Bultje's avatar
Ronald S. Bultje committed
60
61
62
                           const unsigned char *vsrc, int src_stride,
                           const unsigned char *upred,
                           const unsigned char *vpred, int dst_stride) {
John Koleszar's avatar
John Koleszar committed
63
64
65
  short *udiff = diff + 256;
  short *vdiff = diff + 320;
  int r, c;
John Koleszar's avatar
John Koleszar committed
66

John Koleszar's avatar
John Koleszar committed
67
68
69
  for (r = 0; r < 8; r++) {
    for (c = 0; c < 8; c++) {
      udiff[c] = usrc[c] - upred[c];
John Koleszar's avatar
John Koleszar committed
70
71
    }

John Koleszar's avatar
John Koleszar committed
72
    udiff += 8;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
73
74
    upred += dst_stride;
    usrc  += src_stride;
John Koleszar's avatar
John Koleszar committed
75
  }
John Koleszar's avatar
John Koleszar committed
76

John Koleszar's avatar
John Koleszar committed
77
78
79
  for (r = 0; r < 8; r++) {
    for (c = 0; c < 8; c++) {
      vdiff[c] = vsrc[c] - vpred[c];
John Koleszar's avatar
John Koleszar committed
80
81
    }

John Koleszar's avatar
John Koleszar committed
82
    vdiff += 8;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
83
84
    vpred += dst_stride;
    vsrc  += src_stride;
John Koleszar's avatar
John Koleszar committed
85
86
  }
}
John Koleszar's avatar
John Koleszar committed
87

88
void vp9_subtract_mbuv_c(short *diff, unsigned char *usrc,
Ronald S. Bultje's avatar
Ronald S. Bultje committed
89
                         unsigned char *vsrc, unsigned char *pred, int stride) {
Ronald S. Bultje's avatar
Ronald S. Bultje committed
90
91
92
  unsigned char *upred = pred + 256;
  unsigned char *vpred = pred + 320;

93
  vp9_subtract_mbuv_s_c(diff, usrc, vsrc, stride, upred, vpred, 8);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
94
95
}

96
void vp9_subtract_mby_s_c(short *diff, const unsigned char *src, int src_stride,
Ronald S. Bultje's avatar
Ronald S. Bultje committed
97
                          const unsigned char *pred, int dst_stride) {
John Koleszar's avatar
John Koleszar committed
98
  int r, c;
John Koleszar's avatar
John Koleszar committed
99

John Koleszar's avatar
John Koleszar committed
100
101
102
  for (r = 0; r < 16; r++) {
    for (c = 0; c < 16; c++) {
      diff[c] = src[c] - pred[c];
John Koleszar's avatar
John Koleszar committed
103
    }
John Koleszar's avatar
John Koleszar committed
104
105

    diff += 16;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
106
107
    pred += dst_stride;
    src  += src_stride;
John Koleszar's avatar
John Koleszar committed
108
  }
John Koleszar's avatar
John Koleszar committed
109
110
}

111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
void vp9_subtract_sby_s_c(short *diff, const unsigned char *src, int src_stride,
                          const unsigned char *pred, int dst_stride) {
  int r, c;

  for (r = 0; r < 32; r++) {
    for (c = 0; c < 32; c++) {
      diff[c] = src[c] - pred[c];
    }

    diff += 32;
    pred += dst_stride;
    src  += src_stride;
  }
}

void vp9_subtract_sbuv_s_c(short *diff, const unsigned char *usrc,
                           const unsigned char *vsrc, int src_stride,
                           const unsigned char *upred,
                           const unsigned char *vpred, int dst_stride) {
  short *udiff = diff + 1024;
  short *vdiff = diff + 1024 + 256;
  int r, c;

  for (r = 0; r < 16; r++) {
    for (c = 0; c < 16; c++) {
      udiff[c] = usrc[c] - upred[c];
    }

    udiff += 16;
    upred += dst_stride;
    usrc  += src_stride;
  }

  for (r = 0; r < 16; r++) {
    for (c = 0; c < 16; c++) {
      vdiff[c] = vsrc[c] - vpred[c];
    }

    vdiff += 16;
    vpred += dst_stride;
    vsrc  += src_stride;
  }
}
#endif

157
void vp9_subtract_mby_c(short *diff, unsigned char *src,
Ronald S. Bultje's avatar
Ronald S. Bultje committed
158
                        unsigned char *pred, int stride) {
159
  vp9_subtract_mby_s_c(diff, src, stride, pred, 16);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
160
161
}

162
static void subtract_mb(MACROBLOCK *x) {
John Koleszar's avatar
John Koleszar committed
163
  BLOCK *b = &x->block[0];
164

165
  vp9_subtract_mby(x->src_diff, *(b->base_src), x->e_mbd.predictor,
Jim Bankoski's avatar
Jim Bankoski committed
166
                   b->src_stride);
167
  vp9_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer,
Jim Bankoski's avatar
Jim Bankoski committed
168
                    x->e_mbd.predictor, x->src.uv_stride);
John Koleszar's avatar
John Koleszar committed
169
170
}

171
static void build_dcblock_4x4(MACROBLOCK *x) {
John Koleszar's avatar
John Koleszar committed
172
173
  short *src_diff_ptr = &x->src_diff[384];
  int i;
John Koleszar's avatar
John Koleszar committed
174

John Koleszar's avatar
John Koleszar committed
175
176
  for (i = 0; i < 16; i++) {
    src_diff_ptr[i] = x->coeff[i * 16];
177
    x->coeff[i * 16] = 0;
John Koleszar's avatar
John Koleszar committed
178
  }
John Koleszar's avatar
John Koleszar committed
179
}
180

181
void vp9_transform_mby_4x4(MACROBLOCK *x) {
John Koleszar's avatar
John Koleszar committed
182
  int i;
183
184
  MACROBLOCKD *xd = &x->e_mbd;
  int has_2nd_order = get_2nd_order_usage(xd);
John Koleszar's avatar
John Koleszar committed
185

186
187
188
189
190
191
192
193
194
195
  for (i = 0; i < 16; i++) {
    BLOCK *b = &x->block[i];
    TX_TYPE tx_type = get_tx_type_4x4(xd, &xd->block[i]);
    if (tx_type != DCT_DCT) {
      assert(has_2nd_order == 0);
      vp9_fht_c(b->src_diff, 32, b->coeff, tx_type, 4);
    } else {
      x->vp9_short_fdct4x4(&x->block[i].src_diff[0],
                           &x->block[i].coeff[0], 32);
    }
John Koleszar's avatar
John Koleszar committed
196
  }
John Koleszar's avatar
John Koleszar committed
197

198
  if (has_2nd_order) {
199
    // build dc block from 16 y dc values
200
    build_dcblock_4x4(x);
201
202

    // do 2nd order transform on the dc block
John Koleszar's avatar
John Koleszar committed
203
204
    x->short_walsh4x4(&x->block[24].src_diff[0],
                      &x->block[24].coeff[0], 8);
205
206
  } else {
    vpx_memset(x->block[24].coeff, 0, 16 * sizeof(x->block[24].coeff[0]));
John Koleszar's avatar
John Koleszar committed
207
  }
John Koleszar's avatar
John Koleszar committed
208
209
}

210
void vp9_transform_mbuv_4x4(MACROBLOCK *x) {
John Koleszar's avatar
John Koleszar committed
211
  int i;
John Koleszar's avatar
John Koleszar committed
212

213
  for (i = 16; i < 24; i += 2) {
214
    x->vp9_short_fdct8x4(&x->block[i].src_diff[0],
John Koleszar's avatar
John Koleszar committed
215
216
                         &x->block[i].coeff[0], 16);
  }
217
218
}

219
static void transform_mb_4x4(MACROBLOCK *x) {
220
221
  vp9_transform_mby_4x4(x);
  vp9_transform_mbuv_4x4(x);
222
223
}

224
static void build_dcblock_8x8(MACROBLOCK *x) {
225
  int16_t *src_diff_ptr = x->block[24].src_diff;
John Koleszar's avatar
John Koleszar committed
226
  int i;
Paul Wilkins's avatar
Paul Wilkins committed
227

228
229
  for (i = 0; i < 16; i++) {
    src_diff_ptr[i] = 0;
John Koleszar's avatar
John Koleszar committed
230
  }
231
232
233
234
  src_diff_ptr[0] = x->coeff[0 * 16];
  src_diff_ptr[1] = x->coeff[4 * 16];
  src_diff_ptr[4] = x->coeff[8 * 16];
  src_diff_ptr[8] = x->coeff[12 * 16];
235
236
237
238
  x->coeff[0 * 16] = 0;
  x->coeff[4 * 16] = 0;
  x->coeff[8 * 16] = 0;
  x->coeff[12 * 16] = 0;
239
240
}

241
void vp9_transform_mby_8x8(MACROBLOCK *x) {
John Koleszar's avatar
John Koleszar committed
242
  int i;
243
244
245
  MACROBLOCKD *xd = &x->e_mbd;
  TX_TYPE tx_type;
  int has_2nd_order = get_2nd_order_usage(xd);
246

John Koleszar's avatar
John Koleszar committed
247
  for (i = 0; i < 9; i += 8) {
248
249
250
251
252
253
254
255
256
    BLOCK *b = &x->block[i];
    tx_type = get_tx_type_8x8(xd, &xd->block[i]);
    if (tx_type != DCT_DCT) {
      assert(has_2nd_order == 0);
      vp9_fht_c(b->src_diff, 32, b->coeff, tx_type, 8);
    } else {
      x->vp9_short_fdct8x8(&x->block[i].src_diff[0],
                           &x->block[i].coeff[0], 32);
    }
John Koleszar's avatar
John Koleszar committed
257
258
  }
  for (i = 2; i < 11; i += 8) {
259
260
261
262
263
264
265
266
267
    BLOCK *b = &x->block[i];
    tx_type = get_tx_type_8x8(xd, &xd->block[i]);
    if (tx_type != DCT_DCT) {
      assert(has_2nd_order == 0);
      vp9_fht_c(b->src_diff, 32, (b + 2)->coeff, tx_type, 8);
    } else {
      x->vp9_short_fdct8x8(&x->block[i].src_diff[0],
                           &x->block[i + 2].coeff[0], 32);
    }
John Koleszar's avatar
John Koleszar committed
268
  }
269

270
  if (has_2nd_order) {
271
    // build dc block from 2x2 y dc values
272
    build_dcblock_8x8(x);
273
274

    // do 2nd order transform on the dc block
John Koleszar's avatar
John Koleszar committed
275
276
    x->short_fhaar2x2(&x->block[24].src_diff[0],
                      &x->block[24].coeff[0], 8);
277
278
  } else {
    vpx_memset(x->block[24].coeff, 0, 16 * sizeof(x->block[24].coeff[0]));
John Koleszar's avatar
John Koleszar committed
279
  }
280
281
}

282
void vp9_transform_mbuv_8x8(MACROBLOCK *x) {
Daniel Kang's avatar
Daniel Kang committed
283
284
  int i;

285
  for (i = 16; i < 24; i += 4) {
286
    x->vp9_short_fdct8x8(&x->block[i].src_diff[0],
287
288
                         &x->block[i].coeff[0], 16);
  }
Daniel Kang's avatar
Daniel Kang committed
289
290
}

291
292
293
void vp9_transform_mb_8x8(MACROBLOCK *x) {
  vp9_transform_mby_8x8(x);
  vp9_transform_mbuv_8x8(x);
Daniel Kang's avatar
Daniel Kang committed
294
295
}

296
void vp9_transform_mby_16x16(MACROBLOCK *x) {
297
298
299
  MACROBLOCKD *xd = &x->e_mbd;
  BLOCK *b = &x->block[0];
  TX_TYPE tx_type = get_tx_type_16x16(xd, &xd->block[0]);
300
  vp9_clear_system_state();
301
302
303
304
305
306
  if (tx_type != DCT_DCT) {
    vp9_fht_c(b->src_diff, 32, b->coeff, tx_type, 16);
  } else {
    x->vp9_short_fdct16x16(&x->block[0].src_diff[0],
                           &x->block[0].coeff[0], 32);
  }
Daniel Kang's avatar
Daniel Kang committed
307
308
}

309
310
311
void vp9_transform_mb_16x16(MACROBLOCK *x) {
  vp9_transform_mby_16x16(x);
  vp9_transform_mbuv_8x8(x);
Daniel Kang's avatar
Daniel Kang committed
312
}
313

314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
#if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS
void vp9_transform_sby_32x32(MACROBLOCK *x) {
  SUPERBLOCK * const x_sb = &x->sb_coeff_data;
  vp9_short_fdct32x32(x_sb->src_diff, x_sb->coeff, 64);
}

void vp9_transform_sbuv_16x16(MACROBLOCK *x) {
  SUPERBLOCK * const x_sb = &x->sb_coeff_data;
  vp9_clear_system_state();
  x->vp9_short_fdct16x16(x_sb->src_diff + 1024,
                         x_sb->coeff + 1024, 32);
  x->vp9_short_fdct16x16(x_sb->src_diff + 1280,
                         x_sb->coeff + 1280, 32);
}
#endif

330
331
#define RDTRUNC(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF )
#define RDTRUNC_8x8(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF )
332
typedef struct vp9_token_state vp9_token_state;
333

334
struct vp9_token_state {
335
336
  int           rate;
  int           error;
Daniel Kang's avatar
Daniel Kang committed
337
  int           next;
338
339
340
341
  signed char   token;
  short         qc;
};

342
// TODO: experiments to find optimal multiple numbers
343
344
#define Y1_RD_MULT 4
#define UV_RD_MULT 2
345
#define Y2_RD_MULT 4
346

John Koleszar's avatar
John Koleszar committed
347
348
349
350
351
static const int plane_rd_mult[4] = {
  Y1_RD_MULT,
  Y2_RD_MULT,
  UV_RD_MULT,
  Y1_RD_MULT
352
353
};

354
355
356
357
358
359
360
361
362
363
#define UPDATE_RD_COST()\
{\
  rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0);\
  rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1);\
  if (rd_cost0 == rd_cost1) {\
    rd_cost0 = RDTRUNC(rdmult, rddiv, rate0, error0);\
    rd_cost1 = RDTRUNC(rdmult, rddiv, rate1, error1);\
  }\
}

364
365
static void optimize_b(MACROBLOCK *mb, int i, PLANE_TYPE type,
                       ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
366
                       int tx_size) {
367
368
369
370
371
372
373
374
375
376
377
378
379
  BLOCK *b = &mb->block[i];
  BLOCKD *d = &mb->e_mbd.block[i];
  vp9_token_state tokens[257][2];
  unsigned best_index[257][2];
  const short *dequant_ptr = d->dequant, *coeff_ptr = b->coeff;
  short *qcoeff_ptr = d->qcoeff;
  short *dqcoeff_ptr = d->dqcoeff;
  int eob = d->eob, final_eob, sz = 0;
  int i0 = (type == PLANE_TYPE_Y_NO_DC);
  int rc, x, next;
  int64_t rdmult, rddiv, rd_cost0, rd_cost1;
  int rate0, rate1, error0, error1, t0, t1;
  int best, band, pt;
John Koleszar's avatar
John Koleszar committed
380
  int err_mult = plane_rd_mult[type];
381
382
  int default_eob;
  int const *scan, *bands;
John Koleszar's avatar
John Koleszar committed
383

Deb Mukherjee's avatar
Deb Mukherjee committed
384
  switch (tx_size) {
385
386
    default:
    case TX_4X4:
387
388
      scan = vp9_default_zig_zag1d;
      bands = vp9_coef_bands;
389
390
391
392
      default_eob = 16;
      // TODO: this isn't called (for intra4x4 modes), but will be left in
      // since it could be used later
      {
393
        TX_TYPE tx_type = get_tx_type_4x4(&mb->e_mbd, d);
Deb Mukherjee's avatar
Deb Mukherjee committed
394
395
        if (tx_type != DCT_DCT) {
          switch (tx_type) {
396
            case ADST_DCT:
397
              scan = vp9_row_scan;
398
399
400
              break;

            case DCT_ADST:
401
              scan = vp9_col_scan;
402
403
404
              break;

            default:
405
              scan = vp9_default_zig_zag1d;
406
407
              break;
          }
Deb Mukherjee's avatar
Deb Mukherjee committed
408
        } else {
409
          scan = vp9_default_zig_zag1d;
Deb Mukherjee's avatar
Deb Mukherjee committed
410
        }
411
412
413
      }
      break;
    case TX_8X8:
414
415
      scan = vp9_default_zig_zag1d_8x8;
      bands = vp9_coef_bands_8x8;
416
417
      default_eob = 64;
      break;
418
419
420
421
422
    case TX_16X16:
      scan = vp9_default_zig_zag1d_16x16;
      bands = vp9_coef_bands_16x16;
      default_eob = 256;
      break;
423
  }
John Koleszar's avatar
John Koleszar committed
424
425
426
427
428
429

  /* Now set up a Viterbi trellis to evaluate alternative roundings. */
  rdmult = mb->rdmult * err_mult;
  if (mb->e_mbd.mode_info_context->mbmi.ref_frame == INTRA_FRAME)
    rdmult = (rdmult * 9) >> 4;
  rddiv = mb->rddiv;
430
  memset(best_index, 0, sizeof(best_index));
John Koleszar's avatar
John Koleszar committed
431
432
433
  /* Initialize the sentinel node of the trellis. */
  tokens[eob][0].rate = 0;
  tokens[eob][0].error = 0;
434
  tokens[eob][0].next = default_eob;
John Koleszar's avatar
John Koleszar committed
435
436
437
438
439
  tokens[eob][0].token = DCT_EOB_TOKEN;
  tokens[eob][0].qc = 0;
  *(tokens[eob] + 1) = *(tokens[eob] + 0);
  next = eob;
  for (i = eob; i-- > i0;) {
440
    int base_bits, d2, dx;
John Koleszar's avatar
John Koleszar committed
441

442
    rc = scan[i];
John Koleszar's avatar
John Koleszar committed
443
444
445
446
447
448
449
450
451
    x = qcoeff_ptr[rc];
    /* Only add a trellis state for non-zero coefficients. */
    if (x) {
      int shortcut = 0;
      error0 = tokens[next][0].error;
      error1 = tokens[next][1].error;
      /* Evaluate the first possibility for this state. */
      rate0 = tokens[next][0].rate;
      rate1 = tokens[next][1].rate;
452
      t0 = (vp9_dct_value_tokens_ptr + x)->Token;
John Koleszar's avatar
John Koleszar committed
453
      /* Consider both possible successor states. */
454
455
      if (next < default_eob) {
        band = bands[i + 1];
456
        pt = vp9_prev_token_class[t0];
John Koleszar's avatar
John Koleszar committed
457
        rate0 +=
Deb Mukherjee's avatar
Deb Mukherjee committed
458
          mb->token_costs[tx_size][type][band][pt][tokens[next][0].token];
John Koleszar's avatar
John Koleszar committed
459
        rate1 +=
Deb Mukherjee's avatar
Deb Mukherjee committed
460
          mb->token_costs[tx_size][type][band][pt][tokens[next][1].token];
John Koleszar's avatar
John Koleszar committed
461
      }
462
      UPDATE_RD_COST();
John Koleszar's avatar
John Koleszar committed
463
464
      /* And pick the best. */
      best = rd_cost1 < rd_cost0;
465
      base_bits = *(vp9_dct_value_cost_ptr + x);
John Koleszar's avatar
John Koleszar committed
466
467
468
469
470
471
472
      dx = dqcoeff_ptr[rc] - coeff_ptr[rc];
      d2 = dx * dx;
      tokens[i][0].rate = base_bits + (best ? rate1 : rate0);
      tokens[i][0].error = d2 + (best ? error1 : error0);
      tokens[i][0].next = next;
      tokens[i][0].token = t0;
      tokens[i][0].qc = x;
473
      best_index[i][0] = best;
John Koleszar's avatar
John Koleszar committed
474
475
476
477
      /* Evaluate the second possibility for this state. */
      rate0 = tokens[next][0].rate;
      rate1 = tokens[next][1].rate;

478
479
      if ((abs(x)*dequant_ptr[rc != 0] > abs(coeff_ptr[rc])) &&
          (abs(x)*dequant_ptr[rc != 0] < abs(coeff_ptr[rc]) + dequant_ptr[rc != 0]))
John Koleszar's avatar
John Koleszar committed
480
481
482
483
484
485
486
487
488
489
490
491
492
        shortcut = 1;
      else
        shortcut = 0;

      if (shortcut) {
        sz = -(x < 0);
        x -= 2 * sz + 1;
      }

      /* Consider both possible successor states. */
      if (!x) {
        /* If we reduced this coefficient to zero, check to see if
         *  we need to move the EOB back here.
493
         */
John Koleszar's avatar
John Koleszar committed
494
495
496
497
498
        t0 = tokens[next][0].token == DCT_EOB_TOKEN ?
             DCT_EOB_TOKEN : ZERO_TOKEN;
        t1 = tokens[next][1].token == DCT_EOB_TOKEN ?
             DCT_EOB_TOKEN : ZERO_TOKEN;
      } else {
499
        t0 = t1 = (vp9_dct_value_tokens_ptr + x)->Token;
John Koleszar's avatar
John Koleszar committed
500
      }
501
502
      if (next < default_eob) {
        band = bands[i + 1];
John Koleszar's avatar
John Koleszar committed
503
        if (t0 != DCT_EOB_TOKEN) {
504
          pt = vp9_prev_token_class[t0];
Deb Mukherjee's avatar
Deb Mukherjee committed
505
          rate0 += mb->token_costs[tx_size][type][band][pt][
506
              tokens[next][0].token];
John Koleszar's avatar
John Koleszar committed
507
        }
John Koleszar's avatar
John Koleszar committed
508
        if (t1 != DCT_EOB_TOKEN) {
509
          pt = vp9_prev_token_class[t1];
Deb Mukherjee's avatar
Deb Mukherjee committed
510
          rate1 += mb->token_costs[tx_size][type][band][pt][
511
              tokens[next][1].token];
John Koleszar's avatar
John Koleszar committed
512
513
        }
      }
John Koleszar's avatar
John Koleszar committed
514

515
      UPDATE_RD_COST();
John Koleszar's avatar
John Koleszar committed
516
517
      /* And pick the best. */
      best = rd_cost1 < rd_cost0;
518
      base_bits = *(vp9_dct_value_cost_ptr + x);
John Koleszar's avatar
John Koleszar committed
519
520

      if (shortcut) {
521
        dx -= (dequant_ptr[rc != 0] + sz) ^ sz;
John Koleszar's avatar
John Koleszar committed
522
523
524
525
526
527
528
        d2 = dx * dx;
      }
      tokens[i][1].rate = base_bits + (best ? rate1 : rate0);
      tokens[i][1].error = d2 + (best ? error1 : error0);
      tokens[i][1].next = next;
      tokens[i][1].token = best ? t1 : t0;
      tokens[i][1].qc = x;
529
      best_index[i][1] = best;
John Koleszar's avatar
John Koleszar committed
530
531
      /* Finally, make this the new head of the trellis. */
      next = i;
532
    }
John Koleszar's avatar
John Koleszar committed
533
534
535
536
    /* There's no choice to make for a zero coefficient, so we don't
     *  add a new trellis node, but we do need to update the costs.
     */
    else {
537
      band = bands[i + 1];
John Koleszar's avatar
John Koleszar committed
538
539
540
541
      t0 = tokens[next][0].token;
      t1 = tokens[next][1].token;
      /* Update the cost of each path if we're past the EOB token. */
      if (t0 != DCT_EOB_TOKEN) {
Deb Mukherjee's avatar
Deb Mukherjee committed
542
        tokens[next][0].rate += mb->token_costs[tx_size][type][band][0][t0];
John Koleszar's avatar
John Koleszar committed
543
544
545
        tokens[next][0].token = ZERO_TOKEN;
      }
      if (t1 != DCT_EOB_TOKEN) {
Deb Mukherjee's avatar
Deb Mukherjee committed
546
        tokens[next][1].rate += mb->token_costs[tx_size][type][band][0][t1];
John Koleszar's avatar
John Koleszar committed
547
548
549
        tokens[next][1].token = ZERO_TOKEN;
      }
      /* Don't update next, because we didn't add a new node. */
550
    }
John Koleszar's avatar
John Koleszar committed
551
552
553
  }

  /* Now pick the best path through the whole trellis. */
554
  band = bands[i + 1];
555
  VP9_COMBINEENTROPYCONTEXTS(pt, *a, *l);
John Koleszar's avatar
John Koleszar committed
556
557
558
559
560
561
  rate0 = tokens[next][0].rate;
  rate1 = tokens[next][1].rate;
  error0 = tokens[next][0].error;
  error1 = tokens[next][1].error;
  t0 = tokens[next][0].token;
  t1 = tokens[next][1].token;
Deb Mukherjee's avatar
Deb Mukherjee committed
562
563
  rate0 += mb->token_costs[tx_size][type][band][pt][t0];
  rate1 += mb->token_costs[tx_size][type][band][pt][t1];
564
  UPDATE_RD_COST();
John Koleszar's avatar
John Koleszar committed
565
566
567
568
569
570
  best = rd_cost1 < rd_cost0;
  final_eob = i0 - 1;
  for (i = next; i < eob; i = next) {
    x = tokens[i][best].qc;
    if (x)
      final_eob = i;
571
    rc = scan[i];
John Koleszar's avatar
John Koleszar committed
572
    qcoeff_ptr[rc] = x;
573
574
    dqcoeff_ptr[rc] = (x * dequant_ptr[rc != 0]);

John Koleszar's avatar
John Koleszar committed
575
    next = tokens[i][best].next;
576
    best = best_index[i][best];
John Koleszar's avatar
John Koleszar committed
577
578
579
580
  }
  final_eob++;

  d->eob = final_eob;
581
  *a = *l = (d->eob > !type);
John Koleszar's avatar
John Koleszar committed
582
583
}

John Koleszar's avatar
John Koleszar committed
584
585
586
587
588
589
590
591
/**************************************************************************
our inverse hadamard transform effectively is weighted sum of all 16 inputs
with weight either 1 or -1. It has a last stage scaling of (sum+1)>>2. And
dc only idct is (dc+16)>>5. So if all the sums are between -65 and 63 the
output after inverse wht and idct will be all zero. A sum of absolute value
smaller than 65 guarantees all 16 different (+1/-1) weighted sums in wht
fall between -65 and +65.
**************************************************************************/
592
593
#define SUM_2ND_COEFF_THRESH 65

594
static void check_reset_2nd_coeffs(MACROBLOCKD *xd,
John Koleszar's avatar
John Koleszar committed
595
596
597
                                   ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l) {
  int sum = 0;
  int i;
Paul Wilkins's avatar
Paul Wilkins committed
598
  BLOCKD *bd = &xd->block[24];
John Koleszar's avatar
John Koleszar committed
599
600
601
602
603
  if (bd->dequant[0] >= SUM_2ND_COEFF_THRESH
      && bd->dequant[1] >= SUM_2ND_COEFF_THRESH)
    return;

  for (i = 0; i < bd->eob; i++) {
604
    int coef = bd->dqcoeff[vp9_default_zig_zag1d[i]];
John Koleszar's avatar
John Koleszar committed
605
606
607
608
609
610
611
    sum += (coef >= 0) ? coef : -coef;
    if (sum >= SUM_2ND_COEFF_THRESH)
      return;
  }

  if (sum < SUM_2ND_COEFF_THRESH) {
    for (i = 0; i < bd->eob; i++) {
612
      int rc = vp9_default_zig_zag1d[i];
John Koleszar's avatar
John Koleszar committed
613
614
      bd->qcoeff[rc] = 0;
      bd->dqcoeff[rc] = 0;
615
    }
John Koleszar's avatar
John Koleszar committed
616
    bd->eob = 0;
617
    *a = *l = (bd->eob != 0);
John Koleszar's avatar
John Koleszar committed
618
  }
619
}
620

621
#define SUM_2ND_COEFF_THRESH_8X8 32
622
static void check_reset_8x8_2nd_coeffs(MACROBLOCKD *xd,
John Koleszar's avatar
John Koleszar committed
623
624
                                       ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l) {
  int sum = 0;
Paul Wilkins's avatar
Paul Wilkins committed
625
  BLOCKD *bd = &xd->block[24];
John Koleszar's avatar
John Koleszar committed
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
  int coef;

  coef = bd->dqcoeff[0];
  sum += (coef >= 0) ? coef : -coef;
  coef = bd->dqcoeff[1];
  sum += (coef >= 0) ? coef : -coef;
  coef = bd->dqcoeff[4];
  sum += (coef >= 0) ? coef : -coef;
  coef = bd->dqcoeff[8];
  sum += (coef >= 0) ? coef : -coef;

  if (sum < SUM_2ND_COEFF_THRESH_8X8) {
    bd->qcoeff[0] = 0;
    bd->dqcoeff[0] = 0;
    bd->qcoeff[1] = 0;
    bd->dqcoeff[1] = 0;
    bd->qcoeff[4] = 0;
    bd->dqcoeff[4] = 0;
    bd->qcoeff[8] = 0;
    bd->dqcoeff[8] = 0;
    bd->eob = 0;
647
    *a = *l = (bd->eob != 0);
John Koleszar's avatar
John Koleszar committed
648
  }
649
650
}

651
void vp9_optimize_mby_4x4(MACROBLOCK *x) {
John Koleszar's avatar
John Koleszar committed
652
  int b;
653
  PLANE_TYPE type;
John Koleszar's avatar
John Koleszar committed
654
655
656
657
  int has_2nd_order;
  ENTROPY_CONTEXT_PLANES t_above, t_left;
  ENTROPY_CONTEXT *ta;
  ENTROPY_CONTEXT *tl;
658

659
  if (!x->e_mbd.above_context || !x->e_mbd.left_context)
John Koleszar's avatar
John Koleszar committed
660
    return;
661

John Koleszar's avatar
John Koleszar committed
662
663
  vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
  vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
664

John Koleszar's avatar
John Koleszar committed
665
666
  ta = (ENTROPY_CONTEXT *)&t_above;
  tl = (ENTROPY_CONTEXT *)&t_left;
667

668
669
  has_2nd_order = get_2nd_order_usage(&x->e_mbd);

John Koleszar's avatar
John Koleszar committed
670
  type = has_2nd_order ? PLANE_TYPE_Y_NO_DC : PLANE_TYPE_Y_WITH_DC;
John Koleszar's avatar
John Koleszar committed
671

John Koleszar's avatar
John Koleszar committed
672
673
  for (b = 0; b < 16; b++) {
    optimize_b(x, b, type,
Yaowu Xu's avatar
Yaowu Xu committed
674
675
               ta + vp9_block2above[TX_4X4][b],
               tl + vp9_block2left[TX_4X4][b], TX_4X4);
John Koleszar's avatar
John Koleszar committed
676
  }
John Koleszar's avatar
John Koleszar committed
677

John Koleszar's avatar
John Koleszar committed
678
679
680
  if (has_2nd_order) {
    b = 24;
    optimize_b(x, b, PLANE_TYPE_Y2,
Yaowu Xu's avatar
Yaowu Xu committed
681
682
               ta + vp9_block2above[TX_4X4][b],
               tl + vp9_block2left[TX_4X4][b], TX_4X4);
683
    check_reset_2nd_coeffs(&x->e_mbd,
Yaowu Xu's avatar
Yaowu Xu committed
684
685
                           ta + vp9_block2above[TX_4X4][b],
                           tl + vp9_block2left[TX_4X4][b]);
John Koleszar's avatar
John Koleszar committed
686
  }
John Koleszar's avatar
John Koleszar committed
687
688
}

689
void vp9_optimize_mbuv_4x4(MACROBLOCK *x) {
John Koleszar's avatar
John Koleszar committed
690
691
692
693
  int b;
  ENTROPY_CONTEXT_PLANES t_above, t_left;
  ENTROPY_CONTEXT *ta;
  ENTROPY_CONTEXT *tl;
John Koleszar's avatar
John Koleszar committed
694

695
  if (!x->e_mbd.above_context || !x->e_mbd.left_context)
John Koleszar's avatar
John Koleszar committed
696
    return;
John Koleszar's avatar
John Koleszar committed
697

John Koleszar's avatar
John Koleszar committed
698
699
  vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
  vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
John Koleszar's avatar
John Koleszar committed
700

John Koleszar's avatar
John Koleszar committed
701
702
  ta = (ENTROPY_CONTEXT *)&t_above;
  tl = (ENTROPY_CONTEXT *)&t_left;
John Koleszar's avatar
John Koleszar committed
703

John Koleszar's avatar
John Koleszar committed
704
705
  for (b = 16; b < 24; b++) {
    optimize_b(x, b, PLANE_TYPE_UV,
Yaowu Xu's avatar
Yaowu Xu committed
706
707
               ta + vp9_block2above[TX_4X4][b],
               tl + vp9_block2left[TX_4X4][b], TX_4X4);
John Koleszar's avatar
John Koleszar committed
708
  }
709
710
}

711
712
713
static void optimize_mb_4x4(MACROBLOCK *x) {
  vp9_optimize_mby_4x4(x);
  vp9_optimize_mbuv_4x4(x);
714
715
}

716
void vp9_optimize_mby_8x8(MACROBLOCK *x) {
John Koleszar's avatar
John Koleszar committed
717
  int b;
718
  PLANE_TYPE type;
John Koleszar's avatar
John Koleszar committed
719
720
721
  ENTROPY_CONTEXT_PLANES t_above, t_left;
  ENTROPY_CONTEXT *ta;
  ENTROPY_CONTEXT *tl;
722
  int has_2nd_order = get_2nd_order_usage(&x->e_mbd);
723

724
  if (!x->e_mbd.above_context || !x->e_mbd.left_context)
John Koleszar's avatar
John Koleszar committed
725
    return;
726

John Koleszar's avatar
John Koleszar committed
727
728
  vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
  vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
729

John Koleszar's avatar
John Koleszar committed
730
731
  ta = (ENTROPY_CONTEXT *)&t_above;
  tl = (ENTROPY_CONTEXT *)&t_left;
732
  type = has_2nd_order ? PLANE_TYPE_Y_NO_DC : PLANE_TYPE_Y_WITH_DC;
John Koleszar's avatar
John Koleszar committed
733
  for (b = 0; b < 16; b += 4) {
Yaowu Xu's avatar
Yaowu Xu committed
734
735
    ENTROPY_CONTEXT *const a = ta + vp9_block2above[TX_8X8][b];
    ENTROPY_CONTEXT *const l = tl + vp9_block2left[TX_8X8][b];
Yaowu Xu's avatar
Yaowu Xu committed
736
737
738
739
740
741
742
743
744
745
#if CONFIG_CNVCONTEXT
    ENTROPY_CONTEXT above_ec = (a[0] + a[1]) != 0;
    ENTROPY_CONTEXT left_ec = (l[0] + l[1]) != 0;
#else
    ENTROPY_CONTEXT above_ec = a[0];
    ENTROPY_CONTEXT left_ec = l[0];
#endif
    optimize_b(x, b, type, &above_ec, &left_ec, TX_8X8);
    a[1] = a[0] = above_ec;
    l[1] = l[0] = left_ec;
John Koleszar's avatar
John Koleszar committed
746
  }
747

Yaowu Xu's avatar
Yaowu Xu committed
748
  // 8x8 always have 2nd order block
749
750
  if (has_2nd_order) {
    check_reset_8x8_2nd_coeffs(&x->e_mbd,
Yaowu Xu's avatar
Yaowu Xu committed
751
752
                               ta + vp9_block2above[TX_8X8][24],
                               tl + vp9_block2left[TX_8X8][24]);
753
  }
754
755
}

756
void vp9_optimize_mbuv_8x8(MACROBLOCK *x) {
John Koleszar's avatar
John Koleszar committed
757
  int b;
Yaowu Xu's avatar
Yaowu Xu committed
758
759
  ENTROPY_CONTEXT *const ta = (ENTROPY_CONTEXT *)x->e_mbd.above_context;
  ENTROPY_CONTEXT *const tl = (ENTROPY_CONTEXT *)x->e_mbd.left_context;
760

Yaowu Xu's avatar
Yaowu Xu committed
761
  if (!ta || !tl)
John Koleszar's avatar
John Koleszar committed
762
    return;
763

John Koleszar's avatar
John Koleszar committed
764
  for (b = 16; b < 24; b += 4) {
Yaowu Xu's avatar
Yaowu Xu committed
765
766
    ENTROPY_CONTEXT *const a = ta + vp9_block2above[TX_8X8][b];
    ENTROPY_CONTEXT *const l = tl + vp9_block2left[TX_8X8][b];
Yaowu Xu's avatar
Yaowu Xu committed
767
768
769
770
771
772
773
774
#if CONFIG_CNVCONTEXT
    ENTROPY_CONTEXT above_ec = (a[0] + a[1]) != 0;
    ENTROPY_CONTEXT left_ec = (l[0] + l[1]) != 0;
#else
    ENTROPY_CONTEXT above_ec = a[0];
    ENTROPY_CONTEXT left_ec = l[0];
#endif
    optimize_b(x, b, PLANE_TYPE_UV, &above_ec, &left_ec, TX_8X8);
John Koleszar's avatar
John Koleszar committed
775
  }
776
777
}

778
779
780
static void optimize_mb_8x8(MACROBLOCK *x) {
  vp9_optimize_mby_8x8(x);
  vp9_optimize_mbuv_8x8(x);
781
782
}

783
void vp9_optimize_mby_16x16(MACROBLOCK *x) {
Yaowu Xu's avatar
Yaowu Xu committed
784
785
786
  ENTROPY_CONTEXT_PLANES *const t_above = x->e_mbd.above_context;
  ENTROPY_CONTEXT_PLANES *const t_left = x->e_mbd.left_context;
  ENTROPY_CONTEXT ta, tl;
787

Yaowu Xu's avatar
Yaowu Xu committed
788
  if (!t_above || !t_left)
789
790
    return;

Yaowu Xu's avatar
Yaowu Xu committed
791
792
793
794
795
796
797
798
#if CONFIG_CNVCONTEXT
  ta = (t_above->y1[0] + t_above->y1[1] + t_above->y1[2] + t_above->y1[3]) != 0;
  tl = (t_left->y1[0] + t_left->y1[1] + t_left->y1[2] + t_left->y1[3]) != 0;
#else
  ta = t_above->y1[0];
  tl = t_left->y1[0];
#endif
  optimize_b(x, 0, PLANE_TYPE_Y_WITH_DC, &ta, &tl, TX_16X16);
Daniel Kang's avatar
Daniel Kang committed
799
800
}

801
802
803
static void optimize_mb_16x16(MACROBLOCK *x) {
  vp9_optimize_mby_16x16(x);
  vp9_optimize_mbuv_8x8(x);
Daniel Kang's avatar
Daniel Kang committed
804
805
}

806
void vp9_fidct_mb(MACROBLOCK *x) {
807
  MACROBLOCKD *const xd = &x->e_mbd;
808
  TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size;
John Koleszar's avatar
John Koleszar committed
809

810
  if (tx_size == TX_16X16) {
811
812
    vp9_transform_mb_16x16(x);
    vp9_quantize_mb_16x16(x);
813
    if (x->optimize)
814
815
      optimize_mb_16x16(x);
    vp9_inverse_transform_mb_16x16(xd);
816
  } else if (tx_size == TX_8X8) {
817
818
    if (xd->mode_info_context->mbmi.mode == SPLITMV) {
      assert(xd->mode_info_context->mbmi.partitioning != PARTITIONING_4X4);
819
820
821
822
      vp9_transform_mby_8x8(x);
      vp9_transform_mbuv_4x4(x);
      vp9_quantize_mby_8x8(x);
      vp9_quantize_mbuv_4x4(x);
823
      if (x->optimize) {
824
825
        vp9_optimize_mby_8x8(x);
        vp9_optimize_mbuv_4x4(x);
826
      }
827
828
      vp9_inverse_transform_mby_8x8(xd);
      vp9_inverse_transform_mbuv_4x4(xd);
829
    } else {
830
831
      vp9_transform_mb_8x8(x);
      vp9_quantize_mb_8x8(x);
832
      if (x->optimize)
833
834
        optimize_mb_8x8(x);
      vp9_inverse_transform_mb_8x8(xd);
835
    }
836
837
  } else {
    transform_mb_4x4(x);
838
    vp9_quantize_mb_4x4(x);
839
    if (x->optimize)
840
841
      optimize_mb_4x4(x);
    vp9_inverse_transform_mb_4x4(xd);
John Koleszar's avatar
John Koleszar committed
842
  }
843
844
}

845
void vp9_encode_inter16x16(MACROBLOCK *x) {
846
  MACROBLOCKD *const xd = &x->e_mbd;
John Koleszar's avatar
John Koleszar committed
847

848
  vp9_build_inter_predictors_mb(xd);
849
850
  subtract_mb(x);
  vp9_fidct_mb(x);
851
  vp9_recon_mb(xd);
John Koleszar's avatar
John Koleszar committed
852
853
}

854
/* this function is used by first pass only */
855
void vp9_encode_inter16x16y(MACROBLOCK *x) {
856
  MACROBLOCKD *xd = &x->e_mbd;
John Koleszar's avatar
John Koleszar committed
857
  BLOCK *b = &x->block[0];
858

859
#if CONFIG_PRED_FILTER
John Koleszar's avatar
John Koleszar committed
860
  // Disable the prediction filter for firstpass
861
  xd->mode_info_context->mbmi.pred_filter_enabled = 0;
862
863
#endif

864
  vp9_build_1st_inter16x16_predictors_mby(xd, xd->predictor, 16, 0);
John Koleszar's avatar
John Koleszar committed
865

866
  vp9_subtract_mby(x->src_diff, *(b->base_src), xd->predictor, b->src_stride);
John Koleszar's avatar
John Koleszar committed
867

868
869
  vp9_transform_mby_4x4(x);
  vp9_quantize_mby_4x4(x);
870
  vp9_inverse_transform_mby_4x4(xd);
Yaowu Xu's avatar
Yaowu Xu committed
871

872
  vp9_recon_mby(xd);
John Koleszar's avatar
John Koleszar committed
873
}