encodemb.c 40.8 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9
10
11
12
13
 */


#include "vpx_ports/config.h"
#include "encodemb.h"
John Koleszar's avatar
John Koleszar committed
14
#include "vp8/common/reconinter.h"
John Koleszar's avatar
John Koleszar committed
15
#include "quantize.h"
16
#include "tokenize.h"
John Koleszar's avatar
John Koleszar committed
17
18
19
#include "vp8/common/invtrans.h"
#include "vp8/common/recon.h"
#include "vp8/common/reconintra.h"
John Koleszar's avatar
John Koleszar committed
20
21
#include "dct.h"
#include "vpx_mem/vpx_mem.h"
Yunqing Wang's avatar
Yunqing Wang committed
22
#include "rdopt.h"
23
#include "vp8/common/systemdependent.h"
John Koleszar's avatar
John Koleszar committed
24
25
26
27
28
29

#if CONFIG_RUNTIME_CPU_DETECT
#define IF_RTCD(x) (x)
#else
#define IF_RTCD(x) NULL
#endif
30
31
32
33
34

#ifdef ENC_DEBUG
extern int enc_debug;
#endif

John Koleszar's avatar
John Koleszar committed
35
36
37
38
39
40
41
void vp8_subtract_b_c(BLOCK *be, BLOCKD *bd, int pitch) {
  unsigned char *src_ptr = (*(be->base_src) + be->src);
  short *diff_ptr = be->src_diff;
  unsigned char *pred_ptr = bd->predictor;
  int src_stride = be->src_stride;

  int r, c;
John Koleszar's avatar
John Koleszar committed
42

John Koleszar's avatar
John Koleszar committed
43
44
45
  for (r = 0; r < 4; r++) {
    for (c = 0; c < 4; c++) {
      diff_ptr[c] = src_ptr[c] - pred_ptr[c];
John Koleszar's avatar
John Koleszar committed
46
    }
John Koleszar's avatar
John Koleszar committed
47
48
49
50
51

    diff_ptr += pitch;
    pred_ptr += pitch;
    src_ptr  += src_stride;
  }
John Koleszar's avatar
John Koleszar committed
52
53
}

John Koleszar's avatar
John Koleszar committed
54
55
56
57
58
59
60
61
62
void vp8_subtract_4b_c(BLOCK *be, BLOCKD *bd, int pitch) {
  unsigned char *src_ptr = (*(be->base_src) + be->src);
  short *diff_ptr = be->src_diff;
  unsigned char *pred_ptr = bd->predictor;
  int src_stride = be->src_stride;
  int r, c;
  for (r = 0; r < 8; r++) {
    for (c = 0; c < 8; c++) {
      diff_ptr[c] = src_ptr[c] - pred_ptr[c];
Yaowu Xu's avatar
Yaowu Xu committed
63
    }
John Koleszar's avatar
John Koleszar committed
64
65
66
67
    diff_ptr += pitch;
    pred_ptr += pitch;
    src_ptr  += src_stride;
  }
Yaowu Xu's avatar
Yaowu Xu committed
68
69
}

John Koleszar's avatar
John Koleszar committed
70
71
72
73
74
void vp8_subtract_mbuv_c(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride) {
  short *udiff = diff + 256;
  short *vdiff = diff + 320;
  unsigned char *upred = pred + 256;
  unsigned char *vpred = pred + 320;
John Koleszar's avatar
John Koleszar committed
75

John Koleszar's avatar
John Koleszar committed
76
  int r, c;
John Koleszar's avatar
John Koleszar committed
77

John Koleszar's avatar
John Koleszar committed
78
79
80
  for (r = 0; r < 8; r++) {
    for (c = 0; c < 8; c++) {
      udiff[c] = usrc[c] - upred[c];
John Koleszar's avatar
John Koleszar committed
81
82
    }

John Koleszar's avatar
John Koleszar committed
83
84
85
86
    udiff += 8;
    upred += 8;
    usrc  += stride;
  }
John Koleszar's avatar
John Koleszar committed
87

John Koleszar's avatar
John Koleszar committed
88
89
90
  for (r = 0; r < 8; r++) {
    for (c = 0; c < 8; c++) {
      vdiff[c] = vsrc[c] - vpred[c];
John Koleszar's avatar
John Koleszar committed
91
92
    }

John Koleszar's avatar
John Koleszar committed
93
94
95
96
97
    vdiff += 8;
    vpred += 8;
    vsrc  += stride;
  }
}
John Koleszar's avatar
John Koleszar committed
98

John Koleszar's avatar
John Koleszar committed
99
100
void vp8_subtract_mby_c(short *diff, unsigned char *src, unsigned char *pred, int stride) {
  int r, c;
John Koleszar's avatar
John Koleszar committed
101

John Koleszar's avatar
John Koleszar committed
102
103
104
  for (r = 0; r < 16; r++) {
    for (c = 0; c < 16; c++) {
      diff[c] = src[c] - pred[c];
John Koleszar's avatar
John Koleszar committed
105
    }
John Koleszar's avatar
John Koleszar committed
106
107
108
109
110

    diff += 16;
    pred += 16;
    src  += stride;
  }
John Koleszar's avatar
John Koleszar committed
111
112
}

John Koleszar's avatar
John Koleszar committed
113
114
static void vp8_subtract_mb(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) {
  BLOCK *b = &x->block[0];
115

John Koleszar's avatar
John Koleszar committed
116
117
  ENCODEMB_INVOKE(&rtcd->encodemb, submby)(x->src_diff, *(b->base_src), x->e_mbd.predictor, b->src_stride);
  ENCODEMB_INVOKE(&rtcd->encodemb, submbuv)(x->src_diff, x->src.u_buffer, x->src.v_buffer, x->e_mbd.predictor, x->src.uv_stride);
John Koleszar's avatar
John Koleszar committed
118
119
}

John Koleszar's avatar
John Koleszar committed
120
121
122
static void build_dcblock(MACROBLOCK *x) {
  short *src_diff_ptr = &x->src_diff[384];
  int i;
John Koleszar's avatar
John Koleszar committed
123

John Koleszar's avatar
John Koleszar committed
124
125
126
  for (i = 0; i < 16; i++) {
    src_diff_ptr[i] = x->coeff[i * 16];
  }
John Koleszar's avatar
John Koleszar committed
127
}
John Koleszar's avatar
John Koleszar committed
128
129
130
131
132
133
134
135
136
137
void vp8_build_dcblock_8x8(MACROBLOCK *x) {
  short *src_diff_ptr = &x->src_diff[384];
  int i;
  for (i = 0; i < 16; i++) {
    src_diff_ptr[i] = 0;
  }
  src_diff_ptr[0] = x->coeff[0 * 16];
  src_diff_ptr[1] = x->coeff[4 * 16];
  src_diff_ptr[4] = x->coeff[8 * 16];
  src_diff_ptr[8] = x->coeff[12 * 16];
138
}
Yaowu Xu's avatar
Yaowu Xu committed
139

John Koleszar's avatar
John Koleszar committed
140
141
void vp8_transform_mbuv(MACROBLOCK *x) {
  int i;
John Koleszar's avatar
John Koleszar committed
142

John Koleszar's avatar
John Koleszar committed
143
144
145
146
  for (i = 16; i < 24; i += 2) {
    x->vp8_short_fdct8x4(&x->block[i].src_diff[0],
                         &x->block[i].coeff[0], 16);
  }
John Koleszar's avatar
John Koleszar committed
147
148
}

Yaowu Xu's avatar
Yaowu Xu committed
149

John Koleszar's avatar
John Koleszar committed
150
151
void vp8_transform_intra_mby(MACROBLOCK *x) {
  int i;
John Koleszar's avatar
John Koleszar committed
152

John Koleszar's avatar
John Koleszar committed
153
154
155
156
  for (i = 0; i < 16; i += 2) {
    x->vp8_short_fdct8x4(&x->block[i].src_diff[0],
                         &x->block[i].coeff[0], 32);
  }
John Koleszar's avatar
John Koleszar committed
157

John Koleszar's avatar
John Koleszar committed
158
159
  // build dc block from 16 y dc values
  build_dcblock(x);
John Koleszar's avatar
John Koleszar committed
160

John Koleszar's avatar
John Koleszar committed
161
162
163
  // do 2nd order transform on the dc block
  x->short_walsh4x4(&x->block[24].src_diff[0],
                    &x->block[24].coeff[0], 8);
John Koleszar's avatar
John Koleszar committed
164
165
166
167

}


John Koleszar's avatar
John Koleszar committed
168
169
static void transform_mb(MACROBLOCK *x) {
  int i;
Paul Wilkins's avatar
Paul Wilkins committed
170
  MB_PREDICTION_MODE mode = x->e_mbd.mode_info_context->mbmi.mode;
John Koleszar's avatar
John Koleszar committed
171

John Koleszar's avatar
John Koleszar committed
172
173
174
175
  for (i = 0; i < 16; i += 2) {
    x->vp8_short_fdct8x4(&x->block[i].src_diff[0],
                         &x->block[i].coeff[0], 32);
  }
John Koleszar's avatar
John Koleszar committed
176

John Koleszar's avatar
John Koleszar committed
177
  // build dc block from 16 y dc values
Paul Wilkins's avatar
Paul Wilkins committed
178
  if (mode != SPLITMV)
John Koleszar's avatar
John Koleszar committed
179
    build_dcblock(x);
John Koleszar's avatar
John Koleszar committed
180

John Koleszar's avatar
John Koleszar committed
181
182
183
184
  for (i = 16; i < 24; i += 2) {
    x->vp8_short_fdct8x4(&x->block[i].src_diff[0],
                         &x->block[i].coeff[0], 16);
  }
John Koleszar's avatar
John Koleszar committed
185

John Koleszar's avatar
John Koleszar committed
186
  // do 2nd order transform on the dc block
Paul Wilkins's avatar
Paul Wilkins committed
187
  if (mode != SPLITMV)
John Koleszar's avatar
John Koleszar committed
188
189
    x->short_walsh4x4(&x->block[24].src_diff[0],
                      &x->block[24].coeff[0], 8);
John Koleszar's avatar
John Koleszar committed
190
191
192

}

193

John Koleszar's avatar
John Koleszar committed
194
195
static void transform_mby(MACROBLOCK *x) {
  int i;
John Koleszar's avatar
John Koleszar committed
196

John Koleszar's avatar
John Koleszar committed
197
198
199
200
  for (i = 0; i < 16; i += 2) {
    x->vp8_short_fdct8x4(&x->block[i].src_diff[0],
                         &x->block[i].coeff[0], 32);
  }
John Koleszar's avatar
John Koleszar committed
201

John Koleszar's avatar
John Koleszar committed
202
203
204
205
206
207
  // build dc block from 16 y dc values
  if (x->e_mbd.mode_info_context->mbmi.mode != SPLITMV) {
    build_dcblock(x);
    x->short_walsh4x4(&x->block[24].src_diff[0],
                      &x->block[24].coeff[0], 8);
  }
John Koleszar's avatar
John Koleszar committed
208
209
}

John Koleszar's avatar
John Koleszar committed
210
211
void vp8_transform_mbuv_8x8(MACROBLOCK *x) {
  int i;
John Koleszar's avatar
John Koleszar committed
212

John Koleszar's avatar
John Koleszar committed
213
214
215
216
  for (i = 16; i < 24; i += 4) {
    x->vp8_short_fdct8x8(&x->block[i].src_diff[0],
                         &x->block[i].coeff[0], 16);
  }
217
218
219
}


John Koleszar's avatar
John Koleszar committed
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
void vp8_transform_intra_mby_8x8(MACROBLOCK *x) { // changed
  int i;
  for (i = 0; i < 9; i += 8) {
    x->vp8_short_fdct8x8(&x->block[i].src_diff[0],
                         &x->block[i].coeff[0], 32);
  }
  for (i = 2; i < 11; i += 8) {
    x->vp8_short_fdct8x8(&x->block[i].src_diff[0],
                         &x->block[i + 2].coeff[0], 32);
  }
  // build dc block from 16 y dc values
  vp8_build_dcblock_8x8(x);
  // vp8_build_dcblock(x);

  // do 2nd order transform on the dc block
  x->short_fhaar2x2(&x->block[24].src_diff[0],
                    &x->block[24].coeff[0], 8);
237
238
239
240

}


John Koleszar's avatar
John Koleszar committed
241
242
void vp8_transform_mb_8x8(MACROBLOCK *x) {
  int i;
Paul Wilkins's avatar
Paul Wilkins committed
243
244
  MB_PREDICTION_MODE mode = x->e_mbd.mode_info_context->mbmi.mode;

John Koleszar's avatar
John Koleszar committed
245
246
247
248
249
250
251
252
253
  for (i = 0; i < 9; i += 8) {
    x->vp8_short_fdct8x8(&x->block[i].src_diff[0],
                         &x->block[i].coeff[0], 32);
  }
  for (i = 2; i < 11; i += 8) {
    x->vp8_short_fdct8x8(&x->block[i].src_diff[0],
                         &x->block[i + 2].coeff[0], 32);
  }
  // build dc block from 16 y dc values
Paul Wilkins's avatar
Paul Wilkins committed
254
  if (mode != B_PRED && mode != SPLITMV)
John Koleszar's avatar
John Koleszar committed
255
256
257
258
259
260
261
    vp8_build_dcblock_8x8(x);
  // vp8_build_dcblock(x);

  for (i = 16; i < 24; i += 4) {
    x->vp8_short_fdct8x8(&x->block[i].src_diff[0],
                         &x->block[i].coeff[0], 16);
  }
262

John Koleszar's avatar
John Koleszar committed
263
  // do 2nd order transform on the dc block
Paul Wilkins's avatar
Paul Wilkins committed
264
  if (mode != B_PRED && mode != SPLITMV)
John Koleszar's avatar
John Koleszar committed
265
266
    x->short_fhaar2x2(&x->block[24].src_diff[0],
                      &x->block[24].coeff[0], 8);
267
268
}

John Koleszar's avatar
John Koleszar committed
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
void vp8_transform_mby_8x8(MACROBLOCK *x) {
  int i;
  for (i = 0; i < 9; i += 8) {
    x->vp8_short_fdct8x8(&x->block[i].src_diff[0],
                         &x->block[i].coeff[0], 32);
  }
  for (i = 2; i < 11; i += 8) {
    x->vp8_short_fdct8x8(&x->block[i].src_diff[0],
                         &x->block[i + 2].coeff[0], 32);
  }
  // build dc block from 16 y dc values
  if (x->e_mbd.mode_info_context->mbmi.mode != SPLITMV) {
    // vp8_build_dcblock(x);
    vp8_build_dcblock_8x8(x);
    x->short_fhaar2x2(&x->block[24].src_diff[0],
                      &x->block[24].coeff[0], 8);
  }
286
287
}

Daniel Kang's avatar
Daniel Kang committed
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
#if CONFIG_TX16X16
void vp8_transform_mbuv_16x16(MACROBLOCK *x) {
  int i;

  vp8_clear_system_state();
  // Default to the 8x8
  for (i = 16; i < 24; i += 4)
    x->vp8_short_fdct8x8(&x->block[i].src_diff[0],
        &x->block[i].coeff[0], 16);
}


void vp8_transform_intra_mby_16x16(MACROBLOCK *x) {
  vp8_clear_system_state();
  x->vp8_short_fdct16x16(&x->block[0].src_diff[0],
      &x->block[0].coeff[0], 32);
}


void vp8_transform_mb_16x16(MACROBLOCK *x) {
  int i;
  vp8_clear_system_state();
  x->vp8_short_fdct16x16(&x->block[0].src_diff[0],
      &x->block[0].coeff[0], 32);

  for (i = 16; i < 24; i += 4) {
      x->vp8_short_fdct8x8(&x->block[i].src_diff[0],
          &x->block[i].coeff[0], 16);
  }
}

void vp8_transform_mby_16x16(MACROBLOCK *x) {
  vp8_clear_system_state();
  x->vp8_short_fdct16x16(&x->block[0].src_diff[0], &x->block[0].coeff[0], 32);
}
#endif
324
325
326

#define RDTRUNC(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF )
#define RDTRUNC_8x8(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF )
327
328
typedef struct vp8_token_state vp8_token_state;

John Koleszar's avatar
John Koleszar committed
329
struct vp8_token_state {
330
331
  int           rate;
  int           error;
Daniel Kang's avatar
Daniel Kang committed
332
  int           next;
333
334
335
336
  signed char   token;
  short         qc;
};

337
// TODO: experiments to find optimal multiple numbers
338
339
#define Y1_RD_MULT 4
#define UV_RD_MULT 2
340
#define Y2_RD_MULT 4
341

John Koleszar's avatar
John Koleszar committed
342
343
344
345
346
static const int plane_rd_mult[4] = {
  Y1_RD_MULT,
  Y2_RD_MULT,
  UV_RD_MULT,
  Y1_RD_MULT
347
348
};

349
350
static void optimize_b(MACROBLOCK *mb, int ib, int type,
                       ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
John Koleszar's avatar
John Koleszar committed
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
                       const VP8_ENCODER_RTCD *rtcd) {
  BLOCK *b;
  BLOCKD *d;
  vp8_token_state tokens[17][2];
  unsigned best_mask[2];
  const short *dequant_ptr;
  const short *coeff_ptr;
  short *qcoeff_ptr;
  short *dqcoeff_ptr;
  int eob;
  int i0;
  int rc;
  int x;
  int sz = 0;
  int next;
  int rdmult;
  int rddiv;
  int final_eob;
369
370
  int64_t rd_cost0;
  int64_t rd_cost1;
John Koleszar's avatar
John Koleszar committed
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
  int rate0;
  int rate1;
  int error0;
  int error1;
  int t0;
  int t1;
  int best;
  int band;
  int pt;
  int i;
  int err_mult = plane_rd_mult[type];

  b = &mb->block[ib];
  d = &mb->e_mbd.block[ib];

  dequant_ptr = d->dequant;
  coeff_ptr = b->coeff;
  qcoeff_ptr = d->qcoeff;
  dqcoeff_ptr = d->dqcoeff;
  i0 = !type;
  eob = d->eob;

  /* Now set up a Viterbi trellis to evaluate alternative roundings. */
  rdmult = mb->rdmult * err_mult;
  if (mb->e_mbd.mode_info_context->mbmi.ref_frame == INTRA_FRAME)
    rdmult = (rdmult * 9) >> 4;

  rddiv = mb->rddiv;
  best_mask[0] = best_mask[1] = 0;
  /* Initialize the sentinel node of the trellis. */
  tokens[eob][0].rate = 0;
  tokens[eob][0].error = 0;
  tokens[eob][0].next = 16;
  tokens[eob][0].token = DCT_EOB_TOKEN;
  tokens[eob][0].qc = 0;
  *(tokens[eob] + 1) = *(tokens[eob] + 0);
  next = eob;
  for (i = eob; i-- > i0;) {
    int base_bits;
    int d2;
    int dx;

    rc = vp8_default_zig_zag1d[i];
    x = qcoeff_ptr[rc];
    /* Only add a trellis state for non-zero coefficients. */
    if (x) {
      int shortcut = 0;
      error0 = tokens[next][0].error;
      error1 = tokens[next][1].error;
      /* Evaluate the first possibility for this state. */
      rate0 = tokens[next][0].rate;
      rate1 = tokens[next][1].rate;
      t0 = (vp8_dct_value_tokens_ptr + x)->Token;
      /* Consider both possible successor states. */
      if (next < 16) {
        band = vp8_coef_bands[i + 1];
        pt = vp8_prev_token_class[t0];
        rate0 +=
429
          mb->token_costs[TX_4X4][type][band][pt][tokens[next][0].token];
John Koleszar's avatar
John Koleszar committed
430
        rate1 +=
431
          mb->token_costs[TX_4X4][type][band][pt][tokens[next][1].token];
John Koleszar's avatar
John Koleszar committed
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
      }
      rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0);
      rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1);
      if (rd_cost0 == rd_cost1) {
        rd_cost0 = RDTRUNC(rdmult, rddiv, rate0, error0);
        rd_cost1 = RDTRUNC(rdmult, rddiv, rate1, error1);
      }
      /* And pick the best. */
      best = rd_cost1 < rd_cost0;
      base_bits = *(vp8_dct_value_cost_ptr + x);
      dx = dqcoeff_ptr[rc] - coeff_ptr[rc];
      d2 = dx * dx;
      tokens[i][0].rate = base_bits + (best ? rate1 : rate0);
      tokens[i][0].error = d2 + (best ? error1 : error0);
      tokens[i][0].next = next;
      tokens[i][0].token = t0;
      tokens[i][0].qc = x;
      best_mask[0] |= best << i;
      /* Evaluate the second possibility for this state. */
      rate0 = tokens[next][0].rate;
      rate1 = tokens[next][1].rate;

      if ((abs(x)*dequant_ptr[rc] > abs(coeff_ptr[rc])) &&
          (abs(x)*dequant_ptr[rc] < abs(coeff_ptr[rc]) + dequant_ptr[rc]))
        shortcut = 1;
      else
        shortcut = 0;

      if (shortcut) {
        sz = -(x < 0);
        x -= 2 * sz + 1;
      }

      /* Consider both possible successor states. */
      if (!x) {
        /* If we reduced this coefficient to zero, check to see if
         *  we need to move the EOB back here.
469
         */
John Koleszar's avatar
John Koleszar committed
470
471
472
473
474
475
476
477
478
479
480
        t0 = tokens[next][0].token == DCT_EOB_TOKEN ?
             DCT_EOB_TOKEN : ZERO_TOKEN;
        t1 = tokens[next][1].token == DCT_EOB_TOKEN ?
             DCT_EOB_TOKEN : ZERO_TOKEN;
      } else {
        t0 = t1 = (vp8_dct_value_tokens_ptr + x)->Token;
      }
      if (next < 16) {
        band = vp8_coef_bands[i + 1];
        if (t0 != DCT_EOB_TOKEN) {
          pt = vp8_prev_token_class[t0];
481
482
          rate0 += mb->token_costs[TX_4X4][type][band][pt]
              [tokens[next][0].token];
John Koleszar's avatar
John Koleszar committed
483
        }
John Koleszar's avatar
John Koleszar committed
484
485
        if (t1 != DCT_EOB_TOKEN) {
          pt = vp8_prev_token_class[t1];
486
487
          rate1 += mb->token_costs[TX_4X4][type][band][pt]
              [tokens[next][1].token];
John Koleszar's avatar
John Koleszar committed
488
489
        }
      }
John Koleszar's avatar
John Koleszar committed
490

John Koleszar's avatar
John Koleszar committed
491
492
493
      rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0);
      rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1);
      if (rd_cost0 == rd_cost1) {
494
495
        rd_cost0 = RDTRUNC(rdmult, rddiv, rate0, error0);
        rd_cost1 = RDTRUNC(rdmult, rddiv, rate1, error1);
John Koleszar's avatar
John Koleszar committed
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
      }
      /* And pick the best. */
      best = rd_cost1 < rd_cost0;
      base_bits = *(vp8_dct_value_cost_ptr + x);

      if (shortcut) {
        dx -= (dequant_ptr[rc] + sz) ^ sz;
        d2 = dx * dx;
      }
      tokens[i][1].rate = base_bits + (best ? rate1 : rate0);
      tokens[i][1].error = d2 + (best ? error1 : error0);
      tokens[i][1].next = next;
      tokens[i][1].token = best ? t1 : t0;
      tokens[i][1].qc = x;
      best_mask[1] |= best << i;
      /* Finally, make this the new head of the trellis. */
      next = i;
513
    }
John Koleszar's avatar
John Koleszar committed
514
515
516
517
518
519
520
521
522
    /* There's no choice to make for a zero coefficient, so we don't
     *  add a new trellis node, but we do need to update the costs.
     */
    else {
      band = vp8_coef_bands[i + 1];
      t0 = tokens[next][0].token;
      t1 = tokens[next][1].token;
      /* Update the cost of each path if we're past the EOB token. */
      if (t0 != DCT_EOB_TOKEN) {
523
        tokens[next][0].rate += mb->token_costs[TX_4X4][type][band][0][t0];
John Koleszar's avatar
John Koleszar committed
524
525
526
        tokens[next][0].token = ZERO_TOKEN;
      }
      if (t1 != DCT_EOB_TOKEN) {
527
        tokens[next][1].rate += mb->token_costs[TX_4X4][type][band][0][t1];
John Koleszar's avatar
John Koleszar committed
528
529
530
        tokens[next][1].token = ZERO_TOKEN;
      }
      /* Don't update next, because we didn't add a new node. */
531
    }
John Koleszar's avatar
John Koleszar committed
532
533
534
535
536
537
538
539
540
541
542
  }

  /* Now pick the best path through the whole trellis. */
  band = vp8_coef_bands[i + 1];
  VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
  rate0 = tokens[next][0].rate;
  rate1 = tokens[next][1].rate;
  error0 = tokens[next][0].error;
  error1 = tokens[next][1].error;
  t0 = tokens[next][0].token;
  t1 = tokens[next][1].token;
543
544
  rate0 += mb->token_costs[TX_4X4][type][band][pt][t0];
  rate1 += mb->token_costs[TX_4X4][type][band][pt][t1];
John Koleszar's avatar
John Koleszar committed
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
  rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0);
  rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1);
  if (rd_cost0 == rd_cost1) {
    rd_cost0 = RDTRUNC(rdmult, rddiv, rate0, error0);
    rd_cost1 = RDTRUNC(rdmult, rddiv, rate1, error1);
  }
  best = rd_cost1 < rd_cost0;
  final_eob = i0 - 1;
  for (i = next; i < eob; i = next) {
    x = tokens[i][best].qc;
    if (x)
      final_eob = i;
    rc = vp8_default_zig_zag1d[i];
    qcoeff_ptr[rc] = x;
    dqcoeff_ptr[rc] = x * dequant_ptr[rc];
    next = tokens[i][best].next;
    best = (best_mask[best] >> i) & 1;
  }
  final_eob++;

  d->eob = final_eob;
  *a = *l = (d->eob != !type);
John Koleszar's avatar
John Koleszar committed
567
568
}

John Koleszar's avatar
John Koleszar committed
569
570
571
572
573
574
575
576
/**************************************************************************
our inverse hadamard transform effectively is weighted sum of all 16 inputs
with weight either 1 or -1. It has a last stage scaling of (sum+1)>>2. And
dc only idct is (dc+16)>>5. So if all the sums are between -65 and 63 the
output after inverse wht and idct will be all zero. A sum of absolute value
smaller than 65 guarantees all 16 different (+1/-1) weighted sums in wht
fall between -65 and +65.
**************************************************************************/
577
578
#define SUM_2ND_COEFF_THRESH 65

579
static void check_reset_2nd_coeffs(MACROBLOCKD *x, int type,
John Koleszar's avatar
John Koleszar committed
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
                                   ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l) {
  int sum = 0;
  int i;
  BLOCKD *bd = &x->block[24];
  if (bd->dequant[0] >= SUM_2ND_COEFF_THRESH
      && bd->dequant[1] >= SUM_2ND_COEFF_THRESH)
    return;

  for (i = 0; i < bd->eob; i++) {
    int coef = bd->dqcoeff[vp8_default_zig_zag1d[i]];
    sum += (coef >= 0) ? coef : -coef;
    if (sum >= SUM_2ND_COEFF_THRESH)
      return;
  }

  if (sum < SUM_2ND_COEFF_THRESH) {
    for (i = 0; i < bd->eob; i++) {
      int rc = vp8_default_zig_zag1d[i];
      bd->qcoeff[rc] = 0;
      bd->dqcoeff[rc] = 0;
600
    }
John Koleszar's avatar
John Koleszar committed
601
602
603
    bd->eob = 0;
    *a = *l = (bd->eob != !type);
  }
604
}
605
#define SUM_2ND_COEFF_THRESH_8X8 32
606
static void check_reset_8x8_2nd_coeffs(MACROBLOCKD *x, int type,
John Koleszar's avatar
John Koleszar committed
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
                                       ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l) {
  int sum = 0;
  BLOCKD *bd = &x->block[24];
  int coef;

  coef = bd->dqcoeff[0];
  sum += (coef >= 0) ? coef : -coef;
  coef = bd->dqcoeff[1];
  sum += (coef >= 0) ? coef : -coef;
  coef = bd->dqcoeff[4];
  sum += (coef >= 0) ? coef : -coef;
  coef = bd->dqcoeff[8];
  sum += (coef >= 0) ? coef : -coef;

  if (sum < SUM_2ND_COEFF_THRESH_8X8) {
    bd->qcoeff[0] = 0;
    bd->dqcoeff[0] = 0;
    bd->qcoeff[1] = 0;
    bd->dqcoeff[1] = 0;
    bd->qcoeff[4] = 0;
    bd->dqcoeff[4] = 0;
    bd->qcoeff[8] = 0;
    bd->dqcoeff[8] = 0;
    bd->eob = 0;
    *a = *l = (bd->eob != !type);
  }
633
634
635
636
}



John Koleszar's avatar
John Koleszar committed
637
638
639
640
641
642
643
static void optimize_mb(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd) {
  int b;
  int type;
  int has_2nd_order;
  ENTROPY_CONTEXT_PLANES t_above, t_left;
  ENTROPY_CONTEXT *ta;
  ENTROPY_CONTEXT *tl;
Paul Wilkins's avatar
Paul Wilkins committed
644
  MB_PREDICTION_MODE mode = x->e_mbd.mode_info_context->mbmi.mode;
645

John Koleszar's avatar
John Koleszar committed
646
647
  vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
  vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
648

John Koleszar's avatar
John Koleszar committed
649
650
  ta = (ENTROPY_CONTEXT *)&t_above;
  tl = (ENTROPY_CONTEXT *)&t_left;
John Koleszar's avatar
John Koleszar committed
651

Paul Wilkins's avatar
Paul Wilkins committed
652
  has_2nd_order = (mode != B_PRED && mode != I8X8_PRED && mode != SPLITMV);
John Koleszar's avatar
John Koleszar committed
653
  type = has_2nd_order ? PLANE_TYPE_Y_NO_DC : PLANE_TYPE_Y_WITH_DC;
John Koleszar's avatar
John Koleszar committed
654

John Koleszar's avatar
John Koleszar committed
655
656
657
658
  for (b = 0; b < 16; b++) {
    optimize_b(x, b, type,
               ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd);
  }
John Koleszar's avatar
John Koleszar committed
659

John Koleszar's avatar
John Koleszar committed
660
661
662
663
  for (b = 16; b < 24; b++) {
    optimize_b(x, b, PLANE_TYPE_UV,
               ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd);
  }
John Koleszar's avatar
John Koleszar committed
664

John Koleszar's avatar
John Koleszar committed
665
666
667
668
669
670
671
  if (has_2nd_order) {
    b = 24;
    optimize_b(x, b, PLANE_TYPE_Y2,
               ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd);
    check_reset_2nd_coeffs(&x->e_mbd, PLANE_TYPE_Y2,
                           ta + vp8_block2above[b], tl + vp8_block2left[b]);
  }
John Koleszar's avatar
John Koleszar committed
672
673
674
}


John Koleszar's avatar
John Koleszar committed
675
676
677
678
void vp8_optimize_mby(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd) {
  int b;
  int type;
  int has_2nd_order;
John Koleszar's avatar
John Koleszar committed
679

John Koleszar's avatar
John Koleszar committed
680
681
682
  ENTROPY_CONTEXT_PLANES t_above, t_left;
  ENTROPY_CONTEXT *ta;
  ENTROPY_CONTEXT *tl;
Paul Wilkins's avatar
Paul Wilkins committed
683
  MB_PREDICTION_MODE mode = x->e_mbd.mode_info_context->mbmi.mode;
684

John Koleszar's avatar
John Koleszar committed
685
686
  if (!x->e_mbd.above_context)
    return;
John Koleszar's avatar
John Koleszar committed
687

John Koleszar's avatar
John Koleszar committed
688
689
  if (!x->e_mbd.left_context)
    return;
690

John Koleszar's avatar
John Koleszar committed
691
692
  vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
  vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
693

John Koleszar's avatar
John Koleszar committed
694
695
  ta = (ENTROPY_CONTEXT *)&t_above;
  tl = (ENTROPY_CONTEXT *)&t_left;
696

Paul Wilkins's avatar
Paul Wilkins committed
697
  has_2nd_order = (mode != B_PRED && mode != I8X8_PRED && mode != SPLITMV);
John Koleszar's avatar
John Koleszar committed
698
  type = has_2nd_order ? PLANE_TYPE_Y_NO_DC : PLANE_TYPE_Y_WITH_DC;
John Koleszar's avatar
John Koleszar committed
699

John Koleszar's avatar
John Koleszar committed
700
701
702
703
  for (b = 0; b < 16; b++) {
    optimize_b(x, b, type,
               ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd);
  }
John Koleszar's avatar
John Koleszar committed
704

705

John Koleszar's avatar
John Koleszar committed
706
707
708
709
710
711
712
  if (has_2nd_order) {
    b = 24;
    optimize_b(x, b, PLANE_TYPE_Y2,
               ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd);
    check_reset_2nd_coeffs(&x->e_mbd, PLANE_TYPE_Y2,
                           ta + vp8_block2above[b], tl + vp8_block2left[b]);
  }
John Koleszar's avatar
John Koleszar committed
713
714
}

John Koleszar's avatar
John Koleszar committed
715
716
717
718
719
void vp8_optimize_mbuv(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd) {
  int b;
  ENTROPY_CONTEXT_PLANES t_above, t_left;
  ENTROPY_CONTEXT *ta;
  ENTROPY_CONTEXT *tl;
John Koleszar's avatar
John Koleszar committed
720

John Koleszar's avatar
John Koleszar committed
721
722
  if (!x->e_mbd.above_context)
    return;
John Koleszar's avatar
John Koleszar committed
723

John Koleszar's avatar
John Koleszar committed
724
725
  if (!x->e_mbd.left_context)
    return;
John Koleszar's avatar
John Koleszar committed
726

John Koleszar's avatar
John Koleszar committed
727
728
  vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
  vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
John Koleszar's avatar
John Koleszar committed
729

John Koleszar's avatar
John Koleszar committed
730
731
  ta = (ENTROPY_CONTEXT *)&t_above;
  tl = (ENTROPY_CONTEXT *)&t_left;
John Koleszar's avatar
John Koleszar committed
732

John Koleszar's avatar
John Koleszar committed
733
734
735
736
  for (b = 16; b < 24; b++) {
    optimize_b(x, b, PLANE_TYPE_UV,
               ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd);
  }
John Koleszar's avatar
John Koleszar committed
737
738
}

739
740
void optimize_b_8x8(MACROBLOCK *mb, int i, int type,
                    ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
John Koleszar's avatar
John Koleszar committed
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
                    const VP8_ENCODER_RTCD *rtcd) {
  BLOCK *b;
  BLOCKD *d;
  vp8_token_state tokens[65][2];
  unsigned best_mask[2];
  const short *dequant_ptr;
  const short *coeff_ptr;
  short *qcoeff_ptr;
  short *dqcoeff_ptr;
  int eob;
  int i0;
  int rc;
  int x;
  int sz = 0;
  int next;
  int rdmult;
  int rddiv;
  int final_eob;
759
760
  int64_t rd_cost0;
  int64_t rd_cost1;
John Koleszar's avatar
John Koleszar committed
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
  int rate0;
  int rate1;
  int error0;
  int error1;
  int t0;
  int t1;
  int best;
  int band;
  int pt;
  int err_mult = plane_rd_mult[type];

  b = &mb->block[i];
  d = &mb->e_mbd.block[i];

  dequant_ptr = d->dequant;
  coeff_ptr = b->coeff;
  qcoeff_ptr = d->qcoeff;
  dqcoeff_ptr = d->dqcoeff;
  i0 = !type;
  eob = d->eob;

  /* Now set up a Viterbi trellis to evaluate alternative roundings. */
  rdmult = mb->rdmult * err_mult;
  if (mb->e_mbd.mode_info_context->mbmi.ref_frame == INTRA_FRAME)
    rdmult = (rdmult * 9) >> 4;
  rddiv = mb->rddiv;
  best_mask[0] = best_mask[1] = 0;
  /* Initialize the sentinel node of the trellis. */
  tokens[eob][0].rate = 0;
  tokens[eob][0].error = 0;
  tokens[eob][0].next = 64;
  tokens[eob][0].token = DCT_EOB_TOKEN;
  tokens[eob][0].qc = 0;
  *(tokens[eob] + 1) = *(tokens[eob] + 0);
  next = eob;
  for (i = eob; i-- > i0;) {
    int base_bits;
    int d2;
    int dx;

    rc = vp8_default_zig_zag1d_8x8[i];
    x = qcoeff_ptr[rc];
    /* Only add a trellis state for non-zero coefficients. */
    if (x) {
      int shortcut = 0;
      error0 = tokens[next][0].error;
      error1 = tokens[next][1].error;
      /* Evaluate the first possibility for this state. */
      rate0 = tokens[next][0].rate;
      rate1 = tokens[next][1].rate;
      t0 = (vp8_dct_value_tokens_ptr + x)->Token;
      /* Consider both possible successor states. */
      if (next < 64) {
        band = vp8_coef_bands_8x8[i + 1];
        pt = vp8_prev_token_class[t0];
        rate0 +=
817
          mb->token_costs[TX_8X8][type][band][pt][tokens[next][0].token];
John Koleszar's avatar
John Koleszar committed
818
        rate1 +=
819
          mb->token_costs[TX_8X8][type][band][pt][tokens[next][1].token];
John Koleszar's avatar
John Koleszar committed
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
      }
      rd_cost0 = RDCOST_8x8(rdmult, rddiv, rate0, error0);
      rd_cost1 = RDCOST_8x8(rdmult, rddiv, rate1, error1);
      if (rd_cost0 == rd_cost1) {
        rd_cost0 = RDTRUNC_8x8(rdmult, rddiv, rate0, error0);
        rd_cost1 = RDTRUNC_8x8(rdmult, rddiv, rate1, error1);
      }
      /* And pick the best. */
      best = rd_cost1 < rd_cost0;
      base_bits = *(vp8_dct_value_cost_ptr + x);
      dx = dqcoeff_ptr[rc] - coeff_ptr[rc];
      d2 = dx * dx;
      tokens[i][0].rate = base_bits + (best ? rate1 : rate0);
      tokens[i][0].error = d2 + (best ? error1 : error0);
      tokens[i][0].next = next;
      tokens[i][0].token = t0;
      tokens[i][0].qc = x;
      best_mask[0] |= best << i;
      /* Evaluate the second possibility for this state. */
      rate0 = tokens[next][0].rate;
      rate1 = tokens[next][1].rate;

      if ((abs(x)*dequant_ptr[rc != 0] > abs(coeff_ptr[rc])) &&
          (abs(x)*dequant_ptr[rc != 0] < abs(coeff_ptr[rc]) + dequant_ptr[rc != 0]))
        shortcut = 1;
      else
        shortcut = 0;

      if (shortcut) {
        sz = -(x < 0);
        x -= 2 * sz + 1;
      }

      /* Consider both possible successor states. */
      if (!x) {
        /* If we reduced this coefficient to zero, check to see if
         *  we need to move the EOB back here.
857
         */
John Koleszar's avatar
John Koleszar committed
858
859
860
861
862
863
864
865
866
867
868
        t0 = tokens[next][0].token == DCT_EOB_TOKEN ?
             DCT_EOB_TOKEN : ZERO_TOKEN;
        t1 = tokens[next][1].token == DCT_EOB_TOKEN ?
             DCT_EOB_TOKEN : ZERO_TOKEN;
      } else {
        t0 = t1 = (vp8_dct_value_tokens_ptr + x)->Token;
      }
      if (next < 64) {
        band = vp8_coef_bands_8x8[i + 1];
        if (t0 != DCT_EOB_TOKEN) {
          pt = vp8_prev_token_class[t0];
869
870
          rate0 += mb->token_costs[TX_8X8][type][band][pt][
              tokens[next][0].token];
871
        }
John Koleszar's avatar
John Koleszar committed
872
873
        if (t1 != DCT_EOB_TOKEN) {
          pt = vp8_prev_token_class[t1];
874
875
          rate1 += mb->token_costs[TX_8X8][type][band][pt][
              tokens[next][1].token];
John Koleszar's avatar
John Koleszar committed
876
877
        }
      }
878

John Koleszar's avatar
John Koleszar committed
879
880
881
      rd_cost0 = RDCOST_8x8(rdmult, rddiv, rate0, error0);
      rd_cost1 = RDCOST_8x8(rdmult, rddiv, rate1, error1);
      if (rd_cost0 == rd_cost1) {
882
883
        rd_cost0 = RDTRUNC_8x8(rdmult, rddiv, rate0, error0);
        rd_cost1 = RDTRUNC_8x8(rdmult, rddiv, rate1, error1);
John Koleszar's avatar
John Koleszar committed
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
      }
      /* And pick the best. */
      best = rd_cost1 < rd_cost0;
      base_bits = *(vp8_dct_value_cost_ptr + x);

      if (shortcut) {
        dx -= (dequant_ptr[rc != 0] + sz) ^ sz;
        d2 = dx * dx;
      }
      tokens[i][1].rate = base_bits + (best ? rate1 : rate0);
      tokens[i][1].error = d2 + (best ? error1 : error0);
      tokens[i][1].next = next;
      tokens[i][1].token = best ? t1 : t0;
      tokens[i][1].qc = x;
      best_mask[1] |= best << i;
      /* Finally, make this the new head of the trellis. */
      next = i;
901
    }
John Koleszar's avatar
John Koleszar committed
902
903
904
905
906
907
908
909
910
    /* There's no choice to make for a zero coefficient, so we don't
     *  add a new trellis node, but we do need to update the costs.
     */
    else {
      band = vp8_coef_bands_8x8[i + 1];
      t0 = tokens[next][0].token;
      t1 = tokens[next][1].token;
      /* Update the cost of each path if we're past the EOB token. */
      if (t0 != DCT_EOB_TOKEN) {
911
        tokens[next][0].rate += mb->token_costs[TX_8X8][type][band][0][t0];
John Koleszar's avatar
John Koleszar committed
912
913
914
        tokens[next][0].token = ZERO_TOKEN;
      }
      if (t1 != DCT_EOB_TOKEN) {
915
        tokens[next][1].rate += mb->token_costs[TX_8X8][type][band][0][t1];
John Koleszar's avatar
John Koleszar committed
916
917
918
        tokens[next][1].token = ZERO_TOKEN;
      }
      /* Don't update next, because we didn't add a new node. */
919
    }
John Koleszar's avatar
John Koleszar committed
920
921
922
923
924
925
926
927
928
929
930
  }

  /* Now pick the best path through the whole trellis. */
  band = vp8_coef_bands_8x8[i + 1];
  VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
  rate0 = tokens[next][0].rate;
  rate1 = tokens[next][1].rate;
  error0 = tokens[next][0].error;
  error1 = tokens[next][1].error;
  t0 = tokens[next][0].token;
  t1 = tokens[next][1].token;
931
932
  rate0 += mb->token_costs[TX_8X8][type][band][pt][t0];
  rate1 += mb->token_costs[TX_8X8][type][band][pt][t1];
John Koleszar's avatar
John Koleszar committed
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
  rd_cost0 = RDCOST_8x8(rdmult, rddiv, rate0, error0);
  rd_cost1 = RDCOST_8x8(rdmult, rddiv, rate1, error1);
  if (rd_cost0 == rd_cost1) {
    rd_cost0 = RDTRUNC_8x8(rdmult, rddiv, rate0, error0);
    rd_cost1 = RDTRUNC_8x8(rdmult, rddiv, rate1, error1);
  }
  best = rd_cost1 < rd_cost0;
  final_eob = i0 - 1;
  for (i = next; i < eob; i = next) {
    x = tokens[i][best].qc;
    if (x)
      final_eob = i;
    rc = vp8_default_zig_zag1d_8x8[i];
    qcoeff_ptr[rc] = x;
    dqcoeff_ptr[rc] = (x * dequant_ptr[rc != 0]);

    next = tokens[i][best].next;
    best = (best_mask[best] >> i) & 1;
  }
  final_eob++;

  d->eob = final_eob;
  *a = *l = (d->eob != !type);
956
957
958

}

John Koleszar's avatar
John Koleszar committed
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
void optimize_mb_8x8(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd) {
  int b;
  int type;
  ENTROPY_CONTEXT_PLANES t_above, t_left;
  ENTROPY_CONTEXT *ta;
  ENTROPY_CONTEXT *tl;

  vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
  vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));

  ta = (ENTROPY_CONTEXT *)&t_above;
  tl = (ENTROPY_CONTEXT *)&t_left;

  type = 0;
  for (b = 0; b < 16; b += 4) {
    optimize_b_8x8(x, b, type,
                   ta + vp8_block2above_8x8[b], tl + vp8_block2left_8x8[b],
                   rtcd);
    *(ta + vp8_block2above_8x8[b] + 1) = *(ta + vp8_block2above_8x8[b]);
    *(tl + vp8_block2left_8x8[b] + 1)  = *(tl + vp8_block2left_8x8[b]);
  }

  for (b = 16; b < 24; b += 4) {
    optimize_b_8x8(x, b, PLANE_TYPE_UV,
                   ta + vp8_block2above_8x8[b], tl + vp8_block2left_8x8[b],
                   rtcd);
    *(ta + vp8_block2above_8x8[b] + 1) = *(ta + vp8_block2above_8x8[b]);
    *(tl + vp8_block2left_8x8[b] + 1) = *(tl + vp8_block2left_8x8[b]);
  }

  // 8x8 always have 2nd roder haar block
  check_reset_8x8_2nd_coeffs(&x->e_mbd, PLANE_TYPE_Y2,
                             ta + vp8_block2above_8x8[24], tl + vp8_block2left_8x8[24]);
992

993
994
}

John Koleszar's avatar
John Koleszar committed
995
996
997
void vp8_optimize_mby_8x8(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd) {
  int b;
  int type;
998

John Koleszar's avatar
John Koleszar committed
999
1000
1001
  ENTROPY_CONTEXT_PLANES t_above, t_left;
  ENTROPY_CONTEXT *ta;
  ENTROPY_CONTEXT *tl;
1002

1003

John Koleszar's avatar
John Koleszar committed
1004
1005
  if (!x->e_mbd.above_context)
    return;
1006

John Koleszar's avatar
John Koleszar committed
1007
1008
  if (!x->e_mbd.left_context)
    return;
1009

John Koleszar's avatar
John Koleszar committed
1010
1011
  vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
  vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
1012

John Koleszar's avatar
John Koleszar committed
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
  ta = (ENTROPY_CONTEXT *)&t_above;
  tl = (ENTROPY_CONTEXT *)&t_left;
  type = 0;
  for (b = 0; b < 16; b += 4) {
    optimize_b_8x8(x, b, type,
                   ta + vp8_block2above[b], tl + vp8_block2left[b],
                   rtcd);
    *(ta + vp8_block2above_8x8[b] + 1) = *(ta + vp8_block2above_8x8[b]);
    *(tl + vp8_block2left_8x8[b] + 1)  = *(tl + vp8_block2left_8x8[b]);
  }
  // 8x8 always have 2nd roder haar block
  check_reset_8x8_2nd_coeffs(&x->e_mbd, PLANE_TYPE_Y2,
                             ta + vp8_block2above_8x8[24], tl + vp8_block2left_8x8[24]);
1026

1027
1028
}

John Koleszar's avatar
John Koleszar committed
1029
1030
1031
1032
1033
void vp8_optimize_mbuv_8x8(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd) {
  int b;
  ENTROPY_CONTEXT_PLANES t_above, t_left;
  ENTROPY_CONTEXT *ta;
  ENTROPY_CONTEXT *tl;
1034

John Koleszar's avatar
John Koleszar committed
1035
1036
  if (!x->e_mbd.above_context)
    return;
1037

John Koleszar's avatar
John Koleszar committed
1038
1039
  if (!x->e_mbd.left_context)
    return;
1040

John Koleszar's avatar
John Koleszar committed
1041
1042
  vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
  vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
1043

John Koleszar's avatar
John Koleszar committed
1044
1045
  ta = (ENTROPY_CONTEXT *)&t_above;
  tl = (ENTROPY_CONTEXT *)&t_left;
1046

John Koleszar's avatar
John Koleszar committed
1047
1048
1049
1050
1051
1052
1053
  for (b = 16; b < 24; b += 4) {
    optimize_b_8x8(x, b, PLANE_TYPE_UV,
                   ta + vp8_block2above_8x8[b], tl + vp8_block2left_8x8[b],
                   rtcd);
    *(ta + vp8_block2above_8x8[b] + 1) = *(ta + vp8_block2above_8x8[b]);
    *(tl + vp8_block2left_8x8[b] + 1) = *(tl + vp8_block2left_8x8[b]);
  }
1054
1055
1056

}

Daniel Kang's avatar
Daniel Kang committed
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118


#if CONFIG_TX16X16
#define UPDATE_RD_COST()\
{\
    rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0);\
    rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1);\
    if (rd_cost0 == rd_cost1) {\
        rd_cost0 = RDTRUNC(rdmult, rddiv, rate0, error0);\
        rd_cost1 = RDTRUNC(rdmult, rddiv, rate1, error1);\
    }\
}

void optimize_b_16x16(MACROBLOCK *mb, int i, int type,
                      ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
                      const VP8_ENCODER_RTCD *rtcd) {
  BLOCK *b = &mb->block[i];
  BLOCKD *d = &mb->e_mbd.block[i];
  vp8_token_state tokens[257][2];
  unsigned best_index[257][2];
  const short *dequant_ptr = d->dequant, *coeff_ptr = b->coeff;
  short *qcoeff_ptr = qcoeff_ptr = d->qcoeff;
  short *dqcoeff_ptr = dqcoeff_ptr = d->dqcoeff;
  int eob = d->eob, final_eob, sz = 0;
  int rc, x, next;
  int64_t rdmult, rddiv, rd_cost0, rd_cost1;
  int rate0, rate1, error0, error1, t0, t1;
  int best, band, pt;
  int err_mult = plane_rd_mult[type];

  /* Now set up a Viterbi trellis to evaluate alternative roundings. */
  rdmult = mb->rdmult * err_mult;
  if (mb->e_mbd.mode_info_context->mbmi.ref_frame == INTRA_FRAME)
      rdmult = (rdmult * 9)>>4;
  rddiv = mb->rddiv;
  memset(best_index, 0, sizeof(best_index));
  /* Initialize the sentinel node of the trellis. */
  tokens[eob][0].rate = 0;
  tokens[eob][0].error = 0;
  tokens[eob][0].next = 256;
  tokens[eob][0].token = DCT_EOB_TOKEN;
  tokens[eob][0].qc = 0;
  *(tokens[eob] + 1) = *(tokens[eob] + 0);
  next = eob;
  for (i = eob; i-- > 0;) {
    int base_bits, d2, dx;

    rc = vp8_default_zig_zag1d_16x16[i];
    x = qcoeff_ptr[rc];
    /* Only add a trellis state for non-zero coefficients. */
    if (x) {
      int shortcut = 0;
      error0 = tokens[next][0].error;
      error1 = tokens[next][1].error;
      /* Evaluate the first possibility for this state. */
      rate0 = tokens[next][0].rate;
      rate1 = tokens[next][1].rate;
      t0 = (vp8_dct_value_tokens_ptr + x)->Token;
      /* Consider both possible successor states. */
      if (next < 256) {
        band = vp8_coef_bands_16x16[i + 1];
        pt = vp8_prev_token_class[t0];
1119
1120
        rate0 += mb->token_costs[TX_16X16][type][band][pt][tokens[next][0].token];
        rate1 += mb->token_costs[TX_16X16][type][band][pt][tokens[next][1].token];
Daniel Kang's avatar
Daniel Kang committed
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
      }
      UPDATE_RD_COST();
      /* And pick the best. */
      best = rd_cost1 < rd_cost0;
      base_bits = *(vp8_dct_value_cost_ptr + x);
      dx = dqcoeff_ptr[rc] - coeff_ptr[rc];
      d2 = dx*dx;
      tokens[i][0].rate = base_bits + (best ? rate1 : rate0);
      tokens[i][0].error = d2 + (best ? error1 : error0);
      tokens[i][0].next = next;
      tokens[i][0].token = t0;
      tokens[i][0].qc = x;
      best_index[i][0] = best;
      /* Evaluate the second possibility for this state. */
      rate0 = tokens[next][0].rate;
      rate1 = tokens[next][1].rate;

      if((abs(x)*dequant_ptr[rc!=0]>abs(coeff_ptr[rc])) &&
         (abs(x)*dequant_ptr[rc!=0]<abs(coeff_ptr[rc])+dequant_ptr[rc!=0]))
        shortcut = 1;
      else
        shortcut = 0;

      if (shortcut) {
        sz = -(x < 0);
        x -= 2*sz + 1;
      }

      /* Consider both possible successor states. */
      if (!x) {
        /* If we reduced this coefficient to zero, check to see if
         *  we need to move the EOB back here.
         */
        t0 = tokens[next][0].token == DCT_EOB_TOKEN ?
             DCT_EOB_TOKEN : ZERO_TOKEN;
        t1 = tokens[next][1].token == DCT_EOB_TOKEN ?
             DCT_EOB_TOKEN : ZERO_TOKEN;
      }
      else
        t0=t1 = (vp8_dct_value_tokens_ptr + x)->Token;
      if (next < 256) {
        band = vp8_coef_bands_16x16[i + 1];
        if (t0 != DCT_EOB_TOKEN) {
            pt = vp8_prev_token_class[t0];
1165
            rate0 += mb->token_costs[TX_16X16][type][band][pt]
Daniel Kang's avatar
Daniel Kang committed
1166
1167
1168
1169
                [tokens[next][0].token];
        }
        if (t1!=DCT_EOB_TOKEN) {
            pt = vp8_prev_token_class[t1];
1170
            rate1 += mb->token_costs[TX_16X16][type][band][pt]
Daniel Kang's avatar
Daniel Kang committed
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
                [tokens[next][1].token];
        }
      }
      UPDATE_RD_COST();
      /* And pick the best. */
      best = rd_cost1 < rd_cost0;
      base_bits = *(vp8_dct_value_cost_ptr + x);

      if(shortcut) {
        dx -= (dequant_ptr[rc!=0] + sz) ^ sz;
        d2 = dx*dx;
      }
      tokens[i][1].rate = base_bits + (best ? rate1 : rate0);
      tokens[i][1].error = d2 + (best ? error1 : error0);
      tokens[i][1].next = next;
      tokens[i][1].token = best ? t1 : t0;
      tokens[i][1].qc = x;
      best_index[i][1] = best;
      /* Finally, make this the new head of the trellis. */
      next = i;
    }
    /* There's no choice to make for a zero coefficient, so we don't
     *  add a new trellis node, but we do need to update the costs.
     */
    else {
      band = vp8_coef_bands_16x16[i + 1];
      t0 = tokens[next][0].token;
      t1 = tokens[next][1].token;
      /* Update the cost of each path if we're past the EOB token. */
      if (t0 != DCT_EOB_TOKEN) {
1201
        tokens[next][0].rate += mb->token_costs[TX_16X16][type][band][0][t0];
Daniel Kang's avatar
Daniel Kang committed
1202
1203
1204
        tokens[next][0].token = ZERO_TOKEN;
      }
      if (t1 != DCT_EOB_TOKEN) {
1205
        tokens[next][1].rate += mb->token_costs[TX_16X16][type][band][0][t1];
Daniel Kang's avatar
Daniel Kang committed
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
        tokens[next][1].token = ZERO_TOKEN;
      }
      /* Don't update next, because we didn't add a new node. */
    }
  }

  /* Now pick the best path through the whole trellis. */
  band = vp8_coef_bands_16x16[i + 1];
  VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
  rate0 = tokens[next][0].rate;
  rate1 = tokens[next][1].rate;
  error0 = tokens[next][0].error;
  error1 = tokens[next][1].error;
  t0 = tokens[next][0].token;
  t1 = tokens[next][1].token;
1221
1222
  rate0 += mb->token_costs[TX_16X16][type][band][pt][t0];
  rate1 += mb->token_costs[TX_16X16][type][band][pt][t1];
Daniel Kang's avatar
Daniel Kang committed
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
  UPDATE_RD_COST();
  best = rd_cost1 < rd_cost0;
  final_eob = -1;

  for (i = next; i < eob; i = next) {
    x = tokens[i][best].qc;
    if (x)
      final_eob = i;
    rc = vp8_default_zig_zag1d_16x16[i];
    qcoeff_ptr[rc] = x;
    dqcoeff_ptr[rc] = (x * dequant_ptr[rc!=0]);

    next = tokens[i][best].next;
    best = best_index[i][best];
  }
  final_eob++;

  d->eob = final_eob;
  *a = *l = (d->eob != !type);
}

void vp8_optimize_mby_16x16(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd) {
    ENTROPY_CONTEXT_PLANES t_above, t_left;
    ENTROPY_CONTEXT *ta, *tl;

    if (!x->e_mbd.above_context)
        return;
    if (!x->e_mbd.left_context)
        return;

    vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
    vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));

    ta = (ENTROPY_CONTEXT *)&t_above;
    tl = (ENTROPY_CONTEXT *)&t_left;
    optimize_b_16x16(x, 0, PLANE_TYPE_Y_WITH_DC, ta, tl, rtcd);
    *(ta + 1) = *ta;
    *(tl + 1) = *tl;
}

void optimize_mb_16x16(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd) {
  int b;
  ENTROPY_CONTEXT_PLANES t_above, t_left;
  ENTROPY_CONTEXT *ta, *tl;

  vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
  vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));

  ta = (ENTROPY_CONTEXT *)&t_above;
  tl = (ENTROPY_CONTEXT *)&t_left;

  optimize_b_16x16(x, 0, PLANE_TYPE_Y_WITH_DC, ta, tl, rtcd);
  *(ta + 1) = *ta;
  *(tl + 1) = *tl;

  for (b = 16; b < 24; b += 4) {
    optimize_b_8x8(x, b, PLANE_TYPE_UV,
                   ta + vp8_block2above_8x8[b], tl + vp8_block2left_8x8[b],
                   rtcd);
    *(ta + vp8_block2above_8x8[b] + 1) = *(ta + vp8_block2above_8x8[b]);
    *(tl + vp8_block2left_8x8[b] + 1) = *(tl + vp8_block2left_8x8[b]);
  }
}
#endif

John Koleszar's avatar
John Koleszar committed
1288
1289
1290
void vp8_encode_inter16x16(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) {
  int tx_type = x->e_mbd.mode_info_context->mbmi.txfm_size;
  vp8_build_inter_predictors_mb(&x->e_mbd);
John Koleszar's avatar
John Koleszar committed
1291

John Koleszar's avatar
John Koleszar committed
1292
  vp8_subtract_mb(rtcd, x);
John Koleszar's avatar
John Koleszar committed
1293

Daniel Kang's avatar
Daniel Kang committed
1294
1295
1296
1297
1298
#if CONFIG_TX16X16
  if (tx_type == TX_16X16)
    vp8_transform_mb_16x16(x);
  else
#endif
John Koleszar's avatar
John Koleszar committed
1299
1300
1301
1302
  if (tx_type == TX_8X8)
    vp8_transform_mb_8x8(x);
  else
    transform_mb(x);
John Koleszar's avatar
John Koleszar committed
1303

Daniel Kang's avatar
Daniel Kang committed
1304
1305
1306
1307
1308
#if CONFIG_TX16X16
  if (tx_type == TX_16X16)
    vp8_quantize_mb_16x16(x);
  else
#endif
John Koleszar's avatar
John Koleszar committed
1309
1310
1311
1312
  if (tx_type == TX_8X8)
    vp8_quantize_mb_8x8(x);
  else
    vp8_quantize_mb(x);
John Koleszar's avatar
John Koleszar committed
1313

John Koleszar's avatar
John Koleszar committed
1314
  if (x->optimize) {
Daniel Kang's avatar
Daniel Kang committed
1315
1316
1317
1318
1319
#if CONFIG_TX16X16
    if (tx_type == TX_16X16)
      optimize_mb_16x16(x, rtcd);
    else
#endif
John Koleszar's avatar
John Koleszar committed
1320
1321
    if (tx_type == TX_8X8)
      optimize_mb_8x8(x, rtcd);
1322
    else
John Koleszar's avatar
John Koleszar committed
1323
1324
1325
      optimize_mb(x, rtcd);
  }

Daniel Kang's avatar
Daniel Kang committed
1326
1327
1328
1329
1330
#if CONFIG_TX16X16
  if (tx_type == TX_16X16)
    vp8_inverse_transform_mb_16x16(IF_RTCD(&rtcd->common->idct), &x->e_mbd);
  else
#endif
John Koleszar's avatar
John Koleszar committed
1331
1332
1333
1334
  if (tx_type == TX_8X8)
    vp8_inverse_transform_mb_8x8(IF_RTCD(&rtcd->common->idct), &x->e_mbd);
  else
    vp8_inverse_transform_mb(IF_RTCD(&rtcd->common->idct), &x->e_mbd);
Paul Wilkins's avatar
Paul Wilkins committed
1335

John Koleszar's avatar
John Koleszar committed
1336
  if (tx_type == TX_8X8) {
1337
#ifdef ENC_DEBUG
John Koleszar's avatar
John Koleszar committed
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
    if (enc_debug) {
      int i;
      printf("qcoeff:\n");
      printf("%d %d:\n", x->e_mbd.mb_to_left_edge, x->e_mbd.mb_to_top_edge);
      for (i = 0; i < 400; i++) {
        printf("%3d ", x->e_mbd.qcoeff[i]);
        if (i % 16 == 15) printf("\n");
      }
      printf("dqcoeff:\n");
      for (i = 0; i < 400; i++) {
        printf("%3d ", x->e_mbd.dqcoeff[i]);
        if (i % 16 == 15) printf("\n");
      }
      printf("diff:\n");
      for (i = 0; i < 400; i++) {
        printf("%3d ", x->e_mbd.diff[i]);
        if (i % 16 == 15) printf("\n");
      }
      printf("predictor:\n");
      for (i = 0; i < 400; i++) {
        printf("%3d ", x->e_mbd.predictor[i]);
        if (i % 16 == 15) printf("\n");
      }
      printf("\n");
1362
    }
John Koleszar's avatar
John Koleszar committed
1363
1364
#endif
  }
John Koleszar's avatar
John Koleszar committed
1365

John Koleszar's avatar
John Koleszar committed
1366
1367
  RECON_INVOKE(&rtcd->common->recon, recon_mb)
  (IF_RTCD(&rtcd->common->recon), &x->e_mbd);
1368
#ifdef ENC_DEBUG
John Koleszar's avatar
John Koleszar committed
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
  if (enc_debug) {
    int i, j, k;
    printf("Final Reconstruction\n");
    for (i = 0; i < 16; i += 4) {
      BLOCKD *b = &x->e_mbd.block[i];
      unsigned char *d = *(b->base_dst) + b->dst;
      for (k = 0; k < 4; k++) {
        for (j = 0; j < 16; j++)
          printf("%3d ", d[j]);
        printf("\n");
        d += b->dst_stride;
1380
1381
      }
    }
John Koleszar's avatar
John Koleszar committed
1382
  }
1383
#endif
John Koleszar's avatar
John Koleszar committed
1384
1385
1386
}


1387
/* this function is used by first pass only */
John Koleszar's avatar
John Koleszar committed
1388
1389
void vp8_encode_inter16x16y(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) {
  int tx_type = x->e_mbd.mode_info_context->mbmi.txfm_size;
Paul Wilkins's avatar
Paul Wilkins committed
1390

John Koleszar's avatar
John Koleszar committed
1391
  BLOCK *b = &x->block[0];
1392

1393
#if CONFIG_PRED_FILTER
John Koleszar's avatar
John Koleszar committed
1394
1395
  // Disable the prediction filter for firstpass
  x->e_mbd.mode_info_context->mbmi.pred_filter_enabled = 0;
1396
1397
#endif

1398
  vp8_build_1st_inter16x16_predictors_mby(&x->e_mbd, x->e_mbd.predictor, 16);
John Koleszar's avatar
John Koleszar committed
1399

John Koleszar's avatar
John Koleszar committed
1400
  ENCODEMB_INVOKE(&rtcd->encodemb, submby)(x->src_diff, *(b->base_src), x->e_mbd.predictor, b->src_stride);
John Koleszar's avatar
John Koleszar committed
1401

Daniel Kang's avatar
Daniel Kang committed
1402
1403
1404
1405
1406
#if CONFIG_TX16X16
  if (tx_type == TX_16X16)
    vp8_transform_mby_16x16(x);
  else
#endif
John Koleszar's avatar
John Koleszar committed
1407
1408
1409
1410
  if (tx_type == TX_8X8)
    vp8_transform_mby_8x8(x);
  else
    transform_mby(x);