vp9_encodemb.c 41.9 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9
10
 */

11
#include "./vpx_config.h"
12
#include "vp9/encoder/vp9_encodemb.h"
13
#include "vp9/common/vp9_reconinter.h"
14
15
#include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_tokenize.h"
16
17
#include "vp9/common/vp9_invtrans.h"
#include "vp9/common/vp9_reconintra.h"
John Koleszar's avatar
John Koleszar committed
18
#include "vpx_mem/vpx_mem.h"
19
#include "vp9/encoder/vp9_rdopt.h"
20
#include "vp9/common/vp9_systemdependent.h"
21
#include "vp9_rtcd.h"
John Koleszar's avatar
John Koleszar committed
22

23
void vp9_subtract_b_c(BLOCK *be, BLOCKD *bd, int pitch) {
24
25
26
  uint8_t *src_ptr = (*(be->base_src) + be->src);
  int16_t *diff_ptr = be->src_diff;
  uint8_t *pred_ptr = bd->predictor;
John Koleszar's avatar
John Koleszar committed
27
28
29
  int src_stride = be->src_stride;

  int r, c;
John Koleszar's avatar
John Koleszar committed
30

John Koleszar's avatar
John Koleszar committed
31
  for (r = 0; r < 4; r++) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
32
    for (c = 0; c < 4; c++)
John Koleszar's avatar
John Koleszar committed
33
34
35
36
37
38
      diff_ptr[c] = src_ptr[c] - pred_ptr[c];

    diff_ptr += pitch;
    pred_ptr += pitch;
    src_ptr  += src_stride;
  }
John Koleszar's avatar
John Koleszar committed
39
40
}

41
void vp9_subtract_4b_c(BLOCK *be, BLOCKD *bd, int pitch) {
42
43
44
  uint8_t *src_ptr = (*(be->base_src) + be->src);
  int16_t *diff_ptr = be->src_diff;
  uint8_t *pred_ptr = bd->predictor;
John Koleszar's avatar
John Koleszar committed
45
46
  int src_stride = be->src_stride;
  int r, c;
47

John Koleszar's avatar
John Koleszar committed
48
  for (r = 0; r < 8; r++) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
49
    for (c = 0; c < 8; c++)
John Koleszar's avatar
John Koleszar committed
50
      diff_ptr[c] = src_ptr[c] - pred_ptr[c];
Dmitry Kovalev's avatar
Dmitry Kovalev committed
51

John Koleszar's avatar
John Koleszar committed
52
53
54
55
    diff_ptr += pitch;
    pred_ptr += pitch;
    src_ptr  += src_stride;
  }
Yaowu Xu's avatar
Yaowu Xu committed
56
57
}

58
59
60
61
62
63
void vp9_subtract_mbuv_s_c(int16_t *diff, const uint8_t *usrc,
                           const uint8_t *vsrc, int src_stride,
                           const uint8_t *upred,
                           const uint8_t *vpred, int dst_stride) {
  int16_t *udiff = diff + 256;
  int16_t *vdiff = diff + 320;
John Koleszar's avatar
John Koleszar committed
64
  int r, c;
John Koleszar's avatar
John Koleszar committed
65

John Koleszar's avatar
John Koleszar committed
66
  for (r = 0; r < 8; r++) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
67
    for (c = 0; c < 8; c++)
John Koleszar's avatar
John Koleszar committed
68
      udiff[c] = usrc[c] - upred[c];
John Koleszar's avatar
John Koleszar committed
69

John Koleszar's avatar
John Koleszar committed
70
    udiff += 8;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
71
72
    upred += dst_stride;
    usrc  += src_stride;
John Koleszar's avatar
John Koleszar committed
73
  }
John Koleszar's avatar
John Koleszar committed
74

John Koleszar's avatar
John Koleszar committed
75
76
77
  for (r = 0; r < 8; r++) {
    for (c = 0; c < 8; c++) {
      vdiff[c] = vsrc[c] - vpred[c];
John Koleszar's avatar
John Koleszar committed
78
79
    }

John Koleszar's avatar
John Koleszar committed
80
    vdiff += 8;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
81
82
    vpred += dst_stride;
    vsrc  += src_stride;
John Koleszar's avatar
John Koleszar committed
83
84
  }
}
John Koleszar's avatar
John Koleszar committed
85

86
87
88
89
void vp9_subtract_mbuv_c(int16_t *diff, uint8_t *usrc,
                         uint8_t *vsrc, uint8_t *pred, int stride) {
  uint8_t *upred = pred + 256;
  uint8_t *vpred = pred + 320;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
90

91
  vp9_subtract_mbuv_s_c(diff, usrc, vsrc, stride, upred, vpred, 8);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
92
93
}

94
95
void vp9_subtract_mby_s_c(int16_t *diff, const uint8_t *src, int src_stride,
                          const uint8_t *pred, int dst_stride) {
John Koleszar's avatar
John Koleszar committed
96
  int r, c;
John Koleszar's avatar
John Koleszar committed
97

John Koleszar's avatar
John Koleszar committed
98
  for (r = 0; r < 16; r++) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
99
    for (c = 0; c < 16; c++)
John Koleszar's avatar
John Koleszar committed
100
101
102
      diff[c] = src[c] - pred[c];

    diff += 16;
Ronald S. Bultje's avatar
Ronald S. Bultje committed
103
104
    pred += dst_stride;
    src  += src_stride;
John Koleszar's avatar
John Koleszar committed
105
  }
John Koleszar's avatar
John Koleszar committed
106
107
}

108
109
void vp9_subtract_sby_s_c(int16_t *diff, const uint8_t *src, int src_stride,
                          const uint8_t *pred, int dst_stride) {
110
111
112
  int r, c;

  for (r = 0; r < 32; r++) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
113
    for (c = 0; c < 32; c++)
114
115
116
117
118
119
120
121
      diff[c] = src[c] - pred[c];

    diff += 32;
    pred += dst_stride;
    src  += src_stride;
  }
}

122
123
124
125
126
127
void vp9_subtract_sbuv_s_c(int16_t *diff, const uint8_t *usrc,
                           const uint8_t *vsrc, int src_stride,
                           const uint8_t *upred,
                           const uint8_t *vpred, int dst_stride) {
  int16_t *udiff = diff + 1024;
  int16_t *vdiff = diff + 1024 + 256;
128
129
130
  int r, c;

  for (r = 0; r < 16; r++) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
131
    for (c = 0; c < 16; c++)
132
133
134
135
136
137
138
139
      udiff[c] = usrc[c] - upred[c];

    udiff += 16;
    upred += dst_stride;
    usrc  += src_stride;
  }

  for (r = 0; r < 16; r++) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
140
    for (c = 0; c < 16; c++)
141
142
143
144
145
146
147
148
      vdiff[c] = vsrc[c] - vpred[c];

    vdiff += 16;
    vpred += dst_stride;
    vsrc  += src_stride;
  }
}

149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
void vp9_subtract_sb64y_s_c(int16_t *diff, const uint8_t *src, int src_stride,
                            const uint8_t *pred, int dst_stride) {
  int r, c;

  for (r = 0; r < 64; r++) {
    for (c = 0; c < 64; c++) {
      diff[c] = src[c] - pred[c];
    }

    diff += 64;
    pred += dst_stride;
    src  += src_stride;
  }
}

void vp9_subtract_sb64uv_s_c(int16_t *diff, const uint8_t *usrc,
                             const uint8_t *vsrc, int src_stride,
                             const uint8_t *upred,
                             const uint8_t *vpred, int dst_stride) {
  int16_t *udiff = diff + 4096;
  int16_t *vdiff = diff + 4096 + 1024;
  int r, c;

  for (r = 0; r < 32; r++) {
    for (c = 0; c < 32; c++) {
      udiff[c] = usrc[c] - upred[c];
    }

    udiff += 32;
    upred += dst_stride;
    usrc  += src_stride;
  }

  for (r = 0; r < 32; r++) {
    for (c = 0; c < 32; c++) {
      vdiff[c] = vsrc[c] - vpred[c];
    }

    vdiff += 32;
    vpred += dst_stride;
    vsrc  += src_stride;
  }
}

193
194
void vp9_subtract_mby_c(int16_t *diff, uint8_t *src,
                        uint8_t *pred, int stride) {
195
  vp9_subtract_mby_s_c(diff, src, stride, pred, 16);
Ronald S. Bultje's avatar
Ronald S. Bultje committed
196
197
}

198
static void subtract_mb(MACROBLOCK *x) {
John Koleszar's avatar
John Koleszar committed
199
  BLOCK *b = &x->block[0];
200

201
  vp9_subtract_mby(x->src_diff, *(b->base_src), x->e_mbd.predictor,
Jim Bankoski's avatar
Jim Bankoski committed
202
                   b->src_stride);
203
  vp9_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer,
Jim Bankoski's avatar
Jim Bankoski committed
204
                    x->e_mbd.predictor, x->src.uv_stride);
John Koleszar's avatar
John Koleszar committed
205
206
}

207
void vp9_transform_mby_4x4(MACROBLOCK *x) {
John Koleszar's avatar
John Koleszar committed
208
  int i;
209
  MACROBLOCKD *xd = &x->e_mbd;
John Koleszar's avatar
John Koleszar committed
210

211
212
  for (i = 0; i < 16; i++) {
    BLOCK *b = &x->block[i];
213
    TX_TYPE tx_type = get_tx_type_4x4(xd, i);
214
    if (tx_type != DCT_DCT) {
215
      vp9_short_fht4x4(b->src_diff, b->coeff, 16, tx_type);
216
    } else if (!(i & 1) && get_tx_type_4x4(xd, i + 1) == DCT_DCT) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
217
      x->fwd_txm8x4(x->block[i].src_diff, x->block[i].coeff, 32);
218
      i++;
219
    } else {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
220
      x->fwd_txm4x4(x->block[i].src_diff, x->block[i].coeff, 32);
221
    }
John Koleszar's avatar
John Koleszar committed
222
  }
John Koleszar's avatar
John Koleszar committed
223
224
}

225
void vp9_transform_mbuv_4x4(MACROBLOCK *x) {
John Koleszar's avatar
John Koleszar committed
226
  int i;
John Koleszar's avatar
John Koleszar committed
227

Dmitry Kovalev's avatar
Dmitry Kovalev committed
228
229
  for (i = 16; i < 24; i += 2)
    x->fwd_txm8x4(x->block[i].src_diff, x->block[i].coeff, 16);
230
231
}

232
static void transform_mb_4x4(MACROBLOCK *x) {
233
234
  vp9_transform_mby_4x4(x);
  vp9_transform_mbuv_4x4(x);
235
236
}

237
void vp9_transform_mby_8x8(MACROBLOCK *x) {
John Koleszar's avatar
John Koleszar committed
238
  int i;
239
240
  MACROBLOCKD *xd = &x->e_mbd;
  TX_TYPE tx_type;
241

John Koleszar's avatar
John Koleszar committed
242
  for (i = 0; i < 9; i += 8) {
243
    BLOCK *b = &x->block[i];
244
    tx_type = get_tx_type_8x8(xd, i);
245
    if (tx_type != DCT_DCT) {
246
      vp9_short_fht8x8(b->src_diff, b->coeff, 16, tx_type);
247
    } else {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
248
      x->fwd_txm8x8(x->block[i].src_diff, x->block[i].coeff, 32);
249
    }
John Koleszar's avatar
John Koleszar committed
250
251
  }
  for (i = 2; i < 11; i += 8) {
252
    BLOCK *b = &x->block[i];
253
    tx_type = get_tx_type_8x8(xd, i);
254
    if (tx_type != DCT_DCT) {
255
      vp9_short_fht8x8(b->src_diff, (b + 2)->coeff, 16, tx_type);
256
    } else {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
257
      x->fwd_txm8x8(x->block[i].src_diff, x->block[i + 2].coeff, 32);
258
    }
John Koleszar's avatar
John Koleszar committed
259
  }
260
261
}

262
void vp9_transform_mbuv_8x8(MACROBLOCK *x) {
Daniel Kang's avatar
Daniel Kang committed
263
264
  int i;

Dmitry Kovalev's avatar
Dmitry Kovalev committed
265
266
  for (i = 16; i < 24; i += 4)
    x->fwd_txm8x8(x->block[i].src_diff, x->block[i].coeff, 16);
Daniel Kang's avatar
Daniel Kang committed
267
268
}

269
270
271
void vp9_transform_mb_8x8(MACROBLOCK *x) {
  vp9_transform_mby_8x8(x);
  vp9_transform_mbuv_8x8(x);
Daniel Kang's avatar
Daniel Kang committed
272
273
}

274
void vp9_transform_mby_16x16(MACROBLOCK *x) {
275
276
  MACROBLOCKD *xd = &x->e_mbd;
  BLOCK *b = &x->block[0];
277
  TX_TYPE tx_type = get_tx_type_16x16(xd, 0);
278
  vp9_clear_system_state();
279
  if (tx_type != DCT_DCT) {
280
    vp9_short_fht16x16(b->src_diff, b->coeff, 16, tx_type);
281
  } else {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
282
    x->fwd_txm16x16(x->block[0].src_diff, x->block[0].coeff, 32);
283
  }
Daniel Kang's avatar
Daniel Kang committed
284
285
}

286
287
288
void vp9_transform_mb_16x16(MACROBLOCK *x) {
  vp9_transform_mby_16x16(x);
  vp9_transform_mbuv_8x8(x);
Daniel Kang's avatar
Daniel Kang committed
289
}
290

291
void vp9_transform_sby_32x32(MACROBLOCK *x) {
292
293
294
295
  vp9_short_fdct32x32(x->src_diff, x->coeff, 64);
}

void vp9_transform_sby_16x16(MACROBLOCK *x) {
296
  MACROBLOCKD *const xd = &x->e_mbd;
297
298
299
300
  int n;

  for (n = 0; n < 4; n++) {
    const int x_idx = n & 1, y_idx = n >> 1;
301
    const TX_TYPE tx_type = get_tx_type_16x16(xd, (y_idx * 8 + x_idx) * 4);
302

303
304
305
306
307
308
309
    if (tx_type != DCT_DCT) {
      vp9_short_fht16x16(x->src_diff + y_idx * 32 * 16 + x_idx * 16,
                         x->coeff + n * 256, 32, tx_type);
    } else {
      x->fwd_txm16x16(x->src_diff + y_idx * 32 * 16 + x_idx * 16,
                      x->coeff + n * 256, 64);
    }
310
311
312
313
  }
}

void vp9_transform_sby_8x8(MACROBLOCK *x) {
314
  MACROBLOCKD *const xd = &x->e_mbd;
315
316
317
318
  int n;

  for (n = 0; n < 16; n++) {
    const int x_idx = n & 3, y_idx = n >> 2;
319
    const TX_TYPE tx_type = get_tx_type_8x8(xd, (y_idx * 8 + x_idx) * 2);
320

321
322
323
324
325
326
327
    if (tx_type != DCT_DCT) {
      vp9_short_fht8x8(x->src_diff + y_idx * 32 * 8 + x_idx * 8,
                       x->coeff + n * 64, 32, tx_type);
    } else {
      x->fwd_txm8x8(x->src_diff + y_idx * 32 * 8 + x_idx * 8,
                    x->coeff + n * 64, 64);
    }
328
329
330
331
  }
}

void vp9_transform_sby_4x4(MACROBLOCK *x) {
332
  MACROBLOCKD *const xd = &x->e_mbd;
333
334
335
336
  int n;

  for (n = 0; n < 64; n++) {
    const int x_idx = n & 7, y_idx = n >> 3;
337
    const TX_TYPE tx_type = get_tx_type_4x4(xd, y_idx * 8 + x_idx);
338

339
340
341
342
343
344
345
    if (tx_type != DCT_DCT) {
      vp9_short_fht4x4(x->src_diff + y_idx * 32 * 4 + x_idx * 4,
                       x->coeff + n * 16, 32, tx_type);
    } else {
      x->fwd_txm4x4(x->src_diff + y_idx * 32 * 4 + x_idx * 4,
                    x->coeff + n * 16, 64);
    }
346
  }
347
348
349
350
}

void vp9_transform_sbuv_16x16(MACROBLOCK *x) {
  vp9_clear_system_state();
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
  x->fwd_txm16x16(x->src_diff + 1024, x->coeff + 1024, 32);
  x->fwd_txm16x16(x->src_diff + 1280, x->coeff + 1280, 32);
}

void vp9_transform_sbuv_8x8(MACROBLOCK *x) {
  int n;

  vp9_clear_system_state();
  for (n = 0; n < 4; n++) {
    const int x_idx = n & 1, y_idx = n >> 1;

    x->fwd_txm8x8(x->src_diff + 1024 + y_idx * 16 * 8 + x_idx * 8,
                  x->coeff + 1024 + n * 64, 32);
    x->fwd_txm8x8(x->src_diff + 1280 + y_idx * 16 * 8 + x_idx * 8,
                  x->coeff + 1280 + n * 64, 32);
  }
}

void vp9_transform_sbuv_4x4(MACROBLOCK *x) {
  int n;

  vp9_clear_system_state();
  for (n = 0; n < 16; n++) {
    const int x_idx = n & 3, y_idx = n >> 2;

    x->fwd_txm4x4(x->src_diff + 1024 + y_idx * 16 * 4 + x_idx * 4,
                  x->coeff + 1024 + n * 16, 32);
    x->fwd_txm4x4(x->src_diff + 1280 + y_idx * 16 * 4 + x_idx * 4,
                  x->coeff + 1280 + n * 16, 32);
  }
}

void vp9_transform_sb64y_32x32(MACROBLOCK *x) {
  int n;

  for (n = 0; n < 4; n++) {
    const int x_idx = n & 1, y_idx = n >> 1;

    vp9_short_fdct32x32(x->src_diff + y_idx * 64 * 32 + x_idx * 32,
                        x->coeff + n * 1024, 128);
  }
}

void vp9_transform_sb64y_16x16(MACROBLOCK *x) {
395
  MACROBLOCKD *const xd = &x->e_mbd;
396
397
398
399
  int n;

  for (n = 0; n < 16; n++) {
    const int x_idx = n & 3, y_idx = n >> 2;
400
    const TX_TYPE tx_type = get_tx_type_16x16(xd, (y_idx * 16 + x_idx) * 4);
401

402
403
404
405
406
407
408
    if (tx_type != DCT_DCT) {
      vp9_short_fht16x16(x->src_diff + y_idx * 64 * 16 + x_idx * 16,
                         x->coeff + n * 256, 64, tx_type);
    } else {
      x->fwd_txm16x16(x->src_diff + y_idx * 64 * 16 + x_idx * 16,
                      x->coeff + n * 256, 128);
    }
409
410
411
412
  }
}

void vp9_transform_sb64y_8x8(MACROBLOCK *x) {
413
  MACROBLOCKD *const xd = &x->e_mbd;
414
415
416
417
  int n;

  for (n = 0; n < 64; n++) {
    const int x_idx = n & 7, y_idx = n >> 3;
418
    const TX_TYPE tx_type = get_tx_type_8x8(xd, (y_idx * 16 + x_idx) * 2);
419

420
421
422
423
424
425
426
    if (tx_type != DCT_DCT) {
      vp9_short_fht8x8(x->src_diff + y_idx * 64 * 8 + x_idx * 8,
                         x->coeff + n * 64, 64, tx_type);
    } else {
      x->fwd_txm8x8(x->src_diff + y_idx * 64 * 8 + x_idx * 8,
                    x->coeff + n * 64, 128);
    }
427
428
429
430
  }
}

void vp9_transform_sb64y_4x4(MACROBLOCK *x) {
431
  MACROBLOCKD *const xd = &x->e_mbd;
432
433
434
435
  int n;

  for (n = 0; n < 256; n++) {
    const int x_idx = n & 15, y_idx = n >> 4;
436
    const TX_TYPE tx_type = get_tx_type_4x4(xd, y_idx * 16 + x_idx);
437

438
439
440
441
442
443
444
    if (tx_type != DCT_DCT) {
      vp9_short_fht8x8(x->src_diff + y_idx * 64 * 4 + x_idx * 4,
                       x->coeff + n * 16, 64, tx_type);
    } else {
      x->fwd_txm4x4(x->src_diff + y_idx * 64 * 4 + x_idx * 4,
                    x->coeff + n * 16, 128);
    }
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
  }
}

void vp9_transform_sb64uv_32x32(MACROBLOCK *x) {
  vp9_clear_system_state();
  vp9_short_fdct32x32(x->src_diff + 4096,
                      x->coeff + 4096, 64);
  vp9_short_fdct32x32(x->src_diff + 4096 + 1024,
                      x->coeff + 4096 + 1024, 64);
}

void vp9_transform_sb64uv_16x16(MACROBLOCK *x) {
  int n;

  vp9_clear_system_state();
  for (n = 0; n < 4; n++) {
    const int x_idx = n & 1, y_idx = n >> 1;

    x->fwd_txm16x16(x->src_diff + 4096 + y_idx * 32 * 16 + x_idx * 16,
                    x->coeff + 4096 + n * 256, 64);
    x->fwd_txm16x16(x->src_diff + 4096 + 1024 + y_idx * 32 * 16 + x_idx * 16,
                    x->coeff + 4096 + 1024 + n * 256, 64);
  }
}

void vp9_transform_sb64uv_8x8(MACROBLOCK *x) {
  int n;

  vp9_clear_system_state();
  for (n = 0; n < 16; n++) {
    const int x_idx = n & 3, y_idx = n >> 2;

    x->fwd_txm8x8(x->src_diff + 4096 + y_idx * 32 * 8 + x_idx * 8,
                  x->coeff + 4096 + n * 64, 64);
    x->fwd_txm8x8(x->src_diff + 4096 + 1024 + y_idx * 32 * 8 + x_idx * 8,
                  x->coeff + 4096 + 1024 + n * 64, 64);
  }
}

void vp9_transform_sb64uv_4x4(MACROBLOCK *x) {
  int n;

  vp9_clear_system_state();
  for (n = 0; n < 64; n++) {
    const int x_idx = n & 7, y_idx = n >> 3;

    x->fwd_txm4x4(x->src_diff + 4096 + y_idx * 32 * 4 + x_idx * 4,
                  x->coeff + 4096 + n * 16, 64);
    x->fwd_txm4x4(x->src_diff + 4096 + 1024 + y_idx * 32 * 4 + x_idx * 4,
                  x->coeff + 4096 + 1024 + n * 16, 64);
  }
496
497
}

498
499
#define RDTRUNC(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF )
#define RDTRUNC_8x8(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF )
500
typedef struct vp9_token_state vp9_token_state;
501

502
struct vp9_token_state {
503
504
  int           rate;
  int           error;
Daniel Kang's avatar
Daniel Kang committed
505
  int           next;
506
507
508
509
  signed char   token;
  short         qc;
};

510
// TODO: experiments to find optimal multiple numbers
511
512
#define Y1_RD_MULT 4
#define UV_RD_MULT 2
513

John Koleszar's avatar
John Koleszar committed
514
515
516
static const int plane_rd_mult[4] = {
  Y1_RD_MULT,
  UV_RD_MULT,
517
518
};

519
520
521
522
523
524
525
526
527
528
#define UPDATE_RD_COST()\
{\
  rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0);\
  rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1);\
  if (rd_cost0 == rd_cost1) {\
    rd_cost0 = RDTRUNC(rdmult, rddiv, rate0, error0);\
    rd_cost1 = RDTRUNC(rdmult, rddiv, rate1, error1);\
  }\
}

529
530
531
532
533
534
535
// This function is a place holder for now but may ultimately need
// to scan previous tokens to work out the correct context.
static int trellis_get_coeff_context(int token) {
  int recent_energy = 0;
  return vp9_get_coef_context(&recent_energy, token);
}

536
537
static void optimize_b(VP9_COMMON *const cm,
                       MACROBLOCK *mb, int ib, PLANE_TYPE type,
538
                       const int16_t *dequant_ptr,
539
                       ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
540
                       int tx_size) {
541
  const int ref = mb->e_mbd.mode_info_context->mbmi.ref_frame != INTRA_FRAME;
542
  MACROBLOCKD *const xd = &mb->e_mbd;
543
544
545
546
547
548
  vp9_token_state tokens[1025][2];
  unsigned best_index[1025][2];
  const int16_t *coeff_ptr = mb->coeff + ib * 16;
  int16_t *qcoeff_ptr = xd->qcoeff + ib * 16;
  int16_t *dqcoeff_ptr = xd->dqcoeff + ib * 16;
  int eob = xd->eobs[ib], final_eob, sz = 0;
549
  const int i0 = 0;
550
  int rc, x, next, i;
551
552
553
  int64_t rdmult, rddiv, rd_cost0, rd_cost1;
  int rate0, rate1, error0, error1, t0, t1;
  int best, band, pt;
John Koleszar's avatar
John Koleszar committed
554
  int err_mult = plane_rd_mult[type];
555
  int default_eob;
556
  int const *scan;
557
  const int mul = 1 + (tx_size == TX_32X32);
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
#if CONFIG_CODE_NONZEROCOUNT
  // TODO(debargha): the dynamic programming approach used in this function
  // is not compatible with the true rate cost when nzcs are used. Note
  // the total rate is the sum of the nzc rate and the indicvidual token
  // rates. The latter part can be optimized in this function, but because
  // the nzc rate is a function of all the other tokens without a Markov
  // relationship this rate cannot be considered correctly.
  // The current implementation uses a suboptimal approach to account for
  // the nzc rates somewhat, but in reality the optimization approach needs
  // to change substantially.
  uint16_t nzc = xd->nzcs[ib];
  uint16_t nzc0, nzc1;
  uint16_t final_nzc = 0, final_nzc_exp;
  int nzc_context = vp9_get_nzc_context(cm, xd, ib);
  unsigned int *nzc_cost;
  nzc0 = nzc1 = nzc;
#endif
John Koleszar's avatar
John Koleszar committed
575

Deb Mukherjee's avatar
Deb Mukherjee committed
576
  switch (tx_size) {
577
    default:
578
579
    case TX_4X4: {
      const TX_TYPE tx_type = get_tx_type_4x4(xd, ib);
580
      default_eob = 16;
581
582
583
#if CONFIG_CODE_NONZEROCOUNT
      nzc_cost = mb->nzc_costs_4x4[nzc_context][ref][type];
#endif
584
585
586
587
      if (tx_type == DCT_ADST) {
        scan = vp9_col_scan_4x4;
      } else if (tx_type == ADST_DCT) {
        scan = vp9_row_scan_4x4;
588
589
590
      } else {
        scan = vp9_default_zig_zag1d_4x4;
      }
591
      break;
592
    }
593
    case TX_8X8:
594
      scan = vp9_default_zig_zag1d_8x8;
595
      default_eob = 64;
596
597
598
#if CONFIG_CODE_NONZEROCOUNT
      nzc_cost = mb->nzc_costs_8x8[nzc_context][ref][type];
#endif
599
      break;
600
601
602
    case TX_16X16:
      scan = vp9_default_zig_zag1d_16x16;
      default_eob = 256;
603
604
605
#if CONFIG_CODE_NONZEROCOUNT
      nzc_cost = mb->nzc_costs_16x16[nzc_context][ref][type];
#endif
606
      break;
607
608
609
    case TX_32X32:
      scan = vp9_default_zig_zag1d_32x32;
      default_eob = 1024;
610
611
612
#if CONFIG_CODE_NONZEROCOUNT
      nzc_cost = mb->nzc_costs_32x32[nzc_context][ref][type];
#endif
613
      break;
614
  }
John Koleszar's avatar
John Koleszar committed
615
616
617
618
619
620

  /* Now set up a Viterbi trellis to evaluate alternative roundings. */
  rdmult = mb->rdmult * err_mult;
  if (mb->e_mbd.mode_info_context->mbmi.ref_frame == INTRA_FRAME)
    rdmult = (rdmult * 9) >> 4;
  rddiv = mb->rddiv;
621
  memset(best_index, 0, sizeof(best_index));
John Koleszar's avatar
John Koleszar committed
622
  /* Initialize the sentinel node of the trellis. */
623
624
625
#if CONFIG_CODE_NONZEROCOUNT
  tokens[eob][0].rate = nzc_cost[nzc];
#else
John Koleszar's avatar
John Koleszar committed
626
  tokens[eob][0].rate = 0;
627
#endif
John Koleszar's avatar
John Koleszar committed
628
  tokens[eob][0].error = 0;
629
  tokens[eob][0].next = default_eob;
John Koleszar's avatar
John Koleszar committed
630
631
632
633
634
  tokens[eob][0].token = DCT_EOB_TOKEN;
  tokens[eob][0].qc = 0;
  *(tokens[eob] + 1) = *(tokens[eob] + 0);
  next = eob;
  for (i = eob; i-- > i0;) {
635
    int base_bits, d2, dx;
636
637
638
#if CONFIG_CODE_NONZEROCOUNT
    int new_nzc0, new_nzc1;
#endif
John Koleszar's avatar
John Koleszar committed
639

640
    rc = scan[i];
John Koleszar's avatar
John Koleszar committed
641
642
643
644
645
646
647
648
649
    x = qcoeff_ptr[rc];
    /* Only add a trellis state for non-zero coefficients. */
    if (x) {
      int shortcut = 0;
      error0 = tokens[next][0].error;
      error1 = tokens[next][1].error;
      /* Evaluate the first possibility for this state. */
      rate0 = tokens[next][0].rate;
      rate1 = tokens[next][1].rate;
650
      t0 = (vp9_dct_value_tokens_ptr + x)->Token;
John Koleszar's avatar
John Koleszar committed
651
      /* Consider both possible successor states. */
652
      if (next < default_eob) {
653
        band = get_coef_band(tx_size, i + 1);
654
        pt = trellis_get_coeff_context(t0);
John Koleszar's avatar
John Koleszar committed
655
        rate0 +=
656
          mb->token_costs[tx_size][type][ref][band][pt][tokens[next][0].token];
John Koleszar's avatar
John Koleszar committed
657
        rate1 +=
658
          mb->token_costs[tx_size][type][ref][band][pt][tokens[next][1].token];
John Koleszar's avatar
John Koleszar committed
659
      }
660
      UPDATE_RD_COST();
John Koleszar's avatar
John Koleszar committed
661
662
      /* And pick the best. */
      best = rd_cost1 < rd_cost0;
663
      base_bits = *(vp9_dct_value_cost_ptr + x);
664
      dx = mul * (dqcoeff_ptr[rc] - coeff_ptr[rc]);
John Koleszar's avatar
John Koleszar committed
665
666
667
668
669
670
      d2 = dx * dx;
      tokens[i][0].rate = base_bits + (best ? rate1 : rate0);
      tokens[i][0].error = d2 + (best ? error1 : error0);
      tokens[i][0].next = next;
      tokens[i][0].token = t0;
      tokens[i][0].qc = x;
671
      best_index[i][0] = best;
672
673
674
675
#if CONFIG_CODE_NONZEROCOUNT
      new_nzc0 = (best ? nzc1 : nzc0);
#endif

John Koleszar's avatar
John Koleszar committed
676
677
678
679
      /* Evaluate the second possibility for this state. */
      rate0 = tokens[next][0].rate;
      rate1 = tokens[next][1].rate;

680
681
682
      if ((abs(x)*dequant_ptr[rc != 0] > abs(coeff_ptr[rc]) * mul) &&
          (abs(x)*dequant_ptr[rc != 0] < abs(coeff_ptr[rc]) * mul +
                                         dequant_ptr[rc != 0]))
John Koleszar's avatar
John Koleszar committed
683
684
685
686
687
688
689
690
691
692
693
694
695
        shortcut = 1;
      else
        shortcut = 0;

      if (shortcut) {
        sz = -(x < 0);
        x -= 2 * sz + 1;
      }

      /* Consider both possible successor states. */
      if (!x) {
        /* If we reduced this coefficient to zero, check to see if
         *  we need to move the EOB back here.
696
         */
John Koleszar's avatar
John Koleszar committed
697
698
699
700
        t0 = tokens[next][0].token == DCT_EOB_TOKEN ?
             DCT_EOB_TOKEN : ZERO_TOKEN;
        t1 = tokens[next][1].token == DCT_EOB_TOKEN ?
             DCT_EOB_TOKEN : ZERO_TOKEN;
701
702
703
704
705
706
#if CONFIG_CODE_NONZEROCOUNT
        // Account for rate drop because of the nzc change.
        // TODO(debargha): Find a better solution
        rate0 -= nzc_cost[nzc0] - nzc_cost[nzc0 - 1];
        rate1 -= nzc_cost[nzc1] - nzc_cost[nzc1 - 1];
#endif
John Koleszar's avatar
John Koleszar committed
707
      } else {
708
        t0 = t1 = (vp9_dct_value_tokens_ptr + x)->Token;
John Koleszar's avatar
John Koleszar committed
709
      }
710
      if (next < default_eob) {
711
        band = get_coef_band(tx_size, i + 1);
John Koleszar's avatar
John Koleszar committed
712
        if (t0 != DCT_EOB_TOKEN) {
713
          pt = trellis_get_coeff_context(t0);
714
          rate0 += mb->token_costs[tx_size][type][ref][band][pt][
715
              tokens[next][0].token];
John Koleszar's avatar
John Koleszar committed
716
        }
John Koleszar's avatar
John Koleszar committed
717
        if (t1 != DCT_EOB_TOKEN) {
718
          pt = trellis_get_coeff_context(t1);
719
          rate1 += mb->token_costs[tx_size][type][ref][band][pt][
720
              tokens[next][1].token];
John Koleszar's avatar
John Koleszar committed
721
722
        }
      }
John Koleszar's avatar
John Koleszar committed
723

724
      UPDATE_RD_COST();
John Koleszar's avatar
John Koleszar committed
725
726
      /* And pick the best. */
      best = rd_cost1 < rd_cost0;
727
      base_bits = *(vp9_dct_value_cost_ptr + x);
John Koleszar's avatar
John Koleszar committed
728
729

      if (shortcut) {
730
        dx -= (dequant_ptr[rc != 0] + sz) ^ sz;
John Koleszar's avatar
John Koleszar committed
731
732
733
734
735
736
737
        d2 = dx * dx;
      }
      tokens[i][1].rate = base_bits + (best ? rate1 : rate0);
      tokens[i][1].error = d2 + (best ? error1 : error0);
      tokens[i][1].next = next;
      tokens[i][1].token = best ? t1 : t0;
      tokens[i][1].qc = x;
738
      best_index[i][1] = best;
739
740
741
742
743
#if CONFIG_CODE_NONZEROCOUNT
      new_nzc1 = (best ? nzc1 : nzc0) - (!x);
      nzc0 = new_nzc0;
      nzc1 = new_nzc1;
#endif
John Koleszar's avatar
John Koleszar committed
744
745
      /* Finally, make this the new head of the trellis. */
      next = i;
746
    }
John Koleszar's avatar
John Koleszar committed
747
748
749
750
    /* There's no choice to make for a zero coefficient, so we don't
     *  add a new trellis node, but we do need to update the costs.
     */
    else {
751
      band = get_coef_band(tx_size, i + 1);
John Koleszar's avatar
John Koleszar committed
752
753
754
755
      t0 = tokens[next][0].token;
      t1 = tokens[next][1].token;
      /* Update the cost of each path if we're past the EOB token. */
      if (t0 != DCT_EOB_TOKEN) {
756
757
        tokens[next][0].rate +=
            mb->token_costs[tx_size][type][ref][band][0][t0];
John Koleszar's avatar
John Koleszar committed
758
759
760
        tokens[next][0].token = ZERO_TOKEN;
      }
      if (t1 != DCT_EOB_TOKEN) {
761
762
        tokens[next][1].rate +=
            mb->token_costs[tx_size][type][ref][band][0][t1];
John Koleszar's avatar
John Koleszar committed
763
764
765
        tokens[next][1].token = ZERO_TOKEN;
      }
      /* Don't update next, because we didn't add a new node. */
766
    }
John Koleszar's avatar
John Koleszar committed
767
768
769
  }

  /* Now pick the best path through the whole trellis. */
770
  band = get_coef_band(tx_size, i + 1);
771
  VP9_COMBINEENTROPYCONTEXTS(pt, *a, *l);
John Koleszar's avatar
John Koleszar committed
772
773
774
775
776
777
  rate0 = tokens[next][0].rate;
  rate1 = tokens[next][1].rate;
  error0 = tokens[next][0].error;
  error1 = tokens[next][1].error;
  t0 = tokens[next][0].token;
  t1 = tokens[next][1].token;
778
779
  rate0 += mb->token_costs[tx_size][type][ref][band][pt][t0];
  rate1 += mb->token_costs[tx_size][type][ref][band][pt][t1];
780
  UPDATE_RD_COST();
John Koleszar's avatar
John Koleszar committed
781
  best = rd_cost1 < rd_cost0;
782
783
784
#if CONFIG_CODE_NONZEROCOUNT
  final_nzc_exp = (best ? nzc1 : nzc0);
#endif
John Koleszar's avatar
John Koleszar committed
785
786
787
  final_eob = i0 - 1;
  for (i = next; i < eob; i = next) {
    x = tokens[i][best].qc;
788
    if (x) {
John Koleszar's avatar
John Koleszar committed
789
      final_eob = i;
790
791
792
793
#if CONFIG_CODE_NONZEROCOUNT
      ++final_nzc;
#endif
    }
794
    rc = scan[i];
John Koleszar's avatar
John Koleszar committed
795
    qcoeff_ptr[rc] = x;
796
    dqcoeff_ptr[rc] = (x * dequant_ptr[rc != 0]) / mul;
797

John Koleszar's avatar
John Koleszar committed
798
    next = tokens[i][best].next;
799
    best = best_index[i][best];
John Koleszar's avatar
John Koleszar committed
800
801
802
  }
  final_eob++;

803
  xd->eobs[ib] = final_eob;
804
  *a = *l = (final_eob > 0);
805
806
807
808
#if CONFIG_CODE_NONZEROCOUNT
  assert(final_nzc == final_nzc_exp);
  xd->nzcs[ib] = final_nzc;
#endif
809
810
}

811
void vp9_optimize_mby_4x4(VP9_COMMON *const cm, MACROBLOCK *x) {
John Koleszar's avatar
John Koleszar committed
812
813
814
815
  int b;
  ENTROPY_CONTEXT_PLANES t_above, t_left;
  ENTROPY_CONTEXT *ta;
  ENTROPY_CONTEXT *tl;
816

817
  if (!x->e_mbd.above_context || !x->e_mbd.left_context)
John Koleszar's avatar
John Koleszar committed
818
    return;
819

John Koleszar's avatar
John Koleszar committed
820
821
  vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
  vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
822

John Koleszar's avatar
John Koleszar committed
823
824
  ta = (ENTROPY_CONTEXT *)&t_above;
  tl = (ENTROPY_CONTEXT *)&t_left;
825

John Koleszar's avatar
John Koleszar committed
826
  for (b = 0; b < 16; b++) {
827
    optimize_b(cm, x, b, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[b].dequant,
Yaowu Xu's avatar
Yaowu Xu committed
828
829
               ta + vp9_block2above[TX_4X4][b],
               tl + vp9_block2left[TX_4X4][b], TX_4X4);
John Koleszar's avatar
John Koleszar committed
830
  }
John Koleszar's avatar
John Koleszar committed
831
832
}

833
void vp9_optimize_mbuv_4x4(VP9_COMMON *const cm, MACROBLOCK *x) {
John Koleszar's avatar
John Koleszar committed
834
835
836
837
  int b;
  ENTROPY_CONTEXT_PLANES t_above, t_left;
  ENTROPY_CONTEXT *ta;
  ENTROPY_CONTEXT *tl;
John Koleszar's avatar
John Koleszar committed
838

839
  if (!x->e_mbd.above_context || !x->e_mbd.left_context)
John Koleszar's avatar
John Koleszar committed
840
    return;
John Koleszar's avatar
John Koleszar committed
841

John Koleszar's avatar
John Koleszar committed
842
843
  vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
  vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
John Koleszar's avatar
John Koleszar committed
844

John Koleszar's avatar
John Koleszar committed
845
846
  ta = (ENTROPY_CONTEXT *)&t_above;
  tl = (ENTROPY_CONTEXT *)&t_left;
John Koleszar's avatar
John Koleszar committed
847

John Koleszar's avatar
John Koleszar committed
848
  for (b = 16; b < 24; b++) {
849
    optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[b].dequant,
Yaowu Xu's avatar
Yaowu Xu committed
850
851
               ta + vp9_block2above[TX_4X4][b],
               tl + vp9_block2left[TX_4X4][b], TX_4X4);
John Koleszar's avatar
John Koleszar committed
852
  }
853
854
}

855
856
857
static void optimize_mb_4x4(VP9_COMMON *const cm, MACROBLOCK *x) {
  vp9_optimize_mby_4x4(cm, x);
  vp9_optimize_mbuv_4x4(cm, x);
858
859
}

860
void vp9_optimize_mby_8x8(VP9_COMMON *const cm, MACROBLOCK *x) {
John Koleszar's avatar
John Koleszar committed
861
862
863
864
  int b;
  ENTROPY_CONTEXT_PLANES t_above, t_left;
  ENTROPY_CONTEXT *ta;
  ENTROPY_CONTEXT *tl;
865

866
  if (!x->e_mbd.above_context || !x->e_mbd.left_context)
John Koleszar's avatar
John Koleszar committed
867
    return;
868

John Koleszar's avatar
John Koleszar committed
869
870
  vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
  vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
871

John Koleszar's avatar
John Koleszar committed
872
873
874
  ta = (ENTROPY_CONTEXT *)&t_above;
  tl = (ENTROPY_CONTEXT *)&t_left;
  for (b = 0; b < 16; b += 4) {
Yaowu Xu's avatar
Yaowu Xu committed
875
876
    ENTROPY_CONTEXT *const a = ta + vp9_block2above[TX_8X8][b];
    ENTROPY_CONTEXT *const l = tl + vp9_block2left[TX_8X8][b];
Yaowu Xu's avatar
Yaowu Xu committed
877
878
    ENTROPY_CONTEXT above_ec = (a[0] + a[1]) != 0;
    ENTROPY_CONTEXT left_ec = (l[0] + l[1]) != 0;
879
    optimize_b(cm, x, b, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[b].dequant,
880
               &above_ec, &left_ec, TX_8X8);
Yaowu Xu's avatar
Yaowu Xu committed
881
882
    a[1] = a[0] = above_ec;
    l[1] = l[0] = left_ec;
John Koleszar's avatar
John Koleszar committed
883
  }
884
885
}

886
void vp9_optimize_mbuv_8x8(VP9_COMMON *const cm, MACROBLOCK *x) {
John Koleszar's avatar
John Koleszar committed
887
  int b;
Yaowu Xu's avatar
Yaowu Xu committed
888
889
  ENTROPY_CONTEXT *const ta = (ENTROPY_CONTEXT *)x->e_mbd.above_context;
  ENTROPY_CONTEXT *const tl = (ENTROPY_CONTEXT *)x->e_mbd.left_context;
890

Yaowu Xu's avatar
Yaowu Xu committed
891
  if (!ta || !tl)
John Koleszar's avatar
John Koleszar committed
892
    return;
893

John Koleszar's avatar
John Koleszar committed
894
  for (b = 16; b < 24; b += 4) {
Yaowu Xu's avatar
Yaowu Xu committed
895
896
    ENTROPY_CONTEXT *const a = ta + vp9_block2above[TX_8X8][b];
    ENTROPY_CONTEXT *const l = tl + vp9_block2left[TX_8X8][b];
Yaowu Xu's avatar
Yaowu Xu committed
897
898
    ENTROPY_CONTEXT above_ec = (a[0] + a[1]) != 0;
    ENTROPY_CONTEXT left_ec = (l[0] + l[1]) != 0;
899
    optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[b].dequant,
900
               &above_ec, &left_ec, TX_8X8);
John Koleszar's avatar
John Koleszar committed
901
  }
902
903
}

904
905
906
static void optimize_mb_8x8(VP9_COMMON *const cm, MACROBLOCK *x) {
  vp9_optimize_mby_8x8(cm, x);
  vp9_optimize_mbuv_8x8(cm, x);
907
908
}

909
void vp9_optimize_mby_16x16(VP9_COMMON *const cm, MACROBLOCK *x) {
Yaowu Xu's avatar
Yaowu Xu committed
910
911
912
  ENTROPY_CONTEXT_PLANES *const t_above = x->e_mbd.above_context;
  ENTROPY_CONTEXT_PLANES *const t_left = x->e_mbd.left_context;
  ENTROPY_CONTEXT ta, tl;
913

Yaowu Xu's avatar
Yaowu Xu committed
914
  if (!t_above || !t_left)
915
916
    return;

Yaowu Xu's avatar
Yaowu Xu committed
917
918
  ta = (t_above->y1[0] + t_above->y1[1] + t_above->y1[2] + t_above->y1[3]) != 0;
  tl = (t_left->y1[0] + t_left->y1[1] + t_left->y1[2] + t_left->y1[3]) != 0;
919
  optimize_b(cm, x, 0, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant,
920
             &ta, &tl, TX_16X16);
Daniel Kang's avatar
Daniel Kang committed
921
922
}

923
924
925
static void optimize_mb_16x16(VP9_COMMON *const cm, MACROBLOCK *x) {
  vp9_optimize_mby_16x16(cm, x);
  vp9_optimize_mbuv_8x8(cm, x);
Daniel Kang's avatar
Daniel Kang committed
926
927
}

928
void vp9_optimize_sby_32x32(VP9_COMMON *const cm, MACROBLOCK *x) {
929
930
931
932
933
934
935
936
  ENTROPY_CONTEXT *a = (ENTROPY_CONTEXT *) x->e_mbd.above_context;
  ENTROPY_CONTEXT *a1 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 1);
  ENTROPY_CONTEXT *l = (ENTROPY_CONTEXT *) x->e_mbd.left_context;
  ENTROPY_CONTEXT *l1 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 1);
  ENTROPY_CONTEXT ta, tl;

  ta = (a[0] + a[1] + a[2] + a[3] + a1[0] + a1[1] + a1[2] + a1[3]) != 0;
  tl = (l[0] + l[1] + l[2] + l[3] + l1[0] + l1[1] + l1[2] + l1[3]) != 0;
937
  optimize_b(cm, x, 0, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant,
938
939
940
             &ta, &tl, TX_32X32);
}

941
void vp9_optimize_sby_16x16(VP9_COMMON *const cm, MACROBLOCK *x) {
942
943
944
945
946
947
948
949
950
951
952
953
954
955
  ENTROPY_CONTEXT *a = (ENTROPY_CONTEXT *) x->e_mbd.above_context;
  ENTROPY_CONTEXT *a1 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 1);
  ENTROPY_CONTEXT *l = (ENTROPY_CONTEXT *) x->e_mbd.left_context;
  ENTROPY_CONTEXT *l1 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 1);
  ENTROPY_CONTEXT ta[2], tl[2];
  int n;

  ta[0] = (a[0] + a[1] + a[2] + a[3]) != 0;
  ta[1] = (a1[0] + a1[1] + a1[2] + a1[3]) != 0;
  tl[0] = (l[0] + l[1] + l[2] + l[3]) != 0;
  tl[1] = (l1[0] + l1[1] + l1[2] + l1[3]) != 0;
  for (n = 0; n < 4; n++) {
    const int x_idx = n & 1, y_idx = n >> 1;

956
    optimize_b(cm, x, n * 16, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant,
957
958
959
960
               ta + x_idx, tl + y_idx, TX_16X16);
  }
}

961
void vp9_optimize_sby_8x8(VP9_COMMON *const cm, MACROBLOCK *x) {
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
  ENTROPY_CONTEXT *a = (ENTROPY_CONTEXT *) x->e_mbd.above_context;
  ENTROPY_CONTEXT *a1 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 1);
  ENTROPY_CONTEXT *l = (ENTROPY_CONTEXT *) x->e_mbd.left_context;
  ENTROPY_CONTEXT *l1 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 1);
  ENTROPY_CONTEXT ta[4], tl[4];
  int n;

  ta[0] = (a[0] + a[1]) != 0;
  ta[1] = (a[2] + a[3]) != 0;
  ta[2] = (a1[0] + a1[1]) != 0;
  ta[3] = (a1[2] + a1[3]) != 0;
  tl[0] = (l[0] + l[1]) != 0;
  tl[1] = (l[2] + l[3]) != 0;
  tl[2] = (l1[0] + l1[1]) != 0;
  tl[3] = (l1[2] + l1[3]) != 0;
  for (n = 0; n < 16; n++) {
    const int x_idx = n & 3, y_idx = n >> 2;

Deb Mukherjee's avatar