vp9_encodemb.c 23.1 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9
10
 */

11
#include "./vpx_config.h"
12
#include "vp9/encoder/vp9_encodemb.h"
13
#include "vp9/common/vp9_reconinter.h"
14
15
#include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_tokenize.h"
16
17
#include "vp9/common/vp9_invtrans.h"
#include "vp9/common/vp9_reconintra.h"
John Koleszar's avatar
John Koleszar committed
18
#include "vpx_mem/vpx_mem.h"
19
#include "vp9/encoder/vp9_rdopt.h"
20
#include "vp9/common/vp9_systemdependent.h"
21
#include "vp9_rtcd.h"
John Koleszar's avatar
John Koleszar committed
22

23
24
25
26
void vp9_subtract_block(int rows, int cols,
                        int16_t *diff_ptr, int diff_stride,
                        const uint8_t *src_ptr, int src_stride,
                        const uint8_t *pred_ptr, int pred_stride) {
John Koleszar's avatar
John Koleszar committed
27
  int r, c;
John Koleszar's avatar
John Koleszar committed
28

29
30
  for (r = 0; r < rows; r++) {
    for (c = 0; c < cols; c++)
John Koleszar's avatar
John Koleszar committed
31
32
      diff_ptr[c] = src_ptr[c] - pred_ptr[c];

33
34
    diff_ptr += diff_stride;
    pred_ptr += pred_stride;
John Koleszar's avatar
John Koleszar committed
35
36
    src_ptr  += src_stride;
  }
John Koleszar's avatar
John Koleszar committed
37
38
}

39

40
41
42
43
static void subtract_plane(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int plane) {
  const MACROBLOCKD * const xd = &x->e_mbd;
  const int bw = 4 << (b_width_log2(bsize) - xd->plane[plane].subsampling_x);
  const int bh = 4 << (b_height_log2(bsize) - xd->plane[plane].subsampling_y);
John Koleszar's avatar
John Koleszar committed
44
45
  const uint8_t *src = x->plane[plane].src.buf;
  const int src_stride = x->plane[plane].src.stride;
Dmitry Kovalev's avatar
Dmitry Kovalev committed
46

47
48
49
50
  assert(plane < 3);
  vp9_subtract_block(bh, bw,
                     x->plane[plane].src_diff, bw, src, src_stride,
                     xd->plane[plane].dst.buf, xd->plane[plane].dst.stride);
Yaowu Xu's avatar
Yaowu Xu committed
51
52
}

53
54
void vp9_subtract_sby(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) {
  subtract_plane(x, bsize, 0);
55
56
}

57
58
void vp9_subtract_sbuv(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) {
  int i;
59

60
61
  for (i = 1; i < MAX_MB_PLANE; i++)
    subtract_plane(x, bsize, i);
62
63
}

64
65
66
void vp9_subtract_sb(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) {
  vp9_subtract_sby(x, bsize);
  vp9_subtract_sbuv(x, bsize);
John Koleszar's avatar
John Koleszar committed
67
68
}

69

70
void vp9_transform_sby_32x32(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) {
71
72
  const int bwl = b_width_log2(bsize) - 3, bw = 1 << bwl;
  const int bh = 1 << (b_height_log2(bsize) - 3);
73
  const int stride = 32 << bwl;
74
75
  int n;

76
77
  for (n = 0; n < bw * bh; n++) {
    const int x_idx = n & (bw - 1), y_idx = n >> bwl;
78

79
    vp9_short_fdct32x32(x->plane[0].src_diff + y_idx * stride * 32 + x_idx * 32,
80
                        x->plane[0].coeff + n * 1024, stride * 2);
81
82
83
  }
}

84
void vp9_transform_sby_16x16(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) {
85
86
  const int bwl = b_width_log2(bsize) - 2, bw = 1 << bwl;
  const int bh = 1 << (b_height_log2(bsize) - 2);
87
  const int stride = 16 << bwl, bstride = 4 << bwl;
88
  MACROBLOCKD *const xd = &x->e_mbd;
89
90
  int n;

91
92
93
94
  for (n = 0; n < bw * bh; n++) {
    const int x_idx = n & (bw - 1), y_idx = n >> bwl;
    const TX_TYPE tx_type = get_tx_type_16x16(xd,
                                              (y_idx * bstride + x_idx) * 4);
95

96
    if (tx_type != DCT_DCT) {
97
98
      vp9_short_fht16x16(x->plane[0].src_diff +
                             y_idx * stride * 16 + x_idx * 16,
99
                         x->plane[0].coeff + n * 256, stride, tx_type);
100
    } else {
101
      x->fwd_txm16x16(x->plane[0].src_diff + y_idx * stride * 16 + x_idx * 16,
102
                      x->plane[0].coeff + n * 256, stride * 2);
103
    }
104
105
106
  }
}

107
void vp9_transform_sby_8x8(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) {
108
109
  const int bwl = b_width_log2(bsize) - 1, bw = 1 << bwl;
  const int bh = 1 << (b_height_log2(bsize) - 1);
110
  const int stride = 8 << bwl, bstride = 2 << bwl;
111
  MACROBLOCKD *const xd = &x->e_mbd;
112
113
  int n;

114
115
116
  for (n = 0; n < bw * bh; n++) {
    const int x_idx = n & (bw - 1), y_idx = n >> bwl;
    const TX_TYPE tx_type = get_tx_type_8x8(xd, (y_idx * bstride + x_idx) * 2);
117

118
    if (tx_type != DCT_DCT) {
119
      vp9_short_fht8x8(x->plane[0].src_diff + y_idx * stride * 8 + x_idx * 8,
120
                       x->plane[0].coeff + n * 64, stride, tx_type);
121
    } else {
122
      x->fwd_txm8x8(x->plane[0].src_diff + y_idx * stride * 8 + x_idx * 8,
123
                    x->plane[0].coeff + n * 64, stride * 2);
124
    }
125
126
127
  }
}

128
void vp9_transform_sby_4x4(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) {
129
130
  const int bwl = b_width_log2(bsize), bw = 1 << bwl;
  const int bh = 1 << b_height_log2(bsize);
131
  const int stride = 4 << bwl;
132
  MACROBLOCKD *const xd = &x->e_mbd;
133
134
  int n;

135
136
137
  for (n = 0; n < bw * bh; n++) {
    const int x_idx = n & (bw - 1), y_idx = n >> bwl;
    const TX_TYPE tx_type = get_tx_type_4x4(xd, n);
138

139
    if (tx_type != DCT_DCT) {
140
      vp9_short_fht4x4(x->plane[0].src_diff + y_idx * stride * 4 + x_idx * 4,
141
                       x->plane[0].coeff + n * 16, stride, tx_type);
142
    } else {
143
      x->fwd_txm4x4(x->plane[0].src_diff + y_idx * stride * 4 + x_idx * 4,
144
                    x->plane[0].coeff + n * 16, stride * 2);
145
    }
146
147
148
  }
}

149
150
void vp9_transform_sbuv_32x32(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) {
  assert(bsize == BLOCK_SIZE_SB64X64);
151
  vp9_clear_system_state();
152
153
  vp9_short_fdct32x32(x->plane[1].src_diff, x->plane[1].coeff, 64);
  vp9_short_fdct32x32(x->plane[2].src_diff, x->plane[2].coeff, 64);
154
155
}

156
void vp9_transform_sbuv_16x16(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) {
157
  const int bwl = b_width_log2(bsize) - 2, bhl = b_height_log2(bsize) - 2;
158
159
  const int bw = 1 << (bwl - 1), bh = 1 << (bhl - 1);
  const int stride = 16 << (bwl - 1);
160
161
162
  int n;

  vp9_clear_system_state();
163
164
  for (n = 0; n < bw * bh; n++) {
    const int x_idx = n & (bw - 1), y_idx = n >> (bwl - 1);
165

166
    x->fwd_txm16x16(x->plane[1].src_diff + y_idx * stride * 16 + x_idx * 16,
167
                    x->plane[1].coeff + n * 256, stride * 2);
168
    x->fwd_txm16x16(x->plane[2].src_diff + y_idx * stride * 16 + x_idx * 16,
169
                    x->plane[2].coeff + n * 256, stride * 2);
170
171
172
  }
}

173
void vp9_transform_sbuv_8x8(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) {
174
  const int bwl = b_width_log2(bsize) - 1, bhl = b_height_log2(bsize) - 1;
175
176
  const int bw = 1 << (bwl - 1), bh = 1 << (bhl - 1);
  const int stride = 8 << (bwl - 1);
177
178
179
  int n;

  vp9_clear_system_state();
180
181
  for (n = 0; n < bw * bh; n++) {
    const int x_idx = n & (bw - 1), y_idx = n >> (bwl - 1);
182

183
    x->fwd_txm8x8(x->plane[1].src_diff + y_idx * stride * 8 + x_idx * 8,
184
                  x->plane[1].coeff + n * 64, stride * 2);
185
    x->fwd_txm8x8(x->plane[2].src_diff + y_idx * stride * 8 + x_idx * 8,
186
                  x->plane[2].coeff + n * 64, stride * 2);
187
188
189
  }
}

190
void vp9_transform_sbuv_4x4(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) {
191
  const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
192
193
  const int bw = 1 << (bwl - 1), bh = 1 << (bhl - 1);
  const int stride = 4 << (bwl - 1);
194
195
196
  int n;

  vp9_clear_system_state();
197
198
  for (n = 0; n < bw * bh; n++) {
    const int x_idx = n & (bw - 1), y_idx = n >> (bwl - 1);
199

200
    x->fwd_txm4x4(x->plane[1].src_diff + y_idx * stride * 4 + x_idx * 4,
201
                  x->plane[1].coeff + n * 16, stride * 2);
202
    x->fwd_txm4x4(x->plane[2].src_diff + y_idx * stride * 4 + x_idx * 4,
203
                  x->plane[2].coeff + n * 16, stride * 2);
204
  }
205
206
}

207
208
#define RDTRUNC(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF )
#define RDTRUNC_8x8(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF )
209
typedef struct vp9_token_state vp9_token_state;
210

211
struct vp9_token_state {
212
213
  int           rate;
  int           error;
Daniel Kang's avatar
Daniel Kang committed
214
  int           next;
215
216
217
218
  signed char   token;
  short         qc;
};

219
// TODO: experiments to find optimal multiple numbers
220
221
#define Y1_RD_MULT 4
#define UV_RD_MULT 2
222

John Koleszar's avatar
John Koleszar committed
223
224
225
static const int plane_rd_mult[4] = {
  Y1_RD_MULT,
  UV_RD_MULT,
226
227
};

228
229
230
231
232
233
234
235
236
237
#define UPDATE_RD_COST()\
{\
  rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0);\
  rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1);\
  if (rd_cost0 == rd_cost1) {\
    rd_cost0 = RDTRUNC(rdmult, rddiv, rate0, error0);\
    rd_cost1 = RDTRUNC(rdmult, rddiv, rate1, error1);\
  }\
}

238
239
// This function is a place holder for now but may ultimately need
// to scan previous tokens to work out the correct context.
240
241
242
243
244
static int trellis_get_coeff_context(const int *scan,
                                     const int *nb,
                                     int idx, int token,
                                     uint8_t *token_cache,
                                     int pad, int l) {
245
246
  int bak = token_cache[scan[idx]], pt;
  token_cache[scan[idx]] = token;
247
  pt = vp9_get_coef_context(scan, nb, pad, token_cache, idx + 1, l);
248
  token_cache[scan[idx]] = bak;
249
  return pt;
250
251
}

John Koleszar's avatar
John Koleszar committed
252
253
static void optimize_b(VP9_COMMON *const cm, MACROBLOCK *mb,
                       int plane, int block, BLOCK_SIZE_TYPE bsize,
254
                       ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
John Koleszar's avatar
John Koleszar committed
255
                       TX_SIZE tx_size) {
256
  const int ref = mb->e_mbd.mode_info_context->mbmi.ref_frame != INTRA_FRAME;
257
  MACROBLOCKD *const xd = &mb->e_mbd;
258
259
  vp9_token_state tokens[1025][2];
  unsigned best_index[1025][2];
John Koleszar's avatar
John Koleszar committed
260
261
  const int16_t *coeff_ptr = BLOCK_OFFSET(mb->plane[plane].coeff,
                                          block, 16);
262
263
  int16_t *qcoeff_ptr;
  int16_t *dqcoeff_ptr;
John Koleszar's avatar
John Koleszar committed
264
  int eob = xd->plane[plane].eobs[block], final_eob, sz = 0;
265
  const int i0 = 0;
266
  int rc, x, next, i;
267
268
269
  int64_t rdmult, rddiv, rd_cost0, rd_cost1;
  int rate0, rate1, error0, error1, t0, t1;
  int best, band, pt;
John Koleszar's avatar
John Koleszar committed
270
  PLANE_TYPE type = xd->plane[plane].plane_type;
John Koleszar's avatar
John Koleszar committed
271
  int err_mult = plane_rd_mult[type];
272
273
  int default_eob, pad;
  int const *scan, *nb;
274
  const int mul = 1 + (tx_size == TX_32X32);
275
  uint8_t token_cache[1024];
John Koleszar's avatar
John Koleszar committed
276
277
278
  const int ib = txfrm_block_to_raster_block(xd, bsize, plane,
                                             block, 2 * tx_size);
  const int16_t *dequant_ptr = xd->plane[plane].dequant;
John Koleszar's avatar
John Koleszar committed
279

John Koleszar's avatar
John Koleszar committed
280
281
282
  assert((!type && !plane) || (type && plane));
  dqcoeff_ptr = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block, 16);
  qcoeff_ptr = BLOCK_OFFSET(xd->plane[plane].qcoeff, block, 16);
Deb Mukherjee's avatar
Deb Mukherjee committed
283
  switch (tx_size) {
284
    default:
285
    case TX_4X4: {
John Koleszar's avatar
John Koleszar committed
286
      const TX_TYPE tx_type = plane == 0 ? get_tx_type_4x4(xd, ib) : DCT_DCT;
287
      default_eob = 16;
288
      scan = get_scan_4x4(tx_type);
289
      break;
290
    }
291
    case TX_8X8: {
John Koleszar's avatar
John Koleszar committed
292
      const TX_TYPE tx_type = plane == 0 ? get_tx_type_8x8(xd, ib) : DCT_DCT;
293
      scan = get_scan_8x8(tx_type);
294
295
      default_eob = 64;
      break;
296
297
    }
    case TX_16X16: {
John Koleszar's avatar
John Koleszar committed
298
      const TX_TYPE tx_type = plane == 0 ? get_tx_type_16x16(xd, ib) : DCT_DCT;
299
      scan = get_scan_16x16(tx_type);
300
301
      default_eob = 256;
      break;
302
    }
303
304
305
306
    case TX_32X32:
      scan = vp9_default_zig_zag1d_32x32;
      default_eob = 1024;
      break;
307
  }
John Koleszar's avatar
John Koleszar committed
308
  assert(eob <= default_eob);
John Koleszar's avatar
John Koleszar committed
309
310
311
312
313
314

  /* Now set up a Viterbi trellis to evaluate alternative roundings. */
  rdmult = mb->rdmult * err_mult;
  if (mb->e_mbd.mode_info_context->mbmi.ref_frame == INTRA_FRAME)
    rdmult = (rdmult * 9) >> 4;
  rddiv = mb->rddiv;
315
  memset(best_index, 0, sizeof(best_index));
John Koleszar's avatar
John Koleszar committed
316
317
318
  /* Initialize the sentinel node of the trellis. */
  tokens[eob][0].rate = 0;
  tokens[eob][0].error = 0;
319
  tokens[eob][0].next = default_eob;
John Koleszar's avatar
John Koleszar committed
320
321
322
323
  tokens[eob][0].token = DCT_EOB_TOKEN;
  tokens[eob][0].qc = 0;
  *(tokens[eob] + 1) = *(tokens[eob] + 0);
  next = eob;
324
  for (i = 0; i < eob; i++)
325
    token_cache[scan[i]] = vp9_dct_value_tokens_ptr[qcoeff_ptr[scan[i]]].token;
326
327
  nb = vp9_get_coef_neighbors_handle(scan, &pad);

John Koleszar's avatar
John Koleszar committed
328
  for (i = eob; i-- > i0;) {
329
    int base_bits, d2, dx;
John Koleszar's avatar
John Koleszar committed
330

331
    rc = scan[i];
John Koleszar's avatar
John Koleszar committed
332
333
334
335
336
337
338
339
340
    x = qcoeff_ptr[rc];
    /* Only add a trellis state for non-zero coefficients. */
    if (x) {
      int shortcut = 0;
      error0 = tokens[next][0].error;
      error1 = tokens[next][1].error;
      /* Evaluate the first possibility for this state. */
      rate0 = tokens[next][0].rate;
      rate1 = tokens[next][1].rate;
341
      t0 = (vp9_dct_value_tokens_ptr + x)->token;
John Koleszar's avatar
John Koleszar committed
342
      /* Consider both possible successor states. */
343
      if (next < default_eob) {
344
        band = get_coef_band(scan, tx_size, i + 1);
345
346
        pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache,
                                       pad, default_eob);
John Koleszar's avatar
John Koleszar committed
347
        rate0 +=
348
          mb->token_costs[tx_size][type][ref][band][pt][tokens[next][0].token];
John Koleszar's avatar
John Koleszar committed
349
        rate1 +=
350
          mb->token_costs[tx_size][type][ref][band][pt][tokens[next][1].token];
John Koleszar's avatar
John Koleszar committed
351
      }
352
      UPDATE_RD_COST();
John Koleszar's avatar
John Koleszar committed
353
354
      /* And pick the best. */
      best = rd_cost1 < rd_cost0;
355
      base_bits = *(vp9_dct_value_cost_ptr + x);
356
      dx = mul * (dqcoeff_ptr[rc] - coeff_ptr[rc]);
John Koleszar's avatar
John Koleszar committed
357
358
359
360
361
362
      d2 = dx * dx;
      tokens[i][0].rate = base_bits + (best ? rate1 : rate0);
      tokens[i][0].error = d2 + (best ? error1 : error0);
      tokens[i][0].next = next;
      tokens[i][0].token = t0;
      tokens[i][0].qc = x;
363
      best_index[i][0] = best;
364

John Koleszar's avatar
John Koleszar committed
365
366
367
368
      /* Evaluate the second possibility for this state. */
      rate0 = tokens[next][0].rate;
      rate1 = tokens[next][1].rate;

369
370
371
      if ((abs(x)*dequant_ptr[rc != 0] > abs(coeff_ptr[rc]) * mul) &&
          (abs(x)*dequant_ptr[rc != 0] < abs(coeff_ptr[rc]) * mul +
                                         dequant_ptr[rc != 0]))
John Koleszar's avatar
John Koleszar committed
372
373
374
375
376
377
378
379
380
381
382
383
384
        shortcut = 1;
      else
        shortcut = 0;

      if (shortcut) {
        sz = -(x < 0);
        x -= 2 * sz + 1;
      }

      /* Consider both possible successor states. */
      if (!x) {
        /* If we reduced this coefficient to zero, check to see if
         *  we need to move the EOB back here.
385
         */
John Koleszar's avatar
John Koleszar committed
386
387
388
389
390
        t0 = tokens[next][0].token == DCT_EOB_TOKEN ?
             DCT_EOB_TOKEN : ZERO_TOKEN;
        t1 = tokens[next][1].token == DCT_EOB_TOKEN ?
             DCT_EOB_TOKEN : ZERO_TOKEN;
      } else {
391
        t0 = t1 = (vp9_dct_value_tokens_ptr + x)->token;
John Koleszar's avatar
John Koleszar committed
392
      }
393
      if (next < default_eob) {
394
        band = get_coef_band(scan, tx_size, i + 1);
John Koleszar's avatar
John Koleszar committed
395
        if (t0 != DCT_EOB_TOKEN) {
396
397
          pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache,
                                         pad, default_eob);
398
          rate0 += mb->token_costs[tx_size][type][ref][band][pt][
399
              tokens[next][0].token];
John Koleszar's avatar
John Koleszar committed
400
        }
John Koleszar's avatar
John Koleszar committed
401
        if (t1 != DCT_EOB_TOKEN) {
402
403
          pt = trellis_get_coeff_context(scan, nb, i, t1, token_cache,
                                         pad, default_eob);
404
          rate1 += mb->token_costs[tx_size][type][ref][band][pt][
405
              tokens[next][1].token];
John Koleszar's avatar
John Koleszar committed
406
407
        }
      }
John Koleszar's avatar
John Koleszar committed
408

409
      UPDATE_RD_COST();
John Koleszar's avatar
John Koleszar committed
410
411
      /* And pick the best. */
      best = rd_cost1 < rd_cost0;
412
      base_bits = *(vp9_dct_value_cost_ptr + x);
John Koleszar's avatar
John Koleszar committed
413
414

      if (shortcut) {
415
        dx -= (dequant_ptr[rc != 0] + sz) ^ sz;
John Koleszar's avatar
John Koleszar committed
416
417
418
419
420
421
422
        d2 = dx * dx;
      }
      tokens[i][1].rate = base_bits + (best ? rate1 : rate0);
      tokens[i][1].error = d2 + (best ? error1 : error0);
      tokens[i][1].next = next;
      tokens[i][1].token = best ? t1 : t0;
      tokens[i][1].qc = x;
423
      best_index[i][1] = best;
John Koleszar's avatar
John Koleszar committed
424
425
      /* Finally, make this the new head of the trellis. */
      next = i;
426
    }
John Koleszar's avatar
John Koleszar committed
427
428
429
430
    /* There's no choice to make for a zero coefficient, so we don't
     *  add a new trellis node, but we do need to update the costs.
     */
    else {
431
      band = get_coef_band(scan, tx_size, i + 1);
John Koleszar's avatar
John Koleszar committed
432
433
434
435
      t0 = tokens[next][0].token;
      t1 = tokens[next][1].token;
      /* Update the cost of each path if we're past the EOB token. */
      if (t0 != DCT_EOB_TOKEN) {
436
437
        tokens[next][0].rate +=
            mb->token_costs[tx_size][type][ref][band][0][t0];
John Koleszar's avatar
John Koleszar committed
438
439
440
        tokens[next][0].token = ZERO_TOKEN;
      }
      if (t1 != DCT_EOB_TOKEN) {
441
442
        tokens[next][1].rate +=
            mb->token_costs[tx_size][type][ref][band][0][t1];
John Koleszar's avatar
John Koleszar committed
443
444
445
        tokens[next][1].token = ZERO_TOKEN;
      }
      /* Don't update next, because we didn't add a new node. */
446
    }
John Koleszar's avatar
John Koleszar committed
447
448
449
  }

  /* Now pick the best path through the whole trellis. */
450
  band = get_coef_band(scan, tx_size, i + 1);
451
  pt = combine_entropy_contexts(*a, *l);
John Koleszar's avatar
John Koleszar committed
452
453
454
455
456
457
  rate0 = tokens[next][0].rate;
  rate1 = tokens[next][1].rate;
  error0 = tokens[next][0].error;
  error1 = tokens[next][1].error;
  t0 = tokens[next][0].token;
  t1 = tokens[next][1].token;
458
459
  rate0 += mb->token_costs[tx_size][type][ref][band][pt][t0];
  rate1 += mb->token_costs[tx_size][type][ref][band][pt][t1];
460
  UPDATE_RD_COST();
John Koleszar's avatar
John Koleszar committed
461
462
  best = rd_cost1 < rd_cost0;
  final_eob = i0 - 1;
463
464
  vpx_memset(qcoeff_ptr, 0, sizeof(*qcoeff_ptr) * (16 << (tx_size * 2)));
  vpx_memset(dqcoeff_ptr, 0, sizeof(*dqcoeff_ptr) * (16 << (tx_size * 2)));
John Koleszar's avatar
John Koleszar committed
465
466
  for (i = next; i < eob; i = next) {
    x = tokens[i][best].qc;
467
    if (x) {
John Koleszar's avatar
John Koleszar committed
468
      final_eob = i;
469
    }
470
    rc = scan[i];
John Koleszar's avatar
John Koleszar committed
471
    qcoeff_ptr[rc] = x;
472
    dqcoeff_ptr[rc] = (x * dequant_ptr[rc != 0]) / mul;
473

John Koleszar's avatar
John Koleszar committed
474
    next = tokens[i][best].next;
475
    best = best_index[i][best];
John Koleszar's avatar
John Koleszar committed
476
477
478
  }
  final_eob++;

John Koleszar's avatar
John Koleszar committed
479
  xd->plane[plane].eobs[block] = final_eob;
480
  *a = *l = (final_eob > 0);
481
482
}

John Koleszar's avatar
John Koleszar committed
483
484
485
486
487
struct optimize_block_args {
  VP9_COMMON *cm;
  MACROBLOCK *x;
  struct optimize_ctx *ctx;
};
488

489
490
491
492
void vp9_optimize_b(int plane, int block, BLOCK_SIZE_TYPE bsize,
                    int ss_txfrm_size, VP9_COMMON *cm, MACROBLOCK *mb,
                    struct optimize_ctx *ctx) {
  MACROBLOCKD* const xd = &mb->e_mbd;
John Koleszar's avatar
John Koleszar committed
493
  int x, y;
494

John Koleszar's avatar
John Koleszar committed
495
496
  // find current entropy context
  txfrm_block_to_raster_xy(xd, bsize, plane, block, ss_txfrm_size, &x, &y);
497

498
499
  optimize_b(cm, mb, plane, block, bsize,
             &ctx->ta[plane][x], &ctx->tl[plane][y],
John Koleszar's avatar
John Koleszar committed
500
             ss_txfrm_size / 2);
501
502
}

503
504
505
506
507
508
509
static void optimize_block(int plane, int block, BLOCK_SIZE_TYPE bsize,
                           int ss_txfrm_size, void *arg) {
  const struct optimize_block_args* const args = arg;
  vp9_optimize_b(plane, block, bsize, ss_txfrm_size, args->cm, args->x,
                 args->ctx);
}

John Koleszar's avatar
John Koleszar committed
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
void vp9_optimize_init(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize,
                       struct optimize_ctx *ctx) {
  int p;

  for (p = 0; p < MAX_MB_PLANE; p++) {
    const struct macroblockd_plane* const plane = &xd->plane[p];
    const int bwl = b_width_log2(bsize) - plane->subsampling_x;
    const int bhl = b_height_log2(bsize) - plane->subsampling_y;
    const TX_SIZE tx_size = tx_size_for_plane(xd, bsize, p);
    int i, j;

    for (i = 0; i < 1 << bwl; i += 1 << tx_size) {
      int c = 0;
      ctx->ta[p][i] = 0;
      for (j = 0; j < 1 << tx_size && !c; j++) {
        c = ctx->ta[p][i] |= plane->above_context[i + j];
      }
    }
    for (i = 0; i < 1 << bhl; i += 1 << tx_size) {
      int c = 0;
      ctx->tl[p][i] = 0;
      for (j = 0; j < 1 << tx_size && !c; j++) {
        c = ctx->tl[p][i] |= plane->left_context[i + j];
      }
534
    }
535
536
537
  }
}

John Koleszar's avatar
John Koleszar committed
538
539
540
541
542
543
544
545
546
547
void vp9_optimize_sby(VP9_COMMON *const cm, MACROBLOCK *x,
                      BLOCK_SIZE_TYPE bsize) {
  struct optimize_ctx ctx;
  struct optimize_block_args arg = {cm, x, &ctx};
  vp9_optimize_init(&x->e_mbd, bsize, &ctx);
  foreach_transformed_block_in_plane(&x->e_mbd, bsize, 0,
#if !CONFIG_SB8X8
  0,
#endif
                                     optimize_block, &arg);
548
549
}

John Koleszar's avatar
John Koleszar committed
550
551
552
553
554
555
void vp9_optimize_sbuv(VP9_COMMON *const cm, MACROBLOCK *x,
                       BLOCK_SIZE_TYPE bsize) {
  struct optimize_ctx ctx;
  struct optimize_block_args arg = {cm, x, &ctx};
  vp9_optimize_init(&x->e_mbd, bsize, &ctx);
  foreach_transformed_block_uv(&x->e_mbd, bsize, optimize_block, &arg);
556
557
}

558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
struct encode_b_args {
  VP9_COMMON *cm;
  MACROBLOCK *x;
  struct optimize_ctx *ctx;
};

static void encode_block(int plane, int block, BLOCK_SIZE_TYPE bsize,
                         int ss_txfrm_size, void *arg) {
  struct encode_b_args* const args = arg;
  MACROBLOCK* const x = args->x;
  MACROBLOCKD* const xd = &x->e_mbd;
  const int bw = 4 << (b_width_log2(bsize) - xd->plane[plane].subsampling_x);
  const int raster_block = txfrm_block_to_raster_block(xd, bsize, plane,
                                                       block, ss_txfrm_size);
  int16_t* const src_diff = raster_block_offset_int16(xd, bsize, plane,
                                                      raster_block,
                                                      x->plane[plane].src_diff);
  int16_t* const diff = raster_block_offset_int16(xd, bsize, plane,
                                                  raster_block,
                                                  xd->plane[plane].diff);
  TX_TYPE tx_type = DCT_DCT;

  switch (ss_txfrm_size / 2) {
    case TX_32X32:
      vp9_short_fdct32x32(src_diff,
                          BLOCK_OFFSET(x->plane[plane].coeff, block, 16),
                          bw * 2);
      break;
    case TX_16X16:
      tx_type = plane == 0 ? get_tx_type_16x16(xd, raster_block) : DCT_DCT;
      if (tx_type != DCT_DCT) {
        vp9_short_fht16x16(src_diff,
                           BLOCK_OFFSET(x->plane[plane].coeff, block, 16),
                           bw, tx_type);
      } else {
        x->fwd_txm16x16(src_diff,
                        BLOCK_OFFSET(x->plane[plane].coeff, block, 16),
                        bw * 2);
      }
      break;
    case TX_8X8:
      tx_type = plane == 0 ? get_tx_type_8x8(xd, raster_block) : DCT_DCT;
      if (tx_type != DCT_DCT) {
        vp9_short_fht8x8(src_diff,
                           BLOCK_OFFSET(x->plane[plane].coeff, block, 16),
                           bw, tx_type);
      } else {
        x->fwd_txm8x8(src_diff,
                      BLOCK_OFFSET(x->plane[plane].coeff, block, 16),
                      bw * 2);
      }
      break;
    case TX_4X4:
      tx_type = plane == 0 ? get_tx_type_4x4(xd, raster_block) : DCT_DCT;
      if (tx_type != DCT_DCT) {
        vp9_short_fht4x4(src_diff,
                           BLOCK_OFFSET(x->plane[plane].coeff, block, 16),
                           bw, tx_type);
      } else {
        x->fwd_txm4x4(src_diff,
                      BLOCK_OFFSET(x->plane[plane].coeff, block, 16),
                      bw * 2);
      }
      break;
    default:
      assert(0);
John Koleszar's avatar
John Koleszar committed
624
  }
625

626
627
628
  vp9_quantize(x, plane, block, 16 << ss_txfrm_size, tx_type);
  if (x->optimize)
    vp9_optimize_b(plane, block, bsize, ss_txfrm_size, args->cm, x, args->ctx);
John Koleszar's avatar
John Koleszar committed
629

630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
  switch (ss_txfrm_size / 2) {
    case TX_32X32:
      vp9_short_idct32x32(BLOCK_OFFSET(xd->plane[plane].dqcoeff, block, 16),
                          diff, bw * 2);
      break;
    case TX_16X16:
      if (tx_type == DCT_DCT) {
        vp9_short_idct16x16(BLOCK_OFFSET(xd->plane[plane].dqcoeff, block, 16),
                            diff, bw * 2);
      } else {
        vp9_short_iht16x16(BLOCK_OFFSET(xd->plane[plane].dqcoeff, block, 16),
                           diff, bw, tx_type);
      }
      break;
    case TX_8X8:
      if (tx_type == DCT_DCT) {
        vp9_short_idct8x8(BLOCK_OFFSET(xd->plane[plane].dqcoeff, block, 16),
                          diff, bw * 2);
      } else {
        vp9_short_iht8x8(BLOCK_OFFSET(xd->plane[plane].dqcoeff, block, 16),
                         diff, bw, tx_type);
      }
      break;
    case TX_4X4:
      if (tx_type == DCT_DCT) {
        // this is like vp9_short_idct4x4 but has a special case around eob<=1
        // which is significant (not just an optimization) for the lossless
        // case.
        vp9_inverse_transform_b_4x4(xd, xd->plane[plane].eobs[block],
            BLOCK_OFFSET(xd->plane[plane].dqcoeff, block, 16), diff, bw * 2);
      } else {
        vp9_short_iht4x4(BLOCK_OFFSET(xd->plane[plane].dqcoeff, block, 16),
                         diff, bw, tx_type);
      }
      break;
  }
John Koleszar's avatar
John Koleszar committed
666
667
}

668
669
670
671
672
void vp9_encode_sb(VP9_COMMON *const cm, MACROBLOCK *x,
                   BLOCK_SIZE_TYPE bsize) {
  MACROBLOCKD* const xd = &x->e_mbd;
  struct optimize_ctx ctx;
  struct encode_b_args arg = {cm, x, &ctx};
John Koleszar's avatar
John Koleszar committed
673

674
675
676
  vp9_subtract_sb(x, bsize);
  if (x->optimize)
    vp9_optimize_init(xd, bsize, &ctx);
John Koleszar's avatar
John Koleszar committed
677

678
  foreach_transformed_block(xd, bsize, encode_block, &arg);
Yaowu Xu's avatar
Yaowu Xu committed
679

680
  vp9_recon_sb(xd, bsize);
John Koleszar's avatar
John Koleszar committed
681
}