vp9_encodemb.c 21.3 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5 6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9 10
 */

11
#include "./vpx_config.h"
12
#include "vp9/encoder/vp9_encodemb.h"
13
#include "vp9/common/vp9_reconinter.h"
14 15
#include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_tokenize.h"
16 17
#include "vp9/common/vp9_invtrans.h"
#include "vp9/common/vp9_reconintra.h"
John Koleszar's avatar
John Koleszar committed
18
#include "vpx_mem/vpx_mem.h"
19
#include "vp9/encoder/vp9_rdopt.h"
20
#include "vp9/common/vp9_systemdependent.h"
21
#include "vp9_rtcd.h"
John Koleszar's avatar
John Koleszar committed
22

23 24 25 26
void vp9_subtract_block(int rows, int cols,
                        int16_t *diff_ptr, int diff_stride,
                        const uint8_t *src_ptr, int src_stride,
                        const uint8_t *pred_ptr, int pred_stride) {
John Koleszar's avatar
John Koleszar committed
27
  int r, c;
John Koleszar's avatar
John Koleszar committed
28

29 30
  for (r = 0; r < rows; r++) {
    for (c = 0; c < cols; c++)
John Koleszar's avatar
John Koleszar committed
31 32
      diff_ptr[c] = src_ptr[c] - pred_ptr[c];

33 34
    diff_ptr += diff_stride;
    pred_ptr += pred_stride;
John Koleszar's avatar
John Koleszar committed
35 36
    src_ptr  += src_stride;
  }
John Koleszar's avatar
John Koleszar committed
37 38
}

39

40 41 42 43
static void subtract_plane(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int plane) {
  const MACROBLOCKD * const xd = &x->e_mbd;
  const int bw = 4 << (b_width_log2(bsize) - xd->plane[plane].subsampling_x);
  const int bh = 4 << (b_height_log2(bsize) - xd->plane[plane].subsampling_y);
John Koleszar's avatar
John Koleszar committed
44 45
  const uint8_t *src = x->plane[plane].src.buf;
  const int src_stride = x->plane[plane].src.stride;
Dmitry Kovalev's avatar
Dmitry Kovalev committed
46

47 48 49 50
  assert(plane < 3);
  vp9_subtract_block(bh, bw,
                     x->plane[plane].src_diff, bw, src, src_stride,
                     xd->plane[plane].dst.buf, xd->plane[plane].dst.stride);
Yaowu Xu's avatar
Yaowu Xu committed
51 52
}

53 54
void vp9_subtract_sby(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) {
  subtract_plane(x, bsize, 0);
55 56
}

57 58
void vp9_subtract_sbuv(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) {
  int i;
59

60 61
  for (i = 1; i < MAX_MB_PLANE; i++)
    subtract_plane(x, bsize, i);
62 63
}

64 65 66
void vp9_subtract_sb(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) {
  vp9_subtract_sby(x, bsize);
  vp9_subtract_sbuv(x, bsize);
John Koleszar's avatar
John Koleszar committed
67 68
}

69

70
void vp9_transform_sby_32x32(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) {
71 72
  const int bwl = b_width_log2(bsize) - 3, bw = 1 << bwl;
  const int bh = 1 << (b_height_log2(bsize) - 3);
73
  const int stride = 32 << bwl;
74 75
  int n;

76 77
  for (n = 0; n < bw * bh; n++) {
    const int x_idx = n & (bw - 1), y_idx = n >> bwl;
78

79
    vp9_short_fdct32x32(x->plane[0].src_diff + y_idx * stride * 32 + x_idx * 32,
80
                        x->plane[0].coeff + n * 1024, stride * 2);
81 82 83
  }
}

84
void vp9_transform_sby_16x16(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) {
85 86
  const int bwl = b_width_log2(bsize) - 2, bw = 1 << bwl;
  const int bh = 1 << (b_height_log2(bsize) - 2);
87
  const int stride = 16 << bwl, bstride = 4 << bwl;
88
  MACROBLOCKD *const xd = &x->e_mbd;
89 90
  int n;

91 92 93 94
  for (n = 0; n < bw * bh; n++) {
    const int x_idx = n & (bw - 1), y_idx = n >> bwl;
    const TX_TYPE tx_type = get_tx_type_16x16(xd,
                                              (y_idx * bstride + x_idx) * 4);
95

96
    if (tx_type != DCT_DCT) {
97 98
      vp9_short_fht16x16(x->plane[0].src_diff +
                             y_idx * stride * 16 + x_idx * 16,
99
                         x->plane[0].coeff + n * 256, stride, tx_type);
100
    } else {
101
      x->fwd_txm16x16(x->plane[0].src_diff + y_idx * stride * 16 + x_idx * 16,
102
                      x->plane[0].coeff + n * 256, stride * 2);
103
    }
104 105 106
  }
}

107
void vp9_transform_sby_8x8(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) {
108 109
  const int bwl = b_width_log2(bsize) - 1, bw = 1 << bwl;
  const int bh = 1 << (b_height_log2(bsize) - 1);
110
  const int stride = 8 << bwl, bstride = 2 << bwl;
111
  MACROBLOCKD *const xd = &x->e_mbd;
112 113
  int n;

114 115 116
  for (n = 0; n < bw * bh; n++) {
    const int x_idx = n & (bw - 1), y_idx = n >> bwl;
    const TX_TYPE tx_type = get_tx_type_8x8(xd, (y_idx * bstride + x_idx) * 2);
117

118
    if (tx_type != DCT_DCT) {
119
      vp9_short_fht8x8(x->plane[0].src_diff + y_idx * stride * 8 + x_idx * 8,
120
                       x->plane[0].coeff + n * 64, stride, tx_type);
121
    } else {
122
      x->fwd_txm8x8(x->plane[0].src_diff + y_idx * stride * 8 + x_idx * 8,
123
                    x->plane[0].coeff + n * 64, stride * 2);
124
    }
125 126 127
  }
}

128
void vp9_transform_sby_4x4(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) {
129 130
  const int bwl = b_width_log2(bsize), bw = 1 << bwl;
  const int bh = 1 << b_height_log2(bsize);
131
  const int stride = 4 << bwl;
132
  MACROBLOCKD *const xd = &x->e_mbd;
133 134
  int n;

135 136 137
  for (n = 0; n < bw * bh; n++) {
    const int x_idx = n & (bw - 1), y_idx = n >> bwl;
    const TX_TYPE tx_type = get_tx_type_4x4(xd, n);
138

139
    if (tx_type != DCT_DCT) {
140
      vp9_short_fht4x4(x->plane[0].src_diff + y_idx * stride * 4 + x_idx * 4,
141
                       x->plane[0].coeff + n * 16, stride, tx_type);
142
    } else {
143
      x->fwd_txm4x4(x->plane[0].src_diff + y_idx * stride * 4 + x_idx * 4,
144
                    x->plane[0].coeff + n * 16, stride * 2);
145
    }
146 147 148
  }
}

149 150
void vp9_transform_sbuv_32x32(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) {
  assert(bsize == BLOCK_SIZE_SB64X64);
151
  vp9_clear_system_state();
152 153
  vp9_short_fdct32x32(x->plane[1].src_diff, x->plane[1].coeff, 64);
  vp9_short_fdct32x32(x->plane[2].src_diff, x->plane[2].coeff, 64);
154 155
}

156
void vp9_transform_sbuv_16x16(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) {
157
  const int bwl = b_width_log2(bsize) - 2, bhl = b_height_log2(bsize) - 2;
158 159
  const int bw = 1 << (bwl - 1), bh = 1 << (bhl - 1);
  const int stride = 16 << (bwl - 1);
160 161 162
  int n;

  vp9_clear_system_state();
163 164
  for (n = 0; n < bw * bh; n++) {
    const int x_idx = n & (bw - 1), y_idx = n >> (bwl - 1);
165

166
    x->fwd_txm16x16(x->plane[1].src_diff + y_idx * stride * 16 + x_idx * 16,
167
                    x->plane[1].coeff + n * 256, stride * 2);
168
    x->fwd_txm16x16(x->plane[2].src_diff + y_idx * stride * 16 + x_idx * 16,
169
                    x->plane[2].coeff + n * 256, stride * 2);
170 171 172
  }
}

173
void vp9_transform_sbuv_8x8(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) {
174
  const int bwl = b_width_log2(bsize) - 1, bhl = b_height_log2(bsize) - 1;
175 176
  const int bw = 1 << (bwl - 1), bh = 1 << (bhl - 1);
  const int stride = 8 << (bwl - 1);
177 178 179
  int n;

  vp9_clear_system_state();
180 181
  for (n = 0; n < bw * bh; n++) {
    const int x_idx = n & (bw - 1), y_idx = n >> (bwl - 1);
182

183
    x->fwd_txm8x8(x->plane[1].src_diff + y_idx * stride * 8 + x_idx * 8,
184
                  x->plane[1].coeff + n * 64, stride * 2);
185
    x->fwd_txm8x8(x->plane[2].src_diff + y_idx * stride * 8 + x_idx * 8,
186
                  x->plane[2].coeff + n * 64, stride * 2);
187 188 189
  }
}

190
void vp9_transform_sbuv_4x4(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) {
191
  const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
192 193
  const int bw = 1 << (bwl - 1), bh = 1 << (bhl - 1);
  const int stride = 4 << (bwl - 1);
194 195 196
  int n;

  vp9_clear_system_state();
197 198
  for (n = 0; n < bw * bh; n++) {
    const int x_idx = n & (bw - 1), y_idx = n >> (bwl - 1);
199

200
    x->fwd_txm4x4(x->plane[1].src_diff + y_idx * stride * 4 + x_idx * 4,
201
                  x->plane[1].coeff + n * 16, stride * 2);
202
    x->fwd_txm4x4(x->plane[2].src_diff + y_idx * stride * 4 + x_idx * 4,
203
                  x->plane[2].coeff + n * 16, stride * 2);
204
  }
205 206
}

207 208
#define RDTRUNC(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF )
#define RDTRUNC_8x8(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF )
209
typedef struct vp9_token_state vp9_token_state;
210

211
struct vp9_token_state {
212 213
  int           rate;
  int           error;
Daniel Kang's avatar
Daniel Kang committed
214
  int           next;
215 216 217 218
  signed char   token;
  short         qc;
};

219
// TODO: experiments to find optimal multiple numbers
220 221
#define Y1_RD_MULT 4
#define UV_RD_MULT 2
222

John Koleszar's avatar
John Koleszar committed
223 224 225
static const int plane_rd_mult[4] = {
  Y1_RD_MULT,
  UV_RD_MULT,
226 227
};

228 229 230 231 232 233 234 235 236 237
#define UPDATE_RD_COST()\
{\
  rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0);\
  rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1);\
  if (rd_cost0 == rd_cost1) {\
    rd_cost0 = RDTRUNC(rdmult, rddiv, rate0, error0);\
    rd_cost1 = RDTRUNC(rdmult, rddiv, rate1, error1);\
  }\
}

238 239
// This function is a place holder for now but may ultimately need
// to scan previous tokens to work out the correct context.
240 241 242 243 244
static int trellis_get_coeff_context(const int *scan,
                                     const int *nb,
                                     int idx, int token,
                                     uint8_t *token_cache,
                                     int pad, int l) {
245 246
  int bak = token_cache[scan[idx]], pt;
  token_cache[scan[idx]] = token;
247
  pt = vp9_get_coef_context(scan, nb, pad, token_cache, idx + 1, l);
248
  token_cache[scan[idx]] = bak;
249
  return pt;
250 251
}

John Koleszar's avatar
John Koleszar committed
252 253
static void optimize_b(VP9_COMMON *const cm, MACROBLOCK *mb,
                       int plane, int block, BLOCK_SIZE_TYPE bsize,
254
                       ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
John Koleszar's avatar
John Koleszar committed
255
                       TX_SIZE tx_size) {
256
  const int ref = mb->e_mbd.mode_info_context->mbmi.ref_frame != INTRA_FRAME;
257
  MACROBLOCKD *const xd = &mb->e_mbd;
258 259
  vp9_token_state tokens[1025][2];
  unsigned best_index[1025][2];
John Koleszar's avatar
John Koleszar committed
260 261
  const int16_t *coeff_ptr = BLOCK_OFFSET(mb->plane[plane].coeff,
                                          block, 16);
262 263
  int16_t *qcoeff_ptr;
  int16_t *dqcoeff_ptr;
John Koleszar's avatar
John Koleszar committed
264
  int eob = xd->plane[plane].eobs[block], final_eob, sz = 0;
265
  const int i0 = 0;
266
  int rc, x, next, i;
267 268 269
  int64_t rdmult, rddiv, rd_cost0, rd_cost1;
  int rate0, rate1, error0, error1, t0, t1;
  int best, band, pt;
John Koleszar's avatar
John Koleszar committed
270
  PLANE_TYPE type = xd->plane[plane].plane_type;
John Koleszar's avatar
John Koleszar committed
271
  int err_mult = plane_rd_mult[type];
272 273
  int default_eob, pad;
  int const *scan, *nb;
274
  const int mul = 1 + (tx_size == TX_32X32);
275
  uint8_t token_cache[1024];
John Koleszar's avatar
John Koleszar committed
276 277 278
  const int ib = txfrm_block_to_raster_block(xd, bsize, plane,
                                             block, 2 * tx_size);
  const int16_t *dequant_ptr = xd->plane[plane].dequant;
John Koleszar's avatar
John Koleszar committed
279

John Koleszar's avatar
John Koleszar committed
280 281 282
  assert((!type && !plane) || (type && plane));
  dqcoeff_ptr = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block, 16);
  qcoeff_ptr = BLOCK_OFFSET(xd->plane[plane].qcoeff, block, 16);
Deb Mukherjee's avatar
Deb Mukherjee committed
283
  switch (tx_size) {
284
    default:
285
    case TX_4X4: {
John Koleszar's avatar
John Koleszar committed
286
      const TX_TYPE tx_type = plane == 0 ? get_tx_type_4x4(xd, ib) : DCT_DCT;
287
      default_eob = 16;
288
      scan = get_scan_4x4(tx_type);
289
      break;
290
    }
291
    case TX_8X8: {
John Koleszar's avatar
John Koleszar committed
292
      const TX_TYPE tx_type = plane == 0 ? get_tx_type_8x8(xd, ib) : DCT_DCT;
293
      scan = get_scan_8x8(tx_type);
294 295
      default_eob = 64;
      break;
296 297
    }
    case TX_16X16: {
John Koleszar's avatar
John Koleszar committed
298
      const TX_TYPE tx_type = plane == 0 ? get_tx_type_16x16(xd, ib) : DCT_DCT;
299
      scan = get_scan_16x16(tx_type);
300 301
      default_eob = 256;
      break;
302
    }
303 304 305 306
    case TX_32X32:
      scan = vp9_default_zig_zag1d_32x32;
      default_eob = 1024;
      break;
307
  }
John Koleszar's avatar
John Koleszar committed
308
  assert(eob <= default_eob);
John Koleszar's avatar
John Koleszar committed
309 310 311 312 313 314

  /* Now set up a Viterbi trellis to evaluate alternative roundings. */
  rdmult = mb->rdmult * err_mult;
  if (mb->e_mbd.mode_info_context->mbmi.ref_frame == INTRA_FRAME)
    rdmult = (rdmult * 9) >> 4;
  rddiv = mb->rddiv;
315
  memset(best_index, 0, sizeof(best_index));
John Koleszar's avatar
John Koleszar committed
316 317 318
  /* Initialize the sentinel node of the trellis. */
  tokens[eob][0].rate = 0;
  tokens[eob][0].error = 0;
319
  tokens[eob][0].next = default_eob;
John Koleszar's avatar
John Koleszar committed
320 321 322 323
  tokens[eob][0].token = DCT_EOB_TOKEN;
  tokens[eob][0].qc = 0;
  *(tokens[eob] + 1) = *(tokens[eob] + 0);
  next = eob;
324
  for (i = 0; i < eob; i++)
325
    token_cache[scan[i]] = vp9_dct_value_tokens_ptr[qcoeff_ptr[scan[i]]].token;
326 327
  nb = vp9_get_coef_neighbors_handle(scan, &pad);

John Koleszar's avatar
John Koleszar committed
328
  for (i = eob; i-- > i0;) {
329
    int base_bits, d2, dx;
John Koleszar's avatar
John Koleszar committed
330

331
    rc = scan[i];
John Koleszar's avatar
John Koleszar committed
332 333 334 335 336 337 338 339 340
    x = qcoeff_ptr[rc];
    /* Only add a trellis state for non-zero coefficients. */
    if (x) {
      int shortcut = 0;
      error0 = tokens[next][0].error;
      error1 = tokens[next][1].error;
      /* Evaluate the first possibility for this state. */
      rate0 = tokens[next][0].rate;
      rate1 = tokens[next][1].rate;
341
      t0 = (vp9_dct_value_tokens_ptr + x)->token;
John Koleszar's avatar
John Koleszar committed
342
      /* Consider both possible successor states. */
343
      if (next < default_eob) {
344
        band = get_coef_band(scan, tx_size, i + 1);
345 346
        pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache,
                                       pad, default_eob);
John Koleszar's avatar
John Koleszar committed
347
        rate0 +=
348
          mb->token_costs[tx_size][type][ref][band][pt][tokens[next][0].token];
John Koleszar's avatar
John Koleszar committed
349
        rate1 +=
350
          mb->token_costs[tx_size][type][ref][band][pt][tokens[next][1].token];
John Koleszar's avatar
John Koleszar committed
351
      }
352
      UPDATE_RD_COST();
John Koleszar's avatar
John Koleszar committed
353 354
      /* And pick the best. */
      best = rd_cost1 < rd_cost0;
355
      base_bits = *(vp9_dct_value_cost_ptr + x);
356
      dx = mul * (dqcoeff_ptr[rc] - coeff_ptr[rc]);
John Koleszar's avatar
John Koleszar committed
357 358 359 360 361 362
      d2 = dx * dx;
      tokens[i][0].rate = base_bits + (best ? rate1 : rate0);
      tokens[i][0].error = d2 + (best ? error1 : error0);
      tokens[i][0].next = next;
      tokens[i][0].token = t0;
      tokens[i][0].qc = x;
363
      best_index[i][0] = best;
364

John Koleszar's avatar
John Koleszar committed
365 366 367 368
      /* Evaluate the second possibility for this state. */
      rate0 = tokens[next][0].rate;
      rate1 = tokens[next][1].rate;

369 370 371
      if ((abs(x)*dequant_ptr[rc != 0] > abs(coeff_ptr[rc]) * mul) &&
          (abs(x)*dequant_ptr[rc != 0] < abs(coeff_ptr[rc]) * mul +
                                         dequant_ptr[rc != 0]))
John Koleszar's avatar
John Koleszar committed
372 373 374 375 376 377 378 379 380 381 382 383 384
        shortcut = 1;
      else
        shortcut = 0;

      if (shortcut) {
        sz = -(x < 0);
        x -= 2 * sz + 1;
      }

      /* Consider both possible successor states. */
      if (!x) {
        /* If we reduced this coefficient to zero, check to see if
         *  we need to move the EOB back here.
385
         */
John Koleszar's avatar
John Koleszar committed
386 387 388 389 390
        t0 = tokens[next][0].token == DCT_EOB_TOKEN ?
             DCT_EOB_TOKEN : ZERO_TOKEN;
        t1 = tokens[next][1].token == DCT_EOB_TOKEN ?
             DCT_EOB_TOKEN : ZERO_TOKEN;
      } else {
391
        t0 = t1 = (vp9_dct_value_tokens_ptr + x)->token;
John Koleszar's avatar
John Koleszar committed
392
      }
393
      if (next < default_eob) {
394
        band = get_coef_band(scan, tx_size, i + 1);
John Koleszar's avatar
John Koleszar committed
395
        if (t0 != DCT_EOB_TOKEN) {
396 397
          pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache,
                                         pad, default_eob);
398
          rate0 += mb->token_costs[tx_size][type][ref][band][pt][
399
              tokens[next][0].token];
John Koleszar's avatar
John Koleszar committed
400
        }
John Koleszar's avatar
John Koleszar committed
401
        if (t1 != DCT_EOB_TOKEN) {
402 403
          pt = trellis_get_coeff_context(scan, nb, i, t1, token_cache,
                                         pad, default_eob);
404
          rate1 += mb->token_costs[tx_size][type][ref][band][pt][
405
              tokens[next][1].token];
John Koleszar's avatar
John Koleszar committed
406 407
        }
      }
John Koleszar's avatar
John Koleszar committed
408

409
      UPDATE_RD_COST();
John Koleszar's avatar
John Koleszar committed
410 411
      /* And pick the best. */
      best = rd_cost1 < rd_cost0;
412
      base_bits = *(vp9_dct_value_cost_ptr + x);
John Koleszar's avatar
John Koleszar committed
413 414

      if (shortcut) {
415
        dx -= (dequant_ptr[rc != 0] + sz) ^ sz;
John Koleszar's avatar
John Koleszar committed
416 417 418 419 420 421 422
        d2 = dx * dx;
      }
      tokens[i][1].rate = base_bits + (best ? rate1 : rate0);
      tokens[i][1].error = d2 + (best ? error1 : error0);
      tokens[i][1].next = next;
      tokens[i][1].token = best ? t1 : t0;
      tokens[i][1].qc = x;
423
      best_index[i][1] = best;
John Koleszar's avatar
John Koleszar committed
424 425
      /* Finally, make this the new head of the trellis. */
      next = i;
426
    }
John Koleszar's avatar
John Koleszar committed
427 428 429 430
    /* There's no choice to make for a zero coefficient, so we don't
     *  add a new trellis node, but we do need to update the costs.
     */
    else {
431
      band = get_coef_band(scan, tx_size, i + 1);
John Koleszar's avatar
John Koleszar committed
432 433 434 435
      t0 = tokens[next][0].token;
      t1 = tokens[next][1].token;
      /* Update the cost of each path if we're past the EOB token. */
      if (t0 != DCT_EOB_TOKEN) {
436 437
        tokens[next][0].rate +=
            mb->token_costs[tx_size][type][ref][band][0][t0];
John Koleszar's avatar
John Koleszar committed
438 439 440
        tokens[next][0].token = ZERO_TOKEN;
      }
      if (t1 != DCT_EOB_TOKEN) {
441 442
        tokens[next][1].rate +=
            mb->token_costs[tx_size][type][ref][band][0][t1];
John Koleszar's avatar
John Koleszar committed
443 444 445
        tokens[next][1].token = ZERO_TOKEN;
      }
      /* Don't update next, because we didn't add a new node. */
446
    }
John Koleszar's avatar
John Koleszar committed
447 448 449
  }

  /* Now pick the best path through the whole trellis. */
450
  band = get_coef_band(scan, tx_size, i + 1);
451
  pt = combine_entropy_contexts(*a, *l);
John Koleszar's avatar
John Koleszar committed
452 453 454 455 456 457
  rate0 = tokens[next][0].rate;
  rate1 = tokens[next][1].rate;
  error0 = tokens[next][0].error;
  error1 = tokens[next][1].error;
  t0 = tokens[next][0].token;
  t1 = tokens[next][1].token;
458 459
  rate0 += mb->token_costs[tx_size][type][ref][band][pt][t0];
  rate1 += mb->token_costs[tx_size][type][ref][band][pt][t1];
460
  UPDATE_RD_COST();
John Koleszar's avatar
John Koleszar committed
461 462
  best = rd_cost1 < rd_cost0;
  final_eob = i0 - 1;
463 464
  vpx_memset(qcoeff_ptr, 0, sizeof(*qcoeff_ptr) * (16 << (tx_size * 2)));
  vpx_memset(dqcoeff_ptr, 0, sizeof(*dqcoeff_ptr) * (16 << (tx_size * 2)));
John Koleszar's avatar
John Koleszar committed
465 466
  for (i = next; i < eob; i = next) {
    x = tokens[i][best].qc;
467
    if (x) {
John Koleszar's avatar
John Koleszar committed
468
      final_eob = i;
469
    }
470
    rc = scan[i];
John Koleszar's avatar
John Koleszar committed
471
    qcoeff_ptr[rc] = x;
472
    dqcoeff_ptr[rc] = (x * dequant_ptr[rc != 0]) / mul;
473

John Koleszar's avatar
John Koleszar committed
474
    next = tokens[i][best].next;
475
    best = best_index[i][best];
John Koleszar's avatar
John Koleszar committed
476 477 478
  }
  final_eob++;

John Koleszar's avatar
John Koleszar committed
479
  xd->plane[plane].eobs[block] = final_eob;
480
  *a = *l = (final_eob > 0);
481 482
}

John Koleszar's avatar
John Koleszar committed
483 484 485 486
struct optimize_ctx {
  ENTROPY_CONTEXT ta[MAX_MB_PLANE][16];
  ENTROPY_CONTEXT tl[MAX_MB_PLANE][16];
};
487

John Koleszar's avatar
John Koleszar committed
488 489 490 491 492
struct optimize_block_args {
  VP9_COMMON *cm;
  MACROBLOCK *x;
  struct optimize_ctx *ctx;
};
493

John Koleszar's avatar
John Koleszar committed
494 495 496 497 498
static void optimize_block(int plane, int block, BLOCK_SIZE_TYPE bsize,
                           int ss_txfrm_size, void *arg) {
  const struct optimize_block_args* const args = arg;
  MACROBLOCKD* const xd = &args->x->e_mbd;
  int x, y;
499

John Koleszar's avatar
John Koleszar committed
500 501
  // find current entropy context
  txfrm_block_to_raster_xy(xd, bsize, plane, block, ss_txfrm_size, &x, &y);
502

John Koleszar's avatar
John Koleszar committed
503 504 505
  optimize_b(args->cm, args->x, plane, block, bsize,
             &args->ctx->ta[plane][x], &args->ctx->tl[plane][y],
             ss_txfrm_size / 2);
506 507
}

John Koleszar's avatar
John Koleszar committed
508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531
void vp9_optimize_init(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize,
                       struct optimize_ctx *ctx) {
  int p;

  for (p = 0; p < MAX_MB_PLANE; p++) {
    const struct macroblockd_plane* const plane = &xd->plane[p];
    const int bwl = b_width_log2(bsize) - plane->subsampling_x;
    const int bhl = b_height_log2(bsize) - plane->subsampling_y;
    const TX_SIZE tx_size = tx_size_for_plane(xd, bsize, p);
    int i, j;

    for (i = 0; i < 1 << bwl; i += 1 << tx_size) {
      int c = 0;
      ctx->ta[p][i] = 0;
      for (j = 0; j < 1 << tx_size && !c; j++) {
        c = ctx->ta[p][i] |= plane->above_context[i + j];
      }
    }
    for (i = 0; i < 1 << bhl; i += 1 << tx_size) {
      int c = 0;
      ctx->tl[p][i] = 0;
      for (j = 0; j < 1 << tx_size && !c; j++) {
        c = ctx->tl[p][i] |= plane->left_context[i + j];
      }
532
    }
533 534 535
  }
}

John Koleszar's avatar
John Koleszar committed
536 537 538 539 540 541 542 543 544 545
void vp9_optimize_sby(VP9_COMMON *const cm, MACROBLOCK *x,
                      BLOCK_SIZE_TYPE bsize) {
  struct optimize_ctx ctx;
  struct optimize_block_args arg = {cm, x, &ctx};
  vp9_optimize_init(&x->e_mbd, bsize, &ctx);
  foreach_transformed_block_in_plane(&x->e_mbd, bsize, 0,
#if !CONFIG_SB8X8
  0,
#endif
                                     optimize_block, &arg);
546 547
}

John Koleszar's avatar
John Koleszar committed
548 549 550 551 552 553
void vp9_optimize_sbuv(VP9_COMMON *const cm, MACROBLOCK *x,
                       BLOCK_SIZE_TYPE bsize) {
  struct optimize_ctx ctx;
  struct optimize_block_args arg = {cm, x, &ctx};
  vp9_optimize_init(&x->e_mbd, bsize, &ctx);
  foreach_transformed_block_uv(&x->e_mbd, bsize, optimize_block, &arg);
554 555
}

556
#if !CONFIG_SB8X8
557
void vp9_fidct_mb(VP9_COMMON *const cm, MACROBLOCK *x) {
558
  MACROBLOCKD *const xd = &x->e_mbd;
559
  const TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size;
John Koleszar's avatar
John Koleszar committed
560

561
  if (tx_size == TX_16X16) {
562 563 564 565 566
    vp9_transform_sby_16x16(x, BLOCK_SIZE_MB16X16);
    vp9_transform_sbuv_8x8(x, BLOCK_SIZE_MB16X16);
    vp9_quantize_sby_16x16(x, BLOCK_SIZE_MB16X16);
    vp9_quantize_sbuv_8x8(x, BLOCK_SIZE_MB16X16);
    if (x->optimize) {
John Koleszar's avatar
John Koleszar committed
567 568
      vp9_optimize_sby(cm, x, BLOCK_SIZE_MB16X16);
      vp9_optimize_sbuv(cm, x, BLOCK_SIZE_MB16X16);
569 570 571
    }
    vp9_inverse_transform_sby_16x16(xd, BLOCK_SIZE_MB16X16);
    vp9_inverse_transform_sbuv_8x8(xd, BLOCK_SIZE_MB16X16);
572
  } else if (tx_size == TX_8X8) {
573 574 575
    vp9_transform_sby_8x8(x, BLOCK_SIZE_MB16X16);
    vp9_quantize_sby_8x8(x, BLOCK_SIZE_MB16X16);
    if (x->optimize)
John Koleszar's avatar
John Koleszar committed
576
      vp9_optimize_sby(cm, x, BLOCK_SIZE_MB16X16);
577
    vp9_inverse_transform_sby_8x8(xd, BLOCK_SIZE_MB16X16);
578 579
    if (xd->mode_info_context->mbmi.mode == SPLITMV) {
      assert(xd->mode_info_context->mbmi.partitioning != PARTITIONING_4X4);
580 581 582
      vp9_transform_sbuv_4x4(x, BLOCK_SIZE_MB16X16);
      vp9_quantize_sbuv_4x4(x, BLOCK_SIZE_MB16X16);
      if (x->optimize)
John Koleszar's avatar
John Koleszar committed
583
        vp9_optimize_sbuv(cm, x, BLOCK_SIZE_MB16X16);
584
      vp9_inverse_transform_sbuv_4x4(xd, BLOCK_SIZE_MB16X16);
585
    } else {
586 587
      vp9_transform_sbuv_8x8(x, BLOCK_SIZE_MB16X16);
      vp9_quantize_sbuv_8x8(x, BLOCK_SIZE_MB16X16);
588
      if (x->optimize)
John Koleszar's avatar
John Koleszar committed
589
        vp9_optimize_sbuv(cm, x, BLOCK_SIZE_MB16X16);
590
      vp9_inverse_transform_sbuv_8x8(xd, BLOCK_SIZE_MB16X16);
591
    }
592
  } else {
593 594 595 596 597
    vp9_transform_sby_4x4(x, BLOCK_SIZE_MB16X16);
    vp9_transform_sbuv_4x4(x, BLOCK_SIZE_MB16X16);
    vp9_quantize_sby_4x4(x, BLOCK_SIZE_MB16X16);
    vp9_quantize_sbuv_4x4(x, BLOCK_SIZE_MB16X16);
    if (x->optimize) {
John Koleszar's avatar
John Koleszar committed
598 599
      vp9_optimize_sby(cm, x, BLOCK_SIZE_MB16X16);
      vp9_optimize_sbuv(cm, x, BLOCK_SIZE_MB16X16);
600 601 602
    }
    vp9_inverse_transform_sby_4x4(xd, BLOCK_SIZE_MB16X16);
    vp9_inverse_transform_sbuv_4x4(xd, BLOCK_SIZE_MB16X16);
John Koleszar's avatar
John Koleszar committed
603
  }
604 605
}

606
void vp9_encode_inter16x16(VP9_COMMON *const cm, MACROBLOCK *x,
607
                           int mi_row, int mi_col) {
608
  MACROBLOCKD *const xd = &x->e_mbd;
John Koleszar's avatar
John Koleszar committed
609

610
  vp9_build_inter_predictors_sb(xd, mi_row, mi_col, BLOCK_SIZE_MB16X16);
611
  vp9_subtract_sb(x, BLOCK_SIZE_MB16X16);
612
  vp9_fidct_mb(cm, x);
John Koleszar's avatar
John Koleszar committed
613
  vp9_recon_sb(xd, BLOCK_SIZE_MB16X16);
John Koleszar's avatar
John Koleszar committed
614
}
615
#endif
John Koleszar's avatar
John Koleszar committed
616

617
/* this function is used by first pass only */
618
void vp9_encode_inter16x16y(MACROBLOCK *x, int mi_row, int mi_col) {
619
  MACROBLOCKD *xd = &x->e_mbd;
John Koleszar's avatar
John Koleszar committed
620

621
  vp9_build_inter_predictors_sby(xd, mi_row, mi_col, BLOCK_SIZE_MB16X16);
622
  vp9_subtract_sby(x, BLOCK_SIZE_MB16X16);
John Koleszar's avatar
John Koleszar committed
623

624 625 626
  vp9_transform_sby_4x4(x, BLOCK_SIZE_MB16X16);
  vp9_quantize_sby_4x4(x, BLOCK_SIZE_MB16X16);
  vp9_inverse_transform_sby_4x4(xd, BLOCK_SIZE_MB16X16);
Yaowu Xu's avatar
Yaowu Xu committed
627

John Koleszar's avatar
John Koleszar committed
628
  vp9_recon_sby(xd, BLOCK_SIZE_MB16X16);
John Koleszar's avatar
John Koleszar committed
629
}