vp9_encodemb.c 20.7 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9
10
 */

11
#include "./vpx_config.h"
12
#include "vp9/encoder/vp9_encodemb.h"
13
#include "vp9/common/vp9_reconinter.h"
14
15
#include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_tokenize.h"
16
17
#include "vp9/common/vp9_invtrans.h"
#include "vp9/common/vp9_reconintra.h"
John Koleszar's avatar
John Koleszar committed
18
#include "vpx_mem/vpx_mem.h"
19
#include "vp9/encoder/vp9_rdopt.h"
20
#include "vp9/common/vp9_systemdependent.h"
21
#include "vp9_rtcd.h"
John Koleszar's avatar
John Koleszar committed
22

23
24
25
26
void vp9_subtract_block(int rows, int cols,
                        int16_t *diff_ptr, int diff_stride,
                        const uint8_t *src_ptr, int src_stride,
                        const uint8_t *pred_ptr, int pred_stride) {
John Koleszar's avatar
John Koleszar committed
27
  int r, c;
John Koleszar's avatar
John Koleszar committed
28

29
30
  for (r = 0; r < rows; r++) {
    for (c = 0; c < cols; c++)
John Koleszar's avatar
John Koleszar committed
31
32
      diff_ptr[c] = src_ptr[c] - pred_ptr[c];

33
34
    diff_ptr += diff_stride;
    pred_ptr += pred_stride;
John Koleszar's avatar
John Koleszar committed
35
36
    src_ptr  += src_stride;
  }
John Koleszar's avatar
John Koleszar committed
37
38
}

39

40
41
42
43
static void subtract_plane(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int plane) {
  const MACROBLOCKD * const xd = &x->e_mbd;
  const int bw = 4 << (b_width_log2(bsize) - xd->plane[plane].subsampling_x);
  const int bh = 4 << (b_height_log2(bsize) - xd->plane[plane].subsampling_y);
John Koleszar's avatar
John Koleszar committed
44
45
  const uint8_t *src = x->plane[plane].src.buf;
  const int src_stride = x->plane[plane].src.stride;
Dmitry Kovalev's avatar
Dmitry Kovalev committed
46

47
48
49
50
  assert(plane < 3);
  vp9_subtract_block(bh, bw,
                     x->plane[plane].src_diff, bw, src, src_stride,
                     xd->plane[plane].dst.buf, xd->plane[plane].dst.stride);
Yaowu Xu's avatar
Yaowu Xu committed
51
52
}

53
54
void vp9_subtract_sby(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) {
  subtract_plane(x, bsize, 0);
55
56
}

57
58
void vp9_subtract_sbuv(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) {
  int i;
59

60
61
  for (i = 1; i < MAX_MB_PLANE; i++)
    subtract_plane(x, bsize, i);
62
63
}

64
65
66
void vp9_subtract_sb(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) {
  vp9_subtract_sby(x, bsize);
  vp9_subtract_sbuv(x, bsize);
John Koleszar's avatar
John Koleszar committed
67
68
}

69

70
71
#define RDTRUNC(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF )
#define RDTRUNC_8x8(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF )
72
typedef struct vp9_token_state vp9_token_state;
73

74
struct vp9_token_state {
75
76
  int           rate;
  int           error;
Daniel Kang's avatar
Daniel Kang committed
77
  int           next;
78
79
80
81
  signed char   token;
  short         qc;
};

82
// TODO: experiments to find optimal multiple numbers
83
84
#define Y1_RD_MULT 4
#define UV_RD_MULT 2
85

John Koleszar's avatar
John Koleszar committed
86
87
88
static const int plane_rd_mult[4] = {
  Y1_RD_MULT,
  UV_RD_MULT,
89
90
};

91
92
93
94
95
96
97
98
99
100
#define UPDATE_RD_COST()\
{\
  rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0);\
  rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1);\
  if (rd_cost0 == rd_cost1) {\
    rd_cost0 = RDTRUNC(rdmult, rddiv, rate0, error0);\
    rd_cost1 = RDTRUNC(rdmult, rddiv, rate1, error1);\
  }\
}

101
102
// This function is a place holder for now but may ultimately need
// to scan previous tokens to work out the correct context.
103
104
105
106
107
static int trellis_get_coeff_context(const int *scan,
                                     const int *nb,
                                     int idx, int token,
                                     uint8_t *token_cache,
                                     int pad, int l) {
108
109
  int bak = token_cache[scan[idx]], pt;
  token_cache[scan[idx]] = token;
110
  pt = vp9_get_coef_context(scan, nb, pad, token_cache, idx + 1, l);
111
  token_cache[scan[idx]] = bak;
112
  return pt;
113
114
}

John Koleszar's avatar
John Koleszar committed
115
116
static void optimize_b(VP9_COMMON *const cm, MACROBLOCK *mb,
                       int plane, int block, BLOCK_SIZE_TYPE bsize,
117
                       ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
John Koleszar's avatar
John Koleszar committed
118
                       TX_SIZE tx_size) {
119
  const int ref = mb->e_mbd.mode_info_context->mbmi.ref_frame != INTRA_FRAME;
120
  MACROBLOCKD *const xd = &mb->e_mbd;
121
122
  vp9_token_state tokens[1025][2];
  unsigned best_index[1025][2];
John Koleszar's avatar
John Koleszar committed
123
124
  const int16_t *coeff_ptr = BLOCK_OFFSET(mb->plane[plane].coeff,
                                          block, 16);
125
126
  int16_t *qcoeff_ptr;
  int16_t *dqcoeff_ptr;
John Koleszar's avatar
John Koleszar committed
127
  int eob = xd->plane[plane].eobs[block], final_eob, sz = 0;
128
  const int i0 = 0;
129
  int rc, x, next, i;
130
131
132
  int64_t rdmult, rddiv, rd_cost0, rd_cost1;
  int rate0, rate1, error0, error1, t0, t1;
  int best, band, pt;
John Koleszar's avatar
John Koleszar committed
133
  PLANE_TYPE type = xd->plane[plane].plane_type;
John Koleszar's avatar
John Koleszar committed
134
  int err_mult = plane_rd_mult[type];
135
136
  int default_eob, pad;
  int const *scan, *nb;
137
  const int mul = 1 + (tx_size == TX_32X32);
138
  uint8_t token_cache[1024];
John Koleszar's avatar
John Koleszar committed
139
140
141
  const int ib = txfrm_block_to_raster_block(xd, bsize, plane,
                                             block, 2 * tx_size);
  const int16_t *dequant_ptr = xd->plane[plane].dequant;
Paul Wilkins's avatar
Paul Wilkins committed
142
  const uint8_t * band_translate;
John Koleszar's avatar
John Koleszar committed
143

John Koleszar's avatar
John Koleszar committed
144
145
146
  assert((!type && !plane) || (type && plane));
  dqcoeff_ptr = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block, 16);
  qcoeff_ptr = BLOCK_OFFSET(xd->plane[plane].qcoeff, block, 16);
Deb Mukherjee's avatar
Deb Mukherjee committed
147
  switch (tx_size) {
148
    default:
149
    case TX_4X4: {
John Koleszar's avatar
John Koleszar committed
150
      const TX_TYPE tx_type = plane == 0 ? get_tx_type_4x4(xd, ib) : DCT_DCT;
151
      default_eob = 16;
152
      scan = get_scan_4x4(tx_type);
Paul Wilkins's avatar
Paul Wilkins committed
153
      band_translate = vp9_coefband_trans_4x4;
154
      break;
155
    }
156
    case TX_8X8: {
John Koleszar's avatar
John Koleszar committed
157
      const TX_TYPE tx_type = plane == 0 ? get_tx_type_8x8(xd, ib) : DCT_DCT;
158
      scan = get_scan_8x8(tx_type);
159
      default_eob = 64;
Paul Wilkins's avatar
Paul Wilkins committed
160
      band_translate = vp9_coefband_trans_8x8plus;
161
      break;
162
163
    }
    case TX_16X16: {
John Koleszar's avatar
John Koleszar committed
164
      const TX_TYPE tx_type = plane == 0 ? get_tx_type_16x16(xd, ib) : DCT_DCT;
165
      scan = get_scan_16x16(tx_type);
166
      default_eob = 256;
Paul Wilkins's avatar
Paul Wilkins committed
167
      band_translate = vp9_coefband_trans_8x8plus;
168
      break;
169
    }
170
171
172
    case TX_32X32:
      scan = vp9_default_zig_zag1d_32x32;
      default_eob = 1024;
Paul Wilkins's avatar
Paul Wilkins committed
173
      band_translate = vp9_coefband_trans_8x8plus;
174
      break;
175
  }
John Koleszar's avatar
John Koleszar committed
176
  assert(eob <= default_eob);
John Koleszar's avatar
John Koleszar committed
177
178
179
180
181
182

  /* Now set up a Viterbi trellis to evaluate alternative roundings. */
  rdmult = mb->rdmult * err_mult;
  if (mb->e_mbd.mode_info_context->mbmi.ref_frame == INTRA_FRAME)
    rdmult = (rdmult * 9) >> 4;
  rddiv = mb->rddiv;
183
  memset(best_index, 0, sizeof(best_index));
John Koleszar's avatar
John Koleszar committed
184
185
186
  /* Initialize the sentinel node of the trellis. */
  tokens[eob][0].rate = 0;
  tokens[eob][0].error = 0;
187
  tokens[eob][0].next = default_eob;
John Koleszar's avatar
John Koleszar committed
188
189
190
191
  tokens[eob][0].token = DCT_EOB_TOKEN;
  tokens[eob][0].qc = 0;
  *(tokens[eob] + 1) = *(tokens[eob] + 0);
  next = eob;
192
  for (i = 0; i < eob; i++)
193
    token_cache[scan[i]] = vp9_dct_value_tokens_ptr[qcoeff_ptr[scan[i]]].token;
194
195
  nb = vp9_get_coef_neighbors_handle(scan, &pad);

John Koleszar's avatar
John Koleszar committed
196
  for (i = eob; i-- > i0;) {
197
    int base_bits, d2, dx;
John Koleszar's avatar
John Koleszar committed
198

199
    rc = scan[i];
John Koleszar's avatar
John Koleszar committed
200
201
202
203
204
205
206
207
208
    x = qcoeff_ptr[rc];
    /* Only add a trellis state for non-zero coefficients. */
    if (x) {
      int shortcut = 0;
      error0 = tokens[next][0].error;
      error1 = tokens[next][1].error;
      /* Evaluate the first possibility for this state. */
      rate0 = tokens[next][0].rate;
      rate1 = tokens[next][1].rate;
209
      t0 = (vp9_dct_value_tokens_ptr + x)->token;
John Koleszar's avatar
John Koleszar committed
210
      /* Consider both possible successor states. */
211
      if (next < default_eob) {
Paul Wilkins's avatar
Paul Wilkins committed
212
        band = get_coef_band(band_translate, i + 1);
213
214
        pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache,
                                       pad, default_eob);
John Koleszar's avatar
John Koleszar committed
215
        rate0 +=
216
          mb->token_costs[tx_size][type][ref][band][pt][tokens[next][0].token];
John Koleszar's avatar
John Koleszar committed
217
        rate1 +=
218
          mb->token_costs[tx_size][type][ref][band][pt][tokens[next][1].token];
John Koleszar's avatar
John Koleszar committed
219
      }
220
      UPDATE_RD_COST();
John Koleszar's avatar
John Koleszar committed
221
222
      /* And pick the best. */
      best = rd_cost1 < rd_cost0;
223
      base_bits = *(vp9_dct_value_cost_ptr + x);
224
      dx = mul * (dqcoeff_ptr[rc] - coeff_ptr[rc]);
John Koleszar's avatar
John Koleszar committed
225
226
227
228
229
230
      d2 = dx * dx;
      tokens[i][0].rate = base_bits + (best ? rate1 : rate0);
      tokens[i][0].error = d2 + (best ? error1 : error0);
      tokens[i][0].next = next;
      tokens[i][0].token = t0;
      tokens[i][0].qc = x;
231
      best_index[i][0] = best;
232

John Koleszar's avatar
John Koleszar committed
233
234
235
236
      /* Evaluate the second possibility for this state. */
      rate0 = tokens[next][0].rate;
      rate1 = tokens[next][1].rate;

237
238
239
      if ((abs(x)*dequant_ptr[rc != 0] > abs(coeff_ptr[rc]) * mul) &&
          (abs(x)*dequant_ptr[rc != 0] < abs(coeff_ptr[rc]) * mul +
                                         dequant_ptr[rc != 0]))
John Koleszar's avatar
John Koleszar committed
240
241
242
243
244
245
246
247
248
249
250
251
252
        shortcut = 1;
      else
        shortcut = 0;

      if (shortcut) {
        sz = -(x < 0);
        x -= 2 * sz + 1;
      }

      /* Consider both possible successor states. */
      if (!x) {
        /* If we reduced this coefficient to zero, check to see if
         *  we need to move the EOB back here.
253
         */
John Koleszar's avatar
John Koleszar committed
254
255
256
257
258
        t0 = tokens[next][0].token == DCT_EOB_TOKEN ?
             DCT_EOB_TOKEN : ZERO_TOKEN;
        t1 = tokens[next][1].token == DCT_EOB_TOKEN ?
             DCT_EOB_TOKEN : ZERO_TOKEN;
      } else {
259
        t0 = t1 = (vp9_dct_value_tokens_ptr + x)->token;
John Koleszar's avatar
John Koleszar committed
260
      }
261
      if (next < default_eob) {
Paul Wilkins's avatar
Paul Wilkins committed
262
        band = get_coef_band(band_translate, i + 1);
John Koleszar's avatar
John Koleszar committed
263
        if (t0 != DCT_EOB_TOKEN) {
264
265
          pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache,
                                         pad, default_eob);
266
          rate0 += mb->token_costs[tx_size][type][ref][band][pt][
267
              tokens[next][0].token];
John Koleszar's avatar
John Koleszar committed
268
        }
John Koleszar's avatar
John Koleszar committed
269
        if (t1 != DCT_EOB_TOKEN) {
270
271
          pt = trellis_get_coeff_context(scan, nb, i, t1, token_cache,
                                         pad, default_eob);
272
          rate1 += mb->token_costs[tx_size][type][ref][band][pt][
273
              tokens[next][1].token];
John Koleszar's avatar
John Koleszar committed
274
275
        }
      }
John Koleszar's avatar
John Koleszar committed
276

277
      UPDATE_RD_COST();
John Koleszar's avatar
John Koleszar committed
278
279
      /* And pick the best. */
      best = rd_cost1 < rd_cost0;
280
      base_bits = *(vp9_dct_value_cost_ptr + x);
John Koleszar's avatar
John Koleszar committed
281
282

      if (shortcut) {
283
        dx -= (dequant_ptr[rc != 0] + sz) ^ sz;
John Koleszar's avatar
John Koleszar committed
284
285
286
287
288
289
290
        d2 = dx * dx;
      }
      tokens[i][1].rate = base_bits + (best ? rate1 : rate0);
      tokens[i][1].error = d2 + (best ? error1 : error0);
      tokens[i][1].next = next;
      tokens[i][1].token = best ? t1 : t0;
      tokens[i][1].qc = x;
291
      best_index[i][1] = best;
John Koleszar's avatar
John Koleszar committed
292
293
      /* Finally, make this the new head of the trellis. */
      next = i;
294
    }
John Koleszar's avatar
John Koleszar committed
295
296
297
298
    /* There's no choice to make for a zero coefficient, so we don't
     *  add a new trellis node, but we do need to update the costs.
     */
    else {
Paul Wilkins's avatar
Paul Wilkins committed
299
      band = get_coef_band(band_translate, i + 1);
John Koleszar's avatar
John Koleszar committed
300
301
302
303
      t0 = tokens[next][0].token;
      t1 = tokens[next][1].token;
      /* Update the cost of each path if we're past the EOB token. */
      if (t0 != DCT_EOB_TOKEN) {
304
305
        tokens[next][0].rate +=
            mb->token_costs[tx_size][type][ref][band][0][t0];
John Koleszar's avatar
John Koleszar committed
306
307
308
        tokens[next][0].token = ZERO_TOKEN;
      }
      if (t1 != DCT_EOB_TOKEN) {
309
310
        tokens[next][1].rate +=
            mb->token_costs[tx_size][type][ref][band][0][t1];
John Koleszar's avatar
John Koleszar committed
311
312
313
        tokens[next][1].token = ZERO_TOKEN;
      }
      /* Don't update next, because we didn't add a new node. */
314
    }
John Koleszar's avatar
John Koleszar committed
315
316
317
  }

  /* Now pick the best path through the whole trellis. */
Paul Wilkins's avatar
Paul Wilkins committed
318
  band = get_coef_band(band_translate, i + 1);
319
  pt = combine_entropy_contexts(*a, *l);
John Koleszar's avatar
John Koleszar committed
320
321
322
323
324
325
  rate0 = tokens[next][0].rate;
  rate1 = tokens[next][1].rate;
  error0 = tokens[next][0].error;
  error1 = tokens[next][1].error;
  t0 = tokens[next][0].token;
  t1 = tokens[next][1].token;
326
327
  rate0 += mb->token_costs[tx_size][type][ref][band][pt][t0];
  rate1 += mb->token_costs[tx_size][type][ref][band][pt][t1];
328
  UPDATE_RD_COST();
John Koleszar's avatar
John Koleszar committed
329
330
  best = rd_cost1 < rd_cost0;
  final_eob = i0 - 1;
331
332
  vpx_memset(qcoeff_ptr, 0, sizeof(*qcoeff_ptr) * (16 << (tx_size * 2)));
  vpx_memset(dqcoeff_ptr, 0, sizeof(*dqcoeff_ptr) * (16 << (tx_size * 2)));
John Koleszar's avatar
John Koleszar committed
333
334
  for (i = next; i < eob; i = next) {
    x = tokens[i][best].qc;
335
    if (x) {
John Koleszar's avatar
John Koleszar committed
336
      final_eob = i;
337
    }
338
    rc = scan[i];
John Koleszar's avatar
John Koleszar committed
339
    qcoeff_ptr[rc] = x;
340
    dqcoeff_ptr[rc] = (x * dequant_ptr[rc != 0]) / mul;
341

John Koleszar's avatar
John Koleszar committed
342
    next = tokens[i][best].next;
343
    best = best_index[i][best];
John Koleszar's avatar
John Koleszar committed
344
345
346
  }
  final_eob++;

John Koleszar's avatar
John Koleszar committed
347
  xd->plane[plane].eobs[block] = final_eob;
348
  *a = *l = (final_eob > 0);
349
350
}

John Koleszar's avatar
John Koleszar committed
351
352
353
354
355
struct optimize_block_args {
  VP9_COMMON *cm;
  MACROBLOCK *x;
  struct optimize_ctx *ctx;
};
356

357
358
359
360
void vp9_optimize_b(int plane, int block, BLOCK_SIZE_TYPE bsize,
                    int ss_txfrm_size, VP9_COMMON *cm, MACROBLOCK *mb,
                    struct optimize_ctx *ctx) {
  MACROBLOCKD* const xd = &mb->e_mbd;
John Koleszar's avatar
John Koleszar committed
361
  int x, y;
362

John Koleszar's avatar
John Koleszar committed
363
364
  // find current entropy context
  txfrm_block_to_raster_xy(xd, bsize, plane, block, ss_txfrm_size, &x, &y);
365

366
367
  optimize_b(cm, mb, plane, block, bsize,
             &ctx->ta[plane][x], &ctx->tl[plane][y],
John Koleszar's avatar
John Koleszar committed
368
             ss_txfrm_size / 2);
369
370
}

371
372
373
374
375
376
377
static void optimize_block(int plane, int block, BLOCK_SIZE_TYPE bsize,
                           int ss_txfrm_size, void *arg) {
  const struct optimize_block_args* const args = arg;
  vp9_optimize_b(plane, block, bsize, ss_txfrm_size, args->cm, args->x,
                 args->ctx);
}

John Koleszar's avatar
John Koleszar committed
378
379
380
381
382
383
384
385
void vp9_optimize_init(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize,
                       struct optimize_ctx *ctx) {
  int p;

  for (p = 0; p < MAX_MB_PLANE; p++) {
    const struct macroblockd_plane* const plane = &xd->plane[p];
    const int bwl = b_width_log2(bsize) - plane->subsampling_x;
    const int bhl = b_height_log2(bsize) - plane->subsampling_y;
John Koleszar's avatar
John Koleszar committed
386
387
    const TX_SIZE tx_size = p ? get_uv_tx_size(xd)
                              : xd->mode_info_context->mbmi.txfm_size;
John Koleszar's avatar
John Koleszar committed
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
    int i, j;

    for (i = 0; i < 1 << bwl; i += 1 << tx_size) {
      int c = 0;
      ctx->ta[p][i] = 0;
      for (j = 0; j < 1 << tx_size && !c; j++) {
        c = ctx->ta[p][i] |= plane->above_context[i + j];
      }
    }
    for (i = 0; i < 1 << bhl; i += 1 << tx_size) {
      int c = 0;
      ctx->tl[p][i] = 0;
      for (j = 0; j < 1 << tx_size && !c; j++) {
        c = ctx->tl[p][i] |= plane->left_context[i + j];
      }
403
    }
404
405
406
  }
}

John Koleszar's avatar
John Koleszar committed
407
408
409
410
411
412
413
void vp9_optimize_sby(VP9_COMMON *const cm, MACROBLOCK *x,
                      BLOCK_SIZE_TYPE bsize) {
  struct optimize_ctx ctx;
  struct optimize_block_args arg = {cm, x, &ctx};
  vp9_optimize_init(&x->e_mbd, bsize, &ctx);
  foreach_transformed_block_in_plane(&x->e_mbd, bsize, 0,
                                     optimize_block, &arg);
414
415
}

John Koleszar's avatar
John Koleszar committed
416
417
418
419
420
421
void vp9_optimize_sbuv(VP9_COMMON *const cm, MACROBLOCK *x,
                       BLOCK_SIZE_TYPE bsize) {
  struct optimize_ctx ctx;
  struct optimize_block_args arg = {cm, x, &ctx};
  vp9_optimize_init(&x->e_mbd, bsize, &ctx);
  foreach_transformed_block_uv(&x->e_mbd, bsize, optimize_block, &arg);
422
423
}

424
425
426
427
428
429
struct encode_b_args {
  VP9_COMMON *cm;
  MACROBLOCK *x;
  struct optimize_ctx *ctx;
};

430
static void xform_quant(int plane, int block, BLOCK_SIZE_TYPE bsize,
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
                         int ss_txfrm_size, void *arg) {
  struct encode_b_args* const args = arg;
  MACROBLOCK* const x = args->x;
  MACROBLOCKD* const xd = &x->e_mbd;
  const int bw = 4 << (b_width_log2(bsize) - xd->plane[plane].subsampling_x);
  const int raster_block = txfrm_block_to_raster_block(xd, bsize, plane,
                                                       block, ss_txfrm_size);
  int16_t* const src_diff = raster_block_offset_int16(xd, bsize, plane,
                                                      raster_block,
                                                      x->plane[plane].src_diff);
  TX_TYPE tx_type = DCT_DCT;

  switch (ss_txfrm_size / 2) {
    case TX_32X32:
      vp9_short_fdct32x32(src_diff,
                          BLOCK_OFFSET(x->plane[plane].coeff, block, 16),
                          bw * 2);
      break;
    case TX_16X16:
      tx_type = plane == 0 ? get_tx_type_16x16(xd, raster_block) : DCT_DCT;
      if (tx_type != DCT_DCT) {
        vp9_short_fht16x16(src_diff,
                           BLOCK_OFFSET(x->plane[plane].coeff, block, 16),
                           bw, tx_type);
      } else {
        x->fwd_txm16x16(src_diff,
                        BLOCK_OFFSET(x->plane[plane].coeff, block, 16),
                        bw * 2);
      }
      break;
    case TX_8X8:
      tx_type = plane == 0 ? get_tx_type_8x8(xd, raster_block) : DCT_DCT;
      if (tx_type != DCT_DCT) {
        vp9_short_fht8x8(src_diff,
                           BLOCK_OFFSET(x->plane[plane].coeff, block, 16),
                           bw, tx_type);
      } else {
        x->fwd_txm8x8(src_diff,
                      BLOCK_OFFSET(x->plane[plane].coeff, block, 16),
                      bw * 2);
      }
      break;
    case TX_4X4:
      tx_type = plane == 0 ? get_tx_type_4x4(xd, raster_block) : DCT_DCT;
      if (tx_type != DCT_DCT) {
        vp9_short_fht4x4(src_diff,
                           BLOCK_OFFSET(x->plane[plane].coeff, block, 16),
                           bw, tx_type);
      } else {
        x->fwd_txm4x4(src_diff,
                      BLOCK_OFFSET(x->plane[plane].coeff, block, 16),
                      bw * 2);
      }
      break;
    default:
      assert(0);
John Koleszar's avatar
John Koleszar committed
487
  }
488

489
  vp9_quantize(x, plane, block, 16 << ss_txfrm_size, tx_type);
490
491
492
493
494
495
496
497
498
}

static void encode_block(int plane, int block, BLOCK_SIZE_TYPE bsize,
                         int ss_txfrm_size, void *arg) {
  struct encode_b_args* const args = arg;
  MACROBLOCK* const x = args->x;
  MACROBLOCKD* const xd = &x->e_mbd;
  const int raster_block = txfrm_block_to_raster_block(xd, bsize, plane,
                                                       block, ss_txfrm_size);
Scott LaVarnway's avatar
Scott LaVarnway committed
499
500
501
502
  uint8_t* const dst = raster_block_offset_uint8(xd, bsize, plane,
                                                 raster_block,
                                                 xd->plane[plane].dst.buf,
                                                 xd->plane[plane].dst.stride);
503
504
505
506
  TX_TYPE tx_type = DCT_DCT;

  xform_quant(plane, block, bsize, ss_txfrm_size, arg);

507
508
  if (x->optimize)
    vp9_optimize_b(plane, block, bsize, ss_txfrm_size, args->cm, x, args->ctx);
John Koleszar's avatar
John Koleszar committed
509

510
511
  switch (ss_txfrm_size / 2) {
    case TX_32X32:
Scott LaVarnway's avatar
Scott LaVarnway committed
512
513
        vp9_short_idct32x32_add(BLOCK_OFFSET(xd->plane[plane].dqcoeff,
                                block, 16), dst, xd->plane[plane].dst.stride);
514
515
      break;
    case TX_16X16:
516
      tx_type = plane == 0 ? get_tx_type_16x16(xd, raster_block) : DCT_DCT;
517
      if (tx_type == DCT_DCT) {
Scott LaVarnway's avatar
Scott LaVarnway committed
518
519
        vp9_short_idct16x16_add(BLOCK_OFFSET(xd->plane[plane].dqcoeff,
                                block, 16), dst, xd->plane[plane].dst.stride);
520
      } else {
Scott LaVarnway's avatar
Scott LaVarnway committed
521
522
523
        vp9_short_iht16x16_add(BLOCK_OFFSET(xd->plane[plane].dqcoeff,
                               block, 16), dst, xd->plane[plane].dst.stride,
                               tx_type);
524
525
526
      }
      break;
    case TX_8X8:
527
      tx_type = plane == 0 ? get_tx_type_8x8(xd, raster_block) : DCT_DCT;
528
      if (tx_type == DCT_DCT) {
Scott LaVarnway's avatar
Scott LaVarnway committed
529
530
        vp9_short_idct8x8_add(BLOCK_OFFSET(xd->plane[plane].dqcoeff,
                              block, 16), dst, xd->plane[plane].dst.stride);
531
      } else {
Scott LaVarnway's avatar
Scott LaVarnway committed
532
533
534
        vp9_short_iht8x8_add(BLOCK_OFFSET(xd->plane[plane].dqcoeff,
                             block, 16), dst, xd->plane[plane].dst.stride,
                             tx_type);
535
536
537
      }
      break;
    case TX_4X4:
538
      tx_type = plane == 0 ? get_tx_type_4x4(xd, raster_block) : DCT_DCT;
539
540
541
542
      if (tx_type == DCT_DCT) {
        // this is like vp9_short_idct4x4 but has a special case around eob<=1
        // which is significant (not just an optimization) for the lossless
        // case.
Scott LaVarnway's avatar
Scott LaVarnway committed
543
544
545
        vp9_inverse_transform_b_4x4_add(xd, xd->plane[plane].eobs[block],
            BLOCK_OFFSET(xd->plane[plane].dqcoeff, block, 16), dst,
            xd->plane[plane].dst.stride);
546
      } else {
Scott LaVarnway's avatar
Scott LaVarnway committed
547
548
        vp9_short_iht4x4_add(BLOCK_OFFSET(xd->plane[plane].dqcoeff, block, 16),
                             dst, xd->plane[plane].dst.stride, tx_type);
549
550
551
      }
      break;
  }
John Koleszar's avatar
John Koleszar committed
552
553
}

554
555
556
void vp9_xform_quant_sby(VP9_COMMON *const cm, MACROBLOCK *x,
                         BLOCK_SIZE_TYPE bsize) {
  MACROBLOCKD* const xd = &x->e_mbd;
Scott LaVarnway's avatar
Scott LaVarnway committed
557
  struct encode_b_args arg = {cm, x, NULL};
558

Scott LaVarnway's avatar
Scott LaVarnway committed
559
  foreach_transformed_block_in_plane(xd, bsize, 0, xform_quant, &arg);
560
561
562
563
564
}

void vp9_xform_quant_sbuv(VP9_COMMON *const cm, MACROBLOCK *x,
                         BLOCK_SIZE_TYPE bsize) {
  MACROBLOCKD* const xd = &x->e_mbd;
Scott LaVarnway's avatar
Scott LaVarnway committed
565
  struct encode_b_args arg = {cm, x, NULL};
566
567
568
569
570
571
572
573

  foreach_transformed_block_uv(xd, bsize, xform_quant, &arg);
}

void vp9_encode_sby(VP9_COMMON *const cm, MACROBLOCK *x,
                    BLOCK_SIZE_TYPE bsize) {
  MACROBLOCKD* const xd = &x->e_mbd;
  struct optimize_ctx ctx;
Scott LaVarnway's avatar
Scott LaVarnway committed
574
  struct encode_b_args arg = {cm, x, &ctx};
575
576
577
578
579

  vp9_subtract_sby(x, bsize);
  if (x->optimize)
    vp9_optimize_init(xd, bsize, &ctx);

Scott LaVarnway's avatar
Scott LaVarnway committed
580
  foreach_transformed_block_in_plane(xd, bsize, 0, encode_block, &arg);
581
582
583
584
585
586
}

void vp9_encode_sbuv(VP9_COMMON *const cm, MACROBLOCK *x,
                     BLOCK_SIZE_TYPE bsize) {
  MACROBLOCKD* const xd = &x->e_mbd;
  struct optimize_ctx ctx;
Scott LaVarnway's avatar
Scott LaVarnway committed
587
  struct encode_b_args arg = {cm, x, &ctx};
588
589
590
591
592
593
594
595

  vp9_subtract_sbuv(x, bsize);
  if (x->optimize)
    vp9_optimize_init(xd, bsize, &ctx);

  foreach_transformed_block_uv(xd, bsize, encode_block, &arg);
}

596
597
598
599
void vp9_encode_sb(VP9_COMMON *const cm, MACROBLOCK *x,
                   BLOCK_SIZE_TYPE bsize) {
  MACROBLOCKD* const xd = &x->e_mbd;
  struct optimize_ctx ctx;
Scott LaVarnway's avatar
Scott LaVarnway committed
600
  struct encode_b_args arg = {cm, x, &ctx};
John Koleszar's avatar
John Koleszar committed
601

602
603
604
  vp9_subtract_sb(x, bsize);
  if (x->optimize)
    vp9_optimize_init(xd, bsize, &ctx);
Scott LaVarnway's avatar
Scott LaVarnway committed
605

Scott LaVarnway's avatar
Scott LaVarnway committed
606
  foreach_transformed_block(xd, bsize, encode_block, &arg);
John Koleszar's avatar
John Koleszar committed
607
}