vp9_encodemb.c 39.9 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9
10
 */

11
12

#include "./vp9_rtcd.h"
13
#include "./vpx_config.h"
14
15
16
17

#include "vpx_mem/vpx_mem.h"

#include "vp9/common/vp9_idct.h"
18
19
20
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
#include "vp9/common/vp9_systemdependent.h"
21
22
23

#include "vp9/encoder/vp9_encodemb.h"
#include "vp9/encoder/vp9_quantize.h"
24
#include "vp9/encoder/vp9_rd.h"
25
#include "vp9/encoder/vp9_tokenize.h"
John Koleszar's avatar
John Koleszar committed
26

27
28
29
30
31
32
33
34
struct optimize_ctx {
  ENTROPY_CONTEXT ta[MAX_MB_PLANE][16];
  ENTROPY_CONTEXT tl[MAX_MB_PLANE][16];
};

struct encode_b_args {
  MACROBLOCK *x;
  struct optimize_ctx *ctx;
35
  int8_t *skip;
36
37
};

38
void vp9_subtract_block_c(int rows, int cols,
39
40
41
                          int16_t *diff, ptrdiff_t diff_stride,
                          const uint8_t *src, ptrdiff_t src_stride,
                          const uint8_t *pred, ptrdiff_t pred_stride) {
John Koleszar's avatar
John Koleszar committed
42
  int r, c;
John Koleszar's avatar
John Koleszar committed
43

44
45
  for (r = 0; r < rows; r++) {
    for (c = 0; c < cols; c++)
46
      diff[c] = src[c] - pred[c];
John Koleszar's avatar
John Koleszar committed
47

48
49
50
    diff += diff_stride;
    pred += pred_stride;
    src  += src_stride;
John Koleszar's avatar
John Koleszar committed
51
  }
John Koleszar's avatar
John Koleszar committed
52
53
}

54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#if CONFIG_VP9_HIGHBITDEPTH
void vp9_high_subtract_block_c(int rows, int cols,
                               int16_t *diff, ptrdiff_t diff_stride,
                               const uint8_t *src8, ptrdiff_t src_stride,
                               const uint8_t *pred8, ptrdiff_t pred_stride,
                               int bd) {
  int r, c;
  uint16_t *src = CONVERT_TO_SHORTPTR(src8);
  uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
  (void) bd;

  for (r = 0; r < rows; r++) {
    for (c = 0; c < cols; c++) {
      diff[c] = src[c] - pred[c];
    }

    diff += diff_stride;
    pred += pred_stride;
    src  += src_stride;
  }
}
#endif  // CONFIG_VP9_HIGHBITDEPTH

77
void vp9_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
78
  struct macroblock_plane *const p = &x->plane[plane];
79
80
81
82
  const struct macroblockd_plane *const pd = &x->e_mbd.plane[plane];
  const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
  const int bw = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
  const int bh = 4 * num_4x4_blocks_high_lookup[plane_bsize];
83

84
85
86
87
88
89
90
#if CONFIG_VP9_HIGHBITDEPTH
  if (x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
    vp9_high_subtract_block(bh, bw, p->src_diff, bw, p->src.buf, p->src.stride,
                            pd->dst.buf, pd->dst.stride, x->e_mbd.bd);
    return;
  }
#endif  // CONFIG_VP9_HIGHBITDEPTH
91
  vp9_subtract_block(bh, bw, p->src_diff, bw, p->src.buf, p->src.stride,
92
                     pd->dst.buf, pd->dst.stride);
Yaowu Xu's avatar
Yaowu Xu committed
93
94
}

95
#define RDTRUNC(RM, DM, R, D) ((128 + (R) * (RM)) & 0xFF)
96

Dmitry Kovalev's avatar
Dmitry Kovalev committed
97
typedef struct vp9_token_state {
98
99
  int           rate;
  int           error;
Daniel Kang's avatar
Daniel Kang committed
100
  int           next;
101
102
  signed char   token;
  short         qc;
Dmitry Kovalev's avatar
Dmitry Kovalev committed
103
} vp9_token_state;
104

105
// TODO(jimbankoski): experiment to find optimal RD numbers.
Dmitry Kovalev's avatar
Dmitry Kovalev committed
106
static const int plane_rd_mult[PLANE_TYPES] = { 4, 2 };
107

108
109
110
111
112
113
114
115
116
117
#define UPDATE_RD_COST()\
{\
  rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0);\
  rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1);\
  if (rd_cost0 == rd_cost1) {\
    rd_cost0 = RDTRUNC(rdmult, rddiv, rate0, error0);\
    rd_cost1 = RDTRUNC(rdmult, rddiv, rate1, error1);\
  }\
}

118
119
// This function is a place holder for now but may ultimately need
// to scan previous tokens to work out the correct context.
120
121
static int trellis_get_coeff_context(const int16_t *scan,
                                     const int16_t *nb,
122
                                     int idx, int token,
123
                                     uint8_t *token_cache) {
124
  int bak = token_cache[scan[idx]], pt;
125
  token_cache[scan[idx]] = vp9_pt_energy_class[token];
126
  pt = get_coef_context(nb, token_cache, idx + 1);
127
  token_cache[scan[idx]] = bak;
128
  return pt;
129
130
}

Dmitry Kovalev's avatar
Dmitry Kovalev committed
131
static int optimize_b(MACROBLOCK *mb, int plane, int block,
Yaowu Xu's avatar
Yaowu Xu committed
132
                      TX_SIZE tx_size, int ctx) {
133
  MACROBLOCKD *const xd = &mb->e_mbd;
Dmitry Kovalev's avatar
Dmitry Kovalev committed
134
135
  struct macroblock_plane *const p = &mb->plane[plane];
  struct macroblockd_plane *const pd = &xd->plane[plane];
hkuang's avatar
hkuang committed
136
  const int ref = is_inter_block(&xd->mi[0].src_mi->mbmi);
137
138
  vp9_token_state tokens[1025][2];
  unsigned best_index[1025][2];
Dmitry Kovalev's avatar
Dmitry Kovalev committed
139
  uint8_t token_cache[1024];
140
141
142
  const tran_low_t *const coeff = BLOCK_OFFSET(mb->plane[plane].coeff, block);
  tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
Dmitry Kovalev's avatar
Dmitry Kovalev committed
143
144
  const int eob = p->eobs[block];
  const PLANE_TYPE type = pd->plane_type;
145
  const int default_eob = 16 << (tx_size << 1);
146
  const int mul = 1 + (tx_size == TX_32X32);
147
  const int16_t *dequant_ptr = pd->dequant;
148
  const uint8_t *const band_translate = get_band_translate(tx_size);
Dmitry Kovalev's avatar
Dmitry Kovalev committed
149
150
151
152
153
154
155
156
  const scan_order *const so = get_scan(xd, tx_size, type, block);
  const int16_t *const scan = so->scan;
  const int16_t *const nb = so->neighbors;
  int next = eob, sz = 0;
  int64_t rdmult = mb->rdmult * plane_rd_mult[type], rddiv = mb->rddiv;
  int64_t rd_cost0, rd_cost1;
  int rate0, rate1, error0, error1, t0, t1;
  int best, band, pt, i, final_eob;
157
158
  const TOKENVALUE *dct_value_tokens;
  const int16_t *dct_value_cost;
John Koleszar's avatar
John Koleszar committed
159

John Koleszar's avatar
John Koleszar committed
160
  assert((!type && !plane) || (type && plane));
John Koleszar's avatar
John Koleszar committed
161
  assert(eob <= default_eob);
John Koleszar's avatar
John Koleszar committed
162
163

  /* Now set up a Viterbi trellis to evaluate alternative roundings. */
Dmitry Kovalev's avatar
Dmitry Kovalev committed
164
  if (!ref)
John Koleszar's avatar
John Koleszar committed
165
    rdmult = (rdmult * 9) >> 4;
Dmitry Kovalev's avatar
Dmitry Kovalev committed
166

John Koleszar's avatar
John Koleszar committed
167
168
169
  /* Initialize the sentinel node of the trellis. */
  tokens[eob][0].rate = 0;
  tokens[eob][0].error = 0;
170
  tokens[eob][0].next = default_eob;
171
  tokens[eob][0].token = EOB_TOKEN;
John Koleszar's avatar
John Koleszar committed
172
  tokens[eob][0].qc = 0;
Dmitry Kovalev's avatar
Dmitry Kovalev committed
173
174
  tokens[eob][1] = tokens[eob][0];

175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
#if CONFIG_VP9_HIGHBITDEPTH
  if (xd->bd == 12) {
    dct_value_tokens = vp9_dct_value_tokens_high12_ptr;
    dct_value_cost = vp9_dct_value_cost_high12_ptr;
  } else if (xd->bd == 10) {
    dct_value_tokens = vp9_dct_value_tokens_high10_ptr;
    dct_value_cost = vp9_dct_value_cost_high10_ptr;
  } else {
    dct_value_tokens = vp9_dct_value_tokens_ptr;
    dct_value_cost = vp9_dct_value_cost_ptr;
  }
#else
  dct_value_tokens = vp9_dct_value_tokens_ptr;
  dct_value_cost = vp9_dct_value_cost_ptr;
#endif
190
  for (i = 0; i < eob; i++)
Dmitry Kovalev's avatar
Dmitry Kovalev committed
191
    token_cache[scan[i]] =
192
        vp9_pt_energy_class[dct_value_tokens[qcoeff[scan[i]]].token];
193

Dmitry Kovalev's avatar
Dmitry Kovalev committed
194
  for (i = eob; i-- > 0;) {
195
    int base_bits, d2, dx;
Dmitry Kovalev's avatar
Dmitry Kovalev committed
196
197
    const int rc = scan[i];
    int x = qcoeff[rc];
John Koleszar's avatar
John Koleszar committed
198
199
200
201
202
203
204
205
    /* Only add a trellis state for non-zero coefficients. */
    if (x) {
      int shortcut = 0;
      error0 = tokens[next][0].error;
      error1 = tokens[next][1].error;
      /* Evaluate the first possibility for this state. */
      rate0 = tokens[next][0].rate;
      rate1 = tokens[next][1].rate;
206
      t0 = (dct_value_tokens + x)->token;
John Koleszar's avatar
John Koleszar committed
207
      /* Consider both possible successor states. */
208
      if (next < default_eob) {
209
        band = band_translate[i + 1];
210
        pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache);
Dmitry Kovalev's avatar
Dmitry Kovalev committed
211
212
213
214
        rate0 += mb->token_costs[tx_size][type][ref][band][0][pt]
                                [tokens[next][0].token];
        rate1 += mb->token_costs[tx_size][type][ref][band][0][pt]
                                [tokens[next][1].token];
John Koleszar's avatar
John Koleszar committed
215
      }
216
      UPDATE_RD_COST();
John Koleszar's avatar
John Koleszar committed
217
218
      /* And pick the best. */
      best = rd_cost1 < rd_cost0;
219
      base_bits = dct_value_cost[x];
220
      dx = mul * (dqcoeff[rc] - coeff[rc]);
221
222
223
224
225
#if CONFIG_VP9_HIGHBITDEPTH
      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
        dx >>= xd->bd - 8;
      }
#endif  // CONFIG_VP9_HIGHBITDEPTH
John Koleszar's avatar
John Koleszar committed
226
227
228
229
230
231
      d2 = dx * dx;
      tokens[i][0].rate = base_bits + (best ? rate1 : rate0);
      tokens[i][0].error = d2 + (best ? error1 : error0);
      tokens[i][0].next = next;
      tokens[i][0].token = t0;
      tokens[i][0].qc = x;
232
      best_index[i][0] = best;
233

John Koleszar's avatar
John Koleszar committed
234
235
236
237
      /* Evaluate the second possibility for this state. */
      rate0 = tokens[next][0].rate;
      rate1 = tokens[next][1].rate;

Dmitry Kovalev's avatar
Dmitry Kovalev committed
238
239
240
      if ((abs(x) * dequant_ptr[rc != 0] > abs(coeff[rc]) * mul) &&
          (abs(x) * dequant_ptr[rc != 0] < abs(coeff[rc]) * mul +
                                               dequant_ptr[rc != 0]))
John Koleszar's avatar
John Koleszar committed
241
242
243
244
245
246
247
248
249
250
251
252
253
        shortcut = 1;
      else
        shortcut = 0;

      if (shortcut) {
        sz = -(x < 0);
        x -= 2 * sz + 1;
      }

      /* Consider both possible successor states. */
      if (!x) {
        /* If we reduced this coefficient to zero, check to see if
         *  we need to move the EOB back here.
254
         */
255
256
        t0 = tokens[next][0].token == EOB_TOKEN ? EOB_TOKEN : ZERO_TOKEN;
        t1 = tokens[next][1].token == EOB_TOKEN ? EOB_TOKEN : ZERO_TOKEN;
John Koleszar's avatar
John Koleszar committed
257
      } else {
258
        t0 = t1 = (dct_value_tokens + x)->token;
John Koleszar's avatar
John Koleszar committed
259
      }
260
      if (next < default_eob) {
261
        band = band_translate[i + 1];
262
        if (t0 != EOB_TOKEN) {
263
          pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache);
264
          rate0 += mb->token_costs[tx_size][type][ref][band][!x][pt]
265
                                  [tokens[next][0].token];
John Koleszar's avatar
John Koleszar committed
266
        }
267
        if (t1 != EOB_TOKEN) {
268
          pt = trellis_get_coeff_context(scan, nb, i, t1, token_cache);
269
          rate1 += mb->token_costs[tx_size][type][ref][band][!x][pt]
270
                                  [tokens[next][1].token];
John Koleszar's avatar
John Koleszar committed
271
272
        }
      }
John Koleszar's avatar
John Koleszar committed
273

274
      UPDATE_RD_COST();
John Koleszar's avatar
John Koleszar committed
275
276
      /* And pick the best. */
      best = rd_cost1 < rd_cost0;
277
      base_bits = dct_value_cost[x];
John Koleszar's avatar
John Koleszar committed
278
279

      if (shortcut) {
280
281
282
283
284
285
286
287
288
#if CONFIG_VP9_HIGHBITDEPTH
        if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
          dx -= ((dequant_ptr[rc != 0] >> (xd->bd - 8)) + sz) ^ sz;
        } else {
          dx -= (dequant_ptr[rc != 0] + sz) ^ sz;
        }
#else
        dx -= (dequant_ptr[rc != 0] + sz) ^ sz;
#endif  // CONFIG_VP9_HIGHBITDEPTH
John Koleszar's avatar
John Koleszar committed
289
290
291
292
293
294
295
        d2 = dx * dx;
      }
      tokens[i][1].rate = base_bits + (best ? rate1 : rate0);
      tokens[i][1].error = d2 + (best ? error1 : error0);
      tokens[i][1].next = next;
      tokens[i][1].token = best ? t1 : t0;
      tokens[i][1].qc = x;
296
      best_index[i][1] = best;
John Koleszar's avatar
John Koleszar committed
297
298
      /* Finally, make this the new head of the trellis. */
      next = i;
299
300
301
302
    } else {
      /* There's no choice to make for a zero coefficient, so we don't
       *  add a new trellis node, but we do need to update the costs.
       */
303
      band = band_translate[i + 1];
John Koleszar's avatar
John Koleszar committed
304
305
306
      t0 = tokens[next][0].token;
      t1 = tokens[next][1].token;
      /* Update the cost of each path if we're past the EOB token. */
307
      if (t0 != EOB_TOKEN) {
308
        tokens[next][0].rate +=
309
            mb->token_costs[tx_size][type][ref][band][1][0][t0];
John Koleszar's avatar
John Koleszar committed
310
311
        tokens[next][0].token = ZERO_TOKEN;
      }
312
      if (t1 != EOB_TOKEN) {
313
        tokens[next][1].rate +=
314
            mb->token_costs[tx_size][type][ref][band][1][0][t1];
John Koleszar's avatar
John Koleszar committed
315
316
        tokens[next][1].token = ZERO_TOKEN;
      }
317
      best_index[i][0] = best_index[i][1] = 0;
John Koleszar's avatar
John Koleszar committed
318
      /* Don't update next, because we didn't add a new node. */
319
    }
John Koleszar's avatar
John Koleszar committed
320
321
322
  }

  /* Now pick the best path through the whole trellis. */
323
  band = band_translate[i + 1];
John Koleszar's avatar
John Koleszar committed
324
325
326
327
328
329
  rate0 = tokens[next][0].rate;
  rate1 = tokens[next][1].rate;
  error0 = tokens[next][0].error;
  error1 = tokens[next][1].error;
  t0 = tokens[next][0].token;
  t1 = tokens[next][1].token;
Dmitry Kovalev's avatar
Dmitry Kovalev committed
330
331
  rate0 += mb->token_costs[tx_size][type][ref][band][0][ctx][t0];
  rate1 += mb->token_costs[tx_size][type][ref][band][0][ctx][t1];
332
  UPDATE_RD_COST();
John Koleszar's avatar
John Koleszar committed
333
  best = rd_cost1 < rd_cost0;
Dmitry Kovalev's avatar
Dmitry Kovalev committed
334
  final_eob = -1;
335
336
  vpx_memset(qcoeff, 0, sizeof(*qcoeff) * (16 << (tx_size * 2)));
  vpx_memset(dqcoeff, 0, sizeof(*dqcoeff) * (16 << (tx_size * 2)));
John Koleszar's avatar
John Koleszar committed
337
  for (i = next; i < eob; i = next) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
338
339
    const int x = tokens[i][best].qc;
    const int rc = scan[i];
340
    if (x) {
John Koleszar's avatar
John Koleszar committed
341
      final_eob = i;
342
    }
Dmitry Kovalev's avatar
Dmitry Kovalev committed
343

344
345
    qcoeff[rc] = x;
    dqcoeff[rc] = (x * dequant_ptr[rc != 0]) / mul;
346

John Koleszar's avatar
John Koleszar committed
347
    next = tokens[i][best].next;
348
    best = best_index[i][best];
John Koleszar's avatar
John Koleszar committed
349
350
351
  }
  final_eob++;

352
  mb->plane[plane].eobs[block] = final_eob;
Dmitry Kovalev's avatar
Dmitry Kovalev committed
353
  return final_eob;
354
355
}

Dmitry Kovalev's avatar
Dmitry Kovalev committed
356
static INLINE void fdct32x32(int rd_transform,
357
358
                             const int16_t *src, tran_low_t *dst,
                             int src_stride) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
359
360
361
362
363
364
  if (rd_transform)
    vp9_fdct32x32_rd(src, dst, src_stride);
  else
    vp9_fdct32x32(src, dst, src_stride);
}

365
366
367
368
369
370
371
372
#if CONFIG_VP9_HIGHBITDEPTH
static INLINE void high_fdct32x32(int rd_transform, const int16_t *src,
                                  tran_low_t *dst, int src_stride) {
  if (rd_transform)
    vp9_high_fdct32x32_rd(src, dst, src_stride);
  else
    vp9_high_fdct32x32(src, dst, src_stride);
}
373
#endif  // CONFIG_VP9_HIGHBITDEPTH
374

375
376
377
378
379
void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block,
                        BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
  MACROBLOCKD *const xd = &x->e_mbd;
  const struct macroblock_plane *const p = &x->plane[plane];
  const struct macroblockd_plane *const pd = &xd->plane[plane];
Jingning Han's avatar
Jingning Han committed
380
  const scan_order *const scan_order = &vp9_default_scan_orders[tx_size];
381
382
383
  tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
  tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
Jingning Han's avatar
Jingning Han committed
384
385
386
387
388
389
390
  uint16_t *const eob = &p->eobs[block];
  const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
  int i, j;
  const int16_t *src_diff;
  txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
  src_diff = &p->src_diff[4 * (j * diff_stride + i)];

391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
#if CONFIG_VP9_HIGHBITDEPTH
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
    switch (tx_size) {
      case TX_32X32:
        high_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
        vp9_high_quantize_fp_32x32(coeff, 1024, x->skip_block, p->zbin,
                                   p->round_fp, p->quant_fp, p->quant_shift,
                                   qcoeff, dqcoeff, pd->dequant, p->zbin_extra,
                                   eob, scan_order->scan, scan_order->iscan);
        break;
      case TX_16X16:
        vp9_high_fdct16x16(src_diff, coeff, diff_stride);
        vp9_high_quantize_fp(coeff, 256, x->skip_block, p->zbin, p->round_fp,
                             p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
                             pd->dequant, p->zbin_extra, eob,
                             scan_order->scan, scan_order->iscan);
        break;
      case TX_8X8:
        vp9_high_fdct8x8(src_diff, coeff, diff_stride);
        vp9_high_quantize_fp(coeff, 64, x->skip_block, p->zbin, p->round_fp,
                             p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
                             pd->dequant, p->zbin_extra, eob,
                             scan_order->scan, scan_order->iscan);
        break;
      case TX_4X4:
        x->fwd_txm4x4(src_diff, coeff, diff_stride);
        vp9_high_quantize_fp(coeff, 16, x->skip_block, p->zbin, p->round_fp,
                             p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
                             pd->dequant, p->zbin_extra, eob,
                             scan_order->scan, scan_order->iscan);
        break;
      default:
        assert(0);
    }
    return;
  }
#endif  // CONFIG_VP9_HIGHBITDEPTH

Jingning Han's avatar
Jingning Han committed
429
430
431
  switch (tx_size) {
    case TX_32X32:
      fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
432
433
434
435
      vp9_quantize_fp_32x32(coeff, 1024, x->skip_block, p->zbin, p->round_fp,
                            p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
                            pd->dequant, p->zbin_extra, eob, scan_order->scan,
                            scan_order->iscan);
Jingning Han's avatar
Jingning Han committed
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
      break;
    case TX_16X16:
      vp9_fdct16x16(src_diff, coeff, diff_stride);
      vp9_quantize_fp(coeff, 256, x->skip_block, p->zbin, p->round_fp,
                      p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
                      pd->dequant, p->zbin_extra, eob,
                      scan_order->scan, scan_order->iscan);
      break;
    case TX_8X8:
      vp9_fdct8x8(src_diff, coeff, diff_stride);
      vp9_quantize_fp(coeff, 64, x->skip_block, p->zbin, p->round_fp,
                      p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
                      pd->dequant, p->zbin_extra, eob,
                      scan_order->scan, scan_order->iscan);
      break;
    case TX_4X4:
      x->fwd_txm4x4(src_diff, coeff, diff_stride);
      vp9_quantize_fp(coeff, 16, x->skip_block, p->zbin, p->round_fp,
                      p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
                      pd->dequant, p->zbin_extra, eob,
                      scan_order->scan, scan_order->iscan);
      break;
    default:
      assert(0);
460
      break;
Jingning Han's avatar
Jingning Han committed
461
462
463
464
465
466
467
468
  }
}

void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block,
                        BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
  MACROBLOCKD *const xd = &x->e_mbd;
  const struct macroblock_plane *const p = &x->plane[plane];
  const struct macroblockd_plane *const pd = &xd->plane[plane];
469
470
471
  tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
  tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
472
473
474
475
476
477
478
479
  uint16_t *const eob = &p->eobs[block];
  const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
  int i, j;
  const int16_t *src_diff;

  txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
  src_diff = &p->src_diff[4 * (j * diff_stride + i)];

480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
#if CONFIG_VP9_HIGHBITDEPTH
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
    switch (tx_size) {
      case TX_32X32:
        vp9_high_fdct32x32_1(src_diff, coeff, diff_stride);
        vp9_high_quantize_dc_32x32(coeff, x->skip_block, p->round,
                                   p->quant_fp[0], qcoeff, dqcoeff,
                                   pd->dequant[0], eob);
        break;
      case TX_16X16:
        vp9_high_fdct16x16_1(src_diff, coeff, diff_stride);
        vp9_high_quantize_dc(coeff, x->skip_block, p->round,
                             p->quant_fp[0], qcoeff, dqcoeff,
                             pd->dequant[0], eob);
        break;
      case TX_8X8:
        vp9_high_fdct8x8_1(src_diff, coeff, diff_stride);
        vp9_high_quantize_dc(coeff, x->skip_block, p->round,
                             p->quant_fp[0], qcoeff, dqcoeff,
                             pd->dequant[0], eob);
        break;
      case TX_4X4:
        x->fwd_txm4x4(src_diff, coeff, diff_stride);
        vp9_high_quantize_dc(coeff, x->skip_block, p->round,
                             p->quant_fp[0], qcoeff, dqcoeff,
                             pd->dequant[0], eob);
        break;
      default:
        assert(0);
    }
    return;
  }
#endif  // CONFIG_VP9_HIGHBITDEPTH

514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
  switch (tx_size) {
    case TX_32X32:
      vp9_fdct32x32_1(src_diff, coeff, diff_stride);
      vp9_quantize_dc_32x32(coeff, x->skip_block, p->round,
                            p->quant_fp[0], qcoeff, dqcoeff,
                            pd->dequant[0], eob);
      break;
    case TX_16X16:
      vp9_fdct16x16_1(src_diff, coeff, diff_stride);
      vp9_quantize_dc(coeff, x->skip_block, p->round,
                     p->quant_fp[0], qcoeff, dqcoeff,
                     pd->dequant[0], eob);
      break;
    case TX_8X8:
      vp9_fdct8x8_1(src_diff, coeff, diff_stride);
      vp9_quantize_dc(coeff, x->skip_block, p->round,
                      p->quant_fp[0], qcoeff, dqcoeff,
                      pd->dequant[0], eob);
      break;
    case TX_4X4:
      x->fwd_txm4x4(src_diff, coeff, diff_stride);
      vp9_quantize_dc(coeff, x->skip_block, p->round,
                      p->quant_fp[0], qcoeff, dqcoeff,
                      pd->dequant[0], eob);
      break;
    default:
      assert(0);
541
      break;
542
543
544
  }
}

545
546
547
void vp9_xform_quant(MACROBLOCK *x, int plane, int block,
                     BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
  MACROBLOCKD *const xd = &x->e_mbd;
548
549
550
  const struct macroblock_plane *const p = &x->plane[plane];
  const struct macroblockd_plane *const pd = &xd->plane[plane];
  const scan_order *const scan_order = &vp9_default_scan_orders[tx_size];
551
552
553
  tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
  tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
554
  uint16_t *const eob = &p->eobs[block];
555
556
  const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
  int i, j;
557
  const int16_t *src_diff;
558
559
  txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
  src_diff = &p->src_diff[4 * (j * diff_stride + i)];
560

561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
#if CONFIG_VP9_HIGHBITDEPTH
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
     switch (tx_size) {
      case TX_32X32:
        high_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
        vp9_high_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin,
                                  p->round, p->quant, p->quant_shift, qcoeff,
                                  dqcoeff, pd->dequant, p->zbin_extra, eob,
                                  scan_order->scan, scan_order->iscan);
        break;
      case TX_16X16:
        vp9_high_fdct16x16(src_diff, coeff, diff_stride);
        vp9_high_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
                            p->quant, p->quant_shift, qcoeff, dqcoeff,
                            pd->dequant, p->zbin_extra, eob,
                            scan_order->scan, scan_order->iscan);
        break;
      case TX_8X8:
        vp9_high_fdct8x8(src_diff, coeff, diff_stride);
        vp9_high_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round,
                            p->quant, p->quant_shift, qcoeff, dqcoeff,
                            pd->dequant, p->zbin_extra, eob,
                            scan_order->scan, scan_order->iscan);
        break;
      case TX_4X4:
        x->fwd_txm4x4(src_diff, coeff, diff_stride);
        vp9_high_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round,
                            p->quant, p->quant_shift, qcoeff, dqcoeff,
                            pd->dequant, p->zbin_extra, eob,
                            scan_order->scan, scan_order->iscan);
        break;
      default:
        assert(0);
    }
    return;
  }
#endif  // CONFIG_VP9_HIGHBITDEPTH

599
  switch (tx_size) {
600
    case TX_32X32:
Dmitry Kovalev's avatar
Dmitry Kovalev committed
601
      fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
602
603
      vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round,
                           p->quant, p->quant_shift, qcoeff, dqcoeff,
604
605
                           pd->dequant, p->zbin_extra, eob, scan_order->scan,
                           scan_order->iscan);
606
607
      break;
    case TX_16X16:
608
      vp9_fdct16x16(src_diff, coeff, diff_stride);
609
610
      vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
                     p->quant, p->quant_shift, qcoeff, dqcoeff,
611
612
                     pd->dequant, p->zbin_extra, eob,
                     scan_order->scan, scan_order->iscan);
613
614
      break;
    case TX_8X8:
615
      vp9_fdct8x8(src_diff, coeff, diff_stride);
616
617
      vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round,
                     p->quant, p->quant_shift, qcoeff, dqcoeff,
618
619
                     pd->dequant, p->zbin_extra, eob,
                     scan_order->scan, scan_order->iscan);
620
621
      break;
    case TX_4X4:
622
      x->fwd_txm4x4(src_diff, coeff, diff_stride);
623
624
      vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round,
                     p->quant, p->quant_shift, qcoeff, dqcoeff,
625
626
                     pd->dequant, p->zbin_extra, eob,
                     scan_order->scan, scan_order->iscan);
627
628
629
      break;
    default:
      assert(0);
630
      break;
John Koleszar's avatar
John Koleszar committed
631
  }
632
633
}

634
static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize,
635
                         TX_SIZE tx_size, void *arg) {
636
637
638
  struct encode_b_args *const args = arg;
  MACROBLOCK *const x = args->x;
  MACROBLOCKD *const xd = &x->e_mbd;
639
  struct optimize_ctx *const ctx = args->ctx;
640
  struct macroblock_plane *const p = &x->plane[plane];
641
  struct macroblockd_plane *const pd = &xd->plane[plane];
642
  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
643
644
  int i, j;
  uint8_t *dst;
645
  ENTROPY_CONTEXT *a, *l;
646
647
  txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
  dst = &pd->dst.buf[4 * j * pd->dst.stride + 4 * i];
648
649
  a = &ctx->ta[plane][i];
  l = &ctx->tl[plane][j];
650
651
652
653

  // TODO(jingning): per transformed block zero forcing only enabled for
  // luma component. will integrate chroma components as well.
  if (x->zcoeff_blk[tx_size][block] && plane == 0) {
654
    p->eobs[block] = 0;
655
    *a = *l = 0;
656
657
658
    return;
  }

659
  if (!x->skip_recode) {
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
    if (max_txsize_lookup[plane_bsize] == tx_size) {
      if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == 0) {
        // full forward transform and quantization
        if (x->quant_fp)
          vp9_xform_quant_fp(x, plane, block, plane_bsize, tx_size);
        else
          vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
      } else if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == 2) {
        // fast path forward transform and quantization
        vp9_xform_quant_dc(x, plane, block, plane_bsize, tx_size);
      } else {
        // skip forward transform
        p->eobs[block] = 0;
        *a = *l = 0;
        return;
      }
676
    } else {
677
      vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
Jingning Han's avatar
Jingning Han committed
678
    }
679
  }
680

681
  if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
682
    const int ctx = combine_entropy_contexts(*a, *l);
Yaowu Xu's avatar
Yaowu Xu committed
683
    *a = *l = optimize_b(x, plane, block, tx_size, ctx) > 0;
684
  } else {
685
    *a = *l = p->eobs[block] > 0;
686
  }
John Koleszar's avatar
John Koleszar committed
687

Jim Bankoski's avatar
Jim Bankoski committed
688
  if (p->eobs[block])
689
    *(args->skip) = 0;
Jim Bankoski's avatar
Jim Bankoski committed
690

691
  if (x->skip_encode || p->eobs[block] == 0)
692
    return;
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
#if CONFIG_VP9_HIGHBITDEPTH
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
    switch (tx_size) {
      case TX_32X32:
        vp9_high_idct32x32_add(dqcoeff, dst, pd->dst.stride,
                               p->eobs[block], xd->bd);
        break;
      case TX_16X16:
        vp9_high_idct16x16_add(dqcoeff, dst, pd->dst.stride,
                               p->eobs[block], xd->bd);
        break;
      case TX_8X8:
        vp9_high_idct8x8_add(dqcoeff, dst, pd->dst.stride,
                             p->eobs[block], xd->bd);
        break;
      case TX_4X4:
        // this is like vp9_short_idct4x4 but has a special case around eob<=1
        // which is significant (not just an optimization) for the lossless
        // case.
        x->high_itxm_add(dqcoeff, dst, pd->dst.stride,
                         p->eobs[block], xd->bd);
        break;
      default:
        assert(0 && "Invalid transform size");
    }
    return;
  }
#endif  // CONFIG_VP9_HIGHBITDEPTH
721

722
  switch (tx_size) {
723
    case TX_32X32:
724
      vp9_idct32x32_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
725
726
      break;
    case TX_16X16:
727
      vp9_idct16x16_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
728
729
      break;
    case TX_8X8:
730
      vp9_idct8x8_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
731
732
      break;
    case TX_4X4:
733
734
735
      // this is like vp9_short_idct4x4 but has a special case around eob<=1
      // which is significant (not just an optimization) for the lossless
      // case.
736
      x->itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
737
      break;
738
    default:
James Zern's avatar
James Zern committed
739
      assert(0 && "Invalid transform size");
740
      break;
741
  }
John Koleszar's avatar
John Koleszar committed
742
}
743

744
745
static void encode_block_pass1(int plane, int block, BLOCK_SIZE plane_bsize,
                               TX_SIZE tx_size, void *arg) {
746
  MACROBLOCK *const x = (MACROBLOCK *)arg;
747
  MACROBLOCKD *const xd = &x->e_mbd;
748
  struct macroblock_plane *const p = &x->plane[plane];
749
  struct macroblockd_plane *const pd = &xd->plane[plane];
750
  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
751
752
753
754
  int i, j;
  uint8_t *dst;
  txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
  dst = &pd->dst.buf[4 * j * pd->dst.stride + 4 * i];
755

756
  vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
757

758
759
760
761
762
763
764
  if (p->eobs[block] > 0) {
#if CONFIG_VP9_HIGHBITDEPTH
    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
       x->high_itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block], xd->bd);
       return;
    }
#endif  // CONFIG_VP9_HIGHBITDEPTH
765
    x->itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
766
  }
767
768
}

769
void vp9_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize) {
770
  vp9_subtract_plane(x, bsize, 0);
771
772
  vp9_foreach_transformed_block_in_plane(&x->e_mbd, bsize, 0,
                                         encode_block_pass1, x);
773
774
}

775
void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) {
776
  MACROBLOCKD *const xd = &x->e_mbd;
777
  struct optimize_ctx ctx;
hkuang's avatar
hkuang committed
778
  MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi;
779
  struct encode_b_args arg = {x, &ctx, &mbmi->skip};
780
  int plane;
John Koleszar's avatar
John Koleszar committed
781

782
783
784
785
786
  mbmi->skip = 1;

  if (x->skip)
    return;

787
788
789
  for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
    if (!x->skip_recode)
      vp9_subtract_plane(x, bsize, plane);
790

791
792
    if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
      const struct macroblockd_plane* const pd = &xd->plane[plane];
793
      const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi, pd) : mbmi->tx_size;
794
795
796
      vp9_get_entropy_contexts(bsize, tx_size, pd,
                               ctx.ta[plane], ctx.tl[plane]);
    }
Scott LaVarnway's avatar
Scott LaVarnway committed
797

798
799
800
    vp9_foreach_transformed_block_in_plane(xd, bsize, plane, encode_block,
                                           &arg);
  }
John Koleszar's avatar
John Koleszar committed
801
}
802

803
804
static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
                               TX_SIZE tx_size, void *arg) {
805
  struct encode_b_args* const args = arg;
806
807
  MACROBLOCK *const x = args->x;
  MACROBLOCKD *const xd = &x->e_mbd;
hkuang's avatar
hkuang committed
808
  MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi;
809
810
  struct macroblock_plane *const p = &x->plane[plane];
  struct macroblockd_plane *const pd = &xd->plane[plane];
811
812
813
  tran_low_t *coeff = BLOCK_OFFSET(p->coeff, block);
  tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block);
  tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
814
  const scan_order *scan_order;
815
  TX_TYPE tx_type;
816
  PREDICTION_MODE mode;
817
818
  const int bwl = b_width_log2(plane_bsize);
  const int diff_stride = 4 * (1 << bwl);
819
820
  uint8_t *src, *dst;
  int16_t *src_diff;
821
  uint16_t *eob = &p->eobs[block];
Dmitry Kovalev's avatar
Dmitry Kovalev committed
822
823
  const int src_stride = p->src.stride;
  const int dst_stride = pd->dst.stride;
824
825
  int i, j;
  txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
Dmitry Kovalev's avatar
Dmitry Kovalev committed
826
827
  dst = &pd->dst.buf[4 * (j * dst_stride + i)];
  src = &p->src.buf[4 * (j * src_stride + i)];
828
  src_diff = &p->src_diff[4 * (j * diff_stride + i)];
829

830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
#if CONFIG_VP9_HIGHBITDEPTH
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
    switch (tx_size) {
      case TX_32X32:
        scan_order = &vp9_default_scan_orders[TX_32X32];
        mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
        vp9_predict_intra_block(xd, block >> 6, bwl, TX_32X32, mode,
                                x->skip_encode ? src : dst,
                                x->skip_encode ? src_stride : dst_stride,
                                dst, dst_stride, i, j, plane);
        if (!x->skip_recode) {
          vp9_high_subtract_block(32, 32, src_diff, diff_stride,
                                  src, src_stride, dst, dst_stride, xd->bd);
          high_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
          vp9_high_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin,
                                    p->round, p->quant, p->quant_shift, qcoeff,
                                    dqcoeff, pd->dequant, p->zbin_extra, eob,
                                    scan_order->scan, scan_order->iscan);
        }
        if (!x->skip_encode && *eob) {
          vp9_high_idct32x32_add(dqcoeff, dst, dst_stride, *eob, xd->bd);
        }
        break;
      case TX_16X16:
        tx_type = get_tx_type(pd->plane_type, xd);
        scan_order = &vp9_scan_orders[TX_16X16][tx_type];
        mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
        vp9_predict_intra_block(xd, block >> 4, bwl, TX_16X16, mode,
                                x->skip_encode ? src : dst,
                                x->skip_encode ? src_stride : dst_stride,
                                dst, dst_stride, i, j, plane);
        if (!x->skip_recode) {
          vp9_high_subtract_block(16, 16, src_diff, diff_stride,
                                  src, src_stride, dst, dst_stride, xd->bd);
          vp9_high_fht16x16(src_diff, coeff, diff_stride, tx_type);
          vp9_high_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
                              p->quant, p->quant_shift, qcoeff, dqcoeff,
                              pd->dequant, p->zbin_extra, eob,
                              scan_order->scan, scan_order->iscan);
        }
        if (!x->skip_encode && *eob) {
          vp9_high_iht16x16_add(tx_type, dqcoeff, dst, dst_stride,
                                *eob, xd->bd);
        }
        break;
      case TX_8X8:
        tx_type = get_tx_type(pd->plane_type, xd);
        scan_order = &vp9_scan_orders[TX_8X8][tx_type];
        mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
        vp9_predict_intra_block(xd, block >> 2, bwl, TX_8X8, mode,
                                x->skip_encode ? src : dst,
                                x->skip_encode ? src_stride : dst_stride,
                                dst, dst_stride, i, j, plane);
        if (!x->skip_recode) {
          vp9_high_subtract_block(8, 8, src_diff, diff_stride,
                                  src, src_stride, dst, dst_stride, xd->bd);
          vp9_high_fht8x8(src_diff, coeff, diff_stride, tx_type);
          vp9_high_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round,
                              p->quant, p->quant_shift, qcoeff, dqcoeff,
                              pd->dequant, p->zbin_extra, eob,
                              scan_order->scan, scan_order->iscan);
        }
        if (!x->skip_encode && *eob) {
          vp9_high_iht8x8_add(tx_type, dqcoeff, dst, dst_stride, *eob,
                              xd->bd);
        }
        break;
      case TX_4X4:
        tx_type = get_tx_type_4x4(pd->plane_type, xd, block);
        scan_order = &vp9_scan_orders[TX_4X4][tx_type];
        mode = plane == 0 ? get_y_mode(xd->mi[0].src_mi, block) : mbmi->uv_mode;
        vp9_predict_intra_block(xd, block, bwl, TX_4X4, mode,
                                x->skip_encode ? src : dst,
                                x->skip_encode ? src_stride : dst_stride,
                                dst, dst_stride, i, j, plane);

        if (!x->skip_recode) {
          vp9_high_subtract_block(4, 4, src_diff, diff_stride,
                                  src, src_stride, dst, dst_stride, xd->bd);
          if (tx_type != DCT_DCT)
            vp9_high_fht4x4(src_diff, coeff, diff_stride, tx_type);
          else
            x->fwd_txm4x4(src_diff, coeff, diff_stride);
          vp9_high_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round,
                              p->quant, p->quant_shift, qcoeff, dqcoeff,
                              pd->dequant, p->zbin_extra, eob,
                              scan_order->scan, scan_order->iscan);
        }

        if (!x->skip_encode && *eob) {
          if (tx_type == DCT_DCT)
            // this is like vp9_short_idct4x4 but has a special case around
            // eob<=1 which is significant (not just an optimization) for the
            // lossless case.
            x->high_itxm_add(dqcoeff, dst, dst_stride, *eob, xd->bd);
          else
            vp9_high_iht4x4_16_add(dqcoeff, dst, dst_stride, tx_type, xd->bd);
        }
        break;
      default:
        assert(0);
        return;
    }
    if (*eob)
      *(args->skip) = 0;
    return;
  }
#endif  // CONFIG_VP9_HIGHBITDEPTH

939
  switch (tx_size) {
940
    case TX_32X32:
941
      scan_order = &vp9_default_scan_orders[TX_32X32];
942
      mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
943
      vp9_predict_intra_block(xd, block >> 6, bwl, TX_32X32, mode,
944
                              x->skip_encode ? src : dst,
Dmitry Kovalev's avatar
Dmitry Kovalev committed
945
946
                              x->skip_encode ? src_stride : dst_stride,
                              dst, dst_stride, i, j, plane);
947
      if (!x->skip_recode) {
948
        vp9_subtract_block(32, 32, src_diff, diff_stride,
Dmitry Kovalev's avatar
Dmitry Kovalev committed
949
950
                           src, src_stride, dst, dst_stride);
        fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
951
952
        vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round,
                             p->quant, p->quant_shift, qcoeff, dqcoeff,
953
954
                             pd->dequant, p->zbin_extra, eob, scan_order->scan,
                             scan_order->iscan);
955
      }
956
      if (!x->skip_encode && *eob)
Dmitry Kovalev's avatar
Dmitry Kovalev committed
957
        vp9_idct32x32_add(dqcoeff, dst, dst_stride, *eob);
958
959
      break;
    case TX_16X16:
960
      tx_type = get_tx_type(pd->plane_type, xd);
961
      scan_order = &vp9_scan_orders[TX_16X16][tx_type];
962
      mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
963
      vp9_predict_intra_block(xd, block >> 4, bwl, TX_16X16, mode,
964
                              x->skip_encode ? src : dst,
Dmitry Kovalev's avatar
Dmitry Kovalev committed
965
966
                              x->skip_encode ? src_stride : dst_stride,
                              dst, dst_stride, i, j, plane);
967
      if (!x->skip_recode) {
968
        vp9_subtract_block(16, 16, src_diff, diff_stride,
Dmitry Kovalev's avatar
Dmitry Kovalev committed
969
                           src, src_stride, dst, dst_stride);
970
        vp9_fht16x16(src_diff, coeff, diff_stride, tx_type);
971
972
        vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
                       p->quant, p->quant_shift, qcoeff, dqcoeff,
973
974
                       pd->dequant, p->zbin_extra, eob, scan_order->scan,
                       scan_order->iscan);
975
      }
976
      if (!x->skip_encode && *eob)
Dmitry Kovalev's avatar
Dmitry Kovalev committed
977
        vp9_iht16x16_add(tx_type, dqcoeff, dst, dst_stride, *eob);
978
979
      break;
    case TX_8X8:
980
      tx_type = get_tx_type(pd->plane_type, xd);
981
      scan_order = &vp9_scan_orders[TX_8X8][tx_type];
982
      mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
983
      vp9_predict_intra_block(xd, block >> 2, bwl, TX_8X8, mode,
984
                              x->skip_encode ? src : dst,
Dmitry Kovalev's avatar
Dmitry Kovalev committed
985
986
                              x->skip_encode ? src_stride : dst_stride,
                              dst, dst_stride, i, j, plane);
987
      if (!x->skip_recode) {
988
        vp9_subtract_block(8, 8, src_diff, diff_stride,
Dmitry Kovalev's avatar
Dmitry Kovalev committed
989
                           src, src_stride, dst, dst_stride);
990
        vp9_fht8x8(src_diff, coeff, diff_stride, tx_type);
991
992
        vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant,
                       p->quant_shift, qcoeff, dqcoeff,
993
994
                       pd->dequant, p->zbin_extra, eob, scan_order->scan,
                       scan_order->iscan);
995
      }
996
      if (!x->skip_encode && *eob)
Dmitry Kovalev's avatar
Dmitry Kovalev committed
997
        vp9_iht8x8_add(tx_type, dqcoeff, dst, dst_stride, *eob);
998
999
      break;
    case TX_4X4:
1000