encodemb.c 75 KB
Newer Older
Jingning Han's avatar
Jingning Han committed
1
/*
Yaowu Xu's avatar
Yaowu Xu committed
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
Jingning Han's avatar
Jingning Han committed
3
 *
Yaowu Xu's avatar
Yaowu Xu committed
4
5
6
7
8
9
 * This source code is subject to the terms of the BSD 2 Clause License and
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
 * was not distributed with this source code in the LICENSE file, you can
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
 * Media Patent License 1.0 was not distributed with this source code in the
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
Jingning Han's avatar
Jingning Han committed
10
11
 */

Yaowu Xu's avatar
Yaowu Xu committed
12
13
14
#include "./av1_rtcd.h"
#include "./aom_config.h"
#include "./aom_dsp_rtcd.h"
Jingning Han's avatar
Jingning Han committed
15

16
#include "aom_dsp/bitwriter.h"
17
#include "aom_dsp/quantize.h"
Yaowu Xu's avatar
Yaowu Xu committed
18
#include "aom_mem/aom_mem.h"
19
#include "aom_ports/mem.h"
Jingning Han's avatar
Jingning Han committed
20

21
22
23
24
#include "av1/common/idct.h"
#include "av1/common/reconinter.h"
#include "av1/common/reconintra.h"
#include "av1/common/scan.h"
Jingning Han's avatar
Jingning Han committed
25

26
#include "av1/encoder/av1_quantize.h"
27
#include "av1/encoder/encodemb.h"
28
29
30
#if CONFIG_LV_MAP
#include "av1/encoder/encodetxb.h"
#endif
31
32
33
#include "av1/encoder/hybrid_fwd_txfm.h"
#include "av1/encoder/rd.h"
#include "av1/encoder/tokenize.h"
Jingning Han's avatar
Jingning Han committed
34

35
36
37
38
39
40
#if CONFIG_PVQ
#include "av1/encoder/encint.h"
#include "av1/common/partition.h"
#include "av1/encoder/pvq_encoder.h"
#endif

41
42
43
44
#if CONFIG_CFL
#include "av1/common/cfl.h"
#endif

45
46
47
// Check if one needs to use c version subtraction.
static int check_subtract_block_size(int w, int h) { return w < 4 || h < 4; }

Angie Chiang's avatar
Angie Chiang committed
48
49
50
51
static void subtract_block(const MACROBLOCKD *xd, int rows, int cols,
                           int16_t *diff, ptrdiff_t diff_stride,
                           const uint8_t *src8, ptrdiff_t src_stride,
                           const uint8_t *pred8, ptrdiff_t pred_stride) {
52
#if !CONFIG_HIGHBITDEPTH
Angie Chiang's avatar
Angie Chiang committed
53
54
  (void)xd;
#endif
Jingning Han's avatar
Jingning Han committed
55

Angie Chiang's avatar
Angie Chiang committed
56
  if (check_subtract_block_size(rows, cols)) {
57
#if CONFIG_HIGHBITDEPTH
Angie Chiang's avatar
Angie Chiang committed
58
59
60
    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
      aom_highbd_subtract_block_c(rows, cols, diff, diff_stride, src8,
                                  src_stride, pred8, pred_stride, xd->bd);
61
62
      return;
    }
63
#endif  // CONFIG_HIGHBITDEPTH
Angie Chiang's avatar
Angie Chiang committed
64
65
    aom_subtract_block_c(rows, cols, diff, diff_stride, src8, src_stride, pred8,
                         pred_stride);
66
67
68
69

    return;
  }

70
#if CONFIG_HIGHBITDEPTH
Angie Chiang's avatar
Angie Chiang committed
71
72
73
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
    aom_highbd_subtract_block(rows, cols, diff, diff_stride, src8, src_stride,
                              pred8, pred_stride, xd->bd);
Jingning Han's avatar
Jingning Han committed
74
75
    return;
  }
76
#endif  // CONFIG_HIGHBITDEPTH
Angie Chiang's avatar
Angie Chiang committed
77
78
79
80
  aom_subtract_block(rows, cols, diff, diff_stride, src8, src_stride, pred8,
                     pred_stride);
}

Angie Chiang's avatar
Angie Chiang committed
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
void av1_subtract_txb(MACROBLOCK *x, int plane, BLOCK_SIZE plane_bsize,
                      int blk_col, int blk_row, TX_SIZE tx_size) {
  MACROBLOCKD *const xd = &x->e_mbd;
  struct macroblock_plane *const p = &x->plane[plane];
  const struct macroblockd_plane *const pd = &x->e_mbd.plane[plane];
  const int diff_stride = block_size_wide[plane_bsize];
  const int src_stride = p->src.stride;
  const int dst_stride = pd->dst.stride;
  const int tx1d_width = tx_size_wide[tx_size];
  const int tx1d_height = tx_size_high[tx_size];
  uint8_t *dst =
      &pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]];
  uint8_t *src =
      &p->src.buf[(blk_row * src_stride + blk_col) << tx_size_wide_log2[0]];
  int16_t *src_diff =
      &p->src_diff[(blk_row * diff_stride + blk_col) << tx_size_wide_log2[0]];
  subtract_block(xd, tx1d_height, tx1d_width, src_diff, diff_stride, src,
                 src_stride, dst, dst_stride);
}

Angie Chiang's avatar
Angie Chiang committed
101
102
103
104
105
106
107
108
109
110
void av1_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
  struct macroblock_plane *const p = &x->plane[plane];
  const struct macroblockd_plane *const pd = &x->e_mbd.plane[plane];
  const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
  const int bw = block_size_wide[plane_bsize];
  const int bh = block_size_high[plane_bsize];
  const MACROBLOCKD *xd = &x->e_mbd;

  subtract_block(xd, bh, bw, p->src_diff, bw, p->src.buf, p->src.stride,
                 pd->dst.buf, pd->dst.stride);
Jingning Han's avatar
Jingning Han committed
111
112
}

113
114
// These numbers are empirically obtained.
static const int plane_rd_mult[REF_TYPES][PLANE_TYPES] = {
Thomas Davies's avatar
Thomas Davies committed
115
116
117
#if CONFIG_EC_ADAPT
  { 10, 7 }, { 8, 5 },
#else
118
  { 10, 6 }, { 8, 6 },
Thomas Davies's avatar
Thomas Davies committed
119
#endif
120
};
Jingning Han's avatar
Jingning Han committed
121

122
123
124
125
126
#define UPDATE_RD_COST()                             \
  {                                                  \
    rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0); \
    rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1); \
  }
Jingning Han's avatar
Jingning Han committed
127

128
129
130
static INLINE unsigned int get_token_bit_costs(
    unsigned int token_costs[2][COEFF_CONTEXTS][ENTROPY_TOKENS], int skip_eob,
    int ctx, int token) {
131
132
133
134
  (void)skip_eob;
  return token_costs[token == ZERO_TOKEN || token == EOB_TOKEN][ctx][token];
}

135
#if !CONFIG_LV_MAP
136
137
138
139
#define USE_GREEDY_OPTIMIZE_B 0

#if USE_GREEDY_OPTIMIZE_B

140
typedef struct av1_token_state_greedy {
141
142
143
  int16_t token;
  tran_low_t qc;
  tran_low_t dqc;
144
} av1_token_state_greedy;
145

146
147
static int optimize_b_greedy(const AV1_COMMON *cm, MACROBLOCK *mb, int plane,
                             int block, TX_SIZE tx_size, int ctx) {
148
149
150
151
  MACROBLOCKD *const xd = &mb->e_mbd;
  struct macroblock_plane *const p = &mb->plane[plane];
  struct macroblockd_plane *const pd = &xd->plane[plane];
  const int ref = is_inter_block(&xd->mi[0]->mbmi);
152
  av1_token_state_greedy tokens[MAX_TX_SQUARE + 1][2];
153
154
155
156
157
158
159
160
161
162
163
164
165
166
  uint8_t token_cache[MAX_TX_SQUARE];
  const tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
  tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
  const int eob = p->eobs[block];
  const PLANE_TYPE plane_type = pd->plane_type;
  const int16_t *const dequant_ptr = pd->dequant;
  const uint8_t *const band_translate = get_band_translate(tx_size);
  TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
  const SCAN_ORDER *const scan_order =
      get_scan(cm, tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi));
  const int16_t *const scan = scan_order->scan;
  const int16_t *const nb = scan_order->neighbors;
  int dqv;
167
  const int shift = av1_get_tx_scale(tx_size);
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
#if CONFIG_AOM_QM
  int seg_id = xd->mi[0]->mbmi.segment_id;
  const qm_val_t *iqmatrix = pd->seg_iqmatrix[seg_id][!ref][tx_size];
#endif
#if CONFIG_NEW_QUANT
  int dq = get_dq_profile_from_ctx(mb->qindex, ctx, ref, plane_type);
  const dequant_val_type_nuq *dequant_val = pd->dequant_val_nuq[dq];
#endif  // CONFIG_NEW_QUANT
  int sz = 0;
  const int64_t rddiv = mb->rddiv;
  int64_t rd_cost0, rd_cost1;
  int16_t t0, t1;
  int i, final_eob;
  const int cat6_bits = av1_get_cat6_extrabits_size(tx_size, xd->bd);
  unsigned int(*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
      mb->token_costs[txsize_sqr_map[tx_size]][plane_type][ref];
  const int default_eob = tx_size_2d[tx_size];

186
  assert(mb->qindex > 0);
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385

  assert((!plane_type && !plane) || (plane_type && plane));
  assert(eob <= default_eob);

  int64_t rdmult = (mb->rdmult * plane_rd_mult[ref][plane_type]) >> 1;

  int64_t rate0, rate1;
  for (i = 0; i < eob; i++) {
    const int rc = scan[i];
    int x = qcoeff[rc];
    t0 = av1_get_token(x);

    tokens[i][0].qc = x;
    tokens[i][0].token = t0;
    tokens[i][0].dqc = dqcoeff[rc];

    token_cache[rc] = av1_pt_energy_class[t0];
  }
  tokens[eob][0].token = EOB_TOKEN;
  tokens[eob][0].qc = 0;
  tokens[eob][0].dqc = 0;
  tokens[eob][1] = tokens[eob][0];

  unsigned int(*token_costs_ptr)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
      token_costs;

  final_eob = 0;

  int64_t eob_cost0, eob_cost1;

  const int ctx0 = ctx;
  /* Record the r-d cost */
  int64_t accu_rate = 0;
  int64_t accu_error = 0;

  rate0 = get_token_bit_costs(*(token_costs_ptr + band_translate[0]), 0, ctx0,
                              EOB_TOKEN);
  int64_t best_block_rd_cost = RDCOST(rdmult, rddiv, rate0, accu_error);

  // int64_t best_block_rd_cost_all0 = best_block_rd_cost;

  int x_prev = 1;

  for (i = 0; i < eob; i++) {
    const int rc = scan[i];
    int x = qcoeff[rc];
    sz = -(x < 0);

    int band_cur = band_translate[i];
    int ctx_cur = (i == 0) ? ctx : get_coef_context(nb, token_cache, i);
    int token_tree_sel_cur = (x_prev == 0);

    if (x == 0) {
      // no need to search when x == 0
      rate0 =
          get_token_bit_costs(*(token_costs_ptr + band_cur), token_tree_sel_cur,
                              ctx_cur, tokens[i][0].token);
      accu_rate += rate0;
      x_prev = 0;
      // accu_error does not change when x==0
    } else {
      /*  Computing distortion
       */
      // compute the distortion for the first candidate
      // and the distortion for quantizing to 0.
      int dx0 = (-coeff[rc]) * (1 << shift);
#if CONFIG_HIGHBITDEPTH
      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
        dx0 >>= xd->bd - 8;
      }
#endif
      int64_t d0 = (int64_t)dx0 * dx0;

      int x_a = x - 2 * sz - 1;
      int64_t d2, d2_a;

      int dx;

#if CONFIG_AOM_QM
      int iwt = iqmatrix[rc];
      dqv = dequant_ptr[rc != 0];
      dqv = ((iwt * (int)dqv) + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
#else
      dqv = dequant_ptr[rc != 0];
#endif

      dx = (dqcoeff[rc] - coeff[rc]) * (1 << shift);
#if CONFIG_HIGHBITDEPTH
      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
        dx >>= xd->bd - 8;
      }
#endif  // CONFIG_HIGHBITDEPTH
      d2 = (int64_t)dx * dx;

      /* compute the distortion for the second candidate
       * x_a = x - 2 * sz + 1;
       */
      if (x_a != 0) {
#if CONFIG_NEW_QUANT
        dx = av1_dequant_coeff_nuq(x, dqv, dequant_val[band_translate[i]]) -
             (coeff[rc] << shift);
#if CONFIG_HIGHBITDEPTH
        if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
          dx >>= xd->bd - 8;
        }
#endif  // CONFIG_HIGHBITDEPTH
#else   // CONFIG_NEW_QUANT
#if CONFIG_HIGHBITDEPTH
        if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
          dx -= ((dqv >> (xd->bd - 8)) + sz) ^ sz;
        } else {
          dx -= (dqv + sz) ^ sz;
        }
#else
        dx -= (dqv + sz) ^ sz;
#endif  // CONFIG_HIGHBITDEPTH
#endif  // CONFIG_NEW_QUANT
        d2_a = (int64_t)dx * dx;
      } else {
        d2_a = d0;
      }
      /*  Computing rates and r-d cost
       */

      int best_x, best_eob_x;
      int64_t base_bits, next_bits0, next_bits1;
      int64_t next_eob_bits0, next_eob_bits1;

      // rate cost of x
      base_bits = av1_get_token_cost(x, &t0, cat6_bits);
      rate0 = base_bits + get_token_bit_costs(*(token_costs_ptr + band_cur),
                                              token_tree_sel_cur, ctx_cur, t0);

      base_bits = av1_get_token_cost(x_a, &t1, cat6_bits);
      rate1 = base_bits + get_token_bit_costs(*(token_costs_ptr + band_cur),
                                              token_tree_sel_cur, ctx_cur, t1);

      next_bits0 = 0;
      next_bits1 = 0;
      next_eob_bits0 = 0;
      next_eob_bits1 = 0;

      if (i < default_eob - 1) {
        int ctx_next, token_tree_sel_next;
        int band_next = band_translate[i + 1];

        token_cache[rc] = av1_pt_energy_class[t0];
        ctx_next = get_coef_context(nb, token_cache, i + 1);
        token_tree_sel_next = (x == 0);

        next_bits0 = get_token_bit_costs(*(token_costs_ptr + band_next),
                                         token_tree_sel_next, ctx_next,
                                         tokens[i + 1][0].token);
        next_eob_bits0 =
            get_token_bit_costs(*(token_costs_ptr + band_next),
                                token_tree_sel_next, ctx_next, EOB_TOKEN);

        token_cache[rc] = av1_pt_energy_class[t1];
        ctx_next = get_coef_context(nb, token_cache, i + 1);
        token_tree_sel_next = (x_a == 0);

        next_bits1 = get_token_bit_costs(*(token_costs_ptr + band_next),
                                         token_tree_sel_next, ctx_next,
                                         tokens[i + 1][0].token);

        if (x_a != 0) {
          next_eob_bits1 =
              get_token_bit_costs(*(token_costs_ptr + band_next),
                                  token_tree_sel_next, ctx_next, EOB_TOKEN);
        }
      }

      rd_cost0 = RDCOST(rdmult, rddiv, (rate0 + next_bits0), d2);
      rd_cost1 = RDCOST(rdmult, rddiv, (rate1 + next_bits1), d2_a);

      best_x = (rd_cost1 < rd_cost0);

      eob_cost0 = RDCOST(rdmult, rddiv, (accu_rate + rate0 + next_eob_bits0),
                         (accu_error + d2 - d0));
      eob_cost1 = eob_cost0;
      if (x_a != 0) {
        eob_cost1 = RDCOST(rdmult, rddiv, (accu_rate + rate1 + next_eob_bits1),
                           (accu_error + d2_a - d0));
        best_eob_x = (eob_cost1 < eob_cost0);
      } else {
        best_eob_x = 0;
      }

      int dqc, dqc_a = 0;

      dqc = dqcoeff[rc];
      if (best_x + best_eob_x) {
        if (x_a != 0) {
#if CONFIG_NEW_QUANT
          dqc_a = av1_dequant_abscoeff_nuq(abs(x_a), dqv,
                                           dequant_val[band_translate[i]]);
          dqc_a = shift ? ROUND_POWER_OF_TWO(dqc_a, shift) : dqc_a;
          if (sz) dqc_a = -dqc_a;
#else
386
387
          if (x_a < 0)
            dqc_a = -((-x_a * dqv) >> shift);
388
          else
389
            dqc_a = (x_a * dqv) >> shift;
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
#endif  // CONFIG_NEW_QUANT
        } else {
          dqc_a = 0;
        }  // if (x_a != 0)
      }

      // record the better quantized value
      if (best_x) {
        qcoeff[rc] = x_a;
        dqcoeff[rc] = dqc_a;

        accu_rate += rate1;
        accu_error += d2_a - d0;
        assert(d2_a <= d0);

        token_cache[rc] = av1_pt_energy_class[t1];
      } else {
        accu_rate += rate0;
        accu_error += d2 - d0;
        assert(d2 <= d0);

        token_cache[rc] = av1_pt_energy_class[t0];
      }

      x_prev = qcoeff[rc];

      // determine whether to move the eob position to i+1
      int64_t best_eob_cost_i = eob_cost0;

      tokens[i][1].token = t0;
      tokens[i][1].qc = x;
      tokens[i][1].dqc = dqc;

      if ((x_a != 0) && (best_eob_x)) {
        best_eob_cost_i = eob_cost1;

        tokens[i][1].token = t1;
        tokens[i][1].qc = x_a;
        tokens[i][1].dqc = dqc_a;
      }

      if (best_eob_cost_i < best_block_rd_cost) {
        best_block_rd_cost = best_eob_cost_i;
        final_eob = i + 1;
      }
    }  // if (x==0)
  }    // for (i)

  assert(final_eob <= eob);
  if (final_eob > 0) {
    assert(tokens[final_eob - 1][1].qc != 0);
    i = final_eob - 1;
    int rc = scan[i];
    qcoeff[rc] = tokens[i][1].qc;
    dqcoeff[rc] = tokens[i][1].dqc;
  }

  for (i = final_eob; i < eob; i++) {
    int rc = scan[i];
    qcoeff[rc] = 0;
    dqcoeff[rc] = 0;
  }

  mb->plane[plane].eobs[block] = final_eob;
  return final_eob;
}

#else  // USE_GREEDY_OPTIMIZE_B

459
typedef struct av1_token_state_org {
460
461
462
463
464
465
466
  int64_t error;
  int rate;
  int16_t next;
  int16_t token;
  tran_low_t qc;
  tran_low_t dqc;
  uint8_t best_index;
467
} av1_token_state_org;
468

469
470
static int optimize_b_org(const AV1_COMMON *cm, MACROBLOCK *mb, int plane,
                          int block, TX_SIZE tx_size, int ctx) {
Jingning Han's avatar
Jingning Han committed
471
472
473
474
  MACROBLOCKD *const xd = &mb->e_mbd;
  struct macroblock_plane *const p = &mb->plane[plane];
  struct macroblockd_plane *const pd = &xd->plane[plane];
  const int ref = is_inter_block(&xd->mi[0]->mbmi);
475
  av1_token_state_org tokens[MAX_TX_SQUARE + 1][2];
476
  uint8_t token_cache[MAX_TX_SQUARE];
477
  const tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
Jingning Han's avatar
Jingning Han committed
478
479
480
  tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
  const int eob = p->eobs[block];
481
  const PLANE_TYPE plane_type = pd->plane_type;
482
  const int default_eob = tx_size_2d[tx_size];
483
484
  const int16_t *const dequant_ptr = pd->dequant;
  const uint8_t *const band_translate = get_band_translate(tx_size);
485
  TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
486
  const SCAN_ORDER *const scan_order =
Angie Chiang's avatar
Angie Chiang committed
487
      get_scan(cm, tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi));
488
489
  const int16_t *const scan = scan_order->scan;
  const int16_t *const nb = scan_order->neighbors;
Thomas Davies's avatar
Thomas Davies committed
490
  int dqv;
491
  const int shift = av1_get_tx_scale(tx_size);
492
493
#if CONFIG_AOM_QM
  int seg_id = xd->mi[0]->mbmi.segment_id;
494
  const qm_val_t *iqmatrix = pd->seg_iqmatrix[seg_id][!ref][tx_size];
495
#endif
496
#if CONFIG_NEW_QUANT
497
  int dq = get_dq_profile_from_ctx(mb->qindex, ctx, ref, plane_type);
498
  const dequant_val_type_nuq *dequant_val = pd->dequant_val_nuq[dq];
499
#endif  // CONFIG_NEW_QUANT
Jingning Han's avatar
Jingning Han committed
500
  int next = eob, sz = 0;
501
  const int64_t rdmult = (mb->rdmult * plane_rd_mult[ref][plane_type]) >> 1;
hui su's avatar
hui su committed
502
  const int64_t rddiv = mb->rddiv;
Jingning Han's avatar
Jingning Han committed
503
  int64_t rd_cost0, rd_cost1;
504
505
  int rate0, rate1;
  int64_t error0, error1;
Jingning Han's avatar
Jingning Han committed
506
  int16_t t0, t1;
507
508
  int best, band = (eob < default_eob) ? band_translate[eob]
                                       : band_translate[eob - 1];
509
  int pt, i, final_eob;
510
  const int cat6_bits = av1_get_cat6_extrabits_size(tx_size, xd->bd);
511
  unsigned int(*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
512
      mb->token_costs[txsize_sqr_map[tx_size]][plane_type][ref];
513
514
  const uint16_t *band_counts = &band_count_table[tx_size][band];
  uint16_t band_left = eob - band_cum_count_table[tx_size][band] + 1;
515
516
  int shortcut = 0;
  int next_shortcut = 0;
517

Fangwen Fu's avatar
Fangwen Fu committed
518
519
520
521
522
#if CONFIG_EXT_DELTA_Q
  const int qindex = cm->seg.enabled
                         ? av1_get_qindex(&cm->seg, xd->mi[0]->mbmi.segment_id,
                                          cm->base_qindex)
                         : cm->base_qindex;
523
524
  assert(qindex > 0);
  (void)qindex;
Fangwen Fu's avatar
Fangwen Fu committed
525
#else
526
  assert(mb->qindex > 0);
Fangwen Fu's avatar
Fangwen Fu committed
527
#endif
528

529
  token_costs += band;
Jingning Han's avatar
Jingning Han committed
530

531
  assert((!plane_type && !plane) || (plane_type && plane));
Jingning Han's avatar
Jingning Han committed
532
  assert(eob <= default_eob);
533

Jingning Han's avatar
Jingning Han committed
534
535
536
537
538
539
540
541
542
  /* Now set up a Viterbi trellis to evaluate alternative roundings. */
  /* Initialize the sentinel node of the trellis. */
  tokens[eob][0].rate = 0;
  tokens[eob][0].error = 0;
  tokens[eob][0].next = default_eob;
  tokens[eob][0].token = EOB_TOKEN;
  tokens[eob][0].qc = 0;
  tokens[eob][1] = tokens[eob][0];

543
544
  for (i = 0; i < eob; i++) {
    const int rc = scan[i];
545
    tokens[i][0].rate = av1_get_token_cost(qcoeff[rc], &t0, cat6_bits);
546
    tokens[i][0].token = t0;
Yaowu Xu's avatar
Yaowu Xu committed
547
    token_cache[rc] = av1_pt_energy_class[t0];
548
  }
Jingning Han's avatar
Jingning Han committed
549
550

  for (i = eob; i-- > 0;) {
551
552
    int base_bits, dx;
    int64_t d2;
Jingning Han's avatar
Jingning Han committed
553
    const int rc = scan[i];
Thomas Davies's avatar
Thomas Davies committed
554
    int x = qcoeff[rc];
555
556
#if CONFIG_AOM_QM
    int iwt = iqmatrix[rc];
Thomas Davies's avatar
Thomas Davies committed
557
558
559
560
    dqv = dequant_ptr[rc != 0];
    dqv = ((iwt * (int)dqv) + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
#else
    dqv = dequant_ptr[rc != 0];
561
#endif
562
    next_shortcut = shortcut;
563

Jingning Han's avatar
Jingning Han committed
564
    /* Only add a trellis state for non-zero coefficients. */
565
    if (UNLIKELY(x)) {
Jingning Han's avatar
Jingning Han committed
566
567
568
569
570
      error0 = tokens[next][0].error;
      error1 = tokens[next][1].error;
      /* Evaluate the first possibility for this state. */
      rate0 = tokens[next][0].rate;
      rate1 = tokens[next][1].rate;
571

572
573
574
575
      if (next_shortcut) {
        /* Consider both possible successor states. */
        if (next < default_eob) {
          pt = get_coef_context(nb, token_cache, i + 1);
576
577
578
579
          rate0 +=
              get_token_bit_costs(*token_costs, 0, pt, tokens[next][0].token);
          rate1 +=
              get_token_bit_costs(*token_costs, 0, pt, tokens[next][1].token);
580
581
582
583
584
585
586
        }
        UPDATE_RD_COST();
        /* And pick the best. */
        best = rd_cost1 < rd_cost0;
      } else {
        if (next < default_eob) {
          pt = get_coef_context(nb, token_cache, i + 1);
587
588
          rate0 +=
              get_token_bit_costs(*token_costs, 0, pt, tokens[next][0].token);
589
590
        }
        best = 0;
Jingning Han's avatar
Jingning Han committed
591
      }
592
593

      dx = (dqcoeff[rc] - coeff[rc]) * (1 << shift);
594
#if CONFIG_HIGHBITDEPTH
Jingning Han's avatar
Jingning Han committed
595
596
597
      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
        dx >>= xd->bd - 8;
      }
598
#endif  // CONFIG_HIGHBITDEPTH
599
      d2 = (int64_t)dx * dx;
600
      tokens[i][0].rate += (best ? rate1 : rate0);
Jingning Han's avatar
Jingning Han committed
601
602
603
      tokens[i][0].error = d2 + (best ? error1 : error0);
      tokens[i][0].next = next;
      tokens[i][0].qc = x;
604
      tokens[i][0].dqc = dqcoeff[rc];
605
      tokens[i][0].best_index = best;
Jingning Han's avatar
Jingning Han committed
606
607
608
609
610

      /* Evaluate the second possibility for this state. */
      rate0 = tokens[next][0].rate;
      rate1 = tokens[next][1].rate;

611
      // The threshold of 3 is empirically obtained.
612
      if (UNLIKELY(abs(x) > 3)) {
613
614
        shortcut = 0;
      } else {
615
#if CONFIG_NEW_QUANT
Thomas Davies's avatar
Thomas Davies committed
616
        shortcut = ((av1_dequant_abscoeff_nuq(abs(x), dqv,
Yaowu Xu's avatar
Yaowu Xu committed
617
                                              dequant_val[band_translate[i]]) >
618
                     (abs(coeff[rc]) << shift)) &&
Thomas Davies's avatar
Thomas Davies committed
619
                    (av1_dequant_abscoeff_nuq(abs(x) - 1, dqv,
Yaowu Xu's avatar
Yaowu Xu committed
620
                                              dequant_val[band_translate[i]]) <
621
                     (abs(coeff[rc]) << shift)));
622
623
624
625
626
627
628
629
#else  // CONFIG_NEW_QUANT
#if CONFIG_AOM_QM
        if ((abs(x) * dequant_ptr[rc != 0] * iwt >
             ((abs(coeff[rc]) << shift) << AOM_QM_BITS)) &&
            (abs(x) * dequant_ptr[rc != 0] * iwt <
             (((abs(coeff[rc]) << shift) + dequant_ptr[rc != 0])
              << AOM_QM_BITS)))
#else
630
        if ((abs(x) * dequant_ptr[rc != 0] > (abs(coeff[rc]) << shift)) &&
631
632
            (abs(x) * dequant_ptr[rc != 0] <
             (abs(coeff[rc]) << shift) + dequant_ptr[rc != 0]))
633
#endif  // CONFIG_AOM_QM
634
635
636
          shortcut = 1;
        else
          shortcut = 0;
637
#endif  // CONFIG_NEW_QUANT
638
      }
Jingning Han's avatar
Jingning Han committed
639
640
641
642

      if (shortcut) {
        sz = -(x < 0);
        x -= 2 * sz + 1;
643
644
645
      } else {
        tokens[i][1] = tokens[i][0];
        next = i;
646

647
        if (UNLIKELY(!(--band_left))) {
648
649
650
651
          --band_counts;
          band_left = *band_counts;
          --token_costs;
        }
652
        continue;
Jingning Han's avatar
Jingning Han committed
653
654
655
656
657
658
659
660
661
      }

      /* Consider both possible successor states. */
      if (!x) {
        /* If we reduced this coefficient to zero, check to see if
         *  we need to move the EOB back here.
         */
        t0 = tokens[next][0].token == EOB_TOKEN ? EOB_TOKEN : ZERO_TOKEN;
        t1 = tokens[next][1].token == EOB_TOKEN ? EOB_TOKEN : ZERO_TOKEN;
662
        base_bits = 0;
Jingning Han's avatar
Jingning Han committed
663
      } else {
664
        base_bits = av1_get_token_cost(x, &t0, cat6_bits);
Jingning Han's avatar
Jingning Han committed
665
666
        t1 = t0;
      }
667
668

      if (next_shortcut) {
669
        if (LIKELY(next < default_eob)) {
670
          if (t0 != EOB_TOKEN) {
Yaowu Xu's avatar
Yaowu Xu committed
671
            token_cache[rc] = av1_pt_energy_class[t0];
672
            pt = get_coef_context(nb, token_cache, i + 1);
673
674
            rate0 += get_token_bit_costs(*token_costs, !x, pt,
                                         tokens[next][0].token);
675
676
          }
          if (t1 != EOB_TOKEN) {
Yaowu Xu's avatar
Yaowu Xu committed
677
            token_cache[rc] = av1_pt_energy_class[t1];
678
            pt = get_coef_context(nb, token_cache, i + 1);
679
680
            rate1 += get_token_bit_costs(*token_costs, !x, pt,
                                         tokens[next][1].token);
681
682
683
684
685
686
687
688
689
          }
        }

        UPDATE_RD_COST();
        /* And pick the best. */
        best = rd_cost1 < rd_cost0;
      } else {
        // The two states in next stage are identical.
        if (next < default_eob && t0 != EOB_TOKEN) {
Yaowu Xu's avatar
Yaowu Xu committed
690
          token_cache[rc] = av1_pt_energy_class[t0];
Jingning Han's avatar
Jingning Han committed
691
          pt = get_coef_context(nb, token_cache, i + 1);
692
693
          rate0 +=
              get_token_bit_costs(*token_costs, !x, pt, tokens[next][0].token);
Jingning Han's avatar
Jingning Han committed
694
        }
695
        best = 0;
Jingning Han's avatar
Jingning Han committed
696
697
      }

698
#if CONFIG_NEW_QUANT
Thomas Davies's avatar
Thomas Davies committed
699
      dx = av1_dequant_coeff_nuq(x, dqv, dequant_val[band_translate[i]]) -
700
           (coeff[rc] << shift);
701
#if CONFIG_HIGHBITDEPTH
hui su's avatar
hui su committed
702
703
704
      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
        dx >>= xd->bd - 8;
      }
705
#endif  // CONFIG_HIGHBITDEPTH
706
#else   // CONFIG_NEW_QUANT
707
#if CONFIG_HIGHBITDEPTH
hui su's avatar
hui su committed
708
      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
Thomas Davies's avatar
Thomas Davies committed
709
        dx -= ((dqv >> (xd->bd - 8)) + sz) ^ sz;
hui su's avatar
hui su committed
710
      } else {
Thomas Davies's avatar
Thomas Davies committed
711
        dx -= (dqv + sz) ^ sz;
hui su's avatar
hui su committed
712
713
      }
#else
Thomas Davies's avatar
Thomas Davies committed
714
      dx -= (dqv + sz) ^ sz;
715
#endif  // CONFIG_HIGHBITDEPTH
716
#endif  // CONFIG_NEW_QUANT
717
      d2 = (int64_t)dx * dx;
hui su's avatar
hui su committed
718

Jingning Han's avatar
Jingning Han committed
719
720
721
722
723
      tokens[i][1].rate = base_bits + (best ? rate1 : rate0);
      tokens[i][1].error = d2 + (best ? error1 : error0);
      tokens[i][1].next = next;
      tokens[i][1].token = best ? t1 : t0;
      tokens[i][1].qc = x;
724
725

      if (x) {
726
#if CONFIG_NEW_QUANT
Yaowu Xu's avatar
Yaowu Xu committed
727
        tokens[i][1].dqc = av1_dequant_abscoeff_nuq(
Thomas Davies's avatar
Thomas Davies committed
728
            abs(x), dqv, dequant_val[band_translate[i]]);
729
730
731
        tokens[i][1].dqc = shift ? ROUND_POWER_OF_TWO(tokens[i][1].dqc, shift)
                                 : tokens[i][1].dqc;
        if (sz) tokens[i][1].dqc = -tokens[i][1].dqc;
732
#else
733
734
        if (x < 0)
          tokens[i][1].dqc = -((-x * dqv) >> shift);
735
        else
736
          tokens[i][1].dqc = (x * dqv) >> shift;
737
#endif  // CONFIG_NEW_QUANT
738
739
740
741
      } else {
        tokens[i][1].dqc = 0;
      }

742
      tokens[i][1].best_index = best;
Jingning Han's avatar
Jingning Han committed
743
744
745
746
747
748
749
750
      /* Finally, make this the new head of the trellis. */
      next = i;
    } else {
      /* There's no choice to make for a zero coefficient, so we don't
       *  add a new trellis node, but we do need to update the costs.
       */
      t0 = tokens[next][0].token;
      t1 = tokens[next][1].token;
751
      pt = get_coef_context(nb, token_cache, i + 1);
Jingning Han's avatar
Jingning Han committed
752
753
      /* Update the cost of each path if we're past the EOB token. */
      if (t0 != EOB_TOKEN) {
754
        tokens[next][0].rate += get_token_bit_costs(*token_costs, 1, pt, t0);
Jingning Han's avatar
Jingning Han committed
755
756
757
        tokens[next][0].token = ZERO_TOKEN;
      }
      if (t1 != EOB_TOKEN) {
758
        tokens[next][1].rate += get_token_bit_costs(*token_costs, 1, pt, t1);
Jingning Han's avatar
Jingning Han committed
759
760
        tokens[next][1].token = ZERO_TOKEN;
      }
761
      tokens[i][0].best_index = tokens[i][1].best_index = 0;
762
      shortcut = (tokens[next][0].rate != tokens[next][1].rate);
Jingning Han's avatar
Jingning Han committed
763
764
      /* Don't update next, because we didn't add a new node. */
    }
765

766
    if (UNLIKELY(!(--band_left))) {
767
768
769
770
      --band_counts;
      band_left = *band_counts;
      --token_costs;
    }
Jingning Han's avatar
Jingning Han committed
771
772
773
774
775
776
777
778
779
  }

  /* Now pick the best path through the whole trellis. */
  rate0 = tokens[next][0].rate;
  rate1 = tokens[next][1].rate;
  error0 = tokens[next][0].error;
  error1 = tokens[next][1].error;
  t0 = tokens[next][0].token;
  t1 = tokens[next][1].token;
780
781
  rate0 += get_token_bit_costs(*token_costs, 0, ctx, t0);
  rate1 += get_token_bit_costs(*token_costs, 0, ctx, t1);
Jingning Han's avatar
Jingning Han committed
782
783
  UPDATE_RD_COST();
  best = rd_cost1 < rd_cost0;
784

Jingning Han's avatar
Jingning Han committed
785
  final_eob = -1;
786

Jingning Han's avatar
Jingning Han committed
787
788
789
  for (i = next; i < eob; i = next) {
    const int x = tokens[i][best].qc;
    const int rc = scan[i];
790
    if (x) final_eob = i;
Jingning Han's avatar
Jingning Han committed
791
    qcoeff[rc] = x;
792
793
    dqcoeff[rc] = tokens[i][best].dqc;

Jingning Han's avatar
Jingning Han committed
794
    next = tokens[i][best].next;
795
    best = tokens[i][best].best_index;
Jingning Han's avatar
Jingning Han committed
796
797
798
799
  }
  final_eob++;

  mb->plane[plane].eobs[block] = final_eob;
800
  assert(final_eob <= default_eob);
Jingning Han's avatar
Jingning Han committed
801
  return final_eob;
802
803
804
}

#endif  // USE_GREEDY_OPTIMIZE_B
805
#endif  // !CONFIG_LV_MAP
806
807

int av1_optimize_b(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, int block,
808
809
                   BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
                   const ENTROPY_CONTEXT *a, const ENTROPY_CONTEXT *l) {
810
811
812
813
814
815
816
  MACROBLOCKD *const xd = &mb->e_mbd;
  struct macroblock_plane *const p = &mb->plane[plane];
  const int eob = p->eobs[block];
  assert((mb->qindex == 0) ^ (xd->lossless[xd->mi[0]->mbmi.segment_id] == 0));
  if (eob == 0) return eob;
  if (xd->lossless[xd->mi[0]->mbmi.segment_id]) return eob;
#if CONFIG_PVQ
817
818
  (void)cm;
  (void)tx_size;
819
820
  (void)a;
  (void)l;
821
822
  return eob;
#endif
Jingning Han's avatar
Jingning Han committed
823

824
#if !CONFIG_LV_MAP
825
  (void)plane_bsize;
826
827
828
829
830
831
#if CONFIG_VAR_TX
  int ctx = get_entropy_context(tx_size, a, l);
#else
  int ctx = combine_entropy_contexts(*a, *l);
#endif

832
833
834
835
#if USE_GREEDY_OPTIMIZE_B
  return optimize_b_greedy(cm, mb, plane, block, tx_size, ctx);
#else   // USE_GREEDY_OPTIMIZE_B
  return optimize_b_org(cm, mb, plane, block, tx_size, ctx);
836
#endif  // USE_GREEDY_OPTIMIZE_B
837
838
839
840
841
#else   // !CONFIG_LV_MAP
  TXB_CTX txb_ctx;
  get_txb_ctx(plane_bsize, tx_size, plane, a, l, &txb_ctx);
  return av1_optimize_txb(cm, mb, plane, block, tx_size, &txb_ctx);
#endif  // !CONFIG_LV_MAP
842
}
843

Thomas Daede's avatar
Thomas Daede committed
844
#if !CONFIG_PVQ
845
#if CONFIG_HIGHBITDEPTH
Angie Chiang's avatar
Angie Chiang committed
846
847
typedef enum QUANT_FUNC {
  QUANT_FUNC_LOWBD = 0,
848
  QUANT_FUNC_HIGHBD = 1,
849
  QUANT_FUNC_TYPES = 2
Angie Chiang's avatar
Angie Chiang committed
850
851
} QUANT_FUNC;

852
853
static AV1_QUANT_FACADE
    quant_func_list[AV1_XFORM_QUANT_TYPES][QUANT_FUNC_TYPES] = {
854
#if !CONFIG_NEW_QUANT
855
      { av1_quantize_fp_facade, av1_highbd_quantize_fp_facade },
Yaowu Xu's avatar
Yaowu Xu committed
856
857
      { av1_quantize_b_facade, av1_highbd_quantize_b_facade },
      { av1_quantize_dc_facade, av1_highbd_quantize_dc_facade },
858
#else   // !CONFIG_NEW_QUANT
859
860
861
      { av1_quantize_fp_nuq_facade, av1_highbd_quantize_fp_nuq_facade },
      { av1_quantize_b_nuq_facade, av1_highbd_quantize_b_nuq_facade },
      { av1_quantize_dc_nuq_facade, av1_highbd_quantize_dc_nuq_facade },
862
#endif  // !CONFIG_NEW_QUANT
863
864
      { NULL, NULL }
    };
865

Thomas Daede's avatar
Thomas Daede committed
866
#else
867

Angie Chiang's avatar
Angie Chiang committed
868
869
typedef enum QUANT_FUNC {
  QUANT_FUNC_LOWBD = 0,
870
  QUANT_FUNC_TYPES = 1
Angie Chiang's avatar
Angie Chiang committed
871
} QUANT_FUNC;
Angie Chiang's avatar
Angie Chiang committed
872

873
874
static AV1_QUANT_FACADE quant_func_list[AV1_XFORM_QUANT_TYPES]
                                       [QUANT_FUNC_TYPES] = {
875
#if !CONFIG_NEW_QUANT
clang-format's avatar
clang-format committed
876
877
878
                                         { av1_quantize_fp_facade },
                                         { av1_quantize_b_facade },
                                         { av1_quantize_dc_facade },
879
#else   // !CONFIG_NEW_QUANT
880
881
882
                                         { av1_quantize_fp_nuq_facade },
                                         { av1_quantize_b_nuq_facade },
                                         { av1_quantize_dc_nuq_facade },
883
#endif  // !CONFIG_NEW_QUANT
clang-format's avatar
clang-format committed
884
885
                                         { NULL }
                                       };
886
#endif  // CONFIG_HIGHBITDEPTH
Thomas Daede's avatar
Thomas Daede committed
887
#endif  // CONFIG_PVQ
888

Angie Chiang's avatar
Angie Chiang committed
889
890
void av1_xform_quant(const AV1_COMMON *cm, MACROBLOCK *x, int plane, int block,
                     int blk_row, int blk_col, BLOCK_SIZE plane_bsize,
891
892
                     TX_SIZE tx_size, int ctx,
                     AV1_XFORM_QUANT xform_quant_idx) {
Jingning Han's avatar
Jingning Han committed
893
  MACROBLOCKD *const xd = &x->e_mbd;
894
  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
Yushin Cho's avatar
Yushin Cho committed
895
#if !(CONFIG_PVQ || CONFIG_DAALA_DIST)
Jingning Han's avatar
Jingning Han committed
896
897
  const struct macroblock_plane *const p = &x->plane[plane];
  const struct macroblockd_plane *const pd = &xd->plane[plane];
898
899
900
901
#else
  struct macroblock_plane *const p = &x->plane[plane];
  struct macroblockd_plane *const pd = &xd->plane[plane];
#endif
902
  PLANE_TYPE plane_type = get_plane_type(plane);
903
  TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
904
  const int is_inter = is_inter_block(mbmi);
Angie Chiang's avatar
Angie Chiang committed
905
  const SCAN_ORDER *const scan_order = get_scan(cm, tx_size, tx_type, is_inter);
Jingning Han's avatar
Jingning Han committed
906
907
908
909
  tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
  tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
  uint16_t *const eob = &p->eobs[block];
910
  const int diff_stride = block_size_wide[plane_bsize];
911
#if CONFIG_AOM_QM
912
  int seg_id = mbmi->segment_id;
913
914
  const qm_val_t *qmatrix = pd->seg_qmatrix[seg_id][!is_inter][tx_size];
  const qm_val_t *iqmatrix = pd->seg_iqmatrix[seg_id][!is_inter][tx_size];
915
#endif
Angie Chiang's avatar
Angie Chiang committed
916
917

  FWD_TXFM_PARAM fwd_txfm_param;
918

Yushin Cho's avatar
Yushin Cho committed
919
920
921
922
923
924
925
926
#if CONFIG_PVQ || CONFIG_DAALA_DIST
  uint8_t *dst;
  int16_t *pred;
  const int dst_stride = pd->dst.stride;
  int tx_blk_size;
  int i, j;
#endif

927
928
#if !CONFIG_PVQ
  const int tx2d_size = tx_size_2d[tx_size];
929
  QUANT_PARAM qparam;
930
931
  const int16_t *src_diff;

932
933
  src_diff =
      &p->src_diff[(blk_row * diff_stride + blk_col) << tx_size_wide_log2[0]];
934
  qparam.log_scale = av1_get_tx_scale(tx_size);
935
936
937
938
939
940
941
942
#if CONFIG_NEW_QUANT
  qparam.tx_size = tx_size;
  qparam.dq = get_dq_profile_from_ctx(x->qindex, ctx, is_inter, plane_type);
#endif  // CONFIG_NEW_QUANT
#if CONFIG_AOM_QM
  qparam.qmatrix = qmatrix;
  qparam.iqmatrix = iqmatrix;
#endif  // CONFIG_AOM_QM
943
944
945
946
#else
  tran_low_t *ref_coeff = BLOCK_OFFSET(pd->pvq_ref_coeff, block);
  int skip = 1;
  PVQ_INFO *pvq_info = NULL;
Yushin Cho's avatar
Yushin Cho committed
947
948
949
  uint8_t *src;
  int16_t *src_int16;
  const int src_stride = p->src.stride;
950

951
  (void)ctx;
952
953
954
955
956
957
958
  (void)scan_order;
  (void)qcoeff;

  if (x->pvq_coded) {
    assert(block < MAX_PVQ_BLOCKS_IN_SB);
    pvq_info = &x->pvq[block][plane];
  }
959
960
961
  src = &p->src.buf[(blk_row * src_stride + blk_col) << tx_size_wide_log2[0]];
  src_int16 =
      &p->src_int16[(blk_row * diff_stride + blk_col) << tx_size_wide_log2[0]];
Yushin Cho's avatar
Yushin Cho committed
962
963
964

  // transform block size in pixels
  tx_blk_size = tx_size_wide[tx_size];
965
#if CONFIG_HIGHBITDEPTH
Thomas Daede's avatar
Thomas Daede committed
966
967
968
969
970
971
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
    for (j = 0; j < tx_blk_size; j++)
      for (i = 0; i < tx_blk_size; i++)
        src_int16[diff_stride * j + i] =
            CONVERT_TO_SHORTPTR(src)[src_stride * j + i];
  } else {
972
#endif  // CONFIG_HIGHBITDEPTH
Thomas Daede's avatar
Thomas Daede committed
973
974
975
    for (j = 0; j < tx_blk_size; j++)
      for (i = 0; i < tx_blk_size; i++)
        src_int16[diff_stride * j + i] = src[src_stride * j + i];
976
#if CONFIG_HIGHBITDEPTH
Thomas Daede's avatar
Thomas Daede committed
977
  }
978
#endif  // CONFIG_HIGHBITDEPTH
Yushin Cho's avatar
Yushin Cho committed
979
980
981
982
#endif

#if CONFIG_PVQ || CONFIG_DAALA_DIST
  dst = &pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]];
983
  pred = &pd->pred[(blk_row * diff_stride + blk_col) << tx_size_wide_log2[0]];
984
985
986
987

  // transform block size in pixels
  tx_blk_size = tx_size_wide[tx_size];

Thomas Daede's avatar
Thomas Daede committed
988
989
// copy uint8 orig and predicted block to int16 buffer
// in order to use existing VP10 transform functions
990
#if CONFIG_HIGHBITDEPTH
Thomas Daede's avatar
Thomas Daede committed
991
992
993
994
995
996
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
    for (j = 0; j < tx_blk_size; j++)
      for (i = 0; i < tx_blk_size; i++)
        pred[diff_stride * j + i] =
            CONVERT_TO_SHORTPTR(dst)[dst_stride * j + i];
  } else {
997
#endif  // CONFIG_HIGHBITDEPTH
Thomas Daede's avatar
Thomas Daede committed
998
999
1000
    for (j = 0; j < tx_blk_size; j++)
      for (i = 0; i < tx_blk_size; i++)
        pred[diff_stride * j + i] = dst[dst_stride * j + i];