encodemb.c 73.9 KB
Newer Older
Jingning Han's avatar
Jingning Han committed
1
/*
Yaowu Xu's avatar
Yaowu Xu committed
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
Jingning Han's avatar
Jingning Han committed
3
 *
Yaowu Xu's avatar
Yaowu Xu committed
4
5
6
7
8
9
 * This source code is subject to the terms of the BSD 2 Clause License and
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
 * was not distributed with this source code in the LICENSE file, you can
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
 * Media Patent License 1.0 was not distributed with this source code in the
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
Jingning Han's avatar
Jingning Han committed
10
11
 */

Yaowu Xu's avatar
Yaowu Xu committed
12
13
14
#include "./av1_rtcd.h"
#include "./aom_config.h"
#include "./aom_dsp_rtcd.h"
Jingning Han's avatar
Jingning Han committed
15

16
#include "aom_dsp/bitwriter.h"
17
#include "aom_dsp/quantize.h"
Yaowu Xu's avatar
Yaowu Xu committed
18
#include "aom_mem/aom_mem.h"
19
#include "aom_ports/mem.h"
Jingning Han's avatar
Jingning Han committed
20

21
22
23
24
#include "av1/common/idct.h"
#include "av1/common/reconinter.h"
#include "av1/common/reconintra.h"
#include "av1/common/scan.h"
Jingning Han's avatar
Jingning Han committed
25

26
#include "av1/encoder/av1_quantize.h"
27
#include "av1/encoder/encodemb.h"
28
29
30
#if CONFIG_LV_MAP
#include "av1/encoder/encodetxb.h"
#endif
31
32
33
#include "av1/encoder/hybrid_fwd_txfm.h"
#include "av1/encoder/rd.h"
#include "av1/encoder/tokenize.h"
Jingning Han's avatar
Jingning Han committed
34

35
36
37
38
39
40
#if CONFIG_PVQ
#include "av1/encoder/encint.h"
#include "av1/common/partition.h"
#include "av1/encoder/pvq_encoder.h"
#endif

41
42
43
44
#if CONFIG_CFL
#include "av1/common/cfl.h"
#endif

45
46
47
// Check if one needs to use c version subtraction.
static int check_subtract_block_size(int w, int h) { return w < 4 || h < 4; }

Angie Chiang's avatar
Angie Chiang committed
48
49
50
51
static void subtract_block(const MACROBLOCKD *xd, int rows, int cols,
                           int16_t *diff, ptrdiff_t diff_stride,
                           const uint8_t *src8, ptrdiff_t src_stride,
                           const uint8_t *pred8, ptrdiff_t pred_stride) {
52
#if !CONFIG_HIGHBITDEPTH
Angie Chiang's avatar
Angie Chiang committed
53
54
  (void)xd;
#endif
Jingning Han's avatar
Jingning Han committed
55

Angie Chiang's avatar
Angie Chiang committed
56
  if (check_subtract_block_size(rows, cols)) {
57
#if CONFIG_HIGHBITDEPTH
Angie Chiang's avatar
Angie Chiang committed
58
59
60
    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
      aom_highbd_subtract_block_c(rows, cols, diff, diff_stride, src8,
                                  src_stride, pred8, pred_stride, xd->bd);
61
62
      return;
    }
63
#endif  // CONFIG_HIGHBITDEPTH
Angie Chiang's avatar
Angie Chiang committed
64
65
    aom_subtract_block_c(rows, cols, diff, diff_stride, src8, src_stride, pred8,
                         pred_stride);
66
67
68
69

    return;
  }

70
#if CONFIG_HIGHBITDEPTH
Angie Chiang's avatar
Angie Chiang committed
71
72
73
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
    aom_highbd_subtract_block(rows, cols, diff, diff_stride, src8, src_stride,
                              pred8, pred_stride, xd->bd);
Jingning Han's avatar
Jingning Han committed
74
75
    return;
  }
76
#endif  // CONFIG_HIGHBITDEPTH
Angie Chiang's avatar
Angie Chiang committed
77
78
79
80
  aom_subtract_block(rows, cols, diff, diff_stride, src8, src_stride, pred8,
                     pred_stride);
}

Angie Chiang's avatar
Angie Chiang committed
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
void av1_subtract_txb(MACROBLOCK *x, int plane, BLOCK_SIZE plane_bsize,
                      int blk_col, int blk_row, TX_SIZE tx_size) {
  MACROBLOCKD *const xd = &x->e_mbd;
  struct macroblock_plane *const p = &x->plane[plane];
  const struct macroblockd_plane *const pd = &x->e_mbd.plane[plane];
  const int diff_stride = block_size_wide[plane_bsize];
  const int src_stride = p->src.stride;
  const int dst_stride = pd->dst.stride;
  const int tx1d_width = tx_size_wide[tx_size];
  const int tx1d_height = tx_size_high[tx_size];
  uint8_t *dst =
      &pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]];
  uint8_t *src =
      &p->src.buf[(blk_row * src_stride + blk_col) << tx_size_wide_log2[0]];
  int16_t *src_diff =
      &p->src_diff[(blk_row * diff_stride + blk_col) << tx_size_wide_log2[0]];
  subtract_block(xd, tx1d_height, tx1d_width, src_diff, diff_stride, src,
                 src_stride, dst, dst_stride);
}

Angie Chiang's avatar
Angie Chiang committed
101
102
103
104
105
106
107
108
109
110
void av1_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
  struct macroblock_plane *const p = &x->plane[plane];
  const struct macroblockd_plane *const pd = &x->e_mbd.plane[plane];
  const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
  const int bw = block_size_wide[plane_bsize];
  const int bh = block_size_high[plane_bsize];
  const MACROBLOCKD *xd = &x->e_mbd;

  subtract_block(xd, bh, bw, p->src_diff, bw, p->src.buf, p->src.stride,
                 pd->dst.buf, pd->dst.stride);
Jingning Han's avatar
Jingning Han committed
111
112
}

113
114
// These numbers are empirically obtained.
static const int plane_rd_mult[REF_TYPES][PLANE_TYPES] = {
Thomas Davies's avatar
Thomas Davies committed
115
116
117
#if CONFIG_EC_ADAPT
  { 10, 7 }, { 8, 5 },
#else
118
  { 10, 6 }, { 8, 6 },
Thomas Davies's avatar
Thomas Davies committed
119
#endif
120
};
Jingning Han's avatar
Jingning Han committed
121

122
123
124
125
126
#define UPDATE_RD_COST()                             \
  {                                                  \
    rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0); \
    rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1); \
  }
Jingning Han's avatar
Jingning Han committed
127

128
129
130
static INLINE unsigned int get_token_bit_costs(
    unsigned int token_costs[2][COEFF_CONTEXTS][ENTROPY_TOKENS], int skip_eob,
    int ctx, int token) {
131
132
133
134
  (void)skip_eob;
  return token_costs[token == ZERO_TOKEN || token == EOB_TOKEN][ctx][token];
}

135
136
137
138
#define USE_GREEDY_OPTIMIZE_B 0

#if USE_GREEDY_OPTIMIZE_B

139
typedef struct av1_token_state_greedy {
140
141
142
  int16_t token;
  tran_low_t qc;
  tran_low_t dqc;
143
} av1_token_state_greedy;
144

145
146
static int optimize_b_greedy(const AV1_COMMON *cm, MACROBLOCK *mb, int plane,
                             int block, TX_SIZE tx_size, int ctx) {
147
148
149
150
  MACROBLOCKD *const xd = &mb->e_mbd;
  struct macroblock_plane *const p = &mb->plane[plane];
  struct macroblockd_plane *const pd = &xd->plane[plane];
  const int ref = is_inter_block(&xd->mi[0]->mbmi);
151
  av1_token_state_greedy tokens[MAX_TX_SQUARE + 1][2];
152
153
154
155
156
157
158
159
160
161
162
163
164
165
  uint8_t token_cache[MAX_TX_SQUARE];
  const tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
  tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
  const int eob = p->eobs[block];
  const PLANE_TYPE plane_type = pd->plane_type;
  const int16_t *const dequant_ptr = pd->dequant;
  const uint8_t *const band_translate = get_band_translate(tx_size);
  TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
  const SCAN_ORDER *const scan_order =
      get_scan(cm, tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi));
  const int16_t *const scan = scan_order->scan;
  const int16_t *const nb = scan_order->neighbors;
  int dqv;
166
  const int shift = av1_get_tx_scale(tx_size);
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
#if CONFIG_AOM_QM
  int seg_id = xd->mi[0]->mbmi.segment_id;
  const qm_val_t *iqmatrix = pd->seg_iqmatrix[seg_id][!ref][tx_size];
#endif
#if CONFIG_NEW_QUANT
  int dq = get_dq_profile_from_ctx(mb->qindex, ctx, ref, plane_type);
  const dequant_val_type_nuq *dequant_val = pd->dequant_val_nuq[dq];
#endif  // CONFIG_NEW_QUANT
  int sz = 0;
  const int64_t rddiv = mb->rddiv;
  int64_t rd_cost0, rd_cost1;
  int16_t t0, t1;
  int i, final_eob;
  const int cat6_bits = av1_get_cat6_extrabits_size(tx_size, xd->bd);
  unsigned int(*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
      mb->token_costs[txsize_sqr_map[tx_size]][plane_type][ref];
  const int default_eob = tx_size_2d[tx_size];

185
  assert(mb->qindex > 0);
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384

  assert((!plane_type && !plane) || (plane_type && plane));
  assert(eob <= default_eob);

  int64_t rdmult = (mb->rdmult * plane_rd_mult[ref][plane_type]) >> 1;

  int64_t rate0, rate1;
  for (i = 0; i < eob; i++) {
    const int rc = scan[i];
    int x = qcoeff[rc];
    t0 = av1_get_token(x);

    tokens[i][0].qc = x;
    tokens[i][0].token = t0;
    tokens[i][0].dqc = dqcoeff[rc];

    token_cache[rc] = av1_pt_energy_class[t0];
  }
  tokens[eob][0].token = EOB_TOKEN;
  tokens[eob][0].qc = 0;
  tokens[eob][0].dqc = 0;
  tokens[eob][1] = tokens[eob][0];

  unsigned int(*token_costs_ptr)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
      token_costs;

  final_eob = 0;

  int64_t eob_cost0, eob_cost1;

  const int ctx0 = ctx;
  /* Record the r-d cost */
  int64_t accu_rate = 0;
  int64_t accu_error = 0;

  rate0 = get_token_bit_costs(*(token_costs_ptr + band_translate[0]), 0, ctx0,
                              EOB_TOKEN);
  int64_t best_block_rd_cost = RDCOST(rdmult, rddiv, rate0, accu_error);

  // int64_t best_block_rd_cost_all0 = best_block_rd_cost;

  int x_prev = 1;

  for (i = 0; i < eob; i++) {
    const int rc = scan[i];
    int x = qcoeff[rc];
    sz = -(x < 0);

    int band_cur = band_translate[i];
    int ctx_cur = (i == 0) ? ctx : get_coef_context(nb, token_cache, i);
    int token_tree_sel_cur = (x_prev == 0);

    if (x == 0) {
      // no need to search when x == 0
      rate0 =
          get_token_bit_costs(*(token_costs_ptr + band_cur), token_tree_sel_cur,
                              ctx_cur, tokens[i][0].token);
      accu_rate += rate0;
      x_prev = 0;
      // accu_error does not change when x==0
    } else {
      /*  Computing distortion
       */
      // compute the distortion for the first candidate
      // and the distortion for quantizing to 0.
      int dx0 = (-coeff[rc]) * (1 << shift);
#if CONFIG_HIGHBITDEPTH
      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
        dx0 >>= xd->bd - 8;
      }
#endif
      int64_t d0 = (int64_t)dx0 * dx0;

      int x_a = x - 2 * sz - 1;
      int64_t d2, d2_a;

      int dx;

#if CONFIG_AOM_QM
      int iwt = iqmatrix[rc];
      dqv = dequant_ptr[rc != 0];
      dqv = ((iwt * (int)dqv) + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
#else
      dqv = dequant_ptr[rc != 0];
#endif

      dx = (dqcoeff[rc] - coeff[rc]) * (1 << shift);
#if CONFIG_HIGHBITDEPTH
      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
        dx >>= xd->bd - 8;
      }
#endif  // CONFIG_HIGHBITDEPTH
      d2 = (int64_t)dx * dx;

      /* compute the distortion for the second candidate
       * x_a = x - 2 * sz + 1;
       */
      if (x_a != 0) {
#if CONFIG_NEW_QUANT
        dx = av1_dequant_coeff_nuq(x, dqv, dequant_val[band_translate[i]]) -
             (coeff[rc] << shift);
#if CONFIG_HIGHBITDEPTH
        if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
          dx >>= xd->bd - 8;
        }
#endif  // CONFIG_HIGHBITDEPTH
#else   // CONFIG_NEW_QUANT
#if CONFIG_HIGHBITDEPTH
        if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
          dx -= ((dqv >> (xd->bd - 8)) + sz) ^ sz;
        } else {
          dx -= (dqv + sz) ^ sz;
        }
#else
        dx -= (dqv + sz) ^ sz;
#endif  // CONFIG_HIGHBITDEPTH
#endif  // CONFIG_NEW_QUANT
        d2_a = (int64_t)dx * dx;
      } else {
        d2_a = d0;
      }
      /*  Computing rates and r-d cost
       */

      int best_x, best_eob_x;
      int64_t base_bits, next_bits0, next_bits1;
      int64_t next_eob_bits0, next_eob_bits1;

      // rate cost of x
      base_bits = av1_get_token_cost(x, &t0, cat6_bits);
      rate0 = base_bits + get_token_bit_costs(*(token_costs_ptr + band_cur),
                                              token_tree_sel_cur, ctx_cur, t0);

      base_bits = av1_get_token_cost(x_a, &t1, cat6_bits);
      rate1 = base_bits + get_token_bit_costs(*(token_costs_ptr + band_cur),
                                              token_tree_sel_cur, ctx_cur, t1);

      next_bits0 = 0;
      next_bits1 = 0;
      next_eob_bits0 = 0;
      next_eob_bits1 = 0;

      if (i < default_eob - 1) {
        int ctx_next, token_tree_sel_next;
        int band_next = band_translate[i + 1];

        token_cache[rc] = av1_pt_energy_class[t0];
        ctx_next = get_coef_context(nb, token_cache, i + 1);
        token_tree_sel_next = (x == 0);

        next_bits0 = get_token_bit_costs(*(token_costs_ptr + band_next),
                                         token_tree_sel_next, ctx_next,
                                         tokens[i + 1][0].token);
        next_eob_bits0 =
            get_token_bit_costs(*(token_costs_ptr + band_next),
                                token_tree_sel_next, ctx_next, EOB_TOKEN);

        token_cache[rc] = av1_pt_energy_class[t1];
        ctx_next = get_coef_context(nb, token_cache, i + 1);
        token_tree_sel_next = (x_a == 0);

        next_bits1 = get_token_bit_costs(*(token_costs_ptr + band_next),
                                         token_tree_sel_next, ctx_next,
                                         tokens[i + 1][0].token);

        if (x_a != 0) {
          next_eob_bits1 =
              get_token_bit_costs(*(token_costs_ptr + band_next),
                                  token_tree_sel_next, ctx_next, EOB_TOKEN);
        }
      }

      rd_cost0 = RDCOST(rdmult, rddiv, (rate0 + next_bits0), d2);
      rd_cost1 = RDCOST(rdmult, rddiv, (rate1 + next_bits1), d2_a);

      best_x = (rd_cost1 < rd_cost0);

      eob_cost0 = RDCOST(rdmult, rddiv, (accu_rate + rate0 + next_eob_bits0),
                         (accu_error + d2 - d0));
      eob_cost1 = eob_cost0;
      if (x_a != 0) {
        eob_cost1 = RDCOST(rdmult, rddiv, (accu_rate + rate1 + next_eob_bits1),
                           (accu_error + d2_a - d0));
        best_eob_x = (eob_cost1 < eob_cost0);
      } else {
        best_eob_x = 0;
      }

      int dqc, dqc_a = 0;

      dqc = dqcoeff[rc];
      if (best_x + best_eob_x) {
        if (x_a != 0) {
#if CONFIG_NEW_QUANT
          dqc_a = av1_dequant_abscoeff_nuq(abs(x_a), dqv,
                                           dequant_val[band_translate[i]]);
          dqc_a = shift ? ROUND_POWER_OF_TWO(dqc_a, shift) : dqc_a;
          if (sz) dqc_a = -dqc_a;
#else
385
386
          if (x_a < 0)
            dqc_a = -((-x_a * dqv) >> shift);
387
          else
388
            dqc_a = (x_a * dqv) >> shift;
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
#endif  // CONFIG_NEW_QUANT
        } else {
          dqc_a = 0;
        }  // if (x_a != 0)
      }

      // record the better quantized value
      if (best_x) {
        qcoeff[rc] = x_a;
        dqcoeff[rc] = dqc_a;

        accu_rate += rate1;
        accu_error += d2_a - d0;
        assert(d2_a <= d0);

        token_cache[rc] = av1_pt_energy_class[t1];
      } else {
        accu_rate += rate0;
        accu_error += d2 - d0;
        assert(d2 <= d0);

        token_cache[rc] = av1_pt_energy_class[t0];
      }

      x_prev = qcoeff[rc];

      // determine whether to move the eob position to i+1
      int64_t best_eob_cost_i = eob_cost0;

      tokens[i][1].token = t0;
      tokens[i][1].qc = x;
      tokens[i][1].dqc = dqc;

      if ((x_a != 0) && (best_eob_x)) {
        best_eob_cost_i = eob_cost1;

        tokens[i][1].token = t1;
        tokens[i][1].qc = x_a;
        tokens[i][1].dqc = dqc_a;
      }

      if (best_eob_cost_i < best_block_rd_cost) {
        best_block_rd_cost = best_eob_cost_i;
        final_eob = i + 1;
      }
    }  // if (x==0)
  }    // for (i)

  assert(final_eob <= eob);
  if (final_eob > 0) {
    assert(tokens[final_eob - 1][1].qc != 0);
    i = final_eob - 1;
    int rc = scan[i];
    qcoeff[rc] = tokens[i][1].qc;
    dqcoeff[rc] = tokens[i][1].dqc;
  }

  for (i = final_eob; i < eob; i++) {
    int rc = scan[i];
    qcoeff[rc] = 0;
    dqcoeff[rc] = 0;
  }

  mb->plane[plane].eobs[block] = final_eob;
  return final_eob;
}

#else  // USE_GREEDY_OPTIMIZE_B

458
typedef struct av1_token_state_org {
459
460
461
462
463
464
465
  int64_t error;
  int rate;
  int16_t next;
  int16_t token;
  tran_low_t qc;
  tran_low_t dqc;
  uint8_t best_index;
466
} av1_token_state_org;
467

468
469
static int optimize_b_org(const AV1_COMMON *cm, MACROBLOCK *mb, int plane,
                          int block, TX_SIZE tx_size, int ctx) {
Jingning Han's avatar
Jingning Han committed
470
471
472
473
  MACROBLOCKD *const xd = &mb->e_mbd;
  struct macroblock_plane *const p = &mb->plane[plane];
  struct macroblockd_plane *const pd = &xd->plane[plane];
  const int ref = is_inter_block(&xd->mi[0]->mbmi);
474
  av1_token_state_org tokens[MAX_TX_SQUARE + 1][2];
475
  uint8_t token_cache[MAX_TX_SQUARE];
476
  const tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
Jingning Han's avatar
Jingning Han committed
477
478
479
  tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
  const int eob = p->eobs[block];
480
  const PLANE_TYPE plane_type = pd->plane_type;
481
  const int default_eob = tx_size_2d[tx_size];
482
483
  const int16_t *const dequant_ptr = pd->dequant;
  const uint8_t *const band_translate = get_band_translate(tx_size);
484
  TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
485
  const SCAN_ORDER *const scan_order =
Angie Chiang's avatar
Angie Chiang committed
486
      get_scan(cm, tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi));
487
488
  const int16_t *const scan = scan_order->scan;
  const int16_t *const nb = scan_order->neighbors;
Thomas Davies's avatar
Thomas Davies committed
489
  int dqv;
490
  const int shift = av1_get_tx_scale(tx_size);
491
492
#if CONFIG_AOM_QM
  int seg_id = xd->mi[0]->mbmi.segment_id;
493
  const qm_val_t *iqmatrix = pd->seg_iqmatrix[seg_id][!ref][tx_size];
494
#endif
495
#if CONFIG_NEW_QUANT
496
  int dq = get_dq_profile_from_ctx(mb->qindex, ctx, ref, plane_type);
497
  const dequant_val_type_nuq *dequant_val = pd->dequant_val_nuq[dq];
498
#endif  // CONFIG_NEW_QUANT
Jingning Han's avatar
Jingning Han committed
499
  int next = eob, sz = 0;
500
  const int64_t rdmult = (mb->rdmult * plane_rd_mult[ref][plane_type]) >> 1;
hui su's avatar
hui su committed
501
  const int64_t rddiv = mb->rddiv;
Jingning Han's avatar
Jingning Han committed
502
  int64_t rd_cost0, rd_cost1;
503
504
  int rate0, rate1;
  int64_t error0, error1;
Jingning Han's avatar
Jingning Han committed
505
  int16_t t0, t1;
506
507
  int best, band = (eob < default_eob) ? band_translate[eob]
                                       : band_translate[eob - 1];
508
  int pt, i, final_eob;
509
  const int cat6_bits = av1_get_cat6_extrabits_size(tx_size, xd->bd);
510
  unsigned int(*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
511
      mb->token_costs[txsize_sqr_map[tx_size]][plane_type][ref];
512
513
  const uint16_t *band_counts = &band_count_table[tx_size][band];
  uint16_t band_left = eob - band_cum_count_table[tx_size][band] + 1;
514
515
  int shortcut = 0;
  int next_shortcut = 0;
516

Fangwen Fu's avatar
Fangwen Fu committed
517
518
519
520
521
#if CONFIG_EXT_DELTA_Q
  const int qindex = cm->seg.enabled
                         ? av1_get_qindex(&cm->seg, xd->mi[0]->mbmi.segment_id,
                                          cm->base_qindex)
                         : cm->base_qindex;
522
523
  assert(qindex > 0);
  (void)qindex;
Fangwen Fu's avatar
Fangwen Fu committed
524
#else
525
  assert(mb->qindex > 0);
Fangwen Fu's avatar
Fangwen Fu committed
526
#endif
527

528
  token_costs += band;
Jingning Han's avatar
Jingning Han committed
529

530
  assert((!plane_type && !plane) || (plane_type && plane));
Jingning Han's avatar
Jingning Han committed
531
  assert(eob <= default_eob);
532

Jingning Han's avatar
Jingning Han committed
533
534
535
536
537
538
539
540
541
  /* Now set up a Viterbi trellis to evaluate alternative roundings. */
  /* Initialize the sentinel node of the trellis. */
  tokens[eob][0].rate = 0;
  tokens[eob][0].error = 0;
  tokens[eob][0].next = default_eob;
  tokens[eob][0].token = EOB_TOKEN;
  tokens[eob][0].qc = 0;
  tokens[eob][1] = tokens[eob][0];

542
543
  for (i = 0; i < eob; i++) {
    const int rc = scan[i];
544
    tokens[i][0].rate = av1_get_token_cost(qcoeff[rc], &t0, cat6_bits);
545
    tokens[i][0].token = t0;
Yaowu Xu's avatar
Yaowu Xu committed
546
    token_cache[rc] = av1_pt_energy_class[t0];
547
  }
Jingning Han's avatar
Jingning Han committed
548
549

  for (i = eob; i-- > 0;) {
550
551
    int base_bits, dx;
    int64_t d2;
Jingning Han's avatar
Jingning Han committed
552
    const int rc = scan[i];
Thomas Davies's avatar
Thomas Davies committed
553
    int x = qcoeff[rc];
554
555
#if CONFIG_AOM_QM
    int iwt = iqmatrix[rc];
Thomas Davies's avatar
Thomas Davies committed
556
557
558
559
    dqv = dequant_ptr[rc != 0];
    dqv = ((iwt * (int)dqv) + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
#else
    dqv = dequant_ptr[rc != 0];
560
#endif
561
    next_shortcut = shortcut;
562

Jingning Han's avatar
Jingning Han committed
563
    /* Only add a trellis state for non-zero coefficients. */
564
    if (UNLIKELY(x)) {
Jingning Han's avatar
Jingning Han committed
565
566
567
568
569
      error0 = tokens[next][0].error;
      error1 = tokens[next][1].error;
      /* Evaluate the first possibility for this state. */
      rate0 = tokens[next][0].rate;
      rate1 = tokens[next][1].rate;
570

571
572
573
574
      if (next_shortcut) {
        /* Consider both possible successor states. */
        if (next < default_eob) {
          pt = get_coef_context(nb, token_cache, i + 1);
575
576
577
578
          rate0 +=
              get_token_bit_costs(*token_costs, 0, pt, tokens[next][0].token);
          rate1 +=
              get_token_bit_costs(*token_costs, 0, pt, tokens[next][1].token);
579
580
581
582
583
584
585
        }
        UPDATE_RD_COST();
        /* And pick the best. */
        best = rd_cost1 < rd_cost0;
      } else {
        if (next < default_eob) {
          pt = get_coef_context(nb, token_cache, i + 1);
586
587
          rate0 +=
              get_token_bit_costs(*token_costs, 0, pt, tokens[next][0].token);
588
589
        }
        best = 0;
Jingning Han's avatar
Jingning Han committed
590
      }
591
592

      dx = (dqcoeff[rc] - coeff[rc]) * (1 << shift);
593
#if CONFIG_HIGHBITDEPTH
Jingning Han's avatar
Jingning Han committed
594
595
596
      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
        dx >>= xd->bd - 8;
      }
597
#endif  // CONFIG_HIGHBITDEPTH
598
      d2 = (int64_t)dx * dx;
599
      tokens[i][0].rate += (best ? rate1 : rate0);
Jingning Han's avatar
Jingning Han committed
600
601
602
      tokens[i][0].error = d2 + (best ? error1 : error0);
      tokens[i][0].next = next;
      tokens[i][0].qc = x;
603
      tokens[i][0].dqc = dqcoeff[rc];
604
      tokens[i][0].best_index = best;
Jingning Han's avatar
Jingning Han committed
605
606
607
608
609

      /* Evaluate the second possibility for this state. */
      rate0 = tokens[next][0].rate;
      rate1 = tokens[next][1].rate;

610
      // The threshold of 3 is empirically obtained.
611
      if (UNLIKELY(abs(x) > 3)) {
612
613
        shortcut = 0;
      } else {
614
#if CONFIG_NEW_QUANT
Thomas Davies's avatar
Thomas Davies committed
615
        shortcut = ((av1_dequant_abscoeff_nuq(abs(x), dqv,
Yaowu Xu's avatar
Yaowu Xu committed
616
                                              dequant_val[band_translate[i]]) >
617
                     (abs(coeff[rc]) << shift)) &&
Thomas Davies's avatar
Thomas Davies committed
618
                    (av1_dequant_abscoeff_nuq(abs(x) - 1, dqv,
Yaowu Xu's avatar
Yaowu Xu committed
619
                                              dequant_val[band_translate[i]]) <
620
                     (abs(coeff[rc]) << shift)));
621
622
623
624
625
626
627
628
#else  // CONFIG_NEW_QUANT
#if CONFIG_AOM_QM
        if ((abs(x) * dequant_ptr[rc != 0] * iwt >
             ((abs(coeff[rc]) << shift) << AOM_QM_BITS)) &&
            (abs(x) * dequant_ptr[rc != 0] * iwt <
             (((abs(coeff[rc]) << shift) + dequant_ptr[rc != 0])
              << AOM_QM_BITS)))
#else
629
        if ((abs(x) * dequant_ptr[rc != 0] > (abs(coeff[rc]) << shift)) &&
630
631
            (abs(x) * dequant_ptr[rc != 0] <
             (abs(coeff[rc]) << shift) + dequant_ptr[rc != 0]))
632
#endif  // CONFIG_AOM_QM
633
634
635
          shortcut = 1;
        else
          shortcut = 0;
636
#endif  // CONFIG_NEW_QUANT
637
      }
Jingning Han's avatar
Jingning Han committed
638
639
640
641

      if (shortcut) {
        sz = -(x < 0);
        x -= 2 * sz + 1;
642
643
644
      } else {
        tokens[i][1] = tokens[i][0];
        next = i;
645

646
        if (UNLIKELY(!(--band_left))) {
647
648
649
650
          --band_counts;
          band_left = *band_counts;
          --token_costs;
        }
651
        continue;
Jingning Han's avatar
Jingning Han committed
652
653
654
655
656
657
658
659
660
      }

      /* Consider both possible successor states. */
      if (!x) {
        /* If we reduced this coefficient to zero, check to see if
         *  we need to move the EOB back here.
         */
        t0 = tokens[next][0].token == EOB_TOKEN ? EOB_TOKEN : ZERO_TOKEN;
        t1 = tokens[next][1].token == EOB_TOKEN ? EOB_TOKEN : ZERO_TOKEN;
661
        base_bits = 0;
Jingning Han's avatar
Jingning Han committed
662
      } else {
663
        base_bits = av1_get_token_cost(x, &t0, cat6_bits);
Jingning Han's avatar
Jingning Han committed
664
665
        t1 = t0;
      }
666
667

      if (next_shortcut) {
668
        if (LIKELY(next < default_eob)) {
669
          if (t0 != EOB_TOKEN) {
Yaowu Xu's avatar
Yaowu Xu committed
670
            token_cache[rc] = av1_pt_energy_class[t0];
671
            pt = get_coef_context(nb, token_cache, i + 1);
672
673
            rate0 += get_token_bit_costs(*token_costs, !x, pt,
                                         tokens[next][0].token);
674
675
          }
          if (t1 != EOB_TOKEN) {
Yaowu Xu's avatar
Yaowu Xu committed
676
            token_cache[rc] = av1_pt_energy_class[t1];
677
            pt = get_coef_context(nb, token_cache, i + 1);
678
679
            rate1 += get_token_bit_costs(*token_costs, !x, pt,
                                         tokens[next][1].token);
680
681
682
683
684
685
686
687
688
          }
        }

        UPDATE_RD_COST();
        /* And pick the best. */
        best = rd_cost1 < rd_cost0;
      } else {
        // The two states in next stage are identical.
        if (next < default_eob && t0 != EOB_TOKEN) {
Yaowu Xu's avatar
Yaowu Xu committed
689
          token_cache[rc] = av1_pt_energy_class[t0];
Jingning Han's avatar
Jingning Han committed
690
          pt = get_coef_context(nb, token_cache, i + 1);
691
692
          rate0 +=
              get_token_bit_costs(*token_costs, !x, pt, tokens[next][0].token);
Jingning Han's avatar
Jingning Han committed
693
        }
694
        best = 0;
Jingning Han's avatar
Jingning Han committed
695
696
      }

697
#if CONFIG_NEW_QUANT
Thomas Davies's avatar
Thomas Davies committed
698
      dx = av1_dequant_coeff_nuq(x, dqv, dequant_val[band_translate[i]]) -
699
           (coeff[rc] << shift);
700
#if CONFIG_HIGHBITDEPTH
hui su's avatar
hui su committed
701
702
703
      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
        dx >>= xd->bd - 8;
      }
704
#endif  // CONFIG_HIGHBITDEPTH
705
#else   // CONFIG_NEW_QUANT
706
#if CONFIG_HIGHBITDEPTH
hui su's avatar
hui su committed
707
      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
Thomas Davies's avatar
Thomas Davies committed
708
        dx -= ((dqv >> (xd->bd - 8)) + sz) ^ sz;
hui su's avatar
hui su committed
709
      } else {
Thomas Davies's avatar
Thomas Davies committed
710
        dx -= (dqv + sz) ^ sz;
hui su's avatar
hui su committed
711
712
      }
#else
Thomas Davies's avatar
Thomas Davies committed
713
      dx -= (dqv + sz) ^ sz;
714
#endif  // CONFIG_HIGHBITDEPTH
715
#endif  // CONFIG_NEW_QUANT
716
      d2 = (int64_t)dx * dx;
hui su's avatar
hui su committed
717

Jingning Han's avatar
Jingning Han committed
718
719
720
721
722
      tokens[i][1].rate = base_bits + (best ? rate1 : rate0);
      tokens[i][1].error = d2 + (best ? error1 : error0);
      tokens[i][1].next = next;
      tokens[i][1].token = best ? t1 : t0;
      tokens[i][1].qc = x;
723
724

      if (x) {
725
#if CONFIG_NEW_QUANT
Yaowu Xu's avatar
Yaowu Xu committed
726
        tokens[i][1].dqc = av1_dequant_abscoeff_nuq(
Thomas Davies's avatar
Thomas Davies committed
727
            abs(x), dqv, dequant_val[band_translate[i]]);
728
729
730
        tokens[i][1].dqc = shift ? ROUND_POWER_OF_TWO(tokens[i][1].dqc, shift)
                                 : tokens[i][1].dqc;
        if (sz) tokens[i][1].dqc = -tokens[i][1].dqc;
731
#else
732
733
        if (x < 0)
          tokens[i][1].dqc = -((-x * dqv) >> shift);
734
        else
735
          tokens[i][1].dqc = (x * dqv) >> shift;
736
#endif  // CONFIG_NEW_QUANT
737
738
739
740
      } else {
        tokens[i][1].dqc = 0;
      }

741
      tokens[i][1].best_index = best;
Jingning Han's avatar
Jingning Han committed
742
743
744
745
746
747
748
749
      /* Finally, make this the new head of the trellis. */
      next = i;
    } else {
      /* There's no choice to make for a zero coefficient, so we don't
       *  add a new trellis node, but we do need to update the costs.
       */
      t0 = tokens[next][0].token;
      t1 = tokens[next][1].token;
750
      pt = get_coef_context(nb, token_cache, i + 1);
Jingning Han's avatar
Jingning Han committed
751
752
      /* Update the cost of each path if we're past the EOB token. */
      if (t0 != EOB_TOKEN) {
753
        tokens[next][0].rate += get_token_bit_costs(*token_costs, 1, pt, t0);
Jingning Han's avatar
Jingning Han committed
754
755
756
        tokens[next][0].token = ZERO_TOKEN;
      }
      if (t1 != EOB_TOKEN) {
757
        tokens[next][1].rate += get_token_bit_costs(*token_costs, 1, pt, t1);
Jingning Han's avatar
Jingning Han committed
758
759
        tokens[next][1].token = ZERO_TOKEN;
      }
760
      tokens[i][0].best_index = tokens[i][1].best_index = 0;
761
      shortcut = (tokens[next][0].rate != tokens[next][1].rate);
Jingning Han's avatar
Jingning Han committed
762
763
      /* Don't update next, because we didn't add a new node. */
    }
764

765
    if (UNLIKELY(!(--band_left))) {
766
767
768
769
      --band_counts;
      band_left = *band_counts;
      --token_costs;
    }
Jingning Han's avatar
Jingning Han committed
770
771
772
773
774
775
776
777
778
  }

  /* Now pick the best path through the whole trellis. */
  rate0 = tokens[next][0].rate;
  rate1 = tokens[next][1].rate;
  error0 = tokens[next][0].error;
  error1 = tokens[next][1].error;
  t0 = tokens[next][0].token;
  t1 = tokens[next][1].token;
779
780
  rate0 += get_token_bit_costs(*token_costs, 0, ctx, t0);
  rate1 += get_token_bit_costs(*token_costs, 0, ctx, t1);
Jingning Han's avatar
Jingning Han committed
781
782
  UPDATE_RD_COST();
  best = rd_cost1 < rd_cost0;
783

Jingning Han's avatar
Jingning Han committed
784
  final_eob = -1;
785

Jingning Han's avatar
Jingning Han committed
786
787
788
  for (i = next; i < eob; i = next) {
    const int x = tokens[i][best].qc;
    const int rc = scan[i];
789
    if (x) final_eob = i;
Jingning Han's avatar
Jingning Han committed
790
    qcoeff[rc] = x;
791
792
    dqcoeff[rc] = tokens[i][best].dqc;

Jingning Han's avatar
Jingning Han committed
793
    next = tokens[i][best].next;
794
    best = tokens[i][best].best_index;
Jingning Han's avatar
Jingning Han committed
795
796
797
798
  }
  final_eob++;

  mb->plane[plane].eobs[block] = final_eob;
799
  assert(final_eob <= default_eob);
Jingning Han's avatar
Jingning Han committed
800
  return final_eob;
801
802
803
804
805
806
807
808
809
810
811
812
813
}

#endif  // USE_GREEDY_OPTIMIZE_B

int av1_optimize_b(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, int block,
                   TX_SIZE tx_size, int ctx) {
  MACROBLOCKD *const xd = &mb->e_mbd;
  struct macroblock_plane *const p = &mb->plane[plane];
  const int eob = p->eobs[block];
  assert((mb->qindex == 0) ^ (xd->lossless[xd->mi[0]->mbmi.segment_id] == 0));
  if (eob == 0) return eob;
  if (xd->lossless[xd->mi[0]->mbmi.segment_id]) return eob;
#if CONFIG_PVQ
814
815
816
  (void)cm;
  (void)tx_size;
  (void)ctx;
817
818
  return eob;
#endif
Jingning Han's avatar
Jingning Han committed
819

820
821
822
823
#if USE_GREEDY_OPTIMIZE_B
  return optimize_b_greedy(cm, mb, plane, block, tx_size, ctx);
#else   // USE_GREEDY_OPTIMIZE_B
  return optimize_b_org(cm, mb, plane, block, tx_size, ctx);
824
#endif  // USE_GREEDY_OPTIMIZE_B
825
}
826

Thomas Daede's avatar
Thomas Daede committed
827
#if !CONFIG_PVQ
828
#if CONFIG_HIGHBITDEPTH
Angie Chiang's avatar
Angie Chiang committed
829
830
typedef enum QUANT_FUNC {
  QUANT_FUNC_LOWBD = 0,
831
  QUANT_FUNC_HIGHBD = 1,
832
  QUANT_FUNC_TYPES = 2
Angie Chiang's avatar
Angie Chiang committed
833
834
} QUANT_FUNC;

835
836
static AV1_QUANT_FACADE
    quant_func_list[AV1_XFORM_QUANT_TYPES][QUANT_FUNC_TYPES] = {
837
#if !CONFIG_NEW_QUANT
838
      { av1_quantize_fp_facade, av1_highbd_quantize_fp_facade },
Yaowu Xu's avatar
Yaowu Xu committed
839
840
      { av1_quantize_b_facade, av1_highbd_quantize_b_facade },
      { av1_quantize_dc_facade, av1_highbd_quantize_dc_facade },
841
#else   // !CONFIG_NEW_QUANT
842
843
844
      { av1_quantize_fp_nuq_facade, av1_highbd_quantize_fp_nuq_facade },
      { av1_quantize_b_nuq_facade, av1_highbd_quantize_b_nuq_facade },
      { av1_quantize_dc_nuq_facade, av1_highbd_quantize_dc_nuq_facade },
845
#endif  // !CONFIG_NEW_QUANT
846
847
      { NULL, NULL }
    };
848

Thomas Daede's avatar
Thomas Daede committed
849
#else
850

Angie Chiang's avatar
Angie Chiang committed
851
852
typedef enum QUANT_FUNC {
  QUANT_FUNC_LOWBD = 0,
853
  QUANT_FUNC_TYPES = 1
Angie Chiang's avatar
Angie Chiang committed
854
} QUANT_FUNC;
Angie Chiang's avatar
Angie Chiang committed
855

856
857
static AV1_QUANT_FACADE quant_func_list[AV1_XFORM_QUANT_TYPES]
                                       [QUANT_FUNC_TYPES] = {
858
#if !CONFIG_NEW_QUANT
clang-format's avatar
clang-format committed
859
860
861
                                         { av1_quantize_fp_facade },
                                         { av1_quantize_b_facade },
                                         { av1_quantize_dc_facade },
862
#else   // !CONFIG_NEW_QUANT
863
864
865
                                         { av1_quantize_fp_nuq_facade },
                                         { av1_quantize_b_nuq_facade },
                                         { av1_quantize_dc_nuq_facade },
866
#endif  // !CONFIG_NEW_QUANT
clang-format's avatar
clang-format committed
867
868
                                         { NULL }
                                       };
869
#endif  // CONFIG_HIGHBITDEPTH
Thomas Daede's avatar
Thomas Daede committed
870
#endif  // CONFIG_PVQ
871

Angie Chiang's avatar
Angie Chiang committed
872
873
void av1_xform_quant(const AV1_COMMON *cm, MACROBLOCK *x, int plane, int block,
                     int blk_row, int blk_col, BLOCK_SIZE plane_bsize,
874
875
                     TX_SIZE tx_size, int ctx,
                     AV1_XFORM_QUANT xform_quant_idx) {
Jingning Han's avatar
Jingning Han committed
876
  MACROBLOCKD *const xd = &x->e_mbd;
877
  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
Yushin Cho's avatar
Yushin Cho committed
878
#if !(CONFIG_PVQ || CONFIG_DAALA_DIST)
Jingning Han's avatar
Jingning Han committed
879
880
  const struct macroblock_plane *const p = &x->plane[plane];
  const struct macroblockd_plane *const pd = &xd->plane[plane];
881
882
883
884
#else
  struct macroblock_plane *const p = &x->plane[plane];
  struct macroblockd_plane *const pd = &xd->plane[plane];
#endif
885
  PLANE_TYPE plane_type = get_plane_type(plane);
886
  TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
887
  const int is_inter = is_inter_block(mbmi);
Angie Chiang's avatar
Angie Chiang committed
888
  const SCAN_ORDER *const scan_order = get_scan(cm, tx_size, tx_type, is_inter);
Jingning Han's avatar
Jingning Han committed
889
890
891
892
  tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
  tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
  uint16_t *const eob = &p->eobs[block];
893
  const int diff_stride = block_size_wide[plane_bsize];
894
#if CONFIG_AOM_QM
895
  int seg_id = mbmi->segment_id;
896
897
  const qm_val_t *qmatrix = pd->seg_qmatrix[seg_id][!is_inter][tx_size];
  const qm_val_t *iqmatrix = pd->seg_iqmatrix[seg_id][!is_inter][tx_size];
898
#endif
Angie Chiang's avatar
Angie Chiang committed
899
900

  FWD_TXFM_PARAM fwd_txfm_param;
901

Yushin Cho's avatar
Yushin Cho committed
902
903
904
905
906
907
908
909
#if CONFIG_PVQ || CONFIG_DAALA_DIST
  uint8_t *dst;
  int16_t *pred;
  const int dst_stride = pd->dst.stride;
  int tx_blk_size;
  int i, j;
#endif

910
911
#if !CONFIG_PVQ
  const int tx2d_size = tx_size_2d[tx_size];
912
  QUANT_PARAM qparam;
913
914
  const int16_t *src_diff;

915
916
  src_diff =
      &p->src_diff[(blk_row * diff_stride + blk_col) << tx_size_wide_log2[0]];
917
  qparam.log_scale = av1_get_tx_scale(tx_size);
918
919
920
921
922
923
924
925
#if CONFIG_NEW_QUANT
  qparam.tx_size = tx_size;
  qparam.dq = get_dq_profile_from_ctx(x->qindex, ctx, is_inter, plane_type);
#endif  // CONFIG_NEW_QUANT
#if CONFIG_AOM_QM
  qparam.qmatrix = qmatrix;
  qparam.iqmatrix = iqmatrix;
#endif  // CONFIG_AOM_QM
926
927
928
929
#else
  tran_low_t *ref_coeff = BLOCK_OFFSET(pd->pvq_ref_coeff, block);
  int skip = 1;
  PVQ_INFO *pvq_info = NULL;
Yushin Cho's avatar
Yushin Cho committed
930
931
932
  uint8_t *src;
  int16_t *src_int16;
  const int src_stride = p->src.stride;
933

934
  (void)ctx;
935
936
937
938
939
940
941
  (void)scan_order;
  (void)qcoeff;

  if (x->pvq_coded) {
    assert(block < MAX_PVQ_BLOCKS_IN_SB);
    pvq_info = &x->pvq[block][plane];
  }
942
943
944
  src = &p->src.buf[(blk_row * src_stride + blk_col) << tx_size_wide_log2[0]];
  src_int16 =
      &p->src_int16[(blk_row * diff_stride + blk_col) << tx_size_wide_log2[0]];
Yushin Cho's avatar
Yushin Cho committed
945
946
947

  // transform block size in pixels
  tx_blk_size = tx_size_wide[tx_size];
948
#if CONFIG_HIGHBITDEPTH
Thomas Daede's avatar
Thomas Daede committed
949
950
951
952
953
954
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
    for (j = 0; j < tx_blk_size; j++)
      for (i = 0; i < tx_blk_size; i++)
        src_int16[diff_stride * j + i] =
            CONVERT_TO_SHORTPTR(src)[src_stride * j + i];
  } else {
955
#endif  // CONFIG_HIGHBITDEPTH
Thomas Daede's avatar
Thomas Daede committed
956
957
958
    for (j = 0; j < tx_blk_size; j++)
      for (i = 0; i < tx_blk_size; i++)
        src_int16[diff_stride * j + i] = src[src_stride * j + i];
959
#if CONFIG_HIGHBITDEPTH
Thomas Daede's avatar
Thomas Daede committed
960
  }
961
#endif  // CONFIG_HIGHBITDEPTH
Yushin Cho's avatar
Yushin Cho committed
962
963
964
965
#endif

#if CONFIG_PVQ || CONFIG_DAALA_DIST
  dst = &pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]];
966
  pred = &pd->pred[(blk_row * diff_stride + blk_col) << tx_size_wide_log2[0]];
967
968
969
970

  // transform block size in pixels
  tx_blk_size = tx_size_wide[tx_size];

Thomas Daede's avatar
Thomas Daede committed
971
972
// copy uint8 orig and predicted block to int16 buffer
// in order to use existing VP10 transform functions
973
#if CONFIG_HIGHBITDEPTH
Thomas Daede's avatar
Thomas Daede committed
974
975
976
977
978
979
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
    for (j = 0; j < tx_blk_size; j++)
      for (i = 0; i < tx_blk_size; i++)
        pred[diff_stride * j + i] =
            CONVERT_TO_SHORTPTR(dst)[dst_stride * j + i];
  } else {
980
#endif  // CONFIG_HIGHBITDEPTH
Thomas Daede's avatar
Thomas Daede committed
981
982
983
    for (j = 0; j < tx_blk_size; j++)
      for (i = 0; i < tx_blk_size; i++)
        pred[diff_stride * j + i] = dst[dst_stride * j + i];
984
#if CONFIG_HIGHBITDEPTH
Thomas Daede's avatar
Thomas Daede committed
985
  }
986
#endif  // CONFIG_HIGHBITDEPTH
987
#endif
Yushin Cho's avatar
Yushin Cho committed
988

989
  (void)ctx;
990
991

  fwd_txfm_param.tx_type = tx_type;
Angie Chiang's avatar
Angie Chiang committed
992
  fwd_txfm_param.tx_size = tx_size;
993
  fwd_txfm_param.lossless = xd->lossless[mbmi->segment_id];
Angie Chiang's avatar
Angie Chiang committed
994

Thomas Daede's avatar
Thomas Daede committed
995
#if !CONFIG_PVQ
996
#if CONFIG_HIGHBITDEPTH
997
  fwd_txfm_param.bd = xd->bd;
Jingning Han's avatar
Jingning Han committed
998
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
hui su's avatar
hui su committed
999
    av1_highbd_fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param);
Yaowu Xu's avatar
Yaowu Xu committed
1000
    if (xform_quant_idx != AV1_XFORM_QUANT_SKIP_QUANT) {