encodemb.c 62.3 KB
Newer Older
Jingning Han's avatar
Jingning Han committed
1
/*
Yaowu Xu's avatar
Yaowu Xu committed
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
Jingning Han's avatar
Jingning Han committed
3
 *
Yaowu Xu's avatar
Yaowu Xu committed
4
5
6
7
8
9
 * This source code is subject to the terms of the BSD 2 Clause License and
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
 * was not distributed with this source code in the LICENSE file, you can
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
 * Media Patent License 1.0 was not distributed with this source code in the
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
Jingning Han's avatar
Jingning Han committed
10
11
 */

Yaowu Xu's avatar
Yaowu Xu committed
12
13
14
#include "./av1_rtcd.h"
#include "./aom_config.h"
#include "./aom_dsp_rtcd.h"
Jingning Han's avatar
Jingning Han committed
15

16
#include "aom_dsp/bitwriter.h"
17
#include "aom_dsp/quantize.h"
Yaowu Xu's avatar
Yaowu Xu committed
18
#include "aom_mem/aom_mem.h"
19
#include "aom_ports/mem.h"
Jingning Han's avatar
Jingning Han committed
20

21
22
23
24
#include "av1/common/idct.h"
#include "av1/common/reconinter.h"
#include "av1/common/reconintra.h"
#include "av1/common/scan.h"
Jingning Han's avatar
Jingning Han committed
25

26
#include "av1/encoder/av1_quantize.h"
27
#include "av1/encoder/encodemb.h"
28
29
30
#if CONFIG_LV_MAP
#include "av1/encoder/encodetxb.h"
#endif
31
32
33
#include "av1/encoder/hybrid_fwd_txfm.h"
#include "av1/encoder/rd.h"
#include "av1/encoder/tokenize.h"
Jingning Han's avatar
Jingning Han committed
34

35
36
37
38
39
40
#if CONFIG_PVQ
#include "av1/encoder/encint.h"
#include "av1/common/partition.h"
#include "av1/encoder/pvq_encoder.h"
#endif

41
42
43
44
#if CONFIG_CFL
#include "av1/common/cfl.h"
#endif

45
46
47
// Check if one needs to use c version subtraction.
static int check_subtract_block_size(int w, int h) { return w < 4 || h < 4; }

Angie Chiang's avatar
Angie Chiang committed
48
49
50
51
static void subtract_block(const MACROBLOCKD *xd, int rows, int cols,
                           int16_t *diff, ptrdiff_t diff_stride,
                           const uint8_t *src8, ptrdiff_t src_stride,
                           const uint8_t *pred8, ptrdiff_t pred_stride) {
52
#if !CONFIG_HIGHBITDEPTH
Angie Chiang's avatar
Angie Chiang committed
53
54
  (void)xd;
#endif
Jingning Han's avatar
Jingning Han committed
55

Angie Chiang's avatar
Angie Chiang committed
56
  if (check_subtract_block_size(rows, cols)) {
57
#if CONFIG_HIGHBITDEPTH
Angie Chiang's avatar
Angie Chiang committed
58
59
60
    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
      aom_highbd_subtract_block_c(rows, cols, diff, diff_stride, src8,
                                  src_stride, pred8, pred_stride, xd->bd);
61
62
      return;
    }
63
#endif  // CONFIG_HIGHBITDEPTH
Angie Chiang's avatar
Angie Chiang committed
64
65
    aom_subtract_block_c(rows, cols, diff, diff_stride, src8, src_stride, pred8,
                         pred_stride);
66
67
68
69

    return;
  }

70
#if CONFIG_HIGHBITDEPTH
Angie Chiang's avatar
Angie Chiang committed
71
72
73
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
    aom_highbd_subtract_block(rows, cols, diff, diff_stride, src8, src_stride,
                              pred8, pred_stride, xd->bd);
Jingning Han's avatar
Jingning Han committed
74
75
    return;
  }
76
#endif  // CONFIG_HIGHBITDEPTH
Angie Chiang's avatar
Angie Chiang committed
77
78
79
80
  aom_subtract_block(rows, cols, diff, diff_stride, src8, src_stride, pred8,
                     pred_stride);
}

Angie Chiang's avatar
Angie Chiang committed
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
void av1_subtract_txb(MACROBLOCK *x, int plane, BLOCK_SIZE plane_bsize,
                      int blk_col, int blk_row, TX_SIZE tx_size) {
  MACROBLOCKD *const xd = &x->e_mbd;
  struct macroblock_plane *const p = &x->plane[plane];
  const struct macroblockd_plane *const pd = &x->e_mbd.plane[plane];
  const int diff_stride = block_size_wide[plane_bsize];
  const int src_stride = p->src.stride;
  const int dst_stride = pd->dst.stride;
  const int tx1d_width = tx_size_wide[tx_size];
  const int tx1d_height = tx_size_high[tx_size];
  uint8_t *dst =
      &pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]];
  uint8_t *src =
      &p->src.buf[(blk_row * src_stride + blk_col) << tx_size_wide_log2[0]];
  int16_t *src_diff =
      &p->src_diff[(blk_row * diff_stride + blk_col) << tx_size_wide_log2[0]];
  subtract_block(xd, tx1d_height, tx1d_width, src_diff, diff_stride, src,
                 src_stride, dst, dst_stride);
}

Angie Chiang's avatar
Angie Chiang committed
101
102
103
104
105
106
107
108
109
110
void av1_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
  struct macroblock_plane *const p = &x->plane[plane];
  const struct macroblockd_plane *const pd = &x->e_mbd.plane[plane];
  const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
  const int bw = block_size_wide[plane_bsize];
  const int bh = block_size_high[plane_bsize];
  const MACROBLOCKD *xd = &x->e_mbd;

  subtract_block(xd, bh, bw, p->src_diff, bw, p->src.buf, p->src.stride,
                 pd->dst.buf, pd->dst.stride);
Jingning Han's avatar
Jingning Han committed
111
112
}

113
114
// These numbers are empirically obtained.
static const int plane_rd_mult[REF_TYPES][PLANE_TYPES] = {
Thomas Davies's avatar
Thomas Davies committed
115
116
117
#if CONFIG_EC_ADAPT
  { 10, 7 }, { 8, 5 },
#else
118
  { 10, 6 }, { 8, 6 },
Thomas Davies's avatar
Thomas Davies committed
119
#endif
120
};
Jingning Han's avatar
Jingning Han committed
121

122
123
124
125
126
#define UPDATE_RD_COST()                             \
  {                                                  \
    rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0); \
    rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1); \
  }
Jingning Han's avatar
Jingning Han committed
127

128
129
130
static INLINE unsigned int get_token_bit_costs(
    unsigned int token_costs[2][COEFF_CONTEXTS][ENTROPY_TOKENS], int skip_eob,
    int ctx, int token) {
131
132
133
134
135
136
137
138
#if CONFIG_NEW_TOKENSET
  (void)skip_eob;
  return token_costs[token == ZERO_TOKEN || token == EOB_TOKEN][ctx][token];
#else
  return token_costs[skip_eob][ctx][token];
#endif
}

139
140
141
142
#define USE_GREEDY_OPTIMIZE_B 0

#if USE_GREEDY_OPTIMIZE_B

143
typedef struct av1_token_state_greedy {
144
145
146
  int16_t token;
  tran_low_t qc;
  tran_low_t dqc;
147
} av1_token_state_greedy;
148

149
150
static int optimize_b_greedy(const AV1_COMMON *cm, MACROBLOCK *mb, int plane,
                             int block, TX_SIZE tx_size, int ctx) {
151
152
153
154
  MACROBLOCKD *const xd = &mb->e_mbd;
  struct macroblock_plane *const p = &mb->plane[plane];
  struct macroblockd_plane *const pd = &xd->plane[plane];
  const int ref = is_inter_block(&xd->mi[0]->mbmi);
155
  av1_token_state_greedy tokens[MAX_TX_SQUARE + 1][2];
156
157
158
159
160
161
162
163
164
165
166
167
168
169
  uint8_t token_cache[MAX_TX_SQUARE];
  const tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
  tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
  const int eob = p->eobs[block];
  const PLANE_TYPE plane_type = pd->plane_type;
  const int16_t *const dequant_ptr = pd->dequant;
  const uint8_t *const band_translate = get_band_translate(tx_size);
  TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
  const SCAN_ORDER *const scan_order =
      get_scan(cm, tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi));
  const int16_t *const scan = scan_order->scan;
  const int16_t *const nb = scan_order->neighbors;
  int dqv;
170
  const int shift = av1_get_tx_scale(tx_size);
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
#if CONFIG_AOM_QM
  int seg_id = xd->mi[0]->mbmi.segment_id;
  const qm_val_t *iqmatrix = pd->seg_iqmatrix[seg_id][!ref][tx_size];
#endif
#if CONFIG_NEW_QUANT
  int dq = get_dq_profile_from_ctx(mb->qindex, ctx, ref, plane_type);
  const dequant_val_type_nuq *dequant_val = pd->dequant_val_nuq[dq];
#elif !CONFIG_AOM_QM
  const int dq_step[2] = { dequant_ptr[0] >> shift, dequant_ptr[1] >> shift };
#endif  // CONFIG_NEW_QUANT
  int sz = 0;
  const int64_t rddiv = mb->rddiv;
  int64_t rd_cost0, rd_cost1;
  int16_t t0, t1;
  int i, final_eob;
  const int cat6_bits = av1_get_cat6_extrabits_size(tx_size, xd->bd);
  unsigned int(*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
      mb->token_costs[txsize_sqr_map[tx_size]][plane_type][ref];
  const int default_eob = tx_size_2d[tx_size];

191
  assert(mb->qindex > 0);
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475

  assert((!plane_type && !plane) || (plane_type && plane));
  assert(eob <= default_eob);

  int64_t rdmult = (mb->rdmult * plane_rd_mult[ref][plane_type]) >> 1;

  int64_t rate0, rate1;
  for (i = 0; i < eob; i++) {
    const int rc = scan[i];
    int x = qcoeff[rc];
    t0 = av1_get_token(x);

    tokens[i][0].qc = x;
    tokens[i][0].token = t0;
    tokens[i][0].dqc = dqcoeff[rc];

    token_cache[rc] = av1_pt_energy_class[t0];
  }
  tokens[eob][0].token = EOB_TOKEN;
  tokens[eob][0].qc = 0;
  tokens[eob][0].dqc = 0;
  tokens[eob][1] = tokens[eob][0];

  unsigned int(*token_costs_ptr)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
      token_costs;

  final_eob = 0;

  int64_t eob_cost0, eob_cost1;

  const int ctx0 = ctx;
  /* Record the r-d cost */
  int64_t accu_rate = 0;
  int64_t accu_error = 0;

  rate0 = get_token_bit_costs(*(token_costs_ptr + band_translate[0]), 0, ctx0,
                              EOB_TOKEN);
  int64_t best_block_rd_cost = RDCOST(rdmult, rddiv, rate0, accu_error);

  // int64_t best_block_rd_cost_all0 = best_block_rd_cost;

  int x_prev = 1;

  for (i = 0; i < eob; i++) {
    const int rc = scan[i];
    int x = qcoeff[rc];
    sz = -(x < 0);

    int band_cur = band_translate[i];
    int ctx_cur = (i == 0) ? ctx : get_coef_context(nb, token_cache, i);
    int token_tree_sel_cur = (x_prev == 0);

    if (x == 0) {
      // no need to search when x == 0
      rate0 =
          get_token_bit_costs(*(token_costs_ptr + band_cur), token_tree_sel_cur,
                              ctx_cur, tokens[i][0].token);
      accu_rate += rate0;
      x_prev = 0;
      // accu_error does not change when x==0
    } else {
      /*  Computing distortion
       */
      // compute the distortion for the first candidate
      // and the distortion for quantizing to 0.
      int dx0 = (-coeff[rc]) * (1 << shift);
#if CONFIG_HIGHBITDEPTH
      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
        dx0 >>= xd->bd - 8;
      }
#endif
      int64_t d0 = (int64_t)dx0 * dx0;

      int x_a = x - 2 * sz - 1;
      int64_t d2, d2_a;

      int dx;

#if CONFIG_AOM_QM
      int iwt = iqmatrix[rc];
      dqv = dequant_ptr[rc != 0];
      dqv = ((iwt * (int)dqv) + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
#else
      dqv = dequant_ptr[rc != 0];
#endif

      dx = (dqcoeff[rc] - coeff[rc]) * (1 << shift);
#if CONFIG_HIGHBITDEPTH
      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
        dx >>= xd->bd - 8;
      }
#endif  // CONFIG_HIGHBITDEPTH
      d2 = (int64_t)dx * dx;

      /* compute the distortion for the second candidate
       * x_a = x - 2 * sz + 1;
       */
      if (x_a != 0) {
#if CONFIG_NEW_QUANT
        dx = av1_dequant_coeff_nuq(x, dqv, dequant_val[band_translate[i]]) -
             (coeff[rc] << shift);
#if CONFIG_HIGHBITDEPTH
        if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
          dx >>= xd->bd - 8;
        }
#endif  // CONFIG_HIGHBITDEPTH
#else   // CONFIG_NEW_QUANT
#if CONFIG_HIGHBITDEPTH
        if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
          dx -= ((dqv >> (xd->bd - 8)) + sz) ^ sz;
        } else {
          dx -= (dqv + sz) ^ sz;
        }
#else
        dx -= (dqv + sz) ^ sz;
#endif  // CONFIG_HIGHBITDEPTH
#endif  // CONFIG_NEW_QUANT
        d2_a = (int64_t)dx * dx;
      } else {
        d2_a = d0;
      }
      /*  Computing rates and r-d cost
       */

      int best_x, best_eob_x;
      int64_t base_bits, next_bits0, next_bits1;
      int64_t next_eob_bits0, next_eob_bits1;

      // rate cost of x
      base_bits = av1_get_token_cost(x, &t0, cat6_bits);
      rate0 = base_bits + get_token_bit_costs(*(token_costs_ptr + band_cur),
                                              token_tree_sel_cur, ctx_cur, t0);

      base_bits = av1_get_token_cost(x_a, &t1, cat6_bits);
      rate1 = base_bits + get_token_bit_costs(*(token_costs_ptr + band_cur),
                                              token_tree_sel_cur, ctx_cur, t1);

      next_bits0 = 0;
      next_bits1 = 0;
      next_eob_bits0 = 0;
      next_eob_bits1 = 0;

      if (i < default_eob - 1) {
        int ctx_next, token_tree_sel_next;
        int band_next = band_translate[i + 1];

        token_cache[rc] = av1_pt_energy_class[t0];
        ctx_next = get_coef_context(nb, token_cache, i + 1);
        token_tree_sel_next = (x == 0);

        next_bits0 = get_token_bit_costs(*(token_costs_ptr + band_next),
                                         token_tree_sel_next, ctx_next,
                                         tokens[i + 1][0].token);
        next_eob_bits0 =
            get_token_bit_costs(*(token_costs_ptr + band_next),
                                token_tree_sel_next, ctx_next, EOB_TOKEN);

        token_cache[rc] = av1_pt_energy_class[t1];
        ctx_next = get_coef_context(nb, token_cache, i + 1);
        token_tree_sel_next = (x_a == 0);

        next_bits1 = get_token_bit_costs(*(token_costs_ptr + band_next),
                                         token_tree_sel_next, ctx_next,
                                         tokens[i + 1][0].token);

        if (x_a != 0) {
          next_eob_bits1 =
              get_token_bit_costs(*(token_costs_ptr + band_next),
                                  token_tree_sel_next, ctx_next, EOB_TOKEN);
        }
      }

      rd_cost0 = RDCOST(rdmult, rddiv, (rate0 + next_bits0), d2);
      rd_cost1 = RDCOST(rdmult, rddiv, (rate1 + next_bits1), d2_a);

      best_x = (rd_cost1 < rd_cost0);

      eob_cost0 = RDCOST(rdmult, rddiv, (accu_rate + rate0 + next_eob_bits0),
                         (accu_error + d2 - d0));
      eob_cost1 = eob_cost0;
      if (x_a != 0) {
        eob_cost1 = RDCOST(rdmult, rddiv, (accu_rate + rate1 + next_eob_bits1),
                           (accu_error + d2_a - d0));
        best_eob_x = (eob_cost1 < eob_cost0);
      } else {
        best_eob_x = 0;
      }

      int dqc, dqc_a = 0;

      dqc = dqcoeff[rc];
      if (best_x + best_eob_x) {
        if (x_a != 0) {
#if CONFIG_NEW_QUANT
          dqc_a = av1_dequant_abscoeff_nuq(abs(x_a), dqv,
                                           dequant_val[band_translate[i]]);
          dqc_a = shift ? ROUND_POWER_OF_TWO(dqc_a, shift) : dqc_a;
          if (sz) dqc_a = -dqc_a;
#else
// The 32x32 transform coefficient uses half quantization step size.
// Account for the rounding difference in the dequantized coefficeint
// value when the quantization index is dropped from an even number
// to an odd number.

#if CONFIG_AOM_QM
          tran_low_t offset = dqv >> shift;
#else
          tran_low_t offset = dq_step[rc != 0];
#endif
          if (shift & x_a) offset += (dqv & 0x01);

          if (sz == 0)
            dqc_a = dqcoeff[rc] - offset;
          else
            dqc_a = dqcoeff[rc] + offset;
#endif  // CONFIG_NEW_QUANT
        } else {
          dqc_a = 0;
        }  // if (x_a != 0)
      }

      // record the better quantized value
      if (best_x) {
        qcoeff[rc] = x_a;
        dqcoeff[rc] = dqc_a;

        accu_rate += rate1;
        accu_error += d2_a - d0;
        assert(d2_a <= d0);

        token_cache[rc] = av1_pt_energy_class[t1];
      } else {
        accu_rate += rate0;
        accu_error += d2 - d0;
        assert(d2 <= d0);

        token_cache[rc] = av1_pt_energy_class[t0];
      }

      x_prev = qcoeff[rc];

      // determine whether to move the eob position to i+1
      int64_t best_eob_cost_i = eob_cost0;

      tokens[i][1].token = t0;
      tokens[i][1].qc = x;
      tokens[i][1].dqc = dqc;

      if ((x_a != 0) && (best_eob_x)) {
        best_eob_cost_i = eob_cost1;

        tokens[i][1].token = t1;
        tokens[i][1].qc = x_a;
        tokens[i][1].dqc = dqc_a;
      }

      if (best_eob_cost_i < best_block_rd_cost) {
        best_block_rd_cost = best_eob_cost_i;
        final_eob = i + 1;
      }
    }  // if (x==0)
  }    // for (i)

  assert(final_eob <= eob);
  if (final_eob > 0) {
    assert(tokens[final_eob - 1][1].qc != 0);
    i = final_eob - 1;
    int rc = scan[i];
    qcoeff[rc] = tokens[i][1].qc;
    dqcoeff[rc] = tokens[i][1].dqc;
  }

  for (i = final_eob; i < eob; i++) {
    int rc = scan[i];
    qcoeff[rc] = 0;
    dqcoeff[rc] = 0;
  }

  mb->plane[plane].eobs[block] = final_eob;
  return final_eob;
}

#else  // USE_GREEDY_OPTIMIZE_B

476
typedef struct av1_token_state_org {
477
478
479
480
481
482
483
  int64_t error;
  int rate;
  int16_t next;
  int16_t token;
  tran_low_t qc;
  tran_low_t dqc;
  uint8_t best_index;
484
} av1_token_state_org;
485

486
487
static int optimize_b_org(const AV1_COMMON *cm, MACROBLOCK *mb, int plane,
                          int block, TX_SIZE tx_size, int ctx) {
Jingning Han's avatar
Jingning Han committed
488
489
490
491
  MACROBLOCKD *const xd = &mb->e_mbd;
  struct macroblock_plane *const p = &mb->plane[plane];
  struct macroblockd_plane *const pd = &xd->plane[plane];
  const int ref = is_inter_block(&xd->mi[0]->mbmi);
492
  av1_token_state_org tokens[MAX_TX_SQUARE + 1][2];
493
  uint8_t token_cache[MAX_TX_SQUARE];
494
  const tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
Jingning Han's avatar
Jingning Han committed
495
496
497
  tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
  const int eob = p->eobs[block];
498
  const PLANE_TYPE plane_type = pd->plane_type;
499
  const int default_eob = tx_size_2d[tx_size];
500
501
  const int16_t *const dequant_ptr = pd->dequant;
  const uint8_t *const band_translate = get_band_translate(tx_size);
502
  TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
503
  const SCAN_ORDER *const scan_order =
Angie Chiang's avatar
Angie Chiang committed
504
      get_scan(cm, tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi));
505
506
  const int16_t *const scan = scan_order->scan;
  const int16_t *const nb = scan_order->neighbors;
Thomas Davies's avatar
Thomas Davies committed
507
  int dqv;
508
  const int shift = av1_get_tx_scale(tx_size);
509
510
#if CONFIG_AOM_QM
  int seg_id = xd->mi[0]->mbmi.segment_id;
511
  const qm_val_t *iqmatrix = pd->seg_iqmatrix[seg_id][!ref][tx_size];
512
#endif
513
#if CONFIG_NEW_QUANT
514
  int dq = get_dq_profile_from_ctx(mb->qindex, ctx, ref, plane_type);
515
  const dequant_val_type_nuq *dequant_val = pd->dequant_val_nuq[dq];
516
#elif !CONFIG_AOM_QM
517
  const int dq_step[2] = { dequant_ptr[0] >> shift, dequant_ptr[1] >> shift };
518
#endif  // CONFIG_NEW_QUANT
Jingning Han's avatar
Jingning Han committed
519
  int next = eob, sz = 0;
520
  const int64_t rdmult = (mb->rdmult * plane_rd_mult[ref][plane_type]) >> 1;
hui su's avatar
hui su committed
521
  const int64_t rddiv = mb->rddiv;
Jingning Han's avatar
Jingning Han committed
522
  int64_t rd_cost0, rd_cost1;
523
524
  int rate0, rate1;
  int64_t error0, error1;
Jingning Han's avatar
Jingning Han committed
525
  int16_t t0, t1;
526
527
  int best, band = (eob < default_eob) ? band_translate[eob]
                                       : band_translate[eob - 1];
528
  int pt, i, final_eob;
529
  const int cat6_bits = av1_get_cat6_extrabits_size(tx_size, xd->bd);
530
  unsigned int(*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
531
      mb->token_costs[txsize_sqr_map[tx_size]][plane_type][ref];
532
533
  const uint16_t *band_counts = &band_count_table[tx_size][band];
  uint16_t band_left = eob - band_cum_count_table[tx_size][band] + 1;
534
535
  int shortcut = 0;
  int next_shortcut = 0;
536

Fangwen Fu's avatar
Fangwen Fu committed
537
538
539
540
541
#if CONFIG_EXT_DELTA_Q
  const int qindex = cm->seg.enabled
                         ? av1_get_qindex(&cm->seg, xd->mi[0]->mbmi.segment_id,
                                          cm->base_qindex)
                         : cm->base_qindex;
542
543
  assert(qindex > 0);
  (void)qindex;
Fangwen Fu's avatar
Fangwen Fu committed
544
#else
545
  assert(mb->qindex > 0);
Fangwen Fu's avatar
Fangwen Fu committed
546
#endif
547

548
  token_costs += band;
Jingning Han's avatar
Jingning Han committed
549

550
  assert((!plane_type && !plane) || (plane_type && plane));
Jingning Han's avatar
Jingning Han committed
551
  assert(eob <= default_eob);
552

Jingning Han's avatar
Jingning Han committed
553
554
555
556
557
558
559
560
561
  /* Now set up a Viterbi trellis to evaluate alternative roundings. */
  /* Initialize the sentinel node of the trellis. */
  tokens[eob][0].rate = 0;
  tokens[eob][0].error = 0;
  tokens[eob][0].next = default_eob;
  tokens[eob][0].token = EOB_TOKEN;
  tokens[eob][0].qc = 0;
  tokens[eob][1] = tokens[eob][0];

562
563
  for (i = 0; i < eob; i++) {
    const int rc = scan[i];
564
    tokens[i][0].rate = av1_get_token_cost(qcoeff[rc], &t0, cat6_bits);
565
    tokens[i][0].token = t0;
Yaowu Xu's avatar
Yaowu Xu committed
566
    token_cache[rc] = av1_pt_energy_class[t0];
567
  }
Jingning Han's avatar
Jingning Han committed
568
569

  for (i = eob; i-- > 0;) {
570
571
    int base_bits, dx;
    int64_t d2;
Jingning Han's avatar
Jingning Han committed
572
    const int rc = scan[i];
Thomas Davies's avatar
Thomas Davies committed
573
    int x = qcoeff[rc];
574
575
#if CONFIG_AOM_QM
    int iwt = iqmatrix[rc];
Thomas Davies's avatar
Thomas Davies committed
576
577
578
579
    dqv = dequant_ptr[rc != 0];
    dqv = ((iwt * (int)dqv) + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS;
#else
    dqv = dequant_ptr[rc != 0];
580
#endif
581
    next_shortcut = shortcut;
582

Jingning Han's avatar
Jingning Han committed
583
    /* Only add a trellis state for non-zero coefficients. */
584
    if (UNLIKELY(x)) {
Jingning Han's avatar
Jingning Han committed
585
586
587
588
589
      error0 = tokens[next][0].error;
      error1 = tokens[next][1].error;
      /* Evaluate the first possibility for this state. */
      rate0 = tokens[next][0].rate;
      rate1 = tokens[next][1].rate;
590

591
592
593
594
      if (next_shortcut) {
        /* Consider both possible successor states. */
        if (next < default_eob) {
          pt = get_coef_context(nb, token_cache, i + 1);
595
596
597
598
          rate0 +=
              get_token_bit_costs(*token_costs, 0, pt, tokens[next][0].token);
          rate1 +=
              get_token_bit_costs(*token_costs, 0, pt, tokens[next][1].token);
599
600
601
602
603
604
605
        }
        UPDATE_RD_COST();
        /* And pick the best. */
        best = rd_cost1 < rd_cost0;
      } else {
        if (next < default_eob) {
          pt = get_coef_context(nb, token_cache, i + 1);
606
607
          rate0 +=
              get_token_bit_costs(*token_costs, 0, pt, tokens[next][0].token);
608
609
        }
        best = 0;
Jingning Han's avatar
Jingning Han committed
610
      }
611
612

      dx = (dqcoeff[rc] - coeff[rc]) * (1 << shift);
613
#if CONFIG_HIGHBITDEPTH
Jingning Han's avatar
Jingning Han committed
614
615
616
      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
        dx >>= xd->bd - 8;
      }
617
#endif  // CONFIG_HIGHBITDEPTH
618
      d2 = (int64_t)dx * dx;
619
      tokens[i][0].rate += (best ? rate1 : rate0);
Jingning Han's avatar
Jingning Han committed
620
621
622
      tokens[i][0].error = d2 + (best ? error1 : error0);
      tokens[i][0].next = next;
      tokens[i][0].qc = x;
623
      tokens[i][0].dqc = dqcoeff[rc];
624
      tokens[i][0].best_index = best;
Jingning Han's avatar
Jingning Han committed
625
626
627
628
629

      /* Evaluate the second possibility for this state. */
      rate0 = tokens[next][0].rate;
      rate1 = tokens[next][1].rate;

630
      // The threshold of 3 is empirically obtained.
631
      if (UNLIKELY(abs(x) > 3)) {
632
633
        shortcut = 0;
      } else {
634
#if CONFIG_NEW_QUANT
Thomas Davies's avatar
Thomas Davies committed
635
        shortcut = ((av1_dequant_abscoeff_nuq(abs(x), dqv,
Yaowu Xu's avatar
Yaowu Xu committed
636
                                              dequant_val[band_translate[i]]) >
637
                     (abs(coeff[rc]) << shift)) &&
Thomas Davies's avatar
Thomas Davies committed
638
                    (av1_dequant_abscoeff_nuq(abs(x) - 1, dqv,
Yaowu Xu's avatar
Yaowu Xu committed
639
                                              dequant_val[band_translate[i]]) <
640
                     (abs(coeff[rc]) << shift)));
641
642
643
644
645
646
647
648
#else  // CONFIG_NEW_QUANT
#if CONFIG_AOM_QM
        if ((abs(x) * dequant_ptr[rc != 0] * iwt >
             ((abs(coeff[rc]) << shift) << AOM_QM_BITS)) &&
            (abs(x) * dequant_ptr[rc != 0] * iwt <
             (((abs(coeff[rc]) << shift) + dequant_ptr[rc != 0])
              << AOM_QM_BITS)))
#else
649
        if ((abs(x) * dequant_ptr[rc != 0] > (abs(coeff[rc]) << shift)) &&
650
651
            (abs(x) * dequant_ptr[rc != 0] <
             (abs(coeff[rc]) << shift) + dequant_ptr[rc != 0]))
652
#endif  // CONFIG_AOM_QM
653
654
655
          shortcut = 1;
        else
          shortcut = 0;
656
#endif  // CONFIG_NEW_QUANT
657
      }
Jingning Han's avatar
Jingning Han committed
658
659
660
661

      if (shortcut) {
        sz = -(x < 0);
        x -= 2 * sz + 1;
662
663
664
      } else {
        tokens[i][1] = tokens[i][0];
        next = i;
665

666
        if (UNLIKELY(!(--band_left))) {
667
668
669
670
          --band_counts;
          band_left = *band_counts;
          --token_costs;
        }
671
        continue;
Jingning Han's avatar
Jingning Han committed
672
673
674
675
676
677
678
679
680
      }

      /* Consider both possible successor states. */
      if (!x) {
        /* If we reduced this coefficient to zero, check to see if
         *  we need to move the EOB back here.
         */
        t0 = tokens[next][0].token == EOB_TOKEN ? EOB_TOKEN : ZERO_TOKEN;
        t1 = tokens[next][1].token == EOB_TOKEN ? EOB_TOKEN : ZERO_TOKEN;
681
        base_bits = 0;
Jingning Han's avatar
Jingning Han committed
682
      } else {
683
        base_bits = av1_get_token_cost(x, &t0, cat6_bits);
Jingning Han's avatar
Jingning Han committed
684
685
        t1 = t0;
      }
686
687

      if (next_shortcut) {
688
        if (LIKELY(next < default_eob)) {
689
          if (t0 != EOB_TOKEN) {
Yaowu Xu's avatar
Yaowu Xu committed
690
            token_cache[rc] = av1_pt_energy_class[t0];
691
            pt = get_coef_context(nb, token_cache, i + 1);
692
693
            rate0 += get_token_bit_costs(*token_costs, !x, pt,
                                         tokens[next][0].token);
694
695
          }
          if (t1 != EOB_TOKEN) {
Yaowu Xu's avatar
Yaowu Xu committed
696
            token_cache[rc] = av1_pt_energy_class[t1];
697
            pt = get_coef_context(nb, token_cache, i + 1);
698
699
            rate1 += get_token_bit_costs(*token_costs, !x, pt,
                                         tokens[next][1].token);
700
701
702
703
704
705
706
707
708
          }
        }

        UPDATE_RD_COST();
        /* And pick the best. */
        best = rd_cost1 < rd_cost0;
      } else {
        // The two states in next stage are identical.
        if (next < default_eob && t0 != EOB_TOKEN) {
Yaowu Xu's avatar
Yaowu Xu committed
709
          token_cache[rc] = av1_pt_energy_class[t0];
Jingning Han's avatar
Jingning Han committed
710
          pt = get_coef_context(nb, token_cache, i + 1);
711
712
          rate0 +=
              get_token_bit_costs(*token_costs, !x, pt, tokens[next][0].token);
Jingning Han's avatar
Jingning Han committed
713
        }
714
        best = 0;
Jingning Han's avatar
Jingning Han committed
715
716
      }

717
#if CONFIG_NEW_QUANT
Thomas Davies's avatar
Thomas Davies committed
718
      dx = av1_dequant_coeff_nuq(x, dqv, dequant_val[band_translate[i]]) -
719
           (coeff[rc] << shift);
720
#if CONFIG_HIGHBITDEPTH
hui su's avatar
hui su committed
721
722
723
      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
        dx >>= xd->bd - 8;
      }
724
#endif  // CONFIG_HIGHBITDEPTH
725
#else   // CONFIG_NEW_QUANT
726
#if CONFIG_HIGHBITDEPTH
hui su's avatar
hui su committed
727
      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
Thomas Davies's avatar
Thomas Davies committed
728
        dx -= ((dqv >> (xd->bd - 8)) + sz) ^ sz;
hui su's avatar
hui su committed
729
      } else {
Thomas Davies's avatar
Thomas Davies committed
730
        dx -= (dqv + sz) ^ sz;
hui su's avatar
hui su committed
731
732
      }
#else
Thomas Davies's avatar
Thomas Davies committed
733
      dx -= (dqv + sz) ^ sz;
734
#endif  // CONFIG_HIGHBITDEPTH
735
#endif  // CONFIG_NEW_QUANT
736
      d2 = (int64_t)dx * dx;
hui su's avatar
hui su committed
737

Jingning Han's avatar
Jingning Han committed
738
739
740
741
742
      tokens[i][1].rate = base_bits + (best ? rate1 : rate0);
      tokens[i][1].error = d2 + (best ? error1 : error0);
      tokens[i][1].next = next;
      tokens[i][1].token = best ? t1 : t0;
      tokens[i][1].qc = x;
743
744

      if (x) {
745
#if CONFIG_NEW_QUANT
Yaowu Xu's avatar
Yaowu Xu committed
746
        tokens[i][1].dqc = av1_dequant_abscoeff_nuq(
Thomas Davies's avatar
Thomas Davies committed
747
            abs(x), dqv, dequant_val[band_translate[i]]);
748
749
750
        tokens[i][1].dqc = shift ? ROUND_POWER_OF_TWO(tokens[i][1].dqc, shift)
                                 : tokens[i][1].dqc;
        if (sz) tokens[i][1].dqc = -tokens[i][1].dqc;
751
#else
752
753
754
755
// The 32x32 transform coefficient uses half quantization step size.
// Account for the rounding difference in the dequantized coefficeint
// value when the quantization index is dropped from an even number
// to an odd number.
Thomas Davies's avatar
Thomas Davies committed
756
757
758
759
760
761
762

#if CONFIG_AOM_QM
        tran_low_t offset = dqv >> shift;
#else
        tran_low_t offset = dq_step[rc != 0];
#endif
        if (shift & x) offset += (dqv & 0x01);
763
764
765
766
767

        if (sz == 0)
          tokens[i][1].dqc = dqcoeff[rc] - offset;
        else
          tokens[i][1].dqc = dqcoeff[rc] + offset;
768
#endif  // CONFIG_NEW_QUANT
769
770
771
772
      } else {
        tokens[i][1].dqc = 0;
      }

773
      tokens[i][1].best_index = best;
Jingning Han's avatar
Jingning Han committed
774
775
776
777
778
779
780
781
      /* Finally, make this the new head of the trellis. */
      next = i;
    } else {
      /* There's no choice to make for a zero coefficient, so we don't
       *  add a new trellis node, but we do need to update the costs.
       */
      t0 = tokens[next][0].token;
      t1 = tokens[next][1].token;
782
      pt = get_coef_context(nb, token_cache, i + 1);
Jingning Han's avatar
Jingning Han committed
783
784
      /* Update the cost of each path if we're past the EOB token. */
      if (t0 != EOB_TOKEN) {
785
        tokens[next][0].rate += get_token_bit_costs(*token_costs, 1, pt, t0);
Jingning Han's avatar
Jingning Han committed
786
787
788
        tokens[next][0].token = ZERO_TOKEN;
      }
      if (t1 != EOB_TOKEN) {
789
        tokens[next][1].rate += get_token_bit_costs(*token_costs, 1, pt, t1);
Jingning Han's avatar
Jingning Han committed
790
791
        tokens[next][1].token = ZERO_TOKEN;
      }
792
      tokens[i][0].best_index = tokens[i][1].best_index = 0;
793
      shortcut = (tokens[next][0].rate != tokens[next][1].rate);
Jingning Han's avatar
Jingning Han committed
794
795
      /* Don't update next, because we didn't add a new node. */
    }
796

797
    if (UNLIKELY(!(--band_left))) {
798
799
800
801
      --band_counts;
      band_left = *band_counts;
      --token_costs;
    }
Jingning Han's avatar
Jingning Han committed
802
803
804
805
806
807
808
809
810
  }

  /* Now pick the best path through the whole trellis. */
  rate0 = tokens[next][0].rate;
  rate1 = tokens[next][1].rate;
  error0 = tokens[next][0].error;
  error1 = tokens[next][1].error;
  t0 = tokens[next][0].token;
  t1 = tokens[next][1].token;
811
812
  rate0 += get_token_bit_costs(*token_costs, 0, ctx, t0);
  rate1 += get_token_bit_costs(*token_costs, 0, ctx, t1);
Jingning Han's avatar
Jingning Han committed
813
814
  UPDATE_RD_COST();
  best = rd_cost1 < rd_cost0;
815

Jingning Han's avatar
Jingning Han committed
816
  final_eob = -1;
817

Jingning Han's avatar
Jingning Han committed
818
819
820
  for (i = next; i < eob; i = next) {
    const int x = tokens[i][best].qc;
    const int rc = scan[i];
821
    if (x) final_eob = i;
Jingning Han's avatar
Jingning Han committed
822
    qcoeff[rc] = x;
823
824
    dqcoeff[rc] = tokens[i][best].dqc;

Jingning Han's avatar
Jingning Han committed
825
    next = tokens[i][best].next;
826
    best = tokens[i][best].best_index;
Jingning Han's avatar
Jingning Han committed
827
828
829
830
  }
  final_eob++;

  mb->plane[plane].eobs[block] = final_eob;
831
  assert(final_eob <= default_eob);
Jingning Han's avatar
Jingning Han committed
832
  return final_eob;
833
834
835
836
837
838
839
840
841
842
843
844
845
}

#endif  // USE_GREEDY_OPTIMIZE_B

int av1_optimize_b(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, int block,
                   TX_SIZE tx_size, int ctx) {
  MACROBLOCKD *const xd = &mb->e_mbd;
  struct macroblock_plane *const p = &mb->plane[plane];
  const int eob = p->eobs[block];
  assert((mb->qindex == 0) ^ (xd->lossless[xd->mi[0]->mbmi.segment_id] == 0));
  if (eob == 0) return eob;
  if (xd->lossless[xd->mi[0]->mbmi.segment_id]) return eob;
#if CONFIG_PVQ
846
847
848
  (void)cm;
  (void)tx_size;
  (void)ctx;
849
850
  return eob;
#endif
Jingning Han's avatar
Jingning Han committed
851

852
853
854
855
#if USE_GREEDY_OPTIMIZE_B
  return optimize_b_greedy(cm, mb, plane, block, tx_size, ctx);
#else   // USE_GREEDY_OPTIMIZE_B
  return optimize_b_org(cm, mb, plane, block, tx_size, ctx);
856
#endif  // USE_GREEDY_OPTIMIZE_B
857
}
858

Thomas Daede's avatar
Thomas Daede committed
859
#if !CONFIG_PVQ
860
#if CONFIG_HIGHBITDEPTH
Angie Chiang's avatar
Angie Chiang committed
861
862
typedef enum QUANT_FUNC {
  QUANT_FUNC_LOWBD = 0,
863
  QUANT_FUNC_HIGHBD = 1,
864
  QUANT_FUNC_TYPES = 2
Angie Chiang's avatar
Angie Chiang committed
865
866
} QUANT_FUNC;

867
868
static AV1_QUANT_FACADE
    quant_func_list[AV1_XFORM_QUANT_TYPES][QUANT_FUNC_TYPES] = {
869
#if !CONFIG_NEW_QUANT
870
      { av1_quantize_fp_facade, av1_highbd_quantize_fp_facade },
Yaowu Xu's avatar
Yaowu Xu committed
871
872
      { av1_quantize_b_facade, av1_highbd_quantize_b_facade },
      { av1_quantize_dc_facade, av1_highbd_quantize_dc_facade },
873
#else   // !CONFIG_NEW_QUANT
874
875
876
      { av1_quantize_fp_nuq_facade, av1_highbd_quantize_fp_nuq_facade },
      { av1_quantize_b_nuq_facade, av1_highbd_quantize_b_nuq_facade },
      { av1_quantize_dc_nuq_facade, av1_highbd_quantize_dc_nuq_facade },
877
#endif  // !CONFIG_NEW_QUANT
878
879
      { NULL, NULL }
    };
880

Thomas Daede's avatar
Thomas Daede committed
881
#else
882

Angie Chiang's avatar
Angie Chiang committed
883
884
typedef enum QUANT_FUNC {
  QUANT_FUNC_LOWBD = 0,
885
  QUANT_FUNC_TYPES = 1
Angie Chiang's avatar
Angie Chiang committed
886
} QUANT_FUNC;
Angie Chiang's avatar
Angie Chiang committed
887

888
889
static AV1_QUANT_FACADE quant_func_list[AV1_XFORM_QUANT_TYPES]
                                       [QUANT_FUNC_TYPES] = {
890
#if !CONFIG_NEW_QUANT
clang-format's avatar
clang-format committed
891
892
893
                                         { av1_quantize_fp_facade },
                                         { av1_quantize_b_facade },
                                         { av1_quantize_dc_facade },
894
#else   // !CONFIG_NEW_QUANT
895
896
897
                                         { av1_quantize_fp_nuq_facade },
                                         { av1_quantize_b_nuq_facade },
                                         { av1_quantize_dc_nuq_facade },
898
#endif  // !CONFIG_NEW_QUANT
clang-format's avatar
clang-format committed
899
900
                                         { NULL }
                                       };
901
#endif  // CONFIG_HIGHBITDEPTH
Thomas Daede's avatar
Thomas Daede committed
902
#endif  // CONFIG_PVQ
903

Angie Chiang's avatar
Angie Chiang committed
904
905
void av1_xform_quant(const AV1_COMMON *cm, MACROBLOCK *x, int plane, int block,
                     int blk_row, int blk_col, BLOCK_SIZE plane_bsize,
906
907
                     TX_SIZE tx_size, int ctx,
                     AV1_XFORM_QUANT xform_quant_idx) {
Jingning Han's avatar
Jingning Han committed
908
  MACROBLOCKD *const xd = &x->e_mbd;
909
  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
Yushin Cho's avatar
Yushin Cho committed
910
#if !(CONFIG_PVQ || CONFIG_DAALA_DIST)
Jingning Han's avatar
Jingning Han committed
911
912
  const struct macroblock_plane *const p = &x->plane[plane];
  const struct macroblockd_plane *const pd = &xd->plane[plane];
913
914
915
916
#else
  struct macroblock_plane *const p = &x->plane[plane];
  struct macroblockd_plane *const pd = &xd->plane[plane];
#endif
917
  PLANE_TYPE plane_type = get_plane_type(plane);
918
  TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
919
  const int is_inter = is_inter_block(mbmi);
Angie Chiang's avatar
Angie Chiang committed
920
  const SCAN_ORDER *const scan_order = get_scan(cm, tx_size, tx_type, is_inter);
Jingning Han's avatar
Jingning Han committed
921
922
923
924
  tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
  tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
  uint16_t *const eob = &p->eobs[block];
925
  const int diff_stride = block_size_wide[plane_bsize];
926
#if CONFIG_AOM_QM
927
  int seg_id = mbmi->segment_id;
928
929
  const qm_val_t *qmatrix = pd->seg_qmatrix[seg_id][!is_inter][tx_size];
  const qm_val_t *iqmatrix = pd->seg_iqmatrix[seg_id][!is_inter][tx_size];
930
#endif
Angie Chiang's avatar
Angie Chiang committed
931
932

  FWD_TXFM_PARAM fwd_txfm_param;
933

Yushin Cho's avatar
Yushin Cho committed
934
935
936
937
938
939
940
941
#if CONFIG_PVQ || CONFIG_DAALA_DIST
  uint8_t *dst;
  int16_t *pred;
  const int dst_stride = pd->dst.stride;
  int tx_blk_size;
  int i, j;
#endif

942
943
#if !CONFIG_PVQ
  const int tx2d_size = tx_size_2d[tx_size];
944
  QUANT_PARAM qparam;
945
946
  const int16_t *src_diff;

947
948
  src_diff =
      &p->src_diff[(blk_row * diff_stride + blk_col) << tx_size_wide_log2[0]];
949
  qparam.log_scale = av1_get_tx_scale(tx_size);
950
951
952
953
954
955
956
957
#if CONFIG_NEW_QUANT
  qparam.tx_size = tx_size;
  qparam.dq = get_dq_profile_from_ctx(x->qindex, ctx, is_inter, plane_type);
#endif  // CONFIG_NEW_QUANT
#if CONFIG_AOM_QM
  qparam.qmatrix = qmatrix;
  qparam.iqmatrix = iqmatrix;
#endif  // CONFIG_AOM_QM
958
959
960
961
#else
  tran_low_t *ref_coeff = BLOCK_OFFSET(pd->pvq_ref_coeff, block);
  int skip = 1;
  PVQ_INFO *pvq_info = NULL;
Yushin Cho's avatar
Yushin Cho committed
962
963
964
  uint8_t *src;
  int16_t *src_int16;
  const int src_stride = p->src.stride;
965

966
  (void)ctx;
967
968
969
970
971
972
973
  (void)scan_order;
  (void)qcoeff;

  if (x->pvq_coded) {
    assert(block < MAX_PVQ_BLOCKS_IN_SB);
    pvq_info = &x->pvq[block][plane];
  }
974
975
976
  src = &p->src.buf[(blk_row * src_stride + blk_col) << tx_size_wide_log2[0]];
  src_int16 =
      &p->src_int16[(blk_row * diff_stride + blk_col) << tx_size_wide_log2[0]];
Yushin Cho's avatar
Yushin Cho committed
977
978
979

  // transform block size in pixels
  tx_blk_size = tx_size_wide[tx_size];
980
#if CONFIG_HIGHBITDEPTH
Thomas Daede's avatar
Thomas Daede committed
981
982
983
984
985
986
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
    for (j = 0; j < tx_blk_size; j++)
      for (i = 0; i < tx_blk_size; i++)
        src_int16[diff_stride * j + i] =
            CONVERT_TO_SHORTPTR(src)[src_stride * j + i];
  } else {
987
#endif  // CONFIG_HIGHBITDEPTH
Thomas Daede's avatar
Thomas Daede committed
988
989
990
    for (j = 0; j < tx_blk_size; j++)
      for (i = 0; i < tx_blk_size; i++)
        src_int16[diff_stride * j + i] = src[src_stride * j + i];
991
#if CONFIG_HIGHBITDEPTH
Thomas Daede's avatar
Thomas Daede committed
992
  }
993
#endif  // CONFIG_HIGHBITDEPTH
Yushin Cho's avatar
Yushin Cho committed
994
995
996
997
#endif

#if CONFIG_PVQ || CONFIG_DAALA_DIST
  dst = &pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]];
998
  pred = &pd->pred[(blk_row * diff_stride + blk_col) << tx_size_wide_log2[0]];
999
1000
1001
1002

  // transform block size in pixels
  tx_blk_size = tx_size_wide[tx_size];

Thomas Daede's avatar
Thomas Daede committed
1003
1004
// copy uint8 orig and predicted block to int16 buffer
// in order to use existing VP10 transform functions
1005
#if CONFIG_HIGHBITDEPTH
Thomas Daede's avatar
Thomas Daede committed
1006
1007
1008
1009
1010
1011
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
    for (j = 0; j < tx_blk_size; j++)
      for (i = 0; i < tx_blk_size; i++)
        pred[diff_stride * j + i] =
            CONVERT_TO_SHORTPTR(dst)[dst_stride * j + i];
  } else {
1012
#endif  // CONFIG_HIGHBITDEPTH
Thomas Daede's avatar
Thomas Daede committed
1013
1014
1015
    for (j = 0; j < tx_blk_size; j++)
      for (i = 0; i < tx_blk_size; i++)
        pred[diff_stride * j + i] = dst[dst_stride * j + i];
1016
#if CONFIG_HIGHBITDEPTH
Thomas Daede's avatar
Thomas Daede committed
1017
  }
1018
#endif  // CONFIG_HIGHBITDEPTH
1019
#endif
Yushin Cho's avatar
Yushin Cho committed
1020

1021
  (void)ctx;
1022
1023

  fwd_txfm_param.tx_type = tx_type;
Angie Chiang's avatar
Angie Chiang committed
1024
  fwd_txfm_param.tx_size = tx_size;
1025
  fwd_txfm_param.lossless = xd->lossless[mbmi->segment_id];
Angie Chiang's avatar
Angie Chiang committed
1026

Thomas Daede's avatar
Thomas Daede committed
1027
#if !CONFIG_PVQ
1028
#if CONFIG_HIGHBITDEPTH
1029
  fwd_txfm_param.bd = xd->bd;
Jingning Han's avatar
Jingning Han committed
1030
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
hui su's avatar
hui su committed
1031
    av1_highbd_fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param);
Yaowu Xu's avatar
Yaowu Xu committed
1032
    if (xform_quant_idx != AV1_XFORM_QUANT_SKIP_QUANT) {
1033
      if (LIKELY(!x->skip_block)) {
1034
        quant_func_list[xform_quant_idx][QUANT_FUNC_HIGHBD](
1035
            coeff, tx2d_size, p, qcoeff, pd, dqcoeff, eob, scan_order, &qparam);
1036
      } else {
Yaowu Xu's avatar
Yaowu Xu committed
1037
        av1_quantize_skip(tx2d_size, qcoeff, dqcoeff, eob);
1038
1039
      }
    }
1040
1041
1042
1043
#if CONFIG_LV_MAP
    p->txb_entropy_ctx[block] =
        (uint8_t)av1_get_txb_entropy_context(qcoeff, scan_order, *eob);
#endif  // CONFIG_LV_MAP
1044
1045
    return;
  }
1046
#endif  // CONFIG_HIGHBITDEPTH
hui su's avatar
hui su committed
1047
  av1_fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param);
Yaowu Xu's avatar
Yaowu Xu committed
1048
  if (xform_quant_idx != AV1_XFORM_QUANT_SKIP_QUANT) {
1049
    if (LIKELY(!x->skip_block)) {
1050
      quant_func_list[xform_quant_idx][QUANT_FUNC_LOWBD](
1051
          coeff, tx2d_size, p, qcoeff, pd, dqcoeff, eob, scan_order, &qparam);
1052
    } else {
Yaowu Xu's avatar
Yaowu Xu committed
1053
      av1_quantize_skip(tx2d_size, qcoeff, dqcoeff, eob);
Jingning Han's avatar
Jingning Han committed
1054
1055
    }
  }
1056
1057
1058
1059
1060
#if CONFIG_LV_MAP
  p->txb_entropy_ctx[block] =
      (uint8_t)av1_get_txb_entropy_context(qcoeff, scan_order, *eob);
#endif  // CONFIG_LV_MAP
#else   // #if !CONFIG_PVQ
Angie Chiang's avatar
Angie Chiang committed
1061
  (void)xform_quant_idx;
1062
#if CONFIG_HIGHBITDEPTH
Thomas Daede's avatar
Thomas Daede committed
1063
1064
  fwd_txfm_param.bd = xd->bd;
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
hui su's avatar
hui su committed
1065
1066
    av1_highbd_fwd_txfm(src_int16, coeff, diff_stride, &fwd_txfm_param);
    av1_highbd_fwd_txfm(pred, ref_coeff, diff_stride, &fwd_txfm_param);
Thomas Daede's avatar
Thomas Daede committed
1067
1068
  } else {
#endif
hui su's avatar
hui su committed
1069
1070
    av1_fwd_txfm(src_int16, coeff, diff_stride, &fwd_txfm_param);
    av1_fwd_txfm(pred, ref_coeff, diff_stride, &fwd_txfm_param);
1071
#if CONFIG_HIGHBITDEPTH
Thomas Daede's avatar
Thomas Daede committed
1072
1073
  }
#endif
1074
1075

  // PVQ for inter mode block
1076
  if (!x->skip_block) {
ltrudeau's avatar
ltrudeau committed
1077
    PVQ_SKIP_TYPE ac_dc_coded =
Thomas Daede's avatar
Thomas Daede committed
1078
        av1_pvq_encode_helper(x,
ltrudeau's avatar
ltrudeau committed
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
                              coeff,        // target original vector
                              ref_coeff,    // reference vector
                              dqcoeff,      // de-quantized vector
                              eob,          // End of Block marker
                              pd->dequant,  // aom's quantizers
                              plane,        // image plane
                              tx_size,      // block size in log_2 - 2
                              tx_type,
                              &x->rate,  // rate measured
                              x->pvq_speed,
                              pvq_info);  // PVQ info for a block
    skip = ac_dc_coded == PVQ_SKIP;
1091
  }
1092
1093
1094
1095
  x->pvq_skip[plane] = skip;

  if (!skip) mbmi->skip = 0;
#endif  // #if !CONFIG_PVQ
Jingning Han's avatar
Jingning Han committed
1096
1097
}

1098
static void encode_block(int plane, int block, int blk_row, int blk_col,
1099
                         BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) {
Jingning Han's avatar
Jingning Han committed
1100
  struct encode_b_args *const args = arg;
Angie Chiang's avatar
Angie Chiang committed
1101
  AV1_COMMON *cm = args->cm;
Jingning Han's avatar
Jingning Han committed
1102
1103
  MACROBLOCK *const x = args->x;
  MACROBLOCKD *const xd = &x->e_mbd;
1104
  int ctx;
Jingning Han's avatar
Jingning Han committed
1105
1106
1107
1108
  struct macroblock_plane *const p = &x->plane[plane];
  struct macroblockd_plane *const pd = &xd->plane[plane];
  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
  uint8_t *dst;