vp9_encodemb.c 37.2 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5 6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9 10
 */

11 12

#include "./vp9_rtcd.h"
13
#include "./vpx_config.h"
14
#include "./vpx_dsp_rtcd.h"
15

16
#include "vpx_dsp/quantize.h"
17
#include "vpx_mem/vpx_mem.h"
18
#include "vpx_ports/mem.h"
19 20

#include "vp9/common/vp9_idct.h"
21 22
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
Scott LaVarnway's avatar
Scott LaVarnway committed
23
#include "vp9/common/vp9_scan.h"
24 25

#include "vp9/encoder/vp9_encodemb.h"
26
#include "vp9/encoder/vp9_rd.h"
27
#include "vp9/encoder/vp9_tokenize.h"
John Koleszar's avatar
John Koleszar committed
28

29 30 31 32 33
struct optimize_ctx {
  ENTROPY_CONTEXT ta[MAX_MB_PLANE][16];
  ENTROPY_CONTEXT tl[MAX_MB_PLANE][16];
};

34
void vp9_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
35
  struct macroblock_plane *const p = &x->plane[plane];
36 37 38 39
  const struct macroblockd_plane *const pd = &x->e_mbd.plane[plane];
  const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
  const int bw = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
  const int bh = 4 * num_4x4_blocks_high_lookup[plane_bsize];
40

41 42
#if CONFIG_VP9_HIGHBITDEPTH
  if (x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
43
    vpx_highbd_subtract_block(bh, bw, p->src_diff, bw, p->src.buf,
44 45
                              p->src.stride, pd->dst.buf, pd->dst.stride,
                              x->e_mbd.bd);
46 47 48
    return;
  }
#endif  // CONFIG_VP9_HIGHBITDEPTH
49
  vpx_subtract_block(bh, bw, p->src_diff, bw, p->src.buf, p->src.stride,
50
                     pd->dst.buf, pd->dst.stride);
Yaowu Xu's avatar
Yaowu Xu committed
51 52
}

53 54 55
#define RDTRUNC(RM, DM, R, D)                        \
  (((1 << (VP9_PROB_COST_SHIFT - 1)) + (R) * (RM)) & \
   ((1 << VP9_PROB_COST_SHIFT) - 1))
56

Dmitry Kovalev's avatar
Dmitry Kovalev committed
57
typedef struct vp9_token_state {
58 59
  int           rate;
  int           error;
Daniel Kang's avatar
Daniel Kang committed
60
  int           next;
Yaowu Xu's avatar
Yaowu Xu committed
61
  int16_t       token;
Jingning Han's avatar
Jingning Han committed
62
  int16_t       qc;
Dmitry Kovalev's avatar
Dmitry Kovalev committed
63
} vp9_token_state;
64

65
static const int plane_rd_mult[REF_TYPES][PLANE_TYPES] ={ {10, 6}, {8, 7}, };
66

67 68 69 70 71 72 73 74 75 76
#define UPDATE_RD_COST()\
{\
  rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0);\
  rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1);\
  if (rd_cost0 == rd_cost1) {\
    rd_cost0 = RDTRUNC(rdmult, rddiv, rate0, error0);\
    rd_cost1 = RDTRUNC(rdmult, rddiv, rate1, error1);\
  }\
}

77 78
// This function is a place holder for now but may ultimately need
// to scan previous tokens to work out the correct context.
79 80
static int trellis_get_coeff_context(const int16_t *scan,
                                     const int16_t *nb,
81
                                     int idx, int token,
82
                                     uint8_t *token_cache) {
83
  int bak = token_cache[scan[idx]], pt;
84
  token_cache[scan[idx]] = vp9_pt_energy_class[token];
85
  pt = get_coef_context(nb, token_cache, idx + 1);
86
  token_cache[scan[idx]] = bak;
87
  return pt;
88 89
}

Dmitry Kovalev's avatar
Dmitry Kovalev committed
90
static int optimize_b(MACROBLOCK *mb, int plane, int block,
Yaowu Xu's avatar
Yaowu Xu committed
91
                      TX_SIZE tx_size, int ctx) {
92
  MACROBLOCKD *const xd = &mb->e_mbd;
Dmitry Kovalev's avatar
Dmitry Kovalev committed
93 94
  struct macroblock_plane *const p = &mb->plane[plane];
  struct macroblockd_plane *const pd = &xd->plane[plane];
Scott LaVarnway's avatar
Scott LaVarnway committed
95
  const int ref = is_inter_block(xd->mi[0]);
96 97
  vp9_token_state tokens[1025][2];
  unsigned best_index[1025][2];
Dmitry Kovalev's avatar
Dmitry Kovalev committed
98
  uint8_t token_cache[1024];
99 100 101
  const tran_low_t *const coeff = BLOCK_OFFSET(mb->plane[plane].coeff, block);
  tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
Dmitry Kovalev's avatar
Dmitry Kovalev committed
102
  const int eob = p->eobs[block];
103
  const PLANE_TYPE type = get_plane_type(plane);
104
  const int default_eob = 16 << (tx_size << 1);
105
  const int mul = 1 + (tx_size == TX_32X32);
106
  const int16_t *dequant_ptr = pd->dequant;
107
  const uint8_t *const band_translate = get_band_translate(tx_size);
Dmitry Kovalev's avatar
Dmitry Kovalev committed
108 109 110 111
  const scan_order *const so = get_scan(xd, tx_size, type, block);
  const int16_t *const scan = so->scan;
  const int16_t *const nb = so->neighbors;
  int next = eob, sz = 0;
112 113
  const int64_t rdmult = (mb->rdmult * plane_rd_mult[ref][type]) >> 1;
  const int64_t rddiv = mb->rddiv;
Dmitry Kovalev's avatar
Dmitry Kovalev committed
114
  int64_t rd_cost0, rd_cost1;
115 116 117
  int rate0, rate1, error0, error1;
  int16_t t0, t1;
  EXTRABIT e0;
Dmitry Kovalev's avatar
Dmitry Kovalev committed
118
  int best, band, pt, i, final_eob;
119
#if CONFIG_VP9_HIGHBITDEPTH
120
  const int *cat6_high_cost = vp9_get_high_cost_table(xd->bd);
121
#else
122
  const int *cat6_high_cost = vp9_get_high_cost_table(8);
123
#endif
John Koleszar's avatar
John Koleszar committed
124

John Koleszar's avatar
John Koleszar committed
125
  assert((!type && !plane) || (type && plane));
John Koleszar's avatar
John Koleszar committed
126
  assert(eob <= default_eob);
John Koleszar's avatar
John Koleszar committed
127 128 129 130 131

  /* Now set up a Viterbi trellis to evaluate alternative roundings. */
  /* Initialize the sentinel node of the trellis. */
  tokens[eob][0].rate = 0;
  tokens[eob][0].error = 0;
132
  tokens[eob][0].next = default_eob;
133
  tokens[eob][0].token = EOB_TOKEN;
John Koleszar's avatar
John Koleszar committed
134
  tokens[eob][0].qc = 0;
Dmitry Kovalev's avatar
Dmitry Kovalev committed
135 136
  tokens[eob][1] = tokens[eob][0];

137
  for (i = 0; i < eob; i++)
Dmitry Kovalev's avatar
Dmitry Kovalev committed
138
    token_cache[scan[i]] =
139
        vp9_pt_energy_class[vp9_get_token(qcoeff[scan[i]])];
140

Dmitry Kovalev's avatar
Dmitry Kovalev committed
141
  for (i = eob; i-- > 0;) {
142
    int base_bits, d2, dx;
Dmitry Kovalev's avatar
Dmitry Kovalev committed
143 144
    const int rc = scan[i];
    int x = qcoeff[rc];
John Koleszar's avatar
John Koleszar committed
145 146 147 148 149 150 151 152
    /* Only add a trellis state for non-zero coefficients. */
    if (x) {
      int shortcut = 0;
      error0 = tokens[next][0].error;
      error1 = tokens[next][1].error;
      /* Evaluate the first possibility for this state. */
      rate0 = tokens[next][0].rate;
      rate1 = tokens[next][1].rate;
153
      vp9_get_token_extra(x, &t0, &e0);
John Koleszar's avatar
John Koleszar committed
154
      /* Consider both possible successor states. */
155
      if (next < default_eob) {
156
        band = band_translate[i + 1];
157
        pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache);
Dmitry Kovalev's avatar
Dmitry Kovalev committed
158 159 160 161
        rate0 += mb->token_costs[tx_size][type][ref][band][0][pt]
                                [tokens[next][0].token];
        rate1 += mb->token_costs[tx_size][type][ref][band][0][pt]
                                [tokens[next][1].token];
John Koleszar's avatar
John Koleszar committed
162
      }
163
      UPDATE_RD_COST();
John Koleszar's avatar
John Koleszar committed
164 165
      /* And pick the best. */
      best = rd_cost1 < rd_cost0;
166
      base_bits = vp9_get_cost(t0, e0, cat6_high_cost);
167
      dx = mul * (dqcoeff[rc] - coeff[rc]);
168 169 170 171 172
#if CONFIG_VP9_HIGHBITDEPTH
      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
        dx >>= xd->bd - 8;
      }
#endif  // CONFIG_VP9_HIGHBITDEPTH
John Koleszar's avatar
John Koleszar committed
173 174 175 176 177 178
      d2 = dx * dx;
      tokens[i][0].rate = base_bits + (best ? rate1 : rate0);
      tokens[i][0].error = d2 + (best ? error1 : error0);
      tokens[i][0].next = next;
      tokens[i][0].token = t0;
      tokens[i][0].qc = x;
179
      best_index[i][0] = best;
180

John Koleszar's avatar
John Koleszar committed
181 182 183 184
      /* Evaluate the second possibility for this state. */
      rate0 = tokens[next][0].rate;
      rate1 = tokens[next][1].rate;

Dmitry Kovalev's avatar
Dmitry Kovalev committed
185 186 187
      if ((abs(x) * dequant_ptr[rc != 0] > abs(coeff[rc]) * mul) &&
          (abs(x) * dequant_ptr[rc != 0] < abs(coeff[rc]) * mul +
                                               dequant_ptr[rc != 0]))
John Koleszar's avatar
John Koleszar committed
188 189 190 191 192 193 194 195 196 197 198 199 200
        shortcut = 1;
      else
        shortcut = 0;

      if (shortcut) {
        sz = -(x < 0);
        x -= 2 * sz + 1;
      }

      /* Consider both possible successor states. */
      if (!x) {
        /* If we reduced this coefficient to zero, check to see if
         *  we need to move the EOB back here.
201
         */
202 203
        t0 = tokens[next][0].token == EOB_TOKEN ? EOB_TOKEN : ZERO_TOKEN;
        t1 = tokens[next][1].token == EOB_TOKEN ? EOB_TOKEN : ZERO_TOKEN;
204
        e0 = 0;
John Koleszar's avatar
John Koleszar committed
205
      } else {
206 207
        vp9_get_token_extra(x, &t0, &e0);
        t1 = t0;
John Koleszar's avatar
John Koleszar committed
208
      }
209
      if (next < default_eob) {
210
        band = band_translate[i + 1];
211
        if (t0 != EOB_TOKEN) {
212
          pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache);
213
          rate0 += mb->token_costs[tx_size][type][ref][band][!x][pt]
214
                                  [tokens[next][0].token];
John Koleszar's avatar
John Koleszar committed
215
        }
216
        if (t1 != EOB_TOKEN) {
217
          pt = trellis_get_coeff_context(scan, nb, i, t1, token_cache);
218
          rate1 += mb->token_costs[tx_size][type][ref][band][!x][pt]
219
                                  [tokens[next][1].token];
John Koleszar's avatar
John Koleszar committed
220 221
        }
      }
John Koleszar's avatar
John Koleszar committed
222

223
      UPDATE_RD_COST();
John Koleszar's avatar
John Koleszar committed
224 225
      /* And pick the best. */
      best = rd_cost1 < rd_cost0;
226
      base_bits = vp9_get_cost(t0, e0, cat6_high_cost);
John Koleszar's avatar
John Koleszar committed
227 228

      if (shortcut) {
229 230 231 232 233 234 235 236 237
#if CONFIG_VP9_HIGHBITDEPTH
        if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
          dx -= ((dequant_ptr[rc != 0] >> (xd->bd - 8)) + sz) ^ sz;
        } else {
          dx -= (dequant_ptr[rc != 0] + sz) ^ sz;
        }
#else
        dx -= (dequant_ptr[rc != 0] + sz) ^ sz;
#endif  // CONFIG_VP9_HIGHBITDEPTH
John Koleszar's avatar
John Koleszar committed
238 239 240 241 242 243 244
        d2 = dx * dx;
      }
      tokens[i][1].rate = base_bits + (best ? rate1 : rate0);
      tokens[i][1].error = d2 + (best ? error1 : error0);
      tokens[i][1].next = next;
      tokens[i][1].token = best ? t1 : t0;
      tokens[i][1].qc = x;
245
      best_index[i][1] = best;
John Koleszar's avatar
John Koleszar committed
246 247
      /* Finally, make this the new head of the trellis. */
      next = i;
248 249 250 251
    } else {
      /* There's no choice to make for a zero coefficient, so we don't
       *  add a new trellis node, but we do need to update the costs.
       */
252
      band = band_translate[i + 1];
John Koleszar's avatar
John Koleszar committed
253 254 255
      t0 = tokens[next][0].token;
      t1 = tokens[next][1].token;
      /* Update the cost of each path if we're past the EOB token. */
256
      if (t0 != EOB_TOKEN) {
257
        tokens[next][0].rate +=
258
            mb->token_costs[tx_size][type][ref][band][1][0][t0];
John Koleszar's avatar
John Koleszar committed
259 260
        tokens[next][0].token = ZERO_TOKEN;
      }
261
      if (t1 != EOB_TOKEN) {
262
        tokens[next][1].rate +=
263
            mb->token_costs[tx_size][type][ref][band][1][0][t1];
John Koleszar's avatar
John Koleszar committed
264 265
        tokens[next][1].token = ZERO_TOKEN;
      }
266
      best_index[i][0] = best_index[i][1] = 0;
John Koleszar's avatar
John Koleszar committed
267
      /* Don't update next, because we didn't add a new node. */
268
    }
John Koleszar's avatar
John Koleszar committed
269 270 271
  }

  /* Now pick the best path through the whole trellis. */
272
  band = band_translate[i + 1];
John Koleszar's avatar
John Koleszar committed
273 274 275 276 277 278
  rate0 = tokens[next][0].rate;
  rate1 = tokens[next][1].rate;
  error0 = tokens[next][0].error;
  error1 = tokens[next][1].error;
  t0 = tokens[next][0].token;
  t1 = tokens[next][1].token;
Dmitry Kovalev's avatar
Dmitry Kovalev committed
279 280
  rate0 += mb->token_costs[tx_size][type][ref][band][0][ctx][t0];
  rate1 += mb->token_costs[tx_size][type][ref][band][0][ctx][t1];
281
  UPDATE_RD_COST();
John Koleszar's avatar
John Koleszar committed
282
  best = rd_cost1 < rd_cost0;
Dmitry Kovalev's avatar
Dmitry Kovalev committed
283
  final_eob = -1;
James Zern's avatar
James Zern committed
284 285
  memset(qcoeff, 0, sizeof(*qcoeff) * (16 << (tx_size * 2)));
  memset(dqcoeff, 0, sizeof(*dqcoeff) * (16 << (tx_size * 2)));
John Koleszar's avatar
John Koleszar committed
286
  for (i = next; i < eob; i = next) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
287 288
    const int x = tokens[i][best].qc;
    const int rc = scan[i];
289
    if (x) {
John Koleszar's avatar
John Koleszar committed
290
      final_eob = i;
291
    }
Dmitry Kovalev's avatar
Dmitry Kovalev committed
292

293 294
    qcoeff[rc] = x;
    dqcoeff[rc] = (x * dequant_ptr[rc != 0]) / mul;
295

John Koleszar's avatar
John Koleszar committed
296
    next = tokens[i][best].next;
297
    best = best_index[i][best];
John Koleszar's avatar
John Koleszar committed
298 299 300
  }
  final_eob++;

301
  mb->plane[plane].eobs[block] = final_eob;
Dmitry Kovalev's avatar
Dmitry Kovalev committed
302
  return final_eob;
303 304
}

Dmitry Kovalev's avatar
Dmitry Kovalev committed
305
static INLINE void fdct32x32(int rd_transform,
306 307
                             const int16_t *src, tran_low_t *dst,
                             int src_stride) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
308
  if (rd_transform)
309
    vpx_fdct32x32_rd(src, dst, src_stride);
Dmitry Kovalev's avatar
Dmitry Kovalev committed
310
  else
311
    vpx_fdct32x32(src, dst, src_stride);
Dmitry Kovalev's avatar
Dmitry Kovalev committed
312 313
}

314
#if CONFIG_VP9_HIGHBITDEPTH
315 316
static INLINE void highbd_fdct32x32(int rd_transform, const int16_t *src,
                                    tran_low_t *dst, int src_stride) {
317
  if (rd_transform)
318
    vpx_highbd_fdct32x32_rd(src, dst, src_stride);
319
  else
320
    vpx_highbd_fdct32x32(src, dst, src_stride);
321
}
322
#endif  // CONFIG_VP9_HIGHBITDEPTH
323

324 325 326 327 328
void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block,
                        BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
  MACROBLOCKD *const xd = &x->e_mbd;
  const struct macroblock_plane *const p = &x->plane[plane];
  const struct macroblockd_plane *const pd = &xd->plane[plane];
Jingning Han's avatar
Jingning Han committed
329
  const scan_order *const scan_order = &vp9_default_scan_orders[tx_size];
330 331 332
  tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
  tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
Jingning Han's avatar
Jingning Han committed
333 334 335 336 337 338 339
  uint16_t *const eob = &p->eobs[block];
  const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
  int i, j;
  const int16_t *src_diff;
  txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
  src_diff = &p->src_diff[4 * (j * diff_stride + i)];

340 341 342 343
#if CONFIG_VP9_HIGHBITDEPTH
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
    switch (tx_size) {
      case TX_32X32:
344 345 346 347
        highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
        vp9_highbd_quantize_fp_32x32(coeff, 1024, x->skip_block, p->zbin,
                                     p->round_fp, p->quant_fp, p->quant_shift,
                                     qcoeff, dqcoeff, pd->dequant,
348
                                     eob, scan_order->scan,
349
                                     scan_order->iscan);
350 351
        break;
      case TX_16X16:
352
        vpx_highbd_fdct16x16(src_diff, coeff, diff_stride);
353 354
        vp9_highbd_quantize_fp(coeff, 256, x->skip_block, p->zbin, p->round_fp,
                               p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
355
                               pd->dequant, eob,
356
                               scan_order->scan, scan_order->iscan);
357 358
        break;
      case TX_8X8:
359
        vpx_highbd_fdct8x8(src_diff, coeff, diff_stride);
360 361
        vp9_highbd_quantize_fp(coeff, 64, x->skip_block, p->zbin, p->round_fp,
                               p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
362
                               pd->dequant, eob,
363
                               scan_order->scan, scan_order->iscan);
364 365 366
        break;
      case TX_4X4:
        x->fwd_txm4x4(src_diff, coeff, diff_stride);
367 368
        vp9_highbd_quantize_fp(coeff, 16, x->skip_block, p->zbin, p->round_fp,
                               p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
369
                               pd->dequant, eob,
370
                               scan_order->scan, scan_order->iscan);
371 372 373 374 375 376 377 378
        break;
      default:
        assert(0);
    }
    return;
  }
#endif  // CONFIG_VP9_HIGHBITDEPTH

Jingning Han's avatar
Jingning Han committed
379 380 381
  switch (tx_size) {
    case TX_32X32:
      fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
382 383
      vp9_quantize_fp_32x32(coeff, 1024, x->skip_block, p->zbin, p->round_fp,
                            p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
384
                            pd->dequant, eob, scan_order->scan,
385
                            scan_order->iscan);
Jingning Han's avatar
Jingning Han committed
386 387
      break;
    case TX_16X16:
388
      vpx_fdct16x16(src_diff, coeff, diff_stride);
Jingning Han's avatar
Jingning Han committed
389 390
      vp9_quantize_fp(coeff, 256, x->skip_block, p->zbin, p->round_fp,
                      p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
391
                      pd->dequant, eob,
Jingning Han's avatar
Jingning Han committed
392 393 394
                      scan_order->scan, scan_order->iscan);
      break;
    case TX_8X8:
395 396 397
      vp9_fdct8x8_quant(src_diff, diff_stride, coeff, 64,
                        x->skip_block, p->zbin, p->round_fp,
                        p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
398
                        pd->dequant, eob,
399
                        scan_order->scan, scan_order->iscan);
Jingning Han's avatar
Jingning Han committed
400 401 402 403 404
      break;
    case TX_4X4:
      x->fwd_txm4x4(src_diff, coeff, diff_stride);
      vp9_quantize_fp(coeff, 16, x->skip_block, p->zbin, p->round_fp,
                      p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
405
                      pd->dequant, eob,
Jingning Han's avatar
Jingning Han committed
406 407 408 409
                      scan_order->scan, scan_order->iscan);
      break;
    default:
      assert(0);
410
      break;
Jingning Han's avatar
Jingning Han committed
411 412 413 414 415 416 417 418
  }
}

void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block,
                        BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
  MACROBLOCKD *const xd = &x->e_mbd;
  const struct macroblock_plane *const p = &x->plane[plane];
  const struct macroblockd_plane *const pd = &xd->plane[plane];
419 420 421
  tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
  tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
422 423 424 425 426 427 428 429
  uint16_t *const eob = &p->eobs[block];
  const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
  int i, j;
  const int16_t *src_diff;

  txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
  src_diff = &p->src_diff[4 * (j * diff_stride + i)];

430 431 432 433
#if CONFIG_VP9_HIGHBITDEPTH
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
    switch (tx_size) {
      case TX_32X32:
434
        vpx_highbd_fdct32x32_1(src_diff, coeff, diff_stride);
435
        vpx_highbd_quantize_dc_32x32(coeff, x->skip_block, p->round,
436 437
                                     p->quant_fp[0], qcoeff, dqcoeff,
                                     pd->dequant[0], eob);
438 439
        break;
      case TX_16X16:
440
        vpx_highbd_fdct16x16_1(src_diff, coeff, diff_stride);
441
        vpx_highbd_quantize_dc(coeff, 256, x->skip_block, p->round,
442 443
                               p->quant_fp[0], qcoeff, dqcoeff,
                               pd->dequant[0], eob);
444 445
        break;
      case TX_8X8:
446
        vpx_highbd_fdct8x8_1(src_diff, coeff, diff_stride);
447
        vpx_highbd_quantize_dc(coeff, 64, x->skip_block, p->round,
448 449
                               p->quant_fp[0], qcoeff, dqcoeff,
                               pd->dequant[0], eob);
450 451 452
        break;
      case TX_4X4:
        x->fwd_txm4x4(src_diff, coeff, diff_stride);
453
        vpx_highbd_quantize_dc(coeff, 16, x->skip_block, p->round,
454 455
                               p->quant_fp[0], qcoeff, dqcoeff,
                               pd->dequant[0], eob);
456 457 458 459 460 461 462 463
        break;
      default:
        assert(0);
    }
    return;
  }
#endif  // CONFIG_VP9_HIGHBITDEPTH

464 465
  switch (tx_size) {
    case TX_32X32:
466
      vpx_fdct32x32_1(src_diff, coeff, diff_stride);
467
      vpx_quantize_dc_32x32(coeff, x->skip_block, p->round,
468 469 470 471
                            p->quant_fp[0], qcoeff, dqcoeff,
                            pd->dequant[0], eob);
      break;
    case TX_16X16:
472
      vpx_fdct16x16_1(src_diff, coeff, diff_stride);
473
      vpx_quantize_dc(coeff, 256, x->skip_block, p->round,
474 475 476 477
                     p->quant_fp[0], qcoeff, dqcoeff,
                     pd->dequant[0], eob);
      break;
    case TX_8X8:
478
      vpx_fdct8x8_1(src_diff, coeff, diff_stride);
479
      vpx_quantize_dc(coeff, 64, x->skip_block, p->round,
480 481 482 483 484
                      p->quant_fp[0], qcoeff, dqcoeff,
                      pd->dequant[0], eob);
      break;
    case TX_4X4:
      x->fwd_txm4x4(src_diff, coeff, diff_stride);
485
      vpx_quantize_dc(coeff, 16, x->skip_block, p->round,
486 487 488 489 490
                      p->quant_fp[0], qcoeff, dqcoeff,
                      pd->dequant[0], eob);
      break;
    default:
      assert(0);
491
      break;
492 493 494
  }
}

495 496 497
void vp9_xform_quant(MACROBLOCK *x, int plane, int block,
                     BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
  MACROBLOCKD *const xd = &x->e_mbd;
498 499 500
  const struct macroblock_plane *const p = &x->plane[plane];
  const struct macroblockd_plane *const pd = &xd->plane[plane];
  const scan_order *const scan_order = &vp9_default_scan_orders[tx_size];
501 502 503
  tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
  tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
504
  uint16_t *const eob = &p->eobs[block];
505 506
  const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
  int i, j;
507
  const int16_t *src_diff;
508 509
  txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
  src_diff = &p->src_diff[4 * (j * diff_stride + i)];
510

511 512 513 514
#if CONFIG_VP9_HIGHBITDEPTH
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
     switch (tx_size) {
      case TX_32X32:
515
        highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
516
        vpx_highbd_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin,
517
                                    p->round, p->quant, p->quant_shift, qcoeff,
518
                                    dqcoeff, pd->dequant, eob,
519
                                    scan_order->scan, scan_order->iscan);
520 521
        break;
      case TX_16X16:
522
        vpx_highbd_fdct16x16(src_diff, coeff, diff_stride);
523
        vpx_highbd_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
524
                              p->quant, p->quant_shift, qcoeff, dqcoeff,
525
                              pd->dequant, eob,
526
                              scan_order->scan, scan_order->iscan);
527 528
        break;
      case TX_8X8:
529
        vpx_highbd_fdct8x8(src_diff, coeff, diff_stride);
530
        vpx_highbd_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round,
531
                              p->quant, p->quant_shift, qcoeff, dqcoeff,
532
                              pd->dequant, eob,
533
                              scan_order->scan, scan_order->iscan);
534 535 536
        break;
      case TX_4X4:
        x->fwd_txm4x4(src_diff, coeff, diff_stride);
537
        vpx_highbd_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round,
538
                              p->quant, p->quant_shift, qcoeff, dqcoeff,
539
                              pd->dequant, eob,
540
                              scan_order->scan, scan_order->iscan);
541 542 543 544 545 546 547 548
        break;
      default:
        assert(0);
    }
    return;
  }
#endif  // CONFIG_VP9_HIGHBITDEPTH

549
  switch (tx_size) {
550
    case TX_32X32:
Dmitry Kovalev's avatar
Dmitry Kovalev committed
551
      fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
552
      vpx_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round,
553
                           p->quant, p->quant_shift, qcoeff, dqcoeff,
554
                           pd->dequant, eob, scan_order->scan,
555
                           scan_order->iscan);
556 557
      break;
    case TX_16X16:
558
      vpx_fdct16x16(src_diff, coeff, diff_stride);
559
      vpx_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
560
                     p->quant, p->quant_shift, qcoeff, dqcoeff,
561
                     pd->dequant, eob,
562
                     scan_order->scan, scan_order->iscan);
563 564
      break;
    case TX_8X8:
565
      vpx_fdct8x8(src_diff, coeff, diff_stride);
566
      vpx_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round,
567
                     p->quant, p->quant_shift, qcoeff, dqcoeff,
568
                     pd->dequant, eob,
569
                     scan_order->scan, scan_order->iscan);
570 571
      break;
    case TX_4X4:
572
      x->fwd_txm4x4(src_diff, coeff, diff_stride);
573
      vpx_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round,
574
                     p->quant, p->quant_shift, qcoeff, dqcoeff,
575
                     pd->dequant, eob,
576
                     scan_order->scan, scan_order->iscan);
577 578 579
      break;
    default:
      assert(0);
580
      break;
John Koleszar's avatar
John Koleszar committed
581
  }
582 583
}

584
static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize,
585
                         TX_SIZE tx_size, void *arg) {
586 587 588
  struct encode_b_args *const args = arg;
  MACROBLOCK *const x = args->x;
  MACROBLOCKD *const xd = &x->e_mbd;
589
  struct optimize_ctx *const ctx = args->ctx;
590
  struct macroblock_plane *const p = &x->plane[plane];
591
  struct macroblockd_plane *const pd = &xd->plane[plane];
592
  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
593 594
  int i, j;
  uint8_t *dst;
595
  ENTROPY_CONTEXT *a, *l;
596 597
  txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
  dst = &pd->dst.buf[4 * j * pd->dst.stride + 4 * i];
598 599
  a = &ctx->ta[plane][i];
  l = &ctx->tl[plane][j];
600 601 602 603

  // TODO(jingning): per transformed block zero forcing only enabled for
  // luma component. will integrate chroma components as well.
  if (x->zcoeff_blk[tx_size][block] && plane == 0) {
604
    p->eobs[block] = 0;
605
    *a = *l = 0;
606 607 608
    return;
  }

609
  if (!x->skip_recode) {
610 611
    if (x->quant_fp) {
      // Encoding process for rtc mode
612
      if (x->skip_txfm[0] == SKIP_TXFM_AC_DC && plane == 0) {
613 614 615 616
        // skip forward transform
        p->eobs[block] = 0;
        *a = *l = 0;
        return;
617 618
      } else {
        vp9_xform_quant_fp(x, plane, block, plane_bsize, tx_size);
619
      }
620
    } else {
621 622
      if (max_txsize_lookup[plane_bsize] == tx_size) {
        int txfm_blk_index = (plane << 2) + (block >> (tx_size << 1));
623
        if (x->skip_txfm[txfm_blk_index] == SKIP_TXFM_NONE) {
624 625
          // full forward transform and quantization
          vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
626
        } else if (x->skip_txfm[txfm_blk_index] == SKIP_TXFM_AC_ONLY) {
627 628 629 630 631 632 633 634 635
          // fast path forward transform and quantization
          vp9_xform_quant_dc(x, plane, block, plane_bsize, tx_size);
        } else {
          // skip forward transform
          p->eobs[block] = 0;
          *a = *l = 0;
          return;
        }
      } else {
636
        vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
637
      }
Jingning Han's avatar
Jingning Han committed
638
    }
639
  }
640

641
  if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
642
    const int ctx = combine_entropy_contexts(*a, *l);
Yaowu Xu's avatar
Yaowu Xu committed
643
    *a = *l = optimize_b(x, plane, block, tx_size, ctx) > 0;
644
  } else {
645
    *a = *l = p->eobs[block] > 0;
646
  }
John Koleszar's avatar
John Koleszar committed
647

Jim Bankoski's avatar
Jim Bankoski committed
648
  if (p->eobs[block])
649
    *(args->skip) = 0;
Jim Bankoski's avatar
Jim Bankoski committed
650

651
  if (x->skip_encode || p->eobs[block] == 0)
652
    return;
653 654 655 656
#if CONFIG_VP9_HIGHBITDEPTH
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
    switch (tx_size) {
      case TX_32X32:
657 658
        vp9_highbd_idct32x32_add(dqcoeff, dst, pd->dst.stride,
                                 p->eobs[block], xd->bd);
659 660
        break;
      case TX_16X16:
661 662
        vp9_highbd_idct16x16_add(dqcoeff, dst, pd->dst.stride,
                                 p->eobs[block], xd->bd);
663 664
        break;
      case TX_8X8:
665 666
        vp9_highbd_idct8x8_add(dqcoeff, dst, pd->dst.stride,
                               p->eobs[block], xd->bd);
667 668 669 670 671
        break;
      case TX_4X4:
        // this is like vp9_short_idct4x4 but has a special case around eob<=1
        // which is significant (not just an optimization) for the lossless
        // case.
672 673
        x->highbd_itxm_add(dqcoeff, dst, pd->dst.stride,
                           p->eobs[block], xd->bd);
674 675 676 677 678 679 680
        break;
      default:
        assert(0 && "Invalid transform size");
    }
    return;
  }
#endif  // CONFIG_VP9_HIGHBITDEPTH
681

682
  switch (tx_size) {
683
    case TX_32X32:
684
      vp9_idct32x32_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
685 686
      break;
    case TX_16X16:
687
      vp9_idct16x16_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
688 689
      break;
    case TX_8X8:
690
      vp9_idct8x8_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
691 692
      break;
    case TX_4X4:
693 694 695
      // this is like vp9_short_idct4x4 but has a special case around eob<=1
      // which is significant (not just an optimization) for the lossless
      // case.
696
      x->itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
697
      break;
698
    default:
James Zern's avatar
James Zern committed
699
      assert(0 && "Invalid transform size");
700
      break;
701
  }
John Koleszar's avatar
John Koleszar committed
702
}
703

704 705
static void encode_block_pass1(int plane, int block, BLOCK_SIZE plane_bsize,
                               TX_SIZE tx_size, void *arg) {
706
  MACROBLOCK *const x = (MACROBLOCK *)arg;
707
  MACROBLOCKD *const xd = &x->e_mbd;
708
  struct macroblock_plane *const p = &x->plane[plane];
709
  struct macroblockd_plane *const pd = &xd->plane[plane];
710
  tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
711 712 713 714
  int i, j;
  uint8_t *dst;
  txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
  dst = &pd->dst.buf[4 * j * pd->dst.stride + 4 * i];
715

716
  vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
717

718 719 720
  if (p->eobs[block] > 0) {
#if CONFIG_VP9_HIGHBITDEPTH
    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
721
       x->highbd_itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block], xd->bd);
722 723 724
       return;
    }
#endif  // CONFIG_VP9_HIGHBITDEPTH
725
    x->itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
726
  }
727 728
}

729
void vp9_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize) {
730
  vp9_subtract_plane(x, bsize, 0);
731 732
  vp9_foreach_transformed_block_in_plane(&x->e_mbd, bsize, 0,
                                         encode_block_pass1, x);
733 734
}

735
void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) {
736
  MACROBLOCKD *const xd = &x->e_mbd;