vp9_encodemb.c 26.5 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9
10
 */

11
#include "./vpx_config.h"
12
#include "vp9/encoder/vp9_encodemb.h"
13
#include "vp9/common/vp9_reconinter.h"
14
15
#include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_tokenize.h"
16
17
#include "vp9/common/vp9_invtrans.h"
#include "vp9/common/vp9_reconintra.h"
John Koleszar's avatar
John Koleszar committed
18
#include "vpx_mem/vpx_mem.h"
19
#include "vp9/encoder/vp9_rdopt.h"
20
#include "vp9/common/vp9_systemdependent.h"
21
#include "vp9_rtcd.h"
John Koleszar's avatar
John Koleszar committed
22

23
24
25
DECLARE_ALIGNED(16, extern const uint8_t,
                vp9_pt_energy_class[MAX_ENTROPY_TOKENS]);

26
27
28
29
void vp9_subtract_block(int rows, int cols,
                        int16_t *diff_ptr, int diff_stride,
                        const uint8_t *src_ptr, int src_stride,
                        const uint8_t *pred_ptr, int pred_stride) {
John Koleszar's avatar
John Koleszar committed
30
  int r, c;
John Koleszar's avatar
John Koleszar committed
31

32
33
  for (r = 0; r < rows; r++) {
    for (c = 0; c < cols; c++)
John Koleszar's avatar
John Koleszar committed
34
35
      diff_ptr[c] = src_ptr[c] - pred_ptr[c];

36
37
    diff_ptr += diff_stride;
    pred_ptr += pred_stride;
John Koleszar's avatar
John Koleszar committed
38
39
    src_ptr  += src_stride;
  }
John Koleszar's avatar
John Koleszar committed
40
41
}

42

43
44
45
46
static void subtract_plane(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int plane) {
  const MACROBLOCKD * const xd = &x->e_mbd;
  const int bw = 4 << (b_width_log2(bsize) - xd->plane[plane].subsampling_x);
  const int bh = 4 << (b_height_log2(bsize) - xd->plane[plane].subsampling_y);
John Koleszar's avatar
John Koleszar committed
47
48
  const uint8_t *src = x->plane[plane].src.buf;
  const int src_stride = x->plane[plane].src.stride;
Dmitry Kovalev's avatar
Dmitry Kovalev committed
49

50
51
52
  vp9_subtract_block(bh, bw,
                     x->plane[plane].src_diff, bw, src, src_stride,
                     xd->plane[plane].dst.buf, xd->plane[plane].dst.stride);
Yaowu Xu's avatar
Yaowu Xu committed
53
54
}

55
56
void vp9_subtract_sby(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) {
  subtract_plane(x, bsize, 0);
57
58
}

59
60
void vp9_subtract_sbuv(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) {
  int i;
61

62
63
  for (i = 1; i < MAX_MB_PLANE; i++)
    subtract_plane(x, bsize, i);
64
65
}

66
67
68
void vp9_subtract_sb(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) {
  vp9_subtract_sby(x, bsize);
  vp9_subtract_sbuv(x, bsize);
John Koleszar's avatar
John Koleszar committed
69
70
}

71

72
73
#define RDTRUNC(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF )
#define RDTRUNC_8x8(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF )
74
typedef struct vp9_token_state vp9_token_state;
75

76
struct vp9_token_state {
77
78
  int           rate;
  int           error;
Daniel Kang's avatar
Daniel Kang committed
79
  int           next;
80
81
82
83
  signed char   token;
  short         qc;
};

84
// TODO: experiments to find optimal multiple numbers
85
86
#define Y1_RD_MULT 4
#define UV_RD_MULT 2
87

John Koleszar's avatar
John Koleszar committed
88
89
90
static const int plane_rd_mult[4] = {
  Y1_RD_MULT,
  UV_RD_MULT,
91
92
};

93
94
95
96
97
98
99
100
101
102
#define UPDATE_RD_COST()\
{\
  rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0);\
  rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1);\
  if (rd_cost0 == rd_cost1) {\
    rd_cost0 = RDTRUNC(rdmult, rddiv, rate0, error0);\
    rd_cost1 = RDTRUNC(rdmult, rddiv, rate1, error1);\
  }\
}

103
104
// This function is a place holder for now but may ultimately need
// to scan previous tokens to work out the correct context.
105
106
107
108
109
static int trellis_get_coeff_context(const int *scan,
                                     const int *nb,
                                     int idx, int token,
                                     uint8_t *token_cache,
                                     int pad, int l) {
110
  int bak = token_cache[scan[idx]], pt;
111
  token_cache[scan[idx]] = vp9_pt_energy_class[token];
112
  pt = vp9_get_coef_context(scan, nb, pad, token_cache, idx + 1, l);
113
  token_cache[scan[idx]] = bak;
114
  return pt;
115
116
}

John Koleszar's avatar
John Koleszar committed
117
118
static void optimize_b(VP9_COMMON *const cm, MACROBLOCK *mb,
                       int plane, int block, BLOCK_SIZE_TYPE bsize,
119
                       ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
John Koleszar's avatar
John Koleszar committed
120
                       TX_SIZE tx_size) {
121
  const int ref = mb->e_mbd.mode_info_context->mbmi.ref_frame != INTRA_FRAME;
122
  MACROBLOCKD *const xd = &mb->e_mbd;
123
124
  vp9_token_state tokens[1025][2];
  unsigned best_index[1025][2];
John Koleszar's avatar
John Koleszar committed
125
126
  const int16_t *coeff_ptr = BLOCK_OFFSET(mb->plane[plane].coeff,
                                          block, 16);
127
128
  int16_t *qcoeff_ptr;
  int16_t *dqcoeff_ptr;
John Koleszar's avatar
John Koleszar committed
129
  int eob = xd->plane[plane].eobs[block], final_eob, sz = 0;
130
  const int i0 = 0;
131
  int rc, x, next, i;
132
133
134
  int64_t rdmult, rddiv, rd_cost0, rd_cost1;
  int rate0, rate1, error0, error1, t0, t1;
  int best, band, pt;
John Koleszar's avatar
John Koleszar committed
135
  PLANE_TYPE type = xd->plane[plane].plane_type;
John Koleszar's avatar
John Koleszar committed
136
  int err_mult = plane_rd_mult[type];
137
138
  int default_eob, pad;
  int const *scan, *nb;
139
  const int mul = 1 + (tx_size == TX_32X32);
140
  uint8_t token_cache[1024];
John Koleszar's avatar
John Koleszar committed
141
142
143
  const int ib = txfrm_block_to_raster_block(xd, bsize, plane,
                                             block, 2 * tx_size);
  const int16_t *dequant_ptr = xd->plane[plane].dequant;
Paul Wilkins's avatar
Paul Wilkins committed
144
  const uint8_t * band_translate;
John Koleszar's avatar
John Koleszar committed
145

John Koleszar's avatar
John Koleszar committed
146
147
148
  assert((!type && !plane) || (type && plane));
  dqcoeff_ptr = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block, 16);
  qcoeff_ptr = BLOCK_OFFSET(xd->plane[plane].qcoeff, block, 16);
Deb Mukherjee's avatar
Deb Mukherjee committed
149
  switch (tx_size) {
150
    default:
151
    case TX_4X4: {
John Koleszar's avatar
John Koleszar committed
152
      const TX_TYPE tx_type = plane == 0 ? get_tx_type_4x4(xd, ib) : DCT_DCT;
153
      default_eob = 16;
154
      scan = get_scan_4x4(tx_type);
Paul Wilkins's avatar
Paul Wilkins committed
155
      band_translate = vp9_coefband_trans_4x4;
156
      break;
157
    }
158
    case TX_8X8: {
John Koleszar's avatar
John Koleszar committed
159
      const TX_TYPE tx_type = plane == 0 ? get_tx_type_8x8(xd, ib) : DCT_DCT;
160
      scan = get_scan_8x8(tx_type);
161
      default_eob = 64;
Paul Wilkins's avatar
Paul Wilkins committed
162
      band_translate = vp9_coefband_trans_8x8plus;
163
      break;
164
165
    }
    case TX_16X16: {
John Koleszar's avatar
John Koleszar committed
166
      const TX_TYPE tx_type = plane == 0 ? get_tx_type_16x16(xd, ib) : DCT_DCT;
167
      scan = get_scan_16x16(tx_type);
168
      default_eob = 256;
Paul Wilkins's avatar
Paul Wilkins committed
169
      band_translate = vp9_coefband_trans_8x8plus;
170
      break;
171
    }
172
    case TX_32X32:
Paul Wilkins's avatar
Paul Wilkins committed
173
      scan = vp9_default_scan_32x32;
174
      default_eob = 1024;
Paul Wilkins's avatar
Paul Wilkins committed
175
      band_translate = vp9_coefband_trans_8x8plus;
176
      break;
177
  }
John Koleszar's avatar
John Koleszar committed
178
  assert(eob <= default_eob);
John Koleszar's avatar
John Koleszar committed
179
180
181
182
183
184

  /* Now set up a Viterbi trellis to evaluate alternative roundings. */
  rdmult = mb->rdmult * err_mult;
  if (mb->e_mbd.mode_info_context->mbmi.ref_frame == INTRA_FRAME)
    rdmult = (rdmult * 9) >> 4;
  rddiv = mb->rddiv;
185
  memset(best_index, 0, sizeof(best_index));
John Koleszar's avatar
John Koleszar committed
186
187
188
  /* Initialize the sentinel node of the trellis. */
  tokens[eob][0].rate = 0;
  tokens[eob][0].error = 0;
189
  tokens[eob][0].next = default_eob;
John Koleszar's avatar
John Koleszar committed
190
191
192
193
  tokens[eob][0].token = DCT_EOB_TOKEN;
  tokens[eob][0].qc = 0;
  *(tokens[eob] + 1) = *(tokens[eob] + 0);
  next = eob;
194
  for (i = 0; i < eob; i++)
195
196
    token_cache[scan[i]] = vp9_pt_energy_class[vp9_dct_value_tokens_ptr[
        qcoeff_ptr[scan[i]]].token];
197
198
  nb = vp9_get_coef_neighbors_handle(scan, &pad);

John Koleszar's avatar
John Koleszar committed
199
  for (i = eob; i-- > i0;) {
200
    int base_bits, d2, dx;
John Koleszar's avatar
John Koleszar committed
201

202
    rc = scan[i];
John Koleszar's avatar
John Koleszar committed
203
204
205
206
207
208
209
210
211
    x = qcoeff_ptr[rc];
    /* Only add a trellis state for non-zero coefficients. */
    if (x) {
      int shortcut = 0;
      error0 = tokens[next][0].error;
      error1 = tokens[next][1].error;
      /* Evaluate the first possibility for this state. */
      rate0 = tokens[next][0].rate;
      rate1 = tokens[next][1].rate;
212
      t0 = (vp9_dct_value_tokens_ptr + x)->token;
John Koleszar's avatar
John Koleszar committed
213
      /* Consider both possible successor states. */
214
      if (next < default_eob) {
Paul Wilkins's avatar
Paul Wilkins committed
215
        band = get_coef_band(band_translate, i + 1);
216
217
        pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache,
                                       pad, default_eob);
218
#if CONFIG_BALANCED_COEFTREE
John Koleszar's avatar
John Koleszar committed
219
        rate0 +=
220
221
          mb->token_costs_noskip[tx_size][type][ref][band][pt]
                                [tokens[next][0].token];
John Koleszar's avatar
John Koleszar committed
222
        rate1 +=
223
224
225
226
227
228
229
230
231
232
          mb->token_costs_noskip[tx_size][type][ref][band][pt]
                                [tokens[next][1].token];
#else
        rate0 +=
          mb->token_costs[tx_size][type][ref][band][pt]
                         [tokens[next][0].token];
        rate1 +=
          mb->token_costs[tx_size][type][ref][band][pt]
                         [tokens[next][1].token];
#endif
John Koleszar's avatar
John Koleszar committed
233
      }
234
      UPDATE_RD_COST();
John Koleszar's avatar
John Koleszar committed
235
236
      /* And pick the best. */
      best = rd_cost1 < rd_cost0;
237
      base_bits = *(vp9_dct_value_cost_ptr + x);
238
      dx = mul * (dqcoeff_ptr[rc] - coeff_ptr[rc]);
John Koleszar's avatar
John Koleszar committed
239
240
241
242
243
244
      d2 = dx * dx;
      tokens[i][0].rate = base_bits + (best ? rate1 : rate0);
      tokens[i][0].error = d2 + (best ? error1 : error0);
      tokens[i][0].next = next;
      tokens[i][0].token = t0;
      tokens[i][0].qc = x;
245
      best_index[i][0] = best;
246

John Koleszar's avatar
John Koleszar committed
247
248
249
250
      /* Evaluate the second possibility for this state. */
      rate0 = tokens[next][0].rate;
      rate1 = tokens[next][1].rate;

251
252
253
      if ((abs(x)*dequant_ptr[rc != 0] > abs(coeff_ptr[rc]) * mul) &&
          (abs(x)*dequant_ptr[rc != 0] < abs(coeff_ptr[rc]) * mul +
                                         dequant_ptr[rc != 0]))
John Koleszar's avatar
John Koleszar committed
254
255
256
257
258
259
260
261
262
263
264
265
266
        shortcut = 1;
      else
        shortcut = 0;

      if (shortcut) {
        sz = -(x < 0);
        x -= 2 * sz + 1;
      }

      /* Consider both possible successor states. */
      if (!x) {
        /* If we reduced this coefficient to zero, check to see if
         *  we need to move the EOB back here.
267
         */
John Koleszar's avatar
John Koleszar committed
268
269
270
271
272
        t0 = tokens[next][0].token == DCT_EOB_TOKEN ?
             DCT_EOB_TOKEN : ZERO_TOKEN;
        t1 = tokens[next][1].token == DCT_EOB_TOKEN ?
             DCT_EOB_TOKEN : ZERO_TOKEN;
      } else {
273
        t0 = t1 = (vp9_dct_value_tokens_ptr + x)->token;
John Koleszar's avatar
John Koleszar committed
274
      }
275
      if (next < default_eob) {
Paul Wilkins's avatar
Paul Wilkins committed
276
        band = get_coef_band(band_translate, i + 1);
John Koleszar's avatar
John Koleszar committed
277
        if (t0 != DCT_EOB_TOKEN) {
278
279
          pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache,
                                         pad, default_eob);
280
281
282
283
284
285
286
287
#if CONFIG_BALANCED_COEFTREE
          if (!x)
            rate0 += mb->token_costs[tx_size][type][ref][band][pt][
                tokens[next][0].token];
          else
            rate0 += mb->token_costs_noskip[tx_size][type][ref][band][pt][
                tokens[next][0].token];
#else
288
          rate0 += mb->token_costs[tx_size][type][ref][band][pt][
289
              tokens[next][0].token];
290
#endif
John Koleszar's avatar
John Koleszar committed
291
        }
John Koleszar's avatar
John Koleszar committed
292
        if (t1 != DCT_EOB_TOKEN) {
293
294
          pt = trellis_get_coeff_context(scan, nb, i, t1, token_cache,
                                         pad, default_eob);
295
296
297
298
299
300
301
302
#if CONFIG_BALANCED_COEFTREE
          if (!x)
            rate1 += mb->token_costs[tx_size][type][ref][band][pt][
                tokens[next][1].token];
          else
            rate1 += mb->token_costs_noskip[tx_size][type][ref][band][pt][
                tokens[next][1].token];
#else
303
          rate1 += mb->token_costs[tx_size][type][ref][band][pt][
304
              tokens[next][1].token];
305
#endif
John Koleszar's avatar
John Koleszar committed
306
307
        }
      }
John Koleszar's avatar
John Koleszar committed
308

309
      UPDATE_RD_COST();
John Koleszar's avatar
John Koleszar committed
310
311
      /* And pick the best. */
      best = rd_cost1 < rd_cost0;
312
      base_bits = *(vp9_dct_value_cost_ptr + x);
John Koleszar's avatar
John Koleszar committed
313
314

      if (shortcut) {
315
        dx -= (dequant_ptr[rc != 0] + sz) ^ sz;
John Koleszar's avatar
John Koleszar committed
316
317
318
319
320
321
322
        d2 = dx * dx;
      }
      tokens[i][1].rate = base_bits + (best ? rate1 : rate0);
      tokens[i][1].error = d2 + (best ? error1 : error0);
      tokens[i][1].next = next;
      tokens[i][1].token = best ? t1 : t0;
      tokens[i][1].qc = x;
323
      best_index[i][1] = best;
John Koleszar's avatar
John Koleszar committed
324
325
      /* Finally, make this the new head of the trellis. */
      next = i;
326
    }
John Koleszar's avatar
John Koleszar committed
327
328
329
330
    /* There's no choice to make for a zero coefficient, so we don't
     *  add a new trellis node, but we do need to update the costs.
     */
    else {
Paul Wilkins's avatar
Paul Wilkins committed
331
      band = get_coef_band(band_translate, i + 1);
John Koleszar's avatar
John Koleszar committed
332
333
334
335
      t0 = tokens[next][0].token;
      t1 = tokens[next][1].token;
      /* Update the cost of each path if we're past the EOB token. */
      if (t0 != DCT_EOB_TOKEN) {
336
337
        tokens[next][0].rate +=
            mb->token_costs[tx_size][type][ref][band][0][t0];
John Koleszar's avatar
John Koleszar committed
338
339
340
        tokens[next][0].token = ZERO_TOKEN;
      }
      if (t1 != DCT_EOB_TOKEN) {
341
342
        tokens[next][1].rate +=
            mb->token_costs[tx_size][type][ref][band][0][t1];
John Koleszar's avatar
John Koleszar committed
343
344
345
        tokens[next][1].token = ZERO_TOKEN;
      }
      /* Don't update next, because we didn't add a new node. */
346
    }
John Koleszar's avatar
John Koleszar committed
347
348
349
  }

  /* Now pick the best path through the whole trellis. */
Paul Wilkins's avatar
Paul Wilkins committed
350
  band = get_coef_band(band_translate, i + 1);
351
  pt = combine_entropy_contexts(*a, *l);
John Koleszar's avatar
John Koleszar committed
352
353
354
355
356
357
  rate0 = tokens[next][0].rate;
  rate1 = tokens[next][1].rate;
  error0 = tokens[next][0].error;
  error1 = tokens[next][1].error;
  t0 = tokens[next][0].token;
  t1 = tokens[next][1].token;
358
359
360
361
#if CONFIG_BALANCED_COEFTREE
  rate0 += mb->token_costs_noskip[tx_size][type][ref][band][pt][t0];
  rate1 += mb->token_costs_noskip[tx_size][type][ref][band][pt][t1];
#else
362
363
  rate0 += mb->token_costs[tx_size][type][ref][band][pt][t0];
  rate1 += mb->token_costs[tx_size][type][ref][band][pt][t1];
364
#endif
365
  UPDATE_RD_COST();
John Koleszar's avatar
John Koleszar committed
366
367
  best = rd_cost1 < rd_cost0;
  final_eob = i0 - 1;
368
369
  vpx_memset(qcoeff_ptr, 0, sizeof(*qcoeff_ptr) * (16 << (tx_size * 2)));
  vpx_memset(dqcoeff_ptr, 0, sizeof(*dqcoeff_ptr) * (16 << (tx_size * 2)));
John Koleszar's avatar
John Koleszar committed
370
371
  for (i = next; i < eob; i = next) {
    x = tokens[i][best].qc;
372
    if (x) {
John Koleszar's avatar
John Koleszar committed
373
      final_eob = i;
374
    }
375
    rc = scan[i];
John Koleszar's avatar
John Koleszar committed
376
    qcoeff_ptr[rc] = x;
377
    dqcoeff_ptr[rc] = (x * dequant_ptr[rc != 0]) / mul;
378

John Koleszar's avatar
John Koleszar committed
379
    next = tokens[i][best].next;
380
    best = best_index[i][best];
John Koleszar's avatar
John Koleszar committed
381
382
383
  }
  final_eob++;

John Koleszar's avatar
John Koleszar committed
384
  xd->plane[plane].eobs[block] = final_eob;
385
  *a = *l = (final_eob > 0);
386
387
}

John Koleszar's avatar
John Koleszar committed
388
389
390
391
392
struct optimize_block_args {
  VP9_COMMON *cm;
  MACROBLOCK *x;
  struct optimize_ctx *ctx;
};
393

394
395
396
397
void vp9_optimize_b(int plane, int block, BLOCK_SIZE_TYPE bsize,
                    int ss_txfrm_size, VP9_COMMON *cm, MACROBLOCK *mb,
                    struct optimize_ctx *ctx) {
  MACROBLOCKD* const xd = &mb->e_mbd;
John Koleszar's avatar
John Koleszar committed
398
  int x, y;
399

John Koleszar's avatar
John Koleszar committed
400
401
  // find current entropy context
  txfrm_block_to_raster_xy(xd, bsize, plane, block, ss_txfrm_size, &x, &y);
402

403
404
  optimize_b(cm, mb, plane, block, bsize,
             &ctx->ta[plane][x], &ctx->tl[plane][y],
John Koleszar's avatar
John Koleszar committed
405
             ss_txfrm_size / 2);
406
407
}

408
409
410
411
412
413
414
static void optimize_block(int plane, int block, BLOCK_SIZE_TYPE bsize,
                           int ss_txfrm_size, void *arg) {
  const struct optimize_block_args* const args = arg;
  vp9_optimize_b(plane, block, bsize, ss_txfrm_size, args->cm, args->x,
                 args->ctx);
}

John Koleszar's avatar
John Koleszar committed
415
416
417
418
419
420
421
422
void vp9_optimize_init(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize,
                       struct optimize_ctx *ctx) {
  int p;

  for (p = 0; p < MAX_MB_PLANE; p++) {
    const struct macroblockd_plane* const plane = &xd->plane[p];
    const int bwl = b_width_log2(bsize) - plane->subsampling_x;
    const int bhl = b_height_log2(bsize) - plane->subsampling_y;
John Koleszar's avatar
John Koleszar committed
423
424
    const TX_SIZE tx_size = p ? get_uv_tx_size(xd)
                              : xd->mode_info_context->mbmi.txfm_size;
John Koleszar's avatar
John Koleszar committed
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
    int i, j;

    for (i = 0; i < 1 << bwl; i += 1 << tx_size) {
      int c = 0;
      ctx->ta[p][i] = 0;
      for (j = 0; j < 1 << tx_size && !c; j++) {
        c = ctx->ta[p][i] |= plane->above_context[i + j];
      }
    }
    for (i = 0; i < 1 << bhl; i += 1 << tx_size) {
      int c = 0;
      ctx->tl[p][i] = 0;
      for (j = 0; j < 1 << tx_size && !c; j++) {
        c = ctx->tl[p][i] |= plane->left_context[i + j];
      }
440
    }
441
442
443
  }
}

John Koleszar's avatar
John Koleszar committed
444
445
446
447
448
449
450
void vp9_optimize_sby(VP9_COMMON *const cm, MACROBLOCK *x,
                      BLOCK_SIZE_TYPE bsize) {
  struct optimize_ctx ctx;
  struct optimize_block_args arg = {cm, x, &ctx};
  vp9_optimize_init(&x->e_mbd, bsize, &ctx);
  foreach_transformed_block_in_plane(&x->e_mbd, bsize, 0,
                                     optimize_block, &arg);
451
452
}

John Koleszar's avatar
John Koleszar committed
453
454
455
456
457
458
void vp9_optimize_sbuv(VP9_COMMON *const cm, MACROBLOCK *x,
                       BLOCK_SIZE_TYPE bsize) {
  struct optimize_ctx ctx;
  struct optimize_block_args arg = {cm, x, &ctx};
  vp9_optimize_init(&x->e_mbd, bsize, &ctx);
  foreach_transformed_block_uv(&x->e_mbd, bsize, optimize_block, &arg);
459
460
}

461
462
463
464
465
466
struct encode_b_args {
  VP9_COMMON *cm;
  MACROBLOCK *x;
  struct optimize_ctx *ctx;
};

467
static void xform_quant(int plane, int block, BLOCK_SIZE_TYPE bsize,
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
                         int ss_txfrm_size, void *arg) {
  struct encode_b_args* const args = arg;
  MACROBLOCK* const x = args->x;
  MACROBLOCKD* const xd = &x->e_mbd;
  const int bw = 4 << (b_width_log2(bsize) - xd->plane[plane].subsampling_x);
  const int raster_block = txfrm_block_to_raster_block(xd, bsize, plane,
                                                       block, ss_txfrm_size);
  int16_t* const src_diff = raster_block_offset_int16(xd, bsize, plane,
                                                      raster_block,
                                                      x->plane[plane].src_diff);
  TX_TYPE tx_type = DCT_DCT;

  switch (ss_txfrm_size / 2) {
    case TX_32X32:
      vp9_short_fdct32x32(src_diff,
                          BLOCK_OFFSET(x->plane[plane].coeff, block, 16),
                          bw * 2);
      break;
    case TX_16X16:
      tx_type = plane == 0 ? get_tx_type_16x16(xd, raster_block) : DCT_DCT;
      if (tx_type != DCT_DCT) {
        vp9_short_fht16x16(src_diff,
                           BLOCK_OFFSET(x->plane[plane].coeff, block, 16),
                           bw, tx_type);
      } else {
        x->fwd_txm16x16(src_diff,
                        BLOCK_OFFSET(x->plane[plane].coeff, block, 16),
                        bw * 2);
      }
      break;
    case TX_8X8:
      tx_type = plane == 0 ? get_tx_type_8x8(xd, raster_block) : DCT_DCT;
      if (tx_type != DCT_DCT) {
        vp9_short_fht8x8(src_diff,
                           BLOCK_OFFSET(x->plane[plane].coeff, block, 16),
                           bw, tx_type);
      } else {
        x->fwd_txm8x8(src_diff,
                      BLOCK_OFFSET(x->plane[plane].coeff, block, 16),
                      bw * 2);
      }
      break;
    case TX_4X4:
      tx_type = plane == 0 ? get_tx_type_4x4(xd, raster_block) : DCT_DCT;
      if (tx_type != DCT_DCT) {
        vp9_short_fht4x4(src_diff,
                           BLOCK_OFFSET(x->plane[plane].coeff, block, 16),
                           bw, tx_type);
      } else {
        x->fwd_txm4x4(src_diff,
                      BLOCK_OFFSET(x->plane[plane].coeff, block, 16),
                      bw * 2);
      }
      break;
    default:
      assert(0);
John Koleszar's avatar
John Koleszar committed
524
  }
525

526
  vp9_quantize(x, plane, block, 16 << ss_txfrm_size, tx_type);
527
528
529
530
531
532
533
534
535
}

static void encode_block(int plane, int block, BLOCK_SIZE_TYPE bsize,
                         int ss_txfrm_size, void *arg) {
  struct encode_b_args* const args = arg;
  MACROBLOCK* const x = args->x;
  MACROBLOCKD* const xd = &x->e_mbd;
  const int raster_block = txfrm_block_to_raster_block(xd, bsize, plane,
                                                       block, ss_txfrm_size);
Scott LaVarnway's avatar
Scott LaVarnway committed
536
537
538
539
  uint8_t* const dst = raster_block_offset_uint8(xd, bsize, plane,
                                                 raster_block,
                                                 xd->plane[plane].dst.buf,
                                                 xd->plane[plane].dst.stride);
540
541
542
543
  TX_TYPE tx_type = DCT_DCT;

  xform_quant(plane, block, bsize, ss_txfrm_size, arg);

544
545
  if (x->optimize)
    vp9_optimize_b(plane, block, bsize, ss_txfrm_size, args->cm, x, args->ctx);
John Koleszar's avatar
John Koleszar committed
546

547
548
  switch (ss_txfrm_size / 2) {
    case TX_32X32:
Scott LaVarnway's avatar
Scott LaVarnway committed
549
550
        vp9_short_idct32x32_add(BLOCK_OFFSET(xd->plane[plane].dqcoeff,
                                block, 16), dst, xd->plane[plane].dst.stride);
551
552
      break;
    case TX_16X16:
553
      tx_type = plane == 0 ? get_tx_type_16x16(xd, raster_block) : DCT_DCT;
554
      if (tx_type == DCT_DCT) {
Scott LaVarnway's avatar
Scott LaVarnway committed
555
556
        vp9_short_idct16x16_add(BLOCK_OFFSET(xd->plane[plane].dqcoeff,
                                block, 16), dst, xd->plane[plane].dst.stride);
557
      } else {
Scott LaVarnway's avatar
Scott LaVarnway committed
558
559
560
        vp9_short_iht16x16_add(BLOCK_OFFSET(xd->plane[plane].dqcoeff,
                               block, 16), dst, xd->plane[plane].dst.stride,
                               tx_type);
561
562
563
      }
      break;
    case TX_8X8:
564
      tx_type = plane == 0 ? get_tx_type_8x8(xd, raster_block) : DCT_DCT;
565
      if (tx_type == DCT_DCT) {
Scott LaVarnway's avatar
Scott LaVarnway committed
566
567
        vp9_short_idct8x8_add(BLOCK_OFFSET(xd->plane[plane].dqcoeff,
                              block, 16), dst, xd->plane[plane].dst.stride);
568
      } else {
Scott LaVarnway's avatar
Scott LaVarnway committed
569
570
571
        vp9_short_iht8x8_add(BLOCK_OFFSET(xd->plane[plane].dqcoeff,
                             block, 16), dst, xd->plane[plane].dst.stride,
                             tx_type);
572
573
574
      }
      break;
    case TX_4X4:
575
      tx_type = plane == 0 ? get_tx_type_4x4(xd, raster_block) : DCT_DCT;
576
577
578
579
      if (tx_type == DCT_DCT) {
        // this is like vp9_short_idct4x4 but has a special case around eob<=1
        // which is significant (not just an optimization) for the lossless
        // case.
Scott LaVarnway's avatar
Scott LaVarnway committed
580
581
582
        vp9_inverse_transform_b_4x4_add(xd, xd->plane[plane].eobs[block],
            BLOCK_OFFSET(xd->plane[plane].dqcoeff, block, 16), dst,
            xd->plane[plane].dst.stride);
583
      } else {
Scott LaVarnway's avatar
Scott LaVarnway committed
584
585
        vp9_short_iht4x4_add(BLOCK_OFFSET(xd->plane[plane].dqcoeff, block, 16),
                             dst, xd->plane[plane].dst.stride, tx_type);
586
587
588
      }
      break;
  }
John Koleszar's avatar
John Koleszar committed
589
590
}

591
592
593
void vp9_xform_quant_sby(VP9_COMMON *const cm, MACROBLOCK *x,
                         BLOCK_SIZE_TYPE bsize) {
  MACROBLOCKD* const xd = &x->e_mbd;
Scott LaVarnway's avatar
Scott LaVarnway committed
594
  struct encode_b_args arg = {cm, x, NULL};
595

Scott LaVarnway's avatar
Scott LaVarnway committed
596
  foreach_transformed_block_in_plane(xd, bsize, 0, xform_quant, &arg);
597
598
599
600
601
}

void vp9_xform_quant_sbuv(VP9_COMMON *const cm, MACROBLOCK *x,
                         BLOCK_SIZE_TYPE bsize) {
  MACROBLOCKD* const xd = &x->e_mbd;
Scott LaVarnway's avatar
Scott LaVarnway committed
602
  struct encode_b_args arg = {cm, x, NULL};
603
604
605
606
607
608
609
610

  foreach_transformed_block_uv(xd, bsize, xform_quant, &arg);
}

void vp9_encode_sby(VP9_COMMON *const cm, MACROBLOCK *x,
                    BLOCK_SIZE_TYPE bsize) {
  MACROBLOCKD* const xd = &x->e_mbd;
  struct optimize_ctx ctx;
Scott LaVarnway's avatar
Scott LaVarnway committed
611
  struct encode_b_args arg = {cm, x, &ctx};
612
613
614
615
616

  vp9_subtract_sby(x, bsize);
  if (x->optimize)
    vp9_optimize_init(xd, bsize, &ctx);

Scott LaVarnway's avatar
Scott LaVarnway committed
617
  foreach_transformed_block_in_plane(xd, bsize, 0, encode_block, &arg);
618
619
620
621
622
623
}

void vp9_encode_sbuv(VP9_COMMON *const cm, MACROBLOCK *x,
                     BLOCK_SIZE_TYPE bsize) {
  MACROBLOCKD* const xd = &x->e_mbd;
  struct optimize_ctx ctx;
Scott LaVarnway's avatar
Scott LaVarnway committed
624
  struct encode_b_args arg = {cm, x, &ctx};
625
626
627
628
629
630
631
632

  vp9_subtract_sbuv(x, bsize);
  if (x->optimize)
    vp9_optimize_init(xd, bsize, &ctx);

  foreach_transformed_block_uv(xd, bsize, encode_block, &arg);
}

633
634
635
636
void vp9_encode_sb(VP9_COMMON *const cm, MACROBLOCK *x,
                   BLOCK_SIZE_TYPE bsize) {
  MACROBLOCKD* const xd = &x->e_mbd;
  struct optimize_ctx ctx;
Scott LaVarnway's avatar
Scott LaVarnway committed
637
  struct encode_b_args arg = {cm, x, &ctx};
John Koleszar's avatar
John Koleszar committed
638

639
640
641
  vp9_subtract_sb(x, bsize);
  if (x->optimize)
    vp9_optimize_init(xd, bsize, &ctx);
Scott LaVarnway's avatar
Scott LaVarnway committed
642

Scott LaVarnway's avatar
Scott LaVarnway committed
643
  foreach_transformed_block(xd, bsize, encode_block, &arg);
John Koleszar's avatar
John Koleszar committed
644
}
645
646
647
648
649
650

static void encode_block_intra(int plane, int block, BLOCK_SIZE_TYPE bsize,
                               int ss_txfrm_size, void *arg) {
  struct encode_b_args* const args = arg;
  MACROBLOCK* const x = args->x;
  MACROBLOCKD* const xd = &x->e_mbd;
651
  MB_MODE_INFO* const mbmi = &xd->mode_info_context->mbmi;
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
  const TX_SIZE tx_size = (TX_SIZE)(ss_txfrm_size / 2);
  const int bw = 4 << (b_width_log2(bsize) - xd->plane[plane].subsampling_x);
  const int raster_block = txfrm_block_to_raster_block(xd, bsize, plane,
                                                       block, ss_txfrm_size);
  uint8_t* const src =
      raster_block_offset_uint8(xd, bsize, plane, raster_block,
                                x->plane[plane].src.buf,
                                x->plane[plane].src.stride);
  uint8_t* const dst =
      raster_block_offset_uint8(xd, bsize, plane, raster_block,
                                xd->plane[plane].dst.buf,
                                xd->plane[plane].dst.stride);
  int16_t* const src_diff =
      raster_block_offset_int16(xd, bsize, plane,
                                raster_block, x->plane[plane].src_diff);

  const int txfm_b_size = 4 << tx_size;
  int ib = raster_block;
670
671
  int tx_ib = ib >> tx_size;
  int plane_b_size;
672
673

  TX_TYPE tx_type;
674
  int mode, b_mode;
675

676
677
678
  mode = plane == 0? mbmi->mode: mbmi->uv_mode;
  if (mbmi->sb_type < BLOCK_SIZE_SB8X8 && plane == 0 &&
      mbmi->ref_frame == INTRA_FRAME)
679
    b_mode = xd->mode_info_context->bmi[ib].as_mode.first;
680
  else
681
    b_mode = mode;
682

Yaowu Xu's avatar
Yaowu Xu committed
683
  assert(b_mode >= DC_PRED && b_mode <= TM_PRED);
684
685
686

  plane_b_size = b_width_log2(bsize) - xd->plane[plane].subsampling_x;
  vp9_predict_intra_block(xd, tx_ib, plane_b_size, tx_size, b_mode,
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
                          dst, xd->plane[plane].dst.stride);
  vp9_subtract_block(txfm_b_size, txfm_b_size,
                     src_diff, bw,
                     src, x->plane[plane].src.stride,
                     dst, xd->plane[plane].dst.stride);

  xform_quant(plane, block, bsize, ss_txfrm_size, arg);

  /*
  if (x->optimize)
    vp9_optimize_b(plane, block, bsize, ss_txfrm_size, args->cm, x, args->ctx);
    */
  switch (ss_txfrm_size / 2) {
    case TX_32X32:
        vp9_short_idct32x32_add(BLOCK_OFFSET(xd->plane[plane].dqcoeff,
                                block, 16), dst, xd->plane[plane].dst.stride);
      break;
    case TX_16X16:
      tx_type = plane == 0 ? get_tx_type_16x16(xd, raster_block) : DCT_DCT;
      if (tx_type == DCT_DCT) {
        vp9_short_idct16x16_add(BLOCK_OFFSET(xd->plane[plane].dqcoeff,
                                block, 16), dst, xd->plane[plane].dst.stride);
      } else {
        vp9_short_iht16x16_add(BLOCK_OFFSET(xd->plane[plane].dqcoeff,
                               block, 16), dst, xd->plane[plane].dst.stride,
                               tx_type);
      }
      break;
    case TX_8X8:
      tx_type = plane == 0 ? get_tx_type_8x8(xd, raster_block) : DCT_DCT;
      if (tx_type == DCT_DCT) {
        vp9_short_idct8x8_add(BLOCK_OFFSET(xd->plane[plane].dqcoeff,
                              block, 16), dst, xd->plane[plane].dst.stride);
      } else {
        vp9_short_iht8x8_add(BLOCK_OFFSET(xd->plane[plane].dqcoeff,
                             block, 16), dst, xd->plane[plane].dst.stride,
                             tx_type);
      }
      break;
    case TX_4X4:
      tx_type = plane == 0 ? get_tx_type_4x4(xd, raster_block) : DCT_DCT;
      if (tx_type == DCT_DCT) {
        // this is like vp9_short_idct4x4 but has a special case around eob<=1
        // which is significant (not just an optimization) for the lossless
        // case.
        vp9_inverse_transform_b_4x4_add(xd, xd->plane[plane].eobs[block],
            BLOCK_OFFSET(xd->plane[plane].dqcoeff, block, 16), dst,
            xd->plane[plane].dst.stride);
      } else {
        vp9_short_iht4x4_add(BLOCK_OFFSET(xd->plane[plane].dqcoeff, block, 16),
                             dst, xd->plane[plane].dst.stride, tx_type);
      }
      break;
  }
}

743
744
void vp9_encode_intra_block_y(VP9_COMMON *const cm, MACROBLOCK *x,
                              BLOCK_SIZE_TYPE bsize) {
745
746
747
748
749
750
751
  MACROBLOCKD* const xd = &x->e_mbd;
  struct optimize_ctx ctx;
  struct encode_b_args arg = {cm, x, &ctx};

  foreach_transformed_block_in_plane(xd, bsize, 0,
                                     encode_block_intra, &arg);
}
752
753
754
755
756
757
758
759
void vp9_encode_intra_block_uv(VP9_COMMON *const cm, MACROBLOCK *x,
                              BLOCK_SIZE_TYPE bsize) {
  MACROBLOCKD* const xd = &x->e_mbd;
  struct optimize_ctx ctx;
  struct encode_b_args arg = {cm, x, &ctx};

  foreach_transformed_block_uv(xd, bsize, encode_block_intra, &arg);
}
760