cfl.c 12 KB
Newer Older
Luc Trudeau's avatar
Luc Trudeau committed
1
2
3
4
5
6
7
8
9
10
11
12
13
/*
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
 *
 * This source code is subject to the terms of the BSD 2 Clause License and
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
 * was not distributed with this source code in the LICENSE file, you can
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
 * Media Patent License 1.0 was not distributed with this source code in the
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
 */

#include "av1/common/cfl.h"
#include "av1/common/common_data.h"
14
15
16
17
#include "av1/common/onyxc_int.h"

#include "aom/internal/aom_codec_internal.h"

18
19
20
void cfl_init(CFL_CTX *cfl, AV1_COMMON *cm) {
  if (!((cm->subsampling_x == 0 && cm->subsampling_y == 0) ||
        (cm->subsampling_x == 1 && cm->subsampling_y == 1))) {
21
22
23
24
    aom_internal_error(&cm->error, AOM_CODEC_UNSUP_BITSTREAM,
                       "Only 4:4:4 and 4:2:0 are currently supported by CfL");
  }
  memset(&cfl->y_pix, 0, sizeof(uint8_t) * MAX_SB_SQUARE);
25
26
  cfl->subsampling_x = cm->subsampling_x;
  cfl->subsampling_y = cm->subsampling_y;
Luc Trudeau's avatar
Luc Trudeau committed
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
  cfl->are_parameters_computed = 0;
}

// Load from the CfL pixel buffer into output
static void cfl_load(CFL_CTX *cfl, int row, int col, int width, int height) {
  const int sub_x = cfl->subsampling_x;
  const int sub_y = cfl->subsampling_y;
  const int off_log2 = tx_size_wide_log2[0];

  // TODO(ltrudeau) convert to uint16 to add HBD support
  const uint8_t *y_pix;
  // TODO(ltrudeau) convert to uint16 to add HBD support
  uint8_t *output = cfl->y_down_pix;

  int pred_row_offset = 0;
  int output_row_offset = 0;

  // TODO(ltrudeau) should be faster to downsample when we store the values
  // TODO(ltrudeau) add support for 4:2:2
  if (sub_y == 0 && sub_x == 0) {
    y_pix = &cfl->y_pix[(row * MAX_SB_SIZE + col) << off_log2];
    for (int j = 0; j < height; j++) {
      for (int i = 0; i < width; i++) {
        // In 4:4:4, pixels match 1 to 1
        output[output_row_offset + i] = y_pix[pred_row_offset + i];
      }
      pred_row_offset += MAX_SB_SIZE;
      output_row_offset += MAX_SB_SIZE;
    }
  } else if (sub_y == 1 && sub_x == 1) {
    y_pix = &cfl->y_pix[(row * MAX_SB_SIZE + col) << (off_log2 + sub_y)];
    for (int j = 0; j < height; j++) {
      for (int i = 0; i < width; i++) {
        int top_left = (pred_row_offset + i) << sub_y;
        int bot_left = top_left + MAX_SB_SIZE;
        // In 4:2:0, average pixels in 2x2 grid
        output[output_row_offset + i] = OD_SHR_ROUND(
            y_pix[top_left] + y_pix[top_left + 1]        // Top row
                + y_pix[bot_left] + y_pix[bot_left + 1]  // Bottom row
            ,
            2);
      }
      pred_row_offset += MAX_SB_SIZE;
      output_row_offset += MAX_SB_SIZE;
    }
  } else {
    assert(0);  // Unsupported chroma subsampling
  }
  // Due to frame boundary issues, it is possible that the total area of
  // covered by Chroma exceeds that of Luma. When this happens, we write over
  // the broken data by repeating the last columns and/or rows.
  //
  // Note that in order to manage the case where both rows and columns
  // overrun,
  // we apply rows first. This way, when the rows overrun the bottom of the
  // frame, the columns will be copied over them.
  const int uv_width = (col << off_log2) + width;
  const int uv_height = (row << off_log2) + height;

  const int diff_width = uv_width - (cfl->y_width >> sub_x);
  const int diff_height = uv_height - (cfl->y_height >> sub_y);

  if (diff_width > 0) {
    int last_pixel;
    output_row_offset = width - diff_width;

    for (int j = 0; j < height; j++) {
      last_pixel = output_row_offset - 1;
      for (int i = 0; i < diff_width; i++) {
        output[output_row_offset + i] = output[last_pixel];
      }
      output_row_offset += MAX_SB_SIZE;
    }
  }

  if (diff_height > 0) {
    output_row_offset = (height - diff_height) * MAX_SB_SIZE;
    const int last_row_offset = output_row_offset - MAX_SB_SIZE;

    for (int j = 0; j < diff_height; j++) {
      for (int i = 0; i < width; i++) {
        output[output_row_offset + i] = output[last_row_offset + i];
      }
      output_row_offset += MAX_SB_SIZE;
    }
  }
113
}
Luc Trudeau's avatar
Luc Trudeau committed
114
115
116

// CfL computes its own block-level DC_PRED. This is required to compute both
// alpha_cb and alpha_cr before the prediction are computed.
117
static void cfl_dc_pred(MACROBLOCKD *xd, BLOCK_SIZE plane_bsize) {
118
119
  const struct macroblockd_plane *const pd_u = &xd->plane[AOM_PLANE_U];
  const struct macroblockd_plane *const pd_v = &xd->plane[AOM_PLANE_V];
Luc Trudeau's avatar
Luc Trudeau committed
120

121
122
  const uint8_t *const dst_u = pd_u->dst.buf;
  const uint8_t *const dst_v = pd_v->dst.buf;
Luc Trudeau's avatar
Luc Trudeau committed
123

124
125
  const int dst_u_stride = pd_u->dst.stride;
  const int dst_v_stride = pd_v->dst.stride;
Luc Trudeau's avatar
Luc Trudeau committed
126

Luc Trudeau's avatar
Luc Trudeau committed
127
  CFL_CTX *const cfl = xd->cfl;
128
129
130
131
132
133
134

  // Compute DC_PRED until block boundary. We can't assume the neighbor will use
  // the same transform size.
  const int width = max_block_wide(xd, plane_bsize, AOM_PLANE_U)
                    << tx_size_wide_log2[0];
  const int height = max_block_high(xd, plane_bsize, AOM_PLANE_U)
                     << tx_size_high_log2[0];
135
  // Number of pixel on the top and left borders.
136
  const double num_pel = width + height;
Luc Trudeau's avatar
Luc Trudeau committed
137

138
139
  int sum_u = 0;
  int sum_v = 0;
Luc Trudeau's avatar
Luc Trudeau committed
140

141
142
143
144
145
146
147
148
149
// Match behavior of build_intra_predictors (reconintra.c) at superblock
// boundaries:
//
// 127 127 127 .. 127 127 127 127 127 127
// 129  A   B  ..  Y   Z
// 129  C   D  ..  W   X
// 129  E   F  ..  U   V
// 129  G   H  ..  S   T   T   T   T   T
// ..
Luc Trudeau's avatar
Luc Trudeau committed
150

151
152
153
#if CONFIG_CHROMA_SUB8X8
  if (xd->chroma_up_available && xd->mb_to_right_edge >= 0) {
#else
Luc Trudeau's avatar
Luc Trudeau committed
154
  if (xd->up_available && xd->mb_to_right_edge >= 0) {
155
156
#endif
    // TODO(ltrudeau) replace this with DC_PRED assembly
157
    for (int i = 0; i < width; i++) {
158
159
      sum_u += dst_u[-dst_u_stride + i];
      sum_v += dst_v[-dst_v_stride + i];
Luc Trudeau's avatar
Luc Trudeau committed
160
161
    }
  } else {
162
163
    sum_u = width * 127;
    sum_v = width * 127;
Luc Trudeau's avatar
Luc Trudeau committed
164
165
  }

166
167
168
#if CONFIG_CHROMA_SUB8X8
  if (xd->chroma_left_available && xd->mb_to_bottom_edge >= 0) {
#else
Luc Trudeau's avatar
Luc Trudeau committed
169
  if (xd->left_available && xd->mb_to_bottom_edge >= 0) {
170
#endif
171
    for (int i = 0; i < height; i++) {
172
173
      sum_u += dst_u[i * dst_u_stride - 1];
      sum_v += dst_v[i * dst_v_stride - 1];
Luc Trudeau's avatar
Luc Trudeau committed
174
175
    }
  } else {
176
177
    sum_u += height * 129;
    sum_v += height * 129;
Luc Trudeau's avatar
Luc Trudeau committed
178
179
  }

180
181
  // TODO(ltrudeau) Because of max_block_wide and max_block_high, num_pel will
  // not be a power of two. So these divisions will have to use a lookup table.
Luc Trudeau's avatar
Luc Trudeau committed
182
183
  cfl->dc_pred[CFL_PRED_U] = sum_u / num_pel;
  cfl->dc_pred[CFL_PRED_V] = sum_v / num_pel;
Luc Trudeau's avatar
Luc Trudeau committed
184
185
}

Luc Trudeau's avatar
Luc Trudeau committed
186
187
188
189
190
191
192
193
194
195
static void cfl_compute_average(CFL_CTX *cfl) {
  const int width = cfl->uv_width;
  const int height = cfl->uv_height;
  const double num_pel = width * height;
  // TODO(ltrudeau) Convert to uint16 for HBD support
  const uint8_t *y_pix = cfl->y_down_pix;
  // TODO(ltrudeau) Convert to uint16 for HBD support

  cfl_load(cfl, 0, 0, width, height);

196
197
198
199
200
  int sum = 0;
  for (int j = 0; j < height; j++) {
    for (int i = 0; i < width; i++) {
      sum += y_pix[i];
    }
Luc Trudeau's avatar
Luc Trudeau committed
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
    y_pix += MAX_SB_SIZE;
  }
  cfl->y_average = sum / num_pel;
}

static INLINE double cfl_idx_to_alpha(int alpha_idx, CFL_SIGN_TYPE alpha_sign,
                                      CFL_PRED_TYPE pred_type) {
  const int mag_idx = cfl_alpha_codes[alpha_idx][pred_type];
  const double abs_alpha = cfl_alpha_mags[mag_idx];
  if (alpha_sign == CFL_SIGN_POS) {
    return abs_alpha;
  } else {
    assert(abs_alpha != 0.0);
    assert(cfl_alpha_mags[mag_idx + 1] == -abs_alpha);
    return -abs_alpha;
216
217
218
  }
}

Luc Trudeau's avatar
Luc Trudeau committed
219
// Predict the current transform block using CfL.
Luc Trudeau's avatar
Luc Trudeau committed
220
221
222
223
224
225
226
227
void cfl_predict_block(MACROBLOCKD *const xd, uint8_t *dst, int dst_stride,
                       int row, int col, TX_SIZE tx_size, int plane) {
  CFL_CTX *const cfl = xd->cfl;
  MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;

  // CfL parameters must be computed before prediction can be done.
  assert(cfl->are_parameters_computed == 1);

228
229
  const int width = tx_size_wide[tx_size];
  const int height = tx_size_high[tx_size];
Luc Trudeau's avatar
Luc Trudeau committed
230
231
232
233
234
235
  // TODO(ltrudeau) Convert to uint16 to support HBD
  const uint8_t *y_pix = cfl->y_down_pix;

  const double dc_pred = cfl->dc_pred[plane - 1];
  const double alpha = cfl_idx_to_alpha(
      mbmi->cfl_alpha_idx, mbmi->cfl_alpha_signs[plane - 1], plane - 1);
Luc Trudeau's avatar
Luc Trudeau committed
236

Luc Trudeau's avatar
Luc Trudeau committed
237
  const double avg = cfl->y_average;
238

Luc Trudeau's avatar
Luc Trudeau committed
239
  cfl_load(cfl, row, col, width, height);
240
241
  for (int j = 0; j < height; j++) {
    for (int i = 0; i < width; i++) {
Luc Trudeau's avatar
Luc Trudeau committed
242
      dst[i] = (uint8_t)(alpha * (y_pix[i] - avg) + dc_pred + 0.5);
Luc Trudeau's avatar
Luc Trudeau committed
243
    }
Luc Trudeau's avatar
Luc Trudeau committed
244
    dst += dst_stride;
Luc Trudeau's avatar
Luc Trudeau committed
245
    y_pix += MAX_SB_SIZE;
Luc Trudeau's avatar
Luc Trudeau committed
246
247
  }
}
248
249

void cfl_store(CFL_CTX *cfl, const uint8_t *input, int input_stride, int row,
250
               int col, TX_SIZE tx_size, BLOCK_SIZE bsize) {
251
252
253
254
  const int tx_width = tx_size_wide[tx_size];
  const int tx_height = tx_size_high[tx_size];
  const int tx_off_log2 = tx_size_wide_log2[0];

255
256
257
258
259
260
261
#if CONFIG_CHROMA_SUB8X8
  if (bsize < BLOCK_8X8) {
#if CONFIG_DEBUG
    // Transform cannot be smaller than
    assert(tx_width >= 4);
    assert(tx_height >= 4);
#endif
262

263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
    const int bw = block_size_wide[bsize];
    const int bh = block_size_high[bsize];

    // For chroma_sub8x8, the CfL prediction for prediction blocks smaller than
    // 8X8 uses non chroma reference reconstructed luma pixels. To do so, we
    // combine the 4X4 non chroma reference into the CfL pixel buffers based on
    // their row and column index.

    // The following code is adapted from the is_chroma_reference() function.
    if ((cfl->mi_row &
         0x01)        // Increment the row index for odd indexed 4X4 blocks
        && (bh == 4)  // But not for 4X8 blocks
        && cfl->subsampling_y) {  // And only when chroma is subsampled
      assert(row == 0);
      row++;
    }
279

280
281
282
283
284
285
    if ((cfl->mi_col &
         0x01)        // Increment the col index for odd indexed 4X4 blocks
        && (bw == 4)  // But not for 8X4 blocks
        && cfl->subsampling_x) {  // And only when chroma is subsampled
      assert(col == 0);
      col++;
286
287
    }
  }
288
289
290
291
#endif

  // Invalidate current parameters
  cfl->are_parameters_computed = 0;
292
293
294
295
296
297
298
299
300
301
302

  // Store the surface of the pixel buffer that was written to, this way we
  // can manage chroma overrun (e.g. when the chroma surfaces goes beyond the
  // frame boundary)
  if (col == 0 && row == 0) {
    cfl->y_width = tx_width;
    cfl->y_height = tx_height;
  } else {
    cfl->y_width = OD_MAXI((col << tx_off_log2) + tx_width, cfl->y_width);
    cfl->y_height = OD_MAXI((row << tx_off_log2) + tx_height, cfl->y_height);
  }
303

304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
  // Check that we will remain inside the pixel buffer.
  assert((row << tx_off_log2) + tx_height <= MAX_SB_SIZE);
  assert((col << tx_off_log2) + tx_width <= MAX_SB_SIZE);

  // Store the input into the CfL pixel buffer
  uint8_t *y_pix = &cfl->y_pix[(row * MAX_SB_SIZE + col) << tx_off_log2];

  // TODO(ltrudeau) Speedup possible by moving the downsampling to cfl_store
  for (int j = 0; j < tx_height; j++) {
    for (int i = 0; i < tx_width; i++) {
      y_pix[i] = input[i];
    }
    y_pix += MAX_SB_SIZE;
    input += input_stride;
  }
Luc Trudeau's avatar
Luc Trudeau committed
319
}
320

Luc Trudeau's avatar
Luc Trudeau committed
321
322
323
void cfl_compute_parameters(MACROBLOCKD *const xd, TX_SIZE tx_size) {
  CFL_CTX *const cfl = xd->cfl;
  MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
324

Luc Trudeau's avatar
Luc Trudeau committed
325
326
  // Do not call cfl_compute_parameters multiple time on the same values.
  assert(cfl->are_parameters_computed == 0);
327

Luc Trudeau's avatar
Luc Trudeau committed
328
329
330
331
332
333
334
335
336
337
338
#if CONFIG_CHROMA_SUB8X8
  const BLOCK_SIZE plane_bsize = AOMMAX(
      BLOCK_4X4, get_plane_block_size(mbmi->sb_type, &xd->plane[AOM_PLANE_U]));
#else
  const BLOCK_SIZE plane_bsize =
      get_plane_block_size(mbmi->sb_type, &xd->plane[AOM_PLANE_U]);
#endif
  // AOM_PLANE_U is used, but both planes will have the same sizes.
  cfl->uv_width = max_intra_block_width(xd, plane_bsize, AOM_PLANE_U, tx_size);
  cfl->uv_height =
      max_intra_block_height(xd, plane_bsize, AOM_PLANE_U, tx_size);
339

340
341
342
343
344
345
346
#if CONFIG_DEBUG
  if (mbmi->sb_type >= BLOCK_8X8) {
    assert(cfl->y_width <= cfl->uv_width << cfl->subsampling_x);
    assert(cfl->y_height <= cfl->uv_height << cfl->subsampling_y);
  }
#endif

Luc Trudeau's avatar
Luc Trudeau committed
347
348
  // Compute block-level DC_PRED for both chromatic planes.
  // DC_PRED replaces beta in the linear model.
349
  cfl_dc_pred(xd, plane_bsize);
Luc Trudeau's avatar
Luc Trudeau committed
350
351
352
  // Compute block-level average on reconstructed luma input.
  cfl_compute_average(cfl);
  cfl->are_parameters_computed = 1;
353
}