vp9_loopfilter.c 62.6 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9
10
 */

11
#include "./vpx_config.h"
12
13
#include "vp9/common/vp9_loopfilter.h"
#include "vp9/common/vp9_onyxc_int.h"
14
#include "vp9/common/vp9_reconinter.h"
Attila Nagy's avatar
Attila Nagy committed
15
#include "vpx_mem/vpx_mem.h"
John Koleszar's avatar
John Koleszar committed
16

17
#include "vp9/common/vp9_seg_common.h"
18

James Zern's avatar
James Zern committed
19
// 64 bit masks for left transform size. Each 1 represents a position where
Jim Bankoski's avatar
Jim Bankoski committed
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
// we should apply a loop filter across the left border of an 8x8 block
// boundary.
//
// In the case of TX_16X16->  ( in low order byte first we end up with
// a mask that looks like this
//
//    10101010
//    10101010
//    10101010
//    10101010
//    10101010
//    10101010
//    10101010
//    10101010
//
// A loopfilter should be applied to every other 8x8 horizontally.
static const uint64_t left_64x64_txform_mask[TX_SIZES]= {
James Zern's avatar
James Zern committed
37
38
39
40
  0xffffffffffffffff,  // TX_4X4
  0xffffffffffffffff,  // TX_8x8
  0x5555555555555555,  // TX_16x16
  0x1111111111111111,  // TX_32x32
Jim Bankoski's avatar
Jim Bankoski committed
41
42
};

James Zern's avatar
James Zern committed
43
// 64 bit masks for above transform size. Each 1 represents a position where
Jim Bankoski's avatar
Jim Bankoski committed
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
// we should apply a loop filter across the top border of an 8x8 block
// boundary.
//
// In the case of TX_32x32 ->  ( in low order byte first we end up with
// a mask that looks like this
//
//    11111111
//    00000000
//    00000000
//    00000000
//    11111111
//    00000000
//    00000000
//    00000000
//
// A loopfilter should be applied to every other 4 the row vertically.
static const uint64_t above_64x64_txform_mask[TX_SIZES]= {
James Zern's avatar
James Zern committed
61
62
63
64
  0xffffffffffffffff,  // TX_4X4
  0xffffffffffffffff,  // TX_8x8
  0x00ff00ff00ff00ff,  // TX_16x16
  0x000000ff000000ff,  // TX_32x32
Jim Bankoski's avatar
Jim Bankoski committed
65
66
};

James Zern's avatar
James Zern committed
67
68
69
// 64 bit masks for prediction sizes (left). Each 1 represents a position
// where left border of an 8x8 block. These are aligned to the right most
// appropriate bit, and then shifted into place.
Jim Bankoski's avatar
Jim Bankoski committed
70
71
72
73
74
75
76
77
78
79
80
81
82
//
// In the case of TX_16x32 ->  ( low order byte first ) we end up with
// a mask that looks like this :
//
//  10000000
//  10000000
//  10000000
//  10000000
//  00000000
//  00000000
//  00000000
//  00000000
static const uint64_t left_prediction_mask[BLOCK_SIZES] = {
James Zern's avatar
James Zern committed
83
84
85
86
87
88
89
90
91
92
93
94
95
  0x0000000000000001,  // BLOCK_4X4,
  0x0000000000000001,  // BLOCK_4X8,
  0x0000000000000001,  // BLOCK_8X4,
  0x0000000000000001,  // BLOCK_8X8,
  0x0000000000000101,  // BLOCK_8X16,
  0x0000000000000001,  // BLOCK_16X8,
  0x0000000000000101,  // BLOCK_16X16,
  0x0000000001010101,  // BLOCK_16X32,
  0x0000000000000101,  // BLOCK_32X16,
  0x0000000001010101,  // BLOCK_32X32,
  0x0101010101010101,  // BLOCK_32X64,
  0x0000000001010101,  // BLOCK_64X32,
  0x0101010101010101,  // BLOCK_64X64
Jim Bankoski's avatar
Jim Bankoski committed
96
97
98
99
};

// 64 bit mask to shift and set for each prediction size.
static const uint64_t above_prediction_mask[BLOCK_SIZES] = {
James Zern's avatar
James Zern committed
100
101
102
103
104
105
106
107
108
109
110
111
112
  0x0000000000000001,  // BLOCK_4X4
  0x0000000000000001,  // BLOCK_4X8
  0x0000000000000001,  // BLOCK_8X4
  0x0000000000000001,  // BLOCK_8X8
  0x0000000000000001,  // BLOCK_8X16,
  0x0000000000000003,  // BLOCK_16X8
  0x0000000000000003,  // BLOCK_16X16
  0x0000000000000003,  // BLOCK_16X32,
  0x000000000000000f,  // BLOCK_32X16,
  0x000000000000000f,  // BLOCK_32X32,
  0x000000000000000f,  // BLOCK_32X64,
  0x00000000000000ff,  // BLOCK_64X32,
  0x00000000000000ff,  // BLOCK_64X64
Jim Bankoski's avatar
Jim Bankoski committed
113
};
James Zern's avatar
James Zern committed
114
// 64 bit mask to shift and set for each prediction size. A bit is set for
Jim Bankoski's avatar
Jim Bankoski committed
115
116
117
// each 8x8 block that would be in the left most block of the given block
// size in the 64x64 block.
static const uint64_t size_mask[BLOCK_SIZES] = {
James Zern's avatar
James Zern committed
118
119
120
121
122
123
124
125
126
127
128
129
130
  0x0000000000000001,  // BLOCK_4X4
  0x0000000000000001,  // BLOCK_4X8
  0x0000000000000001,  // BLOCK_8X4
  0x0000000000000001,  // BLOCK_8X8
  0x0000000000000101,  // BLOCK_8X16,
  0x0000000000000003,  // BLOCK_16X8
  0x0000000000000303,  // BLOCK_16X16
  0x0000000003030303,  // BLOCK_16X32,
  0x0000000000000f0f,  // BLOCK_32X16,
  0x000000000f0f0f0f,  // BLOCK_32X32,
  0x0f0f0f0f0f0f0f0f,  // BLOCK_32X64,
  0x00000000ffffffff,  // BLOCK_64X32,
  0xffffffffffffffff,  // BLOCK_64X64
Jim Bankoski's avatar
Jim Bankoski committed
131
132
133
134
135
136
137
138
};

// These are used for masking the left and above borders.
static const uint64_t left_border =  0x1111111111111111;
static const uint64_t above_border = 0x000000ff000000ff;

// 16 bit masks for uv transform sizes.
static const uint16_t left_64x64_txform_mask_uv[TX_SIZES]= {
James Zern's avatar
James Zern committed
139
140
141
142
  0xffff,  // TX_4X4
  0xffff,  // TX_8x8
  0x5555,  // TX_16x16
  0x1111,  // TX_32x32
Jim Bankoski's avatar
Jim Bankoski committed
143
144
145
};

static const uint16_t above_64x64_txform_mask_uv[TX_SIZES]= {
James Zern's avatar
James Zern committed
146
147
148
149
  0xffff,  // TX_4X4
  0xffff,  // TX_8x8
  0x0f0f,  // TX_16x16
  0x000f,  // TX_32x32
Jim Bankoski's avatar
Jim Bankoski committed
150
151
152
153
};

// 16 bit left mask to shift and set for each uv prediction size.
static const uint16_t left_prediction_mask_uv[BLOCK_SIZES] = {
James Zern's avatar
James Zern committed
154
155
156
157
158
159
160
161
162
163
164
165
166
  0x0001,  // BLOCK_4X4,
  0x0001,  // BLOCK_4X8,
  0x0001,  // BLOCK_8X4,
  0x0001,  // BLOCK_8X8,
  0x0001,  // BLOCK_8X16,
  0x0001,  // BLOCK_16X8,
  0x0001,  // BLOCK_16X16,
  0x0011,  // BLOCK_16X32,
  0x0001,  // BLOCK_32X16,
  0x0011,  // BLOCK_32X32,
  0x1111,  // BLOCK_32X64
  0x0011,  // BLOCK_64X32,
  0x1111,  // BLOCK_64X64
Jim Bankoski's avatar
Jim Bankoski committed
167
168
169
};
// 16 bit above mask to shift and set for uv each prediction size.
static const uint16_t above_prediction_mask_uv[BLOCK_SIZES] = {
James Zern's avatar
James Zern committed
170
171
172
173
174
175
176
177
178
179
180
181
182
  0x0001,  // BLOCK_4X4
  0x0001,  // BLOCK_4X8
  0x0001,  // BLOCK_8X4
  0x0001,  // BLOCK_8X8
  0x0001,  // BLOCK_8X16,
  0x0001,  // BLOCK_16X8
  0x0001,  // BLOCK_16X16
  0x0001,  // BLOCK_16X32,
  0x0003,  // BLOCK_32X16,
  0x0003,  // BLOCK_32X32,
  0x0003,  // BLOCK_32X64,
  0x000f,  // BLOCK_64X32,
  0x000f,  // BLOCK_64X64
Jim Bankoski's avatar
Jim Bankoski committed
183
184
185
186
};

// 64 bit mask to shift and set for each uv prediction size
static const uint16_t size_mask_uv[BLOCK_SIZES] = {
James Zern's avatar
James Zern committed
187
188
189
190
191
192
193
194
195
196
197
198
199
  0x0001,  // BLOCK_4X4
  0x0001,  // BLOCK_4X8
  0x0001,  // BLOCK_8X4
  0x0001,  // BLOCK_8X8
  0x0001,  // BLOCK_8X16,
  0x0001,  // BLOCK_16X8
  0x0001,  // BLOCK_16X16
  0x0011,  // BLOCK_16X32,
  0x0003,  // BLOCK_32X16,
  0x0033,  // BLOCK_32X32,
  0x3333,  // BLOCK_32X64,
  0x00ff,  // BLOCK_64X32,
  0xffff,  // BLOCK_64X64
Jim Bankoski's avatar
Jim Bankoski committed
200
201
202
203
};
static const uint16_t left_border_uv =  0x1111;
static const uint16_t above_border_uv = 0x000f;

204
205
206
207
static const int mode_lf_lut[MB_MODE_COUNT] = {
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // INTRA_MODES
  1, 1, 0, 1                     // INTER_MODES (ZEROMV == 0)
};
Attila Nagy's avatar
Attila Nagy committed
208

Frank Galligan's avatar
Frank Galligan committed
209
static void update_sharpness(loop_filter_info_n *lfi, int sharpness_lvl) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
210
  int lvl;
John Koleszar's avatar
John Koleszar committed
211

Dmitry Kovalev's avatar
Dmitry Kovalev committed
212
213
  // For each possible value for the loop filter fill out limits
  for (lvl = 0; lvl <= MAX_LOOP_FILTER; lvl++) {
James Zern's avatar
James Zern committed
214
    // Set loop filter parameters that control sharpness.
Dmitry Kovalev's avatar
Dmitry Kovalev committed
215
    int block_inside_limit = lvl >> ((sharpness_lvl > 0) + (sharpness_lvl > 4));
John Koleszar's avatar
John Koleszar committed
216

John Koleszar's avatar
John Koleszar committed
217
218
219
    if (sharpness_lvl > 0) {
      if (block_inside_limit > (9 - sharpness_lvl))
        block_inside_limit = (9 - sharpness_lvl);
John Koleszar's avatar
John Koleszar committed
220
    }
John Koleszar's avatar
John Koleszar committed
221
222
223
224

    if (block_inside_limit < 1)
      block_inside_limit = 1;

225
226
    vpx_memset(lfi->lfthr[lvl].lim, block_inside_limit, SIMD_WIDTH);
    vpx_memset(lfi->lfthr[lvl].mblim, (2 * (lvl + 2) + block_inside_limit),
John Koleszar's avatar
John Koleszar committed
227
228
               SIMD_WIDTH);
  }
Attila Nagy's avatar
Attila Nagy committed
229
}
John Koleszar's avatar
John Koleszar committed
230

Dmitry Kovalev's avatar
Dmitry Kovalev committed
231
232
233
234
235
236
static uint8_t get_filter_level(const loop_filter_info_n *lfi_n,
                                const MB_MODE_INFO *mbmi) {
  return lfi_n->lvl[mbmi->segment_id][mbmi->ref_frame[0]]
                   [mode_lf_lut[mbmi->mode]];
}

237
void vp9_loop_filter_init(VP9_COMMON *cm) {
John Koleszar's avatar
John Koleszar committed
238
  loop_filter_info_n *lfi = &cm->lf_info;
239
  struct loopfilter *lf = &cm->lf;
240
  int lvl;
Attila Nagy's avatar
Attila Nagy committed
241

242
  // init limits for given sharpness
243
244
  update_sharpness(lfi, lf->sharpness_level);
  lf->last_sharpness_level = lf->sharpness_level;
Attila Nagy's avatar
Attila Nagy committed
245

246
  // init hev threshold const vectors
247
248
  for (lvl = 0; lvl <= MAX_LOOP_FILTER; lvl++)
    vpx_memset(lfi->lfthr[lvl].hev_thr, (lvl >> 4), SIMD_WIDTH);
John Koleszar's avatar
John Koleszar committed
249
250
}

Frank Galligan's avatar
Frank Galligan committed
251
void vp9_loop_filter_frame_init(VP9_COMMON *cm, int default_filt_lvl) {
252
  int seg_id;
James Zern's avatar
James Zern committed
253
  // n_shift is the multiplier for lf_deltas
254
255
  // the multiplier is 1 for when filter_lvl is between 0 and 31;
  // 2 when filter_lvl is between 32 and 63
256
  const int scale = 1 << (default_filt_lvl >> 5);
Dmitry Kovalev's avatar
Dmitry Kovalev committed
257
  loop_filter_info_n *const lfi = &cm->lf_info;
258
  struct loopfilter *const lf = &cm->lf;
259
  const struct segmentation *const seg = &cm->seg;
John Koleszar's avatar
John Koleszar committed
260

Dmitry Kovalev's avatar
Dmitry Kovalev committed
261
  // update limits if sharpness has changed
262
263
264
  if (lf->last_sharpness_level != lf->sharpness_level) {
    update_sharpness(lfi, lf->sharpness_level);
    lf->last_sharpness_level = lf->sharpness_level;
John Koleszar's avatar
John Koleszar committed
265
266
  }

267
  for (seg_id = 0; seg_id < MAX_SEGMENTS; seg_id++) {
268
    int lvl_seg = default_filt_lvl;
269
    if (vp9_segfeature_active(seg, seg_id, SEG_LVL_ALT_LF)) {
270
      const int data = vp9_get_segdata(seg, seg_id, SEG_LVL_ALT_LF);
271
272
273
      lvl_seg = clamp(seg->abs_delta == SEGMENT_ABSDATA ?
                      data : default_filt_lvl + data,
                      0, MAX_LOOP_FILTER);
John Koleszar's avatar
John Koleszar committed
274
    }
John Koleszar's avatar
John Koleszar committed
275

276
    if (!lf->mode_ref_delta_enabled) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
277
278
      // we could get rid of this if we assume that deltas are set to
      // zero when not in use; encoder always uses deltas
279
      vpx_memset(lfi->lvl[seg_id], lvl_seg, sizeof(lfi->lvl[seg_id]));
280
281
282
283
284
285
286
287
288
289
290
    } else {
      int ref, mode;
      const int intra_lvl = lvl_seg + lf->ref_deltas[INTRA_FRAME] * scale;
      lfi->lvl[seg_id][INTRA_FRAME][0] = clamp(intra_lvl, 0, MAX_LOOP_FILTER);

      for (ref = LAST_FRAME; ref < MAX_REF_FRAMES; ++ref) {
        for (mode = 0; mode < MAX_MODE_LF_DELTAS; ++mode) {
          const int inter_lvl = lvl_seg + lf->ref_deltas[ref] * scale
                                        + lf->mode_deltas[mode] * scale;
          lfi->lvl[seg_id][ref][mode] = clamp(inter_lvl, 0, MAX_LOOP_FILTER);
        }
John Koleszar's avatar
John Koleszar committed
291
      }
292
    }
John Koleszar's avatar
John Koleszar committed
293
  }
John Koleszar's avatar
John Koleszar committed
294
295
}

296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
static void filter_selectively_vert_row2(PLANE_TYPE plane_type,
                                         uint8_t *s, int pitch,
                                         unsigned int mask_16x16_l,
                                         unsigned int mask_8x8_l,
                                         unsigned int mask_4x4_l,
                                         unsigned int mask_4x4_int_l,
                                         const loop_filter_info_n *lfi_n,
                                         const uint8_t *lfl) {
  const int mask_shift = plane_type ? 4 : 8;
  const int mask_cutoff = plane_type ? 0xf : 0xff;
  const int lfl_forward = plane_type ? 4 : 8;

  unsigned int mask_16x16_0 = mask_16x16_l & mask_cutoff;
  unsigned int mask_8x8_0 = mask_8x8_l & mask_cutoff;
  unsigned int mask_4x4_0 = mask_4x4_l & mask_cutoff;
  unsigned int mask_4x4_int_0 = mask_4x4_int_l & mask_cutoff;
  unsigned int mask_16x16_1 = (mask_16x16_l >> mask_shift) & mask_cutoff;
  unsigned int mask_8x8_1 = (mask_8x8_l >> mask_shift) & mask_cutoff;
  unsigned int mask_4x4_1 = (mask_4x4_l >> mask_shift) & mask_cutoff;
  unsigned int mask_4x4_int_1 = (mask_4x4_int_l >> mask_shift) & mask_cutoff;
316
317
  unsigned int mask;

318
  for (mask = mask_16x16_0 | mask_8x8_0 | mask_4x4_0 | mask_4x4_int_0 |
James Zern's avatar
James Zern committed
319
320
              mask_16x16_1 | mask_8x8_1 | mask_4x4_1 | mask_4x4_int_1;
       mask; mask >>= 1) {
321
322
    const loop_filter_thresh *lfi0 = lfi_n->lfthr + *lfl;
    const loop_filter_thresh *lfi1 = lfi_n->lfthr + *(lfl + lfl_forward);
323

324
    // TODO(yunqingwang): count in loopfilter functions should be removed.
325
    if (mask & 1) {
326
327
      if ((mask_16x16_0 | mask_16x16_1) & 1) {
        if ((mask_16x16_0 & mask_16x16_1) & 1) {
Jim Bankoski's avatar
Jim Bankoski committed
328
329
          vp9_lpf_vertical_16_dual(s, pitch, lfi0->mblim, lfi0->lim,
                                   lfi0->hev_thr);
330
        } else if (mask_16x16_0 & 1) {
Jim Bankoski's avatar
Jim Bankoski committed
331
332
          vp9_lpf_vertical_16(s, pitch, lfi0->mblim, lfi0->lim,
                              lfi0->hev_thr);
333
        } else {
Jim Bankoski's avatar
Jim Bankoski committed
334
335
          vp9_lpf_vertical_16(s + 8 *pitch, pitch, lfi1->mblim,
                              lfi1->lim, lfi1->hev_thr);
336
        }
337
338
339
340
      }

      if ((mask_8x8_0 | mask_8x8_1) & 1) {
        if ((mask_8x8_0 & mask_8x8_1) & 1) {
Jim Bankoski's avatar
Jim Bankoski committed
341
342
343
          vp9_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim,
                                  lfi0->hev_thr, lfi1->mblim, lfi1->lim,
                                  lfi1->hev_thr);
344
        } else if (mask_8x8_0 & 1) {
Jim Bankoski's avatar
Jim Bankoski committed
345
346
          vp9_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr,
                             1);
347
        } else {
Jim Bankoski's avatar
Jim Bankoski committed
348
349
          vp9_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,
                             lfi1->hev_thr, 1);
350
351
352
353
354
        }
      }

      if ((mask_4x4_0 | mask_4x4_1) & 1) {
        if ((mask_4x4_0 & mask_4x4_1) & 1) {
Jim Bankoski's avatar
Jim Bankoski committed
355
356
357
          vp9_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim,
                                  lfi0->hev_thr, lfi1->mblim, lfi1->lim,
                                  lfi1->hev_thr);
358
        } else if (mask_4x4_0 & 1) {
Jim Bankoski's avatar
Jim Bankoski committed
359
360
          vp9_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr,
                             1);
361
        } else {
Jim Bankoski's avatar
Jim Bankoski committed
362
363
          vp9_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,
                             lfi1->hev_thr, 1);
364
365
366
367
368
        }
      }

      if ((mask_4x4_int_0 | mask_4x4_int_1) & 1) {
        if ((mask_4x4_int_0 & mask_4x4_int_1) & 1) {
Jim Bankoski's avatar
Jim Bankoski committed
369
370
371
          vp9_lpf_vertical_4_dual(s + 4, pitch, lfi0->mblim, lfi0->lim,
                                  lfi0->hev_thr, lfi1->mblim, lfi1->lim,
                                  lfi1->hev_thr);
372
        } else if (mask_4x4_int_0 & 1) {
Jim Bankoski's avatar
Jim Bankoski committed
373
374
          vp9_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim,
                             lfi0->hev_thr, 1);
375
        } else {
Jim Bankoski's avatar
Jim Bankoski committed
376
377
          vp9_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim, lfi1->lim,
                             lfi1->hev_thr, 1);
378
        }
379
380
      }
    }
381

382
    s += 8;
Yunqing Wang's avatar
Yunqing Wang committed
383
    lfl += 1;
384
385
386
387
388
389
390
391
    mask_16x16_0 >>= 1;
    mask_8x8_0 >>= 1;
    mask_4x4_0 >>= 1;
    mask_4x4_int_0 >>= 1;
    mask_16x16_1 >>= 1;
    mask_8x8_1 >>= 1;
    mask_4x4_1 >>= 1;
    mask_4x4_int_1 >>= 1;
392
393
394
  }
}

395
#if CONFIG_VP9_HIGHBITDEPTH
396
397
398
399
400
401
402
403
static void highbd_filter_selectively_vert_row2(PLANE_TYPE plane_type,
                                                uint16_t *s, int pitch,
                                                unsigned int mask_16x16_l,
                                                unsigned int mask_8x8_l,
                                                unsigned int mask_4x4_l,
                                                unsigned int mask_4x4_int_l,
                                                const loop_filter_info_n *lfi_n,
                                                const uint8_t *lfl, int bd) {
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
  const int mask_shift = plane_type ? 4 : 8;
  const int mask_cutoff = plane_type ? 0xf : 0xff;
  const int lfl_forward = plane_type ? 4 : 8;

  unsigned int mask_16x16_0 = mask_16x16_l & mask_cutoff;
  unsigned int mask_8x8_0 = mask_8x8_l & mask_cutoff;
  unsigned int mask_4x4_0 = mask_4x4_l & mask_cutoff;
  unsigned int mask_4x4_int_0 = mask_4x4_int_l & mask_cutoff;
  unsigned int mask_16x16_1 = (mask_16x16_l >> mask_shift) & mask_cutoff;
  unsigned int mask_8x8_1 = (mask_8x8_l >> mask_shift) & mask_cutoff;
  unsigned int mask_4x4_1 = (mask_4x4_l >> mask_shift) & mask_cutoff;
  unsigned int mask_4x4_int_1 = (mask_4x4_int_l >> mask_shift) & mask_cutoff;
  unsigned int mask;

  for (mask = mask_16x16_0 | mask_8x8_0 | mask_4x4_0 | mask_4x4_int_0 |
       mask_16x16_1 | mask_8x8_1 | mask_4x4_1 | mask_4x4_int_1;
       mask; mask >>= 1) {
    const loop_filter_thresh *lfi0 = lfi_n->lfthr + *lfl;
    const loop_filter_thresh *lfi1 = lfi_n->lfthr + *(lfl + lfl_forward);

    // TODO(yunqingwang): count in loopfilter functions should be removed.
    if (mask & 1) {
      if ((mask_16x16_0 | mask_16x16_1) & 1) {
        if ((mask_16x16_0 & mask_16x16_1) & 1) {
          vp9_highbd_lpf_vertical_16_dual(s, pitch, lfi0->mblim, lfi0->lim,
                                          lfi0->hev_thr, bd);
        } else if (mask_16x16_0 & 1) {
          vp9_highbd_lpf_vertical_16(s, pitch, lfi0->mblim, lfi0->lim,
                                     lfi0->hev_thr, bd);
        } else {
          vp9_highbd_lpf_vertical_16(s + 8 *pitch, pitch, lfi1->mblim,
                                     lfi1->lim, lfi1->hev_thr, bd);
        }
      }

      if ((mask_8x8_0 | mask_8x8_1) & 1) {
        if ((mask_8x8_0 & mask_8x8_1) & 1) {
          vp9_highbd_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim,
                                         lfi0->hev_thr, lfi1->mblim, lfi1->lim,
                                         lfi1->hev_thr, bd);
        } else if (mask_8x8_0 & 1) {
          vp9_highbd_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim,
                                    lfi0->hev_thr, 1, bd);
        } else {
          vp9_highbd_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim,
                                    lfi1->lim, lfi1->hev_thr, 1, bd);
        }
      }

      if ((mask_4x4_0 | mask_4x4_1) & 1) {
        if ((mask_4x4_0 & mask_4x4_1) & 1) {
          vp9_highbd_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim,
                                         lfi0->hev_thr, lfi1->mblim, lfi1->lim,
                                         lfi1->hev_thr, bd);
        } else if (mask_4x4_0 & 1) {
          vp9_highbd_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim,
                                    lfi0->hev_thr, 1, bd);
        } else {
          vp9_highbd_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim,
                                    lfi1->lim, lfi1->hev_thr, 1, bd);
        }
      }

      if ((mask_4x4_int_0 | mask_4x4_int_1) & 1) {
        if ((mask_4x4_int_0 & mask_4x4_int_1) & 1) {
          vp9_highbd_lpf_vertical_4_dual(s + 4, pitch, lfi0->mblim, lfi0->lim,
                                         lfi0->hev_thr, lfi1->mblim, lfi1->lim,
                                         lfi1->hev_thr, bd);
        } else if (mask_4x4_int_0 & 1) {
          vp9_highbd_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim,
                                    lfi0->hev_thr, 1, bd);
        } else {
          vp9_highbd_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim,
                                    lfi1->lim, lfi1->hev_thr, 1, bd);
        }
      }
    }

    s += 8;
    lfl += 1;
    mask_16x16_0 >>= 1;
    mask_8x8_0 >>= 1;
    mask_4x4_0 >>= 1;
    mask_4x4_int_0 >>= 1;
    mask_16x16_1 >>= 1;
    mask_8x8_1 >>= 1;
    mask_4x4_1 >>= 1;
    mask_4x4_int_1 >>= 1;
  }
}
#endif  // CONFIG_VP9_HIGHBITDEPTH

496
497
498
499
static void filter_selectively_horiz(uint8_t *s, int pitch,
                                     unsigned int mask_16x16,
                                     unsigned int mask_8x8,
                                     unsigned int mask_4x4,
500
                                     unsigned int mask_4x4_int,
Yunqing Wang's avatar
Yunqing Wang committed
501
502
                                     const loop_filter_info_n *lfi_n,
                                     const uint8_t *lfl) {
503
  unsigned int mask;
504
  int count;
505

John Koleszar's avatar
John Koleszar committed
506
  for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int;
507
       mask; mask >>= count) {
Yunqing Wang's avatar
Yunqing Wang committed
508
    const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl;
509

Dmitry Kovalev's avatar
Dmitry Kovalev committed
510
    count = 1;
511
    if (mask & 1) {
512
      if (mask_16x16 & 1) {
513
        if ((mask_16x16 & 3) == 3) {
Jim Bankoski's avatar
Jim Bankoski committed
514
515
          vp9_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,
                                lfi->hev_thr, 2);
516
517
          count = 2;
        } else {
Jim Bankoski's avatar
Jim Bankoski committed
518
519
          vp9_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,
                                lfi->hev_thr, 1);
520
        }
521
522
      } else if (mask_8x8 & 1) {
        if ((mask_8x8 & 3) == 3) {
523
          // Next block's thresholds.
524
525
          const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);

Jim Bankoski's avatar
Jim Bankoski committed
526
527
528
          vp9_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim,
                                    lfi->hev_thr, lfin->mblim, lfin->lim,
                                    lfin->hev_thr);
529
530

          if ((mask_4x4_int & 3) == 3) {
Jim Bankoski's avatar
Jim Bankoski committed
531
532
533
            vp9_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim,
                                      lfi->lim, lfi->hev_thr, lfin->mblim,
                                      lfin->lim, lfin->hev_thr);
534
          } else {
535
            if (mask_4x4_int & 1)
Jim Bankoski's avatar
Jim Bankoski committed
536
537
              vp9_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
                                   lfi->hev_thr, 1);
538
            else if (mask_4x4_int & 2)
Jim Bankoski's avatar
Jim Bankoski committed
539
540
              vp9_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
                                   lfin->lim, lfin->hev_thr, 1);
541
          }
542
543
          count = 2;
        } else {
Jim Bankoski's avatar
Jim Bankoski committed
544
          vp9_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
545
546

          if (mask_4x4_int & 1)
Jim Bankoski's avatar
Jim Bankoski committed
547
548
            vp9_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
                                 lfi->hev_thr, 1);
549
        }
550
551
      } else if (mask_4x4 & 1) {
        if ((mask_4x4 & 3) == 3) {
552
          // Next block's thresholds.
553
          const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
554

Jim Bankoski's avatar
Jim Bankoski committed
555
556
557
          vp9_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim,
                                    lfi->hev_thr, lfin->mblim, lfin->lim,
                                    lfin->hev_thr);
558
          if ((mask_4x4_int & 3) == 3) {
Jim Bankoski's avatar
Jim Bankoski committed
559
560
561
            vp9_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim,
                                      lfi->lim, lfi->hev_thr, lfin->mblim,
                                      lfin->lim, lfin->hev_thr);
562
563
          } else {
            if (mask_4x4_int & 1)
Jim Bankoski's avatar
Jim Bankoski committed
564
565
              vp9_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
                                   lfi->hev_thr, 1);
566
            else if (mask_4x4_int & 2)
Jim Bankoski's avatar
Jim Bankoski committed
567
568
              vp9_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
                                   lfin->lim, lfin->hev_thr, 1);
569
570
571
          }
          count = 2;
        } else {
Jim Bankoski's avatar
Jim Bankoski committed
572
          vp9_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
573

Jim Bankoski's avatar
Jim Bankoski committed
574
575
576
          if (mask_4x4_int & 1)
            vp9_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
                                 lfi->hev_thr, 1);
577
578
        }
      } else if (mask_4x4_int & 1) {
Jim Bankoski's avatar
Jim Bankoski committed
579
580
        vp9_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
                             lfi->hev_thr, 1);
581
      }
582
    }
583
    s += 8 * count;
Yunqing Wang's avatar
Yunqing Wang committed
584
    lfl += count;
585
586
587
588
    mask_16x16 >>= count;
    mask_8x8 >>= count;
    mask_4x4 >>= count;
    mask_4x4_int >>= count;
589
590
591
  }
}

592
#if CONFIG_VP9_HIGHBITDEPTH
593
594
595
596
597
598
599
static void highbd_filter_selectively_horiz(uint16_t *s, int pitch,
                                            unsigned int mask_16x16,
                                            unsigned int mask_8x8,
                                            unsigned int mask_4x4,
                                            unsigned int mask_4x4_int,
                                            const loop_filter_info_n *lfi_n,
                                            const uint8_t *lfl, int bd) {
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
  unsigned int mask;
  int count;

  for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int;
       mask; mask >>= count) {
    const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl;

    count = 1;
    if (mask & 1) {
      if (mask_16x16 & 1) {
        if ((mask_16x16 & 3) == 3) {
          vp9_highbd_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,
                                       lfi->hev_thr, 2, bd);
          count = 2;
        } else {
          vp9_highbd_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,
                                       lfi->hev_thr, 1, bd);
        }
      } else if (mask_8x8 & 1) {
        if ((mask_8x8 & 3) == 3) {
          // Next block's thresholds.
          const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);

          vp9_highbd_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim,
                                           lfi->hev_thr, lfin->mblim, lfin->lim,
                                           lfin->hev_thr, bd);

          if ((mask_4x4_int & 3) == 3) {
            vp9_highbd_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim,
                                             lfi->lim, lfi->hev_thr,
                                             lfin->mblim, lfin->lim,
                                             lfin->hev_thr, bd);
          } else {
            if (mask_4x4_int & 1) {
              vp9_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
                                          lfi->lim, lfi->hev_thr, 1, bd);
            } else if (mask_4x4_int & 2) {
              vp9_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
                                          lfin->lim, lfin->hev_thr, 1, bd);
            }
          }
          count = 2;
        } else {
          vp9_highbd_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim,
                                      lfi->hev_thr, 1, bd);

          if (mask_4x4_int & 1) {
            vp9_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
                                        lfi->lim, lfi->hev_thr, 1, bd);
          }
        }
      } else if (mask_4x4 & 1) {
        if ((mask_4x4 & 3) == 3) {
          // Next block's thresholds.
          const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);

          vp9_highbd_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim,
                                           lfi->hev_thr, lfin->mblim, lfin->lim,
                                           lfin->hev_thr, bd);
          if ((mask_4x4_int & 3) == 3) {
            vp9_highbd_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim,
                                             lfi->lim, lfi->hev_thr,
                                             lfin->mblim, lfin->lim,
                                             lfin->hev_thr, bd);
          } else {
            if (mask_4x4_int & 1) {
              vp9_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
                                          lfi->lim, lfi->hev_thr, 1, bd);
            } else if (mask_4x4_int & 2) {
              vp9_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
                                          lfin->lim, lfin->hev_thr, 1, bd);
            }
          }
          count = 2;
        } else {
          vp9_highbd_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim,
                                      lfi->hev_thr, 1, bd);

          if (mask_4x4_int & 1) {
            vp9_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
                                        lfi->lim, lfi->hev_thr, 1, bd);
          }
        }
      } else if (mask_4x4_int & 1) {
        vp9_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
                                    lfi->hev_thr, 1, bd);
      }
    }
    s += 8 * count;
    lfl += count;
    mask_16x16 >>= count;
    mask_8x8 >>= count;
    mask_4x4 >>= count;
    mask_4x4_int >>= count;
  }
}
#endif  // CONFIG_VP9_HIGHBITDEPTH

Jim Bankoski's avatar
Jim Bankoski committed
698
// This function ors into the current lfm structure, where to do loop
James Zern's avatar
James Zern committed
699
700
// filters for the specific mi we are looking at. It uses information
// including the block_size_type (32x16, 32x32, etc.), the transform size,
Jim Bankoski's avatar
Jim Bankoski committed
701
702
703
704
705
706
707
708
// whether there were any coefficients encoded, and the loop filter strength
// block we are currently looking at. Shift is used to position the
// 1's we produce.
// TODO(JBB) Need another function for different resolution color..
static void build_masks(const loop_filter_info_n *const lfi_n,
                        const MODE_INFO *mi, const int shift_y,
                        const int shift_uv,
                        LOOP_FILTER_MASK *lfm) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
709
710
711
  const MB_MODE_INFO *mbmi = &mi->mbmi;
  const BLOCK_SIZE block_size = mbmi->sb_type;
  const TX_SIZE tx_size_y = mbmi->tx_size;
712
  const TX_SIZE tx_size_uv = get_uv_tx_size_impl(tx_size_y, block_size, 1, 1);
Dmitry Kovalev's avatar
Dmitry Kovalev committed
713
714
715
716
717
718
719
  const int filter_level = get_filter_level(lfi_n, mbmi);
  uint64_t *const left_y = &lfm->left_y[tx_size_y];
  uint64_t *const above_y = &lfm->above_y[tx_size_y];
  uint64_t *const int_4x4_y = &lfm->int_4x4_y;
  uint16_t *const left_uv = &lfm->left_uv[tx_size_uv];
  uint16_t *const above_uv = &lfm->above_uv[tx_size_uv];
  uint16_t *const int_4x4_uv = &lfm->int_4x4_uv;
Yunqing Wang's avatar
Yunqing Wang committed
720
721
  int i;

Jim Bankoski's avatar
Jim Bankoski committed
722
  // If filter level is 0 we don't loop filter.
723
  if (!filter_level) {
Jim Bankoski's avatar
Jim Bankoski committed
724
    return;
725
  } else {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
726
727
    const int w = num_8x8_blocks_wide_lookup[block_size];
    const int h = num_8x8_blocks_high_lookup[block_size];
728
729
730
731
732
733
    int index = shift_y;
    for (i = 0; i < h; i++) {
      vpx_memset(&lfm->lfl_y[index], filter_level, w);
      index += 8;
    }
  }
Jim Bankoski's avatar
Jim Bankoski committed
734
735

  // These set 1 in the current block size for the block size edges.
James Zern's avatar
James Zern committed
736
  // For instance if the block size is 32x16, we'll set:
Jim Bankoski's avatar
Jim Bankoski committed
737
738
739
740
741
742
743
744
  //    above =   1111
  //              0000
  //    and
  //    left  =   1000
  //          =   1000
  // NOTE : In this example the low bit is left most ( 1000 ) is stored as
  //        1,  not 8...
  //
James Zern's avatar
James Zern committed
745
  // U and V set things on a 16 bit scale.
Jim Bankoski's avatar
Jim Bankoski committed
746
747
748
749
750
751
752
753
  //
  *above_y |= above_prediction_mask[block_size] << shift_y;
  *above_uv |= above_prediction_mask_uv[block_size] << shift_uv;
  *left_y |= left_prediction_mask[block_size] << shift_y;
  *left_uv |= left_prediction_mask_uv[block_size] << shift_uv;

  // If the block has no coefficients and is not intra we skip applying
  // the loop filter on block edges.
Dmitry Kovalev's avatar
Dmitry Kovalev committed
754
  if (mbmi->skip && is_inter_block(mbmi))
Jim Bankoski's avatar
Jim Bankoski committed
755
756
    return;

James Zern's avatar
James Zern committed
757
  // Here we are adding a mask for the transform size. The transform
Jim Bankoski's avatar
Jim Bankoski committed
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
  // size mask is set to be correct for a 64x64 prediction block size. We
  // mask to match the size of the block we are working on and then shift it
  // into place..
  *above_y |= (size_mask[block_size] &
               above_64x64_txform_mask[tx_size_y]) << shift_y;
  *above_uv |= (size_mask_uv[block_size] &
                above_64x64_txform_mask_uv[tx_size_uv]) << shift_uv;

  *left_y |= (size_mask[block_size] &
              left_64x64_txform_mask[tx_size_y]) << shift_y;
  *left_uv |= (size_mask_uv[block_size] &
               left_64x64_txform_mask_uv[tx_size_uv]) << shift_uv;

  // Here we are trying to determine what to do with the internal 4x4 block
  // boundaries.  These differ from the 4x4 boundaries on the outside edge of
  // an 8x8 in that the internal ones can be skipped and don't depend on
  // the prediction block size.
Dmitry Kovalev's avatar
Dmitry Kovalev committed
775
  if (tx_size_y == TX_4X4)
Jim Bankoski's avatar
Jim Bankoski committed
776
    *int_4x4_y |= (size_mask[block_size] & 0xffffffffffffffff) << shift_y;
Dmitry Kovalev's avatar
Dmitry Kovalev committed
777
778

  if (tx_size_uv == TX_4X4)
Jim Bankoski's avatar
Jim Bankoski committed
779
780
781
782
    *int_4x4_uv |= (size_mask_uv[block_size] & 0xffff) << shift_uv;
}

// This function does the same thing as the one above with the exception that
James Zern's avatar
James Zern committed
783
// it only affects the y masks. It exists because for blocks < 16x16 in size,
Jim Bankoski's avatar
Jim Bankoski committed
784
785
786
787
// we only update u and v masks on the first block.
static void build_y_mask(const loop_filter_info_n *const lfi_n,
                         const MODE_INFO *mi, const int shift_y,
                         LOOP_FILTER_MASK *lfm) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
788
789
790
791
792
793
794
  const MB_MODE_INFO *mbmi = &mi->mbmi;
  const BLOCK_SIZE block_size = mbmi->sb_type;
  const TX_SIZE tx_size_y = mbmi->tx_size;
  const int filter_level = get_filter_level(lfi_n, mbmi);
  uint64_t *const left_y = &lfm->left_y[tx_size_y];
  uint64_t *const above_y = &lfm->above_y[tx_size_y];
  uint64_t *const int_4x4_y = &lfm->int_4x4_y;
Yunqing Wang's avatar
Yunqing Wang committed
795
796
  int i;

797
  if (!filter_level) {
Jim Bankoski's avatar
Jim Bankoski committed
798
    return;
799
  } else {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
800
801
    const int w = num_8x8_blocks_wide_lookup[block_size];
    const int h = num_8x8_blocks_high_lookup[block_size];
802
803
804
805
806
807
    int index = shift_y;
    for (i = 0; i < h; i++) {
      vpx_memset(&lfm->lfl_y[index], filter_level, w);
      index += 8;
    }
  }
Jim Bankoski's avatar
Jim Bankoski committed
808
809
810
811

  *above_y |= above_prediction_mask[block_size] << shift_y;
  *left_y |= left_prediction_mask[block_size] << shift_y;

Dmitry Kovalev's avatar
Dmitry Kovalev committed
812
  if (mbmi->skip && is_inter_block(mbmi))
Jim Bankoski's avatar
Jim Bankoski committed
813
814
815
816
817
818
819
820
    return;

  *above_y |= (size_mask[block_size] &
               above_64x64_txform_mask[tx_size_y]) << shift_y;

  *left_y |= (size_mask[block_size] &
              left_64x64_txform_mask[tx_size_y]) << shift_y;

Dmitry Kovalev's avatar
Dmitry Kovalev committed
821
  if (tx_size_y == TX_4X4)
Jim Bankoski's avatar
Jim Bankoski committed
822
823
824
825
826
827
    *int_4x4_y |= (size_mask[block_size] & 0xffffffffffffffff) << shift_y;
}

// This function sets up the bit masks for the entire 64x64 region represented
// by mi_row, mi_col.
// TODO(JBB): This function only works for yv12.
828
void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col,
hkuang's avatar
hkuang committed
829
                    MODE_INFO *mi, const int mode_info_stride,
830
                    LOOP_FILTER_MASK *lfm) {
Jim Bankoski's avatar
Jim Bankoski committed
831
832
  int idx_32, idx_16, idx_8;
  const loop_filter_info_n *const lfi_n = &cm->lf_info;
hkuang's avatar
hkuang committed
833
834
  MODE_INFO *mip = mi;
  MODE_INFO *mip2 = mi;
Jim Bankoski's avatar
Jim Bankoski committed
835
836

  // These are offsets to the next mi in the 64x64 block. It is what gets
James Zern's avatar
James Zern committed
837
838
  // added to the mi ptr as we go through each loop. It helps us to avoid
  // setting up special row and column counters for each index. The last step
Jim Bankoski's avatar
Jim Bankoski committed
839
840
841
842
843
844
845
846
  // brings us out back to the starting position.
  const int offset_32[] = {4, (mode_info_stride << 2) - 4, 4,
                           -(mode_info_stride << 2) - 4};
  const int offset_16[] = {2, (mode_info_stride << 1) - 2, 2,
                           -(mode_info_stride << 1) - 2};
  const int offset[] = {1, mode_info_stride - 1, 1, -mode_info_stride - 1};

  // Following variables represent shifts to position the current block
James Zern's avatar
James Zern committed
847
  // mask over the appropriate block. A shift of 36 to the left will move
Jim Bankoski's avatar
Jim Bankoski committed
848
849
850
851
852
853
854
855
856
857
858
859
860
861
  // the bits for the final 32 by 32 block in the 64x64 up 4 rows and left
  // 4 rows to the appropriate spot.
  const int shift_32_y[] = {0, 4, 32, 36};
  const int shift_16_y[] = {0, 2, 16, 18};
  const int shift_8_y[] = {0, 1, 8, 9};
  const int shift_32_uv[] = {0, 2, 8, 10};
  const int shift_16_uv[] = {0, 1, 4, 5};
  int i;
  const int max_rows = (mi_row + MI_BLOCK_SIZE > cm->mi_rows ?
                        cm->mi_rows - mi_row : MI_BLOCK_SIZE);
  const int max_cols = (mi_col + MI_BLOCK_SIZE > cm->mi_cols ?
                        cm->mi_cols - mi_col : MI_BLOCK_SIZE);

  vp9_zero(*lfm);
hkuang's avatar
hkuang committed
862
  assert(mip != NULL);
Jim Bankoski's avatar
Jim Bankoski committed
863
864
865
866

  // TODO(jimbankoski): Try moving most of the following code into decode
  // loop and storing lfm in the mbmi structure so that we don't have to go
  // through the recursive loop structure multiple times.
hkuang's avatar
hkuang committed
867
  switch (mip->mbmi.sb_type) {
Jim Bankoski's avatar
Jim Bankoski committed
868
    case BLOCK_64X64:
hkuang's avatar
hkuang committed
869
      build_masks(lfi_n, mip , 0, 0, lfm);
Jim Bankoski's avatar
Jim Bankoski committed
870
871
      break;
    case BLOCK_64X32:
hkuang's avatar
hkuang committed
872
      build_masks(lfi_n, mip, 0, 0, lfm);
Jim Bankoski's avatar
Jim Bankoski committed
873
      mip2 = mip + mode_info_stride * 4;
874
875
      if (4 >= max_rows)
        break;
hkuang's avatar
hkuang committed
876
      build_masks(lfi_n, mip2, 32, 8, lfm);
Jim Bankoski's avatar
Jim Bankoski committed
877
878
      break;
    case BLOCK_32X64:
hkuang's avatar
hkuang committed
879
      build_masks(lfi_n, mip, 0, 0, lfm);
Jim Bankoski's avatar
Jim Bankoski committed
880
      mip2 = mip + 4;
881
882
      if (4 >= max_cols)
        break;
hkuang's avatar
hkuang committed
883
      build_masks(lfi_n, mip2, 4, 2, lfm);
Jim Bankoski's avatar
Jim Bankoski committed
884
885
886
887
888
889
890
891
892
      break;
    default:
      for (idx_32 = 0; idx_32 < 4; mip += offset_32[idx_32], ++idx_32) {
        const int shift_y = shift_32_y[idx_32];
        const int shift_uv = shift_32_uv[idx_32];
        const int mi_32_col_offset = ((idx_32 & 1) << 2);
        const int mi_32_row_offset = ((idx_32 >> 1) << 2);
        if (mi_32_col_offset >= max_cols || mi_32_row_offset >= max_rows)
          continue;
hkuang's avatar
hkuang committed
893
        switch (mip->mbmi.sb_type) {
Jim Bankoski's avatar
Jim Bankoski committed
894
          case BLOCK_32X32:
hkuang's avatar
hkuang committed
895
            build_masks(lfi_n, mip, shift_y, shift_uv, lfm);
Jim Bankoski's avatar
Jim Bankoski committed
896
897
            break;
          case BLOCK_32X16:
hkuang's avatar
hkuang committed
898
            build_masks(lfi_n, mip, shift_y, shift_uv, lfm);
899
900
            if (mi_32_row_offset + 2 >= max_rows)
              continue;
Jim Bankoski's avatar
Jim Bankoski committed
901
            mip2 = mip + mode_info_stride * 2;
hkuang's avatar
hkuang committed
902
            build_masks(lfi_n, mip2, shift_y + 16, shift_uv + 4, lfm);
Jim Bankoski's avatar
Jim Bankoski committed
903
904
            break;
          case BLOCK_16X32:
hkuang's avatar
hkuang committed
905
            build_masks(lfi_n, mip, shift_y, shift_uv, lfm);
906
907
            if (mi_32_col_offset + 2 >= max_cols)
              continue;
Jim Bankoski's avatar
Jim Bankoski committed
908
            mip2 = mip + 2;
hkuang's avatar
hkuang committed
909
            build_masks(lfi_n, mip2, shift_y + 2, shift_uv + 1, lfm);
Jim Bankoski's avatar
Jim Bankoski committed
910
911
912
913
914
915
916
917
918
919
920
921
922
            break;
          default:
            for (idx_16 = 0; idx_16 < 4; mip += offset_16[idx_16], ++idx_16) {
              const int shift_y = shift_32_y[idx_32] + shift_16_y[idx_16];
              const int shift_uv = shift_32_uv[idx_32] + shift_16_uv[idx_16];
              const int mi_16_col_offset = mi_32_col_offset +
                  ((idx_16 & 1) << 1);
              const int mi_16_row_offset = mi_32_row_offset +
                  ((idx_16 >> 1) << 1);

              if (mi_16_col_offset >= max_cols || mi_16_row_offset >= max_rows)
                continue;

hkuang's avatar
hkuang committed
923
              switch (mip->mbmi.sb_type) {
Jim Bankoski's avatar
Jim Bankoski committed
924
                case BLOCK_16X16:
hkuang's avatar
hkuang committed
925
                  build_masks(lfi_n, mip, shift_y, shift_uv, lfm);
Jim Bankoski's avatar
Jim Bankoski committed
926
927
                  break;
                case BLOCK_16X8:
hkuang's avatar
hkuang committed
928
                  build_masks(lfi_n, mip, shift_y, shift_uv, lfm);
929
930
                  if (mi_16_row_offset + 1 >= max_rows)
                    continue;
Jim Bankoski's avatar
Jim Bankoski committed
931
                  mip2 = mip + mode_info_stride;
hkuang's avatar
hkuang committed
932
                  build_y_mask(lfi_n, mip2, shift_y+8, lfm);
Jim Bankoski's avatar
Jim Bankoski committed
933
934
                  break;
                case BLOCK_8X16:
hkuang's avatar
hkuang committed
935
                  build_masks(lfi_n, mip, shift_y, shift_uv, lfm);
936
937
                  if (mi_16_col_offset +1 >= max_cols)
                    continue;
Jim Bankoski's avatar
Jim Bankoski committed
938
                  mip2 = mip + 1;
hkuang's avatar
hkuang committed
939
                  build_y_mask(lfi_n, mip2, shift_y+1, lfm);
Jim Bankoski's avatar
Jim Bankoski committed
940
941
942
943
944
                  break;
                default: {
                  const int shift_y = shift_32_y[idx_32] +
                                      shift_16_y[idx_16] +
                                      shift_8_y[0];
hkuang's avatar
hkuang committed
945
                  build_masks(lfi_n, mip, shift_y, shift_uv, lfm);
Jim Bankoski's avatar
Jim Bankoski committed
946
947
948
949
950
951
952
953
954
955
956
957
958
                  mip += offset[0];
                  for (idx_8 = 1; idx_8 < 4; mip += offset[idx_8], ++idx_8) {
                    const int shift_y = shift_32_y[idx_32] +
                                        shift_16_y[idx_16] +
                                        shift_8_y[idx_8];
                    const int mi_8_col_offset = mi_16_col_offset +
                        ((idx_8 & 1));
                    const int mi_8_row_offset = mi_16_row_offset +
                        ((idx_8 >> 1));

                    if (mi_8_col_offset >= max_cols ||
                        mi_8_row_offset >= max_rows)
                      continue;
hkuang's avatar
hkuang committed
959
                    build_y_mask(lfi_n, mip, shift_y, lfm);
Jim Bankoski's avatar
Jim Bankoski committed
960
961
962
963
964
965
966
967
968
969
970
                  }
                  break;
                }
              }
            }
            break;
        }
      }
      break;
  }
  // The largest loopfilter we have is 16x16 so we use the 16x16 mask
hkuang's avatar
hkuang committed
971
  // for 32x32 transforms also.
Jim Bankoski's avatar
Jim Bankoski committed
972
973
974
975
976
977
  lfm->left_y[TX_16X16] |= lfm->left_y[TX_32X32];
  lfm->above_y[TX_16X16] |= lfm->above_y[TX_32X32];
  lfm->left_uv[TX_16X16] |= lfm->left_uv[TX_32X32];
  lfm->above_uv[TX_16X16] |= lfm->above_uv[TX_32X32];

  // We do at least 8 tap filter on every 32x32 even if the transform size
James Zern's avatar
James Zern committed
978
  // is 4x4. So if the 4x4 is set on a border pixel add it to the 8x8 and
Jim Bankoski's avatar
Jim Bankoski committed
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
  // remove it from the 4x4.
  lfm->left_y[TX_8X8] |= lfm->left_y[TX_4X4] & left_border;
  lfm->left_y[TX_4X4] &= ~left_border;
  lfm->above_y[TX_8X8] |= lfm->above_y[TX_4X4] & above_border;
  lfm->above_y[TX_4X4] &= ~above_border;
  lfm->left_uv[TX_8X8] |= lfm->left_uv[TX_4X4] & left_border_uv;
  lfm->left_uv[TX_4X4] &= ~left_border_uv;
  lfm->above_uv[TX_8X8] |= lfm->above_uv[TX_4X4] & above_border_uv;
  lfm->above_uv[TX_4X4] &= ~above_border_uv;

  // We do some special edge handling.
  if (mi_row + MI_BLOCK_SIZE > cm->mi_rows) {
    const uint64_t rows = cm->mi_rows - mi_row;

    // Each pixel inside the border gets a 1,
    const uint64_t mask_y = (((uint64_t) 1 << (rows << 3)) - 1);
    const uint16_t mask_uv = (((uint16_t) 1 << (((rows + 1) >> 1) << 2)) - 1);

    // Remove values completely outside our border.
    for (i = 0; i < TX_32X32; i++) {
      lfm->left_y[i] &= mask_y;
      lfm->above_y[i] &= mask_y;
      lfm->left_uv[i] &= mask_uv;
      lfm->above_uv[i] &= mask_uv;
    }
    lfm->int_4x4_y &= mask_y;
    lfm->int_4x4_uv &= mask_uv;

James Zern's avatar
James Zern committed
1007
    // We don't apply a wide loop filter on the last uv block row. If set
Jim Bankoski's avatar
Jim Bankoski committed
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
    // apply the shorter one instead.
    if (rows == 1) {
      lfm->above_uv[TX_8X8] |= lfm->above_uv[TX_16X16];
      lfm->above_uv[TX_16X16] = 0;
    }
    if (rows == 5) {
      lfm->above_uv[TX_8X8] |= lfm->above_uv[TX_16X16] & 0xff00;
      lfm->above_uv[TX_16X16] &= ~(lfm->above_uv[TX_16X16] & 0xff00);
    }
  }

  if (mi_col + MI_BLOCK_SIZE > cm->mi_cols) {
    const uint64_t columns = cm->mi_cols - mi_col;

    // Each pixel inside the border gets a 1, the multiply copies the border
    // to where we need it.
    const uint64_t mask_y  = (((1 << columns) - 1)) * 0x0101010101010101;
    const uint16_t mask_uv = ((1 << ((columns + 1) >> 1)) - 1) * 0x1111;

    // Internal edges are not applied on the last column of the image so
    // we mask 1 more for the internal edges
    const uint16_t mask_uv_int = ((1 << (columns >> 1)) - 1) * 0x1111;

    // Remove the bits outside the image edge.
    for (i = 0; i < TX_32X32; i++) {
      lfm->left_y[i] &= mask_y;
      lfm->above_y[i] &= mask_y;
      lfm->left_uv[i] &= mask_uv;
      lfm->above_uv[i] &= mask_uv;
    }
    lfm->int_4x4_y &= mask_y;
    lfm->int_4x4_uv &= mask_uv_int;

James Zern's avatar
James Zern committed
1041
    // We don't apply a wide loop filter on the last uv column. If set
Jim Bankoski's avatar
Jim Bankoski committed
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
    // apply the shorter one instead.
    if (columns == 1) {
      lfm->left_uv[TX_8X8] |= lfm->left_uv[TX_16X16];
      lfm->left_uv[TX_16X16] = 0;
    }
    if (columns == 5) {
      lfm->left_uv[TX_8X8] |= (lfm->left_uv[TX_16X16] & 0xcccc);
      lfm->left_uv[TX_16X16] &= ~(lfm->left_uv[TX_16X16] & 0xcccc);
    }
  }
James Zern's avatar
James Zern committed
1052
1053
  // We don't apply a loop filter on the first column in the image, mask that
  // out.
Jim Bankoski's avatar
Jim Bankoski committed
1054
1055
1056
1057
1058
1059
  if (mi_col == 0) {
    for (i = 0; i < TX_32X32; i++) {
      lfm->left_y[i] &= 0xfefefefefefefefe;
      lfm->left_uv[i] &= 0xeeee;
    }
  }
Jim Bankoski's avatar
Jim Bankoski committed
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077

  // Assert if we try to apply 2 different loop filters at the same position.
  assert(!(lfm->left_y[TX_16X16] & lfm->left_y[TX_8X8]));
  assert(!(lfm->left_y[TX_16X16] & lfm->left_y[TX_4X4]));
  assert(!(lfm->left_y[TX_8X8] & lfm->left_y[TX_4X4]));
  assert(!(lfm->int_4x4_y & lfm->left_y[TX_16X16]));
  assert(!(lfm->left_uv[TX_16X16]&lfm->left_uv[TX_8X8]));
  assert(!(lfm->left_uv[TX_16X16] & lfm->left_uv[TX_4X4]));
  assert(!(lfm->left_uv[TX_8X8] & lfm->left_uv[TX_4X4]));
  assert(!(lfm->int_4x4_uv & lfm->left_uv[TX_16X16]));
  assert(!(lfm->above_y[TX_16X16] & lfm->above_y[TX_8X8]));
  assert(!(lfm->above_y[TX_16X16] & lfm->above_y[TX_4X4]));
  assert(!(lfm->above_y[TX_8X8] & lfm->above_y[TX_4X4]));
  assert(!(lfm->int_4x4_y & lfm->above_y[TX_16X16]));
  assert(!(lfm->above_uv[TX_16X16] & lfm->above_uv[TX_8X8]));
  assert(!(lfm->above_uv[TX_16X16] & lfm->above_uv[TX_4X4]));
  assert(!(lfm->above_uv[TX_8X8] & lfm->above_uv[TX_4X4]));
  assert(!(lfm->int_4x4_uv & lfm->above_uv[TX_16X16]));
Jim Bankoski's avatar
Jim Bankoski committed
1078
}
Yunqing Wang's avatar
Yunqing Wang committed
1079

1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
static void filter_selectively_vert(uint8_t *s, int pitch,
                                    unsigned int mask_16x16,
                                    unsigned int mask_8x8,
                                    unsigned int mask_4x4,
                                    unsigned int mask_4x4_int,
                                    const loop_filter_info_n *lfi_n,
                                    const uint8_t *lfl) {
  unsigned int mask;

  for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int;
       mask; mask >>= 1) {
    const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl;

    if (mask & 1) {
      if (mask_16x16 & 1) {
Jim Bankoski's avatar
Jim Bankoski committed
1095
        vp9_lpf_vertical_16(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
1096
      } else if (mask_8x8 & 1) {
Jim Bankoski's avatar
Jim Bankoski committed
1097
        vp9_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
1098
      } else if (mask_4x4 & 1) {
Jim Bankoski's avatar
Jim Bankoski committed
1099
        vp9_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
1100
1101
1102
      }
    }
    if (mask_4x4_int & 1)
Jim Bankoski's avatar
Jim Bankoski committed
1103
      vp9_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
1104
1105
1106
1107
1108
1109
1110
1111
1112
    s += 8;
    lfl += 1;
    mask_16x16 >>= 1;
    mask_8x8 >>= 1;
    mask_4x4 >>= 1;
    mask_4x4_int >>= 1;
  }
}

1113
#if CONFIG_VP9_HIGHBITDEPTH
1114
1115
1116
1117
1118
1119
1120
static void highbd_filter_selectively_vert(uint16_t *s, int pitch,
                                           unsigned int mask_16x16,
                                           unsigned int mask_8x8,
                                           unsigned int mask_4x4,
                                           unsigned int mask_4x4_int,
                                           const loop_filter_info_n *lfi_n,
                                           const uint8_t *lfl, int bd) {
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
  unsigned int mask;

  for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int;
       mask; mask >>= 1) {
    const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl;

    if (mask & 1) {
      if (mask_16x16 & 1) {
        vp9_highbd_lpf_vertical_16(s, pitch, lfi->mblim, lfi->lim,
                                   lfi->hev_thr, bd);
      } else if (mask_8x8 & 1) {
        vp9_highbd_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim,
                                  lfi->hev_thr, 1, bd);
      } else if (mask_4x4 & 1) {
        vp9_highbd_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim,
                                lfi->hev_thr, 1, bd);
      }
    }
    if (mask_4x4_int & 1)
      vp9_highbd_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim,
                                lfi->hev_thr, 1, bd);
    s += 8;
    lfl += 1;
    mask_16x16 >>= 1;
    mask_8x8 >>= 1;
    mask_4x4 >>= 1;
    mask_4x4_int >>= 1;
  }
}
#endif  // CONFIG_VP9_HIGHBITDEPTH

1152
1153
1154
1155
void vp9_filter_block_plane_non420(VP9_COMMON *cm,
                                   struct macroblockd_plane *plane,
                                   MODE_INFO *mi_8x8,
                                   int mi_row, int mi_col) {
1156
1157
  const int ss_x = plane->subsampling_x;
  const int ss_y = plane->subsampling_y;
1158
1159
  const int row_step = 1 << ss_y;
  const int col_step = 1 << ss_x;
1160
  const int row_step_stride = cm->mi_stride * row_step;
1161
  struct buf_2d *const dst = &plane->dst;
1162
  uint8_t* const dst0 = dst->buf;
1163
1164
1165
1166
  unsigned int mask_16x16[MI_BLOCK_SIZE] = {0};
  unsigned int mask_8x8[MI_BLOCK_SIZE] = {0};
  unsigned int mask_4x4[MI_BLOCK_SIZE] = {0};
  unsigned int mask_4x4_int[MI_BLOCK_SIZE] = {0};
1167
  uint8_t lfl[MI_BLOCK_SIZE * MI_BLOCK_SIZE];
1168
  int r, c;
Dmitry Kovalev's avatar
Dmitry Kovalev committed
1169

1170
  for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += row_step) {
1171
1172
1173
1174
1175
1176
    unsigned int mask_16x16_c = 0;
    unsigned int mask_8x8_c = 0;
    unsigned int mask_4x4_c = 0;
    unsigned int border_mask;

    // Determine the vertical edges that need filtering
1177
    for (c = 0; c < MI_BLOCK_SIZE && mi_col + c < cm->mi_cols; c += col_step) {
hkuang's avatar
hkuang committed
1178
      const MODE_INFO *mi = mi_8x8[c].src_mi;
1179
      const BLOCK_SIZE sb_type = mi[0].mbmi.sb_type;
1180
      const int skip_this = mi[0].mbmi.skip && is_inter_block(&mi[0].mbmi);
1181
      // left edge of current unit is block/partition edge -> no skip
1182
1183
      const int block_edge_left = (num_4x4_blocks_wide_lookup[sb_type] > 1) ?
          !(c & (num_8x8_blocks_wide_lookup[sb_type] - 1)) : 1;
1184
      const int skip_this_c = skip_this && !block_edge_left;
1185
      // top edge of current unit is block/partition edge -> no skip
1186
1187
      const int block_edge_above = (num_4x4_blocks_high_lookup[sb_type] > 1) ?
          !(r & (num_8x8_blocks_high_lookup[sb_type] - 1)) : 1;
1188
      const int skip_this_r = skip_this && !block_edge_above;
1189
      const TX_SIZE tx_size = (plane->plane_type == PLANE_TYPE_UV)
1190
                            ? get_uv_tx_size(&mi[0].mbmi, plane)
1191
                            : mi[0].mbmi.tx_size;
1192
      const int skip_border_4x4_c = ss_x && mi_col + c == cm->mi_cols - 1;