vp9_loopfilter.c 65.2 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9
10
 */

11
#include "./vpx_config.h"
12
#include "./vpx_dsp_rtcd.h"
13
14
#include "vp9/common/vp9_loopfilter.h"
#include "vp9/common/vp9_onyxc_int.h"
15
#include "vp9/common/vp9_reconinter.h"
16
#include "vpx_dsp/vpx_dsp_common.h"
Attila Nagy's avatar
Attila Nagy committed
17
#include "vpx_mem/vpx_mem.h"
18
#include "vpx_ports/mem.h"
John Koleszar's avatar
John Koleszar committed
19

20
#include "vp9/common/vp9_seg_common.h"
21

James Zern's avatar
James Zern committed
22
// 64 bit masks for left transform size. Each 1 represents a position where
Jim Bankoski's avatar
Jim Bankoski committed
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
// we should apply a loop filter across the left border of an 8x8 block
// boundary.
//
// In the case of TX_16X16->  ( in low order byte first we end up with
// a mask that looks like this
//
//    10101010
//    10101010
//    10101010
//    10101010
//    10101010
//    10101010
//    10101010
//    10101010
//
// A loopfilter should be applied to every other 8x8 horizontally.
static const uint64_t left_64x64_txform_mask[TX_SIZES]= {
40
41
42
43
  0xffffffffffffffffULL,  // TX_4X4
  0xffffffffffffffffULL,  // TX_8x8
  0x5555555555555555ULL,  // TX_16x16
  0x1111111111111111ULL,  // TX_32x32
Jim Bankoski's avatar
Jim Bankoski committed
44
45
};

James Zern's avatar
James Zern committed
46
// 64 bit masks for above transform size. Each 1 represents a position where
Jim Bankoski's avatar
Jim Bankoski committed
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
// we should apply a loop filter across the top border of an 8x8 block
// boundary.
//
// In the case of TX_32x32 ->  ( in low order byte first we end up with
// a mask that looks like this
//
//    11111111
//    00000000
//    00000000
//    00000000
//    11111111
//    00000000
//    00000000
//    00000000
//
// A loopfilter should be applied to every other 4 the row vertically.
static const uint64_t above_64x64_txform_mask[TX_SIZES]= {
64
65
66
67
  0xffffffffffffffffULL,  // TX_4X4
  0xffffffffffffffffULL,  // TX_8x8
  0x00ff00ff00ff00ffULL,  // TX_16x16
  0x000000ff000000ffULL,  // TX_32x32
Jim Bankoski's avatar
Jim Bankoski committed
68
69
};

James Zern's avatar
James Zern committed
70
71
72
// 64 bit masks for prediction sizes (left). Each 1 represents a position
// where left border of an 8x8 block. These are aligned to the right most
// appropriate bit, and then shifted into place.
Jim Bankoski's avatar
Jim Bankoski committed
73
74
75
76
77
78
79
80
81
82
83
84
85
//
// In the case of TX_16x32 ->  ( low order byte first ) we end up with
// a mask that looks like this :
//
//  10000000
//  10000000
//  10000000
//  10000000
//  00000000
//  00000000
//  00000000
//  00000000
static const uint64_t left_prediction_mask[BLOCK_SIZES] = {
86
87
88
89
90
91
92
93
94
95
96
97
98
  0x0000000000000001ULL,  // BLOCK_4X4,
  0x0000000000000001ULL,  // BLOCK_4X8,
  0x0000000000000001ULL,  // BLOCK_8X4,
  0x0000000000000001ULL,  // BLOCK_8X8,
  0x0000000000000101ULL,  // BLOCK_8X16,
  0x0000000000000001ULL,  // BLOCK_16X8,
  0x0000000000000101ULL,  // BLOCK_16X16,
  0x0000000001010101ULL,  // BLOCK_16X32,
  0x0000000000000101ULL,  // BLOCK_32X16,
  0x0000000001010101ULL,  // BLOCK_32X32,
  0x0101010101010101ULL,  // BLOCK_32X64,
  0x0000000001010101ULL,  // BLOCK_64X32,
  0x0101010101010101ULL,  // BLOCK_64X64
Jim Bankoski's avatar
Jim Bankoski committed
99
100
101
102
};

// 64 bit mask to shift and set for each prediction size.
static const uint64_t above_prediction_mask[BLOCK_SIZES] = {
103
104
105
106
107
108
109
110
111
112
113
114
115
  0x0000000000000001ULL,  // BLOCK_4X4
  0x0000000000000001ULL,  // BLOCK_4X8
  0x0000000000000001ULL,  // BLOCK_8X4
  0x0000000000000001ULL,  // BLOCK_8X8
  0x0000000000000001ULL,  // BLOCK_8X16,
  0x0000000000000003ULL,  // BLOCK_16X8
  0x0000000000000003ULL,  // BLOCK_16X16
  0x0000000000000003ULL,  // BLOCK_16X32,
  0x000000000000000fULL,  // BLOCK_32X16,
  0x000000000000000fULL,  // BLOCK_32X32,
  0x000000000000000fULL,  // BLOCK_32X64,
  0x00000000000000ffULL,  // BLOCK_64X32,
  0x00000000000000ffULL,  // BLOCK_64X64
Jim Bankoski's avatar
Jim Bankoski committed
116
};
James Zern's avatar
James Zern committed
117
// 64 bit mask to shift and set for each prediction size. A bit is set for
Jim Bankoski's avatar
Jim Bankoski committed
118
119
120
// each 8x8 block that would be in the left most block of the given block
// size in the 64x64 block.
static const uint64_t size_mask[BLOCK_SIZES] = {
121
122
123
124
125
126
127
128
129
130
131
132
133
  0x0000000000000001ULL,  // BLOCK_4X4
  0x0000000000000001ULL,  // BLOCK_4X8
  0x0000000000000001ULL,  // BLOCK_8X4
  0x0000000000000001ULL,  // BLOCK_8X8
  0x0000000000000101ULL,  // BLOCK_8X16,
  0x0000000000000003ULL,  // BLOCK_16X8
  0x0000000000000303ULL,  // BLOCK_16X16
  0x0000000003030303ULL,  // BLOCK_16X32,
  0x0000000000000f0fULL,  // BLOCK_32X16,
  0x000000000f0f0f0fULL,  // BLOCK_32X32,
  0x0f0f0f0f0f0f0f0fULL,  // BLOCK_32X64,
  0x00000000ffffffffULL,  // BLOCK_64X32,
  0xffffffffffffffffULL,  // BLOCK_64X64
Jim Bankoski's avatar
Jim Bankoski committed
134
135
136
};

// These are used for masking the left and above borders.
137
138
static const uint64_t left_border =  0x1111111111111111ULL;
static const uint64_t above_border = 0x000000ff000000ffULL;
Jim Bankoski's avatar
Jim Bankoski committed
139
140
141

// 16 bit masks for uv transform sizes.
static const uint16_t left_64x64_txform_mask_uv[TX_SIZES]= {
James Zern's avatar
James Zern committed
142
143
144
145
  0xffff,  // TX_4X4
  0xffff,  // TX_8x8
  0x5555,  // TX_16x16
  0x1111,  // TX_32x32
Jim Bankoski's avatar
Jim Bankoski committed
146
147
148
};

static const uint16_t above_64x64_txform_mask_uv[TX_SIZES]= {
James Zern's avatar
James Zern committed
149
150
151
152
  0xffff,  // TX_4X4
  0xffff,  // TX_8x8
  0x0f0f,  // TX_16x16
  0x000f,  // TX_32x32
Jim Bankoski's avatar
Jim Bankoski committed
153
154
155
156
};

// 16 bit left mask to shift and set for each uv prediction size.
static const uint16_t left_prediction_mask_uv[BLOCK_SIZES] = {
James Zern's avatar
James Zern committed
157
158
159
160
161
162
163
164
165
166
167
168
169
  0x0001,  // BLOCK_4X4,
  0x0001,  // BLOCK_4X8,
  0x0001,  // BLOCK_8X4,
  0x0001,  // BLOCK_8X8,
  0x0001,  // BLOCK_8X16,
  0x0001,  // BLOCK_16X8,
  0x0001,  // BLOCK_16X16,
  0x0011,  // BLOCK_16X32,
  0x0001,  // BLOCK_32X16,
  0x0011,  // BLOCK_32X32,
  0x1111,  // BLOCK_32X64
  0x0011,  // BLOCK_64X32,
  0x1111,  // BLOCK_64X64
Jim Bankoski's avatar
Jim Bankoski committed
170
171
172
};
// 16 bit above mask to shift and set for uv each prediction size.
static const uint16_t above_prediction_mask_uv[BLOCK_SIZES] = {
James Zern's avatar
James Zern committed
173
174
175
176
177
178
179
180
181
182
183
184
185
  0x0001,  // BLOCK_4X4
  0x0001,  // BLOCK_4X8
  0x0001,  // BLOCK_8X4
  0x0001,  // BLOCK_8X8
  0x0001,  // BLOCK_8X16,
  0x0001,  // BLOCK_16X8
  0x0001,  // BLOCK_16X16
  0x0001,  // BLOCK_16X32,
  0x0003,  // BLOCK_32X16,
  0x0003,  // BLOCK_32X32,
  0x0003,  // BLOCK_32X64,
  0x000f,  // BLOCK_64X32,
  0x000f,  // BLOCK_64X64
Jim Bankoski's avatar
Jim Bankoski committed
186
187
188
189
};

// 64 bit mask to shift and set for each uv prediction size
static const uint16_t size_mask_uv[BLOCK_SIZES] = {
James Zern's avatar
James Zern committed
190
191
192
193
194
195
196
197
198
199
200
201
202
  0x0001,  // BLOCK_4X4
  0x0001,  // BLOCK_4X8
  0x0001,  // BLOCK_8X4
  0x0001,  // BLOCK_8X8
  0x0001,  // BLOCK_8X16,
  0x0001,  // BLOCK_16X8
  0x0001,  // BLOCK_16X16
  0x0011,  // BLOCK_16X32,
  0x0003,  // BLOCK_32X16,
  0x0033,  // BLOCK_32X32,
  0x3333,  // BLOCK_32X64,
  0x00ff,  // BLOCK_64X32,
  0xffff,  // BLOCK_64X64
Jim Bankoski's avatar
Jim Bankoski committed
203
204
205
206
};
static const uint16_t left_border_uv =  0x1111;
static const uint16_t above_border_uv = 0x000f;

207
208
209
210
static const int mode_lf_lut[MB_MODE_COUNT] = {
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // INTRA_MODES
  1, 1, 0, 1                     // INTER_MODES (ZEROMV == 0)
};
Attila Nagy's avatar
Attila Nagy committed
211

Frank Galligan's avatar
Frank Galligan committed
212
static void update_sharpness(loop_filter_info_n *lfi, int sharpness_lvl) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
213
  int lvl;
John Koleszar's avatar
John Koleszar committed
214

Dmitry Kovalev's avatar
Dmitry Kovalev committed
215
216
  // For each possible value for the loop filter fill out limits
  for (lvl = 0; lvl <= MAX_LOOP_FILTER; lvl++) {
James Zern's avatar
James Zern committed
217
    // Set loop filter parameters that control sharpness.
Dmitry Kovalev's avatar
Dmitry Kovalev committed
218
    int block_inside_limit = lvl >> ((sharpness_lvl > 0) + (sharpness_lvl > 4));
John Koleszar's avatar
John Koleszar committed
219

John Koleszar's avatar
John Koleszar committed
220
221
222
    if (sharpness_lvl > 0) {
      if (block_inside_limit > (9 - sharpness_lvl))
        block_inside_limit = (9 - sharpness_lvl);
John Koleszar's avatar
John Koleszar committed
223
    }
John Koleszar's avatar
John Koleszar committed
224
225
226
227

    if (block_inside_limit < 1)
      block_inside_limit = 1;

James Zern's avatar
James Zern committed
228
229
230
    memset(lfi->lfthr[lvl].lim, block_inside_limit, SIMD_WIDTH);
    memset(lfi->lfthr[lvl].mblim, (2 * (lvl + 2) + block_inside_limit),
           SIMD_WIDTH);
John Koleszar's avatar
John Koleszar committed
231
  }
Attila Nagy's avatar
Attila Nagy committed
232
}
John Koleszar's avatar
John Koleszar committed
233

Dmitry Kovalev's avatar
Dmitry Kovalev committed
234
static uint8_t get_filter_level(const loop_filter_info_n *lfi_n,
Scott LaVarnway's avatar
Scott LaVarnway committed
235
236
237
                                const MODE_INFO *mi) {
  return lfi_n->lvl[mi->segment_id][mi->ref_frame[0]]
                   [mode_lf_lut[mi->mode]];
Dmitry Kovalev's avatar
Dmitry Kovalev committed
238
239
}

240
void vp9_loop_filter_init(VP9_COMMON *cm) {
John Koleszar's avatar
John Koleszar committed
241
  loop_filter_info_n *lfi = &cm->lf_info;
242
  struct loopfilter *lf = &cm->lf;
243
  int lvl;
Attila Nagy's avatar
Attila Nagy committed
244

245
  // init limits for given sharpness
246
247
  update_sharpness(lfi, lf->sharpness_level);
  lf->last_sharpness_level = lf->sharpness_level;
Attila Nagy's avatar
Attila Nagy committed
248

249
  // init hev threshold const vectors
250
  for (lvl = 0; lvl <= MAX_LOOP_FILTER; lvl++)
James Zern's avatar
James Zern committed
251
    memset(lfi->lfthr[lvl].hev_thr, (lvl >> 4), SIMD_WIDTH);
John Koleszar's avatar
John Koleszar committed
252
253
}

Frank Galligan's avatar
Frank Galligan committed
254
void vp9_loop_filter_frame_init(VP9_COMMON *cm, int default_filt_lvl) {
255
  int seg_id;
James Zern's avatar
James Zern committed
256
  // n_shift is the multiplier for lf_deltas
257
258
  // the multiplier is 1 for when filter_lvl is between 0 and 31;
  // 2 when filter_lvl is between 32 and 63
259
  const int scale = 1 << (default_filt_lvl >> 5);
Dmitry Kovalev's avatar
Dmitry Kovalev committed
260
  loop_filter_info_n *const lfi = &cm->lf_info;
261
  struct loopfilter *const lf = &cm->lf;
262
  const struct segmentation *const seg = &cm->seg;
John Koleszar's avatar
John Koleszar committed
263

Dmitry Kovalev's avatar
Dmitry Kovalev committed
264
  // update limits if sharpness has changed
265
266
267
  if (lf->last_sharpness_level != lf->sharpness_level) {
    update_sharpness(lfi, lf->sharpness_level);
    lf->last_sharpness_level = lf->sharpness_level;
John Koleszar's avatar
John Koleszar committed
268
269
  }

270
  for (seg_id = 0; seg_id < MAX_SEGMENTS; seg_id++) {
271
    int lvl_seg = default_filt_lvl;
272
    if (segfeature_active(seg, seg_id, SEG_LVL_ALT_LF)) {
Scott LaVarnway's avatar
Scott LaVarnway committed
273
      const int data = get_segdata(seg, seg_id, SEG_LVL_ALT_LF);
274
275
276
      lvl_seg = clamp(seg->abs_delta == SEGMENT_ABSDATA ?
                      data : default_filt_lvl + data,
                      0, MAX_LOOP_FILTER);
John Koleszar's avatar
John Koleszar committed
277
    }
John Koleszar's avatar
John Koleszar committed
278

279
    if (!lf->mode_ref_delta_enabled) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
280
281
      // we could get rid of this if we assume that deltas are set to
      // zero when not in use; encoder always uses deltas
James Zern's avatar
James Zern committed
282
      memset(lfi->lvl[seg_id], lvl_seg, sizeof(lfi->lvl[seg_id]));
283
284
285
286
287
288
289
290
291
292
293
    } else {
      int ref, mode;
      const int intra_lvl = lvl_seg + lf->ref_deltas[INTRA_FRAME] * scale;
      lfi->lvl[seg_id][INTRA_FRAME][0] = clamp(intra_lvl, 0, MAX_LOOP_FILTER);

      for (ref = LAST_FRAME; ref < MAX_REF_FRAMES; ++ref) {
        for (mode = 0; mode < MAX_MODE_LF_DELTAS; ++mode) {
          const int inter_lvl = lvl_seg + lf->ref_deltas[ref] * scale
                                        + lf->mode_deltas[mode] * scale;
          lfi->lvl[seg_id][ref][mode] = clamp(inter_lvl, 0, MAX_LOOP_FILTER);
        }
John Koleszar's avatar
John Koleszar committed
294
      }
295
    }
John Koleszar's avatar
John Koleszar committed
296
  }
John Koleszar's avatar
John Koleszar committed
297
298
}

299
static void filter_selectively_vert_row2(int subsampling_factor,
300
301
302
303
304
305
306
                                         uint8_t *s, int pitch,
                                         unsigned int mask_16x16_l,
                                         unsigned int mask_8x8_l,
                                         unsigned int mask_4x4_l,
                                         unsigned int mask_4x4_int_l,
                                         const loop_filter_info_n *lfi_n,
                                         const uint8_t *lfl) {
307
308
309
  const int mask_shift = subsampling_factor ? 4 : 8;
  const int mask_cutoff = subsampling_factor ? 0xf : 0xff;
  const int lfl_forward = subsampling_factor ? 4 : 8;
310
311
312
313
314
315
316
317
318

  unsigned int mask_16x16_0 = mask_16x16_l & mask_cutoff;
  unsigned int mask_8x8_0 = mask_8x8_l & mask_cutoff;
  unsigned int mask_4x4_0 = mask_4x4_l & mask_cutoff;
  unsigned int mask_4x4_int_0 = mask_4x4_int_l & mask_cutoff;
  unsigned int mask_16x16_1 = (mask_16x16_l >> mask_shift) & mask_cutoff;
  unsigned int mask_8x8_1 = (mask_8x8_l >> mask_shift) & mask_cutoff;
  unsigned int mask_4x4_1 = (mask_4x4_l >> mask_shift) & mask_cutoff;
  unsigned int mask_4x4_int_1 = (mask_4x4_int_l >> mask_shift) & mask_cutoff;
319
320
  unsigned int mask;

321
  for (mask = mask_16x16_0 | mask_8x8_0 | mask_4x4_0 | mask_4x4_int_0 |
James Zern's avatar
James Zern committed
322
323
              mask_16x16_1 | mask_8x8_1 | mask_4x4_1 | mask_4x4_int_1;
       mask; mask >>= 1) {
324
325
    const loop_filter_thresh *lfi0 = lfi_n->lfthr + *lfl;
    const loop_filter_thresh *lfi1 = lfi_n->lfthr + *(lfl + lfl_forward);
326

327
    // TODO(yunqingwang): count in loopfilter functions should be removed.
328
    if (mask & 1) {
329
330
      if ((mask_16x16_0 | mask_16x16_1) & 1) {
        if ((mask_16x16_0 & mask_16x16_1) & 1) {
331
          vpx_lpf_vertical_16_dual(s, pitch, lfi0->mblim, lfi0->lim,
Jim Bankoski's avatar
Jim Bankoski committed
332
                                   lfi0->hev_thr);
333
        } else if (mask_16x16_0 & 1) {
334
          vpx_lpf_vertical_16(s, pitch, lfi0->mblim, lfi0->lim,
Jim Bankoski's avatar
Jim Bankoski committed
335
                              lfi0->hev_thr);
336
        } else {
337
          vpx_lpf_vertical_16(s + 8 *pitch, pitch, lfi1->mblim,
Jim Bankoski's avatar
Jim Bankoski committed
338
                              lfi1->lim, lfi1->hev_thr);
339
        }
340
341
342
343
      }

      if ((mask_8x8_0 | mask_8x8_1) & 1) {
        if ((mask_8x8_0 & mask_8x8_1) & 1) {
344
          vpx_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim,
Jim Bankoski's avatar
Jim Bankoski committed
345
346
                                  lfi0->hev_thr, lfi1->mblim, lfi1->lim,
                                  lfi1->hev_thr);
347
        } else if (mask_8x8_0 & 1) {
348
          vpx_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr);
349
        } else {
350
          vpx_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,
351
                             lfi1->hev_thr);
352
353
354
355
356
        }
      }

      if ((mask_4x4_0 | mask_4x4_1) & 1) {
        if ((mask_4x4_0 & mask_4x4_1) & 1) {
357
          vpx_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim,
Jim Bankoski's avatar
Jim Bankoski committed
358
359
                                  lfi0->hev_thr, lfi1->mblim, lfi1->lim,
                                  lfi1->hev_thr);
360
        } else if (mask_4x4_0 & 1) {
361
          vpx_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr);
362
        } else {
363
          vpx_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,
364
                             lfi1->hev_thr);
365
366
367
368
369
        }
      }

      if ((mask_4x4_int_0 | mask_4x4_int_1) & 1) {
        if ((mask_4x4_int_0 & mask_4x4_int_1) & 1) {
370
          vpx_lpf_vertical_4_dual(s + 4, pitch, lfi0->mblim, lfi0->lim,
Jim Bankoski's avatar
Jim Bankoski committed
371
372
                                  lfi0->hev_thr, lfi1->mblim, lfi1->lim,
                                  lfi1->hev_thr);
373
        } else if (mask_4x4_int_0 & 1) {
374
          vpx_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim,
375
                             lfi0->hev_thr);
376
        } else {
377
          vpx_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim, lfi1->lim,
378
                             lfi1->hev_thr);
379
        }
380
381
      }
    }
382

383
    s += 8;
Yunqing Wang's avatar
Yunqing Wang committed
384
    lfl += 1;
385
386
387
388
389
390
391
392
    mask_16x16_0 >>= 1;
    mask_8x8_0 >>= 1;
    mask_4x4_0 >>= 1;
    mask_4x4_int_0 >>= 1;
    mask_16x16_1 >>= 1;
    mask_8x8_1 >>= 1;
    mask_4x4_1 >>= 1;
    mask_4x4_int_1 >>= 1;
393
394
395
  }
}

396
#if CONFIG_VP9_HIGHBITDEPTH
397
static void highbd_filter_selectively_vert_row2(int subsampling_factor,
398
399
400
401
402
403
404
                                                uint16_t *s, int pitch,
                                                unsigned int mask_16x16_l,
                                                unsigned int mask_8x8_l,
                                                unsigned int mask_4x4_l,
                                                unsigned int mask_4x4_int_l,
                                                const loop_filter_info_n *lfi_n,
                                                const uint8_t *lfl, int bd) {
405
406
407
  const int mask_shift = subsampling_factor ? 4 : 8;
  const int mask_cutoff = subsampling_factor ? 0xf : 0xff;
  const int lfl_forward = subsampling_factor ? 4 : 8;
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428

  unsigned int mask_16x16_0 = mask_16x16_l & mask_cutoff;
  unsigned int mask_8x8_0 = mask_8x8_l & mask_cutoff;
  unsigned int mask_4x4_0 = mask_4x4_l & mask_cutoff;
  unsigned int mask_4x4_int_0 = mask_4x4_int_l & mask_cutoff;
  unsigned int mask_16x16_1 = (mask_16x16_l >> mask_shift) & mask_cutoff;
  unsigned int mask_8x8_1 = (mask_8x8_l >> mask_shift) & mask_cutoff;
  unsigned int mask_4x4_1 = (mask_4x4_l >> mask_shift) & mask_cutoff;
  unsigned int mask_4x4_int_1 = (mask_4x4_int_l >> mask_shift) & mask_cutoff;
  unsigned int mask;

  for (mask = mask_16x16_0 | mask_8x8_0 | mask_4x4_0 | mask_4x4_int_0 |
       mask_16x16_1 | mask_8x8_1 | mask_4x4_1 | mask_4x4_int_1;
       mask; mask >>= 1) {
    const loop_filter_thresh *lfi0 = lfi_n->lfthr + *lfl;
    const loop_filter_thresh *lfi1 = lfi_n->lfthr + *(lfl + lfl_forward);

    // TODO(yunqingwang): count in loopfilter functions should be removed.
    if (mask & 1) {
      if ((mask_16x16_0 | mask_16x16_1) & 1) {
        if ((mask_16x16_0 & mask_16x16_1) & 1) {
429
          vpx_highbd_lpf_vertical_16_dual(s, pitch, lfi0->mblim, lfi0->lim,
430
431
                                          lfi0->hev_thr, bd);
        } else if (mask_16x16_0 & 1) {
432
          vpx_highbd_lpf_vertical_16(s, pitch, lfi0->mblim, lfi0->lim,
433
434
                                     lfi0->hev_thr, bd);
        } else {
435
          vpx_highbd_lpf_vertical_16(s + 8 *pitch, pitch, lfi1->mblim,
436
437
438
439
440
441
                                     lfi1->lim, lfi1->hev_thr, bd);
        }
      }

      if ((mask_8x8_0 | mask_8x8_1) & 1) {
        if ((mask_8x8_0 & mask_8x8_1) & 1) {
442
          vpx_highbd_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim,
443
444
445
                                         lfi0->hev_thr, lfi1->mblim, lfi1->lim,
                                         lfi1->hev_thr, bd);
        } else if (mask_8x8_0 & 1) {
446
          vpx_highbd_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim,
447
                                    lfi0->hev_thr, bd);
448
        } else {
449
          vpx_highbd_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim,
450
                                    lfi1->lim, lfi1->hev_thr, bd);
451
452
453
454
455
        }
      }

      if ((mask_4x4_0 | mask_4x4_1) & 1) {
        if ((mask_4x4_0 & mask_4x4_1) & 1) {
456
          vpx_highbd_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim,
457
458
459
                                         lfi0->hev_thr, lfi1->mblim, lfi1->lim,
                                         lfi1->hev_thr, bd);
        } else if (mask_4x4_0 & 1) {
460
          vpx_highbd_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim,
461
                                    lfi0->hev_thr, bd);
462
        } else {
463
          vpx_highbd_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim,
464
                                    lfi1->lim, lfi1->hev_thr, bd);
465
466
467
468
469
        }
      }

      if ((mask_4x4_int_0 | mask_4x4_int_1) & 1) {
        if ((mask_4x4_int_0 & mask_4x4_int_1) & 1) {
470
          vpx_highbd_lpf_vertical_4_dual(s + 4, pitch, lfi0->mblim, lfi0->lim,
471
472
473
                                         lfi0->hev_thr, lfi1->mblim, lfi1->lim,
                                         lfi1->hev_thr, bd);
        } else if (mask_4x4_int_0 & 1) {
474
          vpx_highbd_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim,
475
                                    lfi0->hev_thr, bd);
476
        } else {
477
          vpx_highbd_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim,
478
                                    lfi1->lim, lfi1->hev_thr, bd);
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
        }
      }
    }

    s += 8;
    lfl += 1;
    mask_16x16_0 >>= 1;
    mask_8x8_0 >>= 1;
    mask_4x4_0 >>= 1;
    mask_4x4_int_0 >>= 1;
    mask_16x16_1 >>= 1;
    mask_8x8_1 >>= 1;
    mask_4x4_1 >>= 1;
    mask_4x4_int_1 >>= 1;
  }
}
#endif  // CONFIG_VP9_HIGHBITDEPTH

497
498
499
500
static void filter_selectively_horiz(uint8_t *s, int pitch,
                                     unsigned int mask_16x16,
                                     unsigned int mask_8x8,
                                     unsigned int mask_4x4,
501
                                     unsigned int mask_4x4_int,
Yunqing Wang's avatar
Yunqing Wang committed
502
503
                                     const loop_filter_info_n *lfi_n,
                                     const uint8_t *lfl) {
504
  unsigned int mask;
505
  int count;
506

John Koleszar's avatar
John Koleszar committed
507
  for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int;
508
       mask; mask >>= count) {
Yunqing Wang's avatar
Yunqing Wang committed
509
    const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl;
510

Dmitry Kovalev's avatar
Dmitry Kovalev committed
511
    count = 1;
512
    if (mask & 1) {
513
      if (mask_16x16 & 1) {
514
        if ((mask_16x16 & 3) == 3) {
515
516
          vpx_lpf_horizontal_edge_16(s, pitch, lfi->mblim, lfi->lim,
                                     lfi->hev_thr);
517
518
          count = 2;
        } else {
519
520
          vpx_lpf_horizontal_edge_8(s, pitch, lfi->mblim, lfi->lim,
                                    lfi->hev_thr);
521
        }
522
523
      } else if (mask_8x8 & 1) {
        if ((mask_8x8 & 3) == 3) {
524
          // Next block's thresholds.
525
526
          const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);

527
          vpx_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim,
Jim Bankoski's avatar
Jim Bankoski committed
528
529
                                    lfi->hev_thr, lfin->mblim, lfin->lim,
                                    lfin->hev_thr);
530
531

          if ((mask_4x4_int & 3) == 3) {
532
            vpx_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim,
Jim Bankoski's avatar
Jim Bankoski committed
533
534
                                      lfi->lim, lfi->hev_thr, lfin->mblim,
                                      lfin->lim, lfin->hev_thr);
535
          } else {
536
            if (mask_4x4_int & 1)
537
              vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
538
                                   lfi->hev_thr);
539
            else if (mask_4x4_int & 2)
540
              vpx_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
541
                                   lfin->lim, lfin->hev_thr);
542
          }
543
544
          count = 2;
        } else {
545
          vpx_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
546
547

          if (mask_4x4_int & 1)
548
            vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
549
                                 lfi->hev_thr);
550
        }
551
552
      } else if (mask_4x4 & 1) {
        if ((mask_4x4 & 3) == 3) {
553
          // Next block's thresholds.
554
          const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
555

556
          vpx_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim,
Jim Bankoski's avatar
Jim Bankoski committed
557
558
                                    lfi->hev_thr, lfin->mblim, lfin->lim,
                                    lfin->hev_thr);
559
          if ((mask_4x4_int & 3) == 3) {
560
            vpx_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim,
Jim Bankoski's avatar
Jim Bankoski committed
561
562
                                      lfi->lim, lfi->hev_thr, lfin->mblim,
                                      lfin->lim, lfin->hev_thr);
563
564
          } else {
            if (mask_4x4_int & 1)
565
              vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
566
                                   lfi->hev_thr);
567
            else if (mask_4x4_int & 2)
568
              vpx_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
569
                                   lfin->lim, lfin->hev_thr);
570
571
572
          }
          count = 2;
        } else {
573
          vpx_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
574

Jim Bankoski's avatar
Jim Bankoski committed
575
          if (mask_4x4_int & 1)
576
            vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
577
                                 lfi->hev_thr);
578
579
        }
      } else if (mask_4x4_int & 1) {
580
        vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
581
                             lfi->hev_thr);
582
      }
583
    }
584
    s += 8 * count;
Yunqing Wang's avatar
Yunqing Wang committed
585
    lfl += count;
586
587
588
589
    mask_16x16 >>= count;
    mask_8x8 >>= count;
    mask_4x4 >>= count;
    mask_4x4_int >>= count;
590
591
592
  }
}

593
#if CONFIG_VP9_HIGHBITDEPTH
594
595
596
597
598
599
600
static void highbd_filter_selectively_horiz(uint16_t *s, int pitch,
                                            unsigned int mask_16x16,
                                            unsigned int mask_8x8,
                                            unsigned int mask_4x4,
                                            unsigned int mask_4x4_int,
                                            const loop_filter_info_n *lfi_n,
                                            const uint8_t *lfl, int bd) {
601
602
603
604
605
606
607
608
609
610
611
  unsigned int mask;
  int count;

  for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int;
       mask; mask >>= count) {
    const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl;

    count = 1;
    if (mask & 1) {
      if (mask_16x16 & 1) {
        if ((mask_16x16 & 3) == 3) {
612
613
          vpx_highbd_lpf_horizontal_edge_16(s, pitch, lfi->mblim, lfi->lim,
                                            lfi->hev_thr, bd);
614
615
          count = 2;
        } else {
616
617
          vpx_highbd_lpf_horizontal_edge_8(s, pitch, lfi->mblim, lfi->lim,
                                           lfi->hev_thr, bd);
618
619
620
621
622
623
        }
      } else if (mask_8x8 & 1) {
        if ((mask_8x8 & 3) == 3) {
          // Next block's thresholds.
          const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);

624
          vpx_highbd_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim,
625
626
627
628
                                           lfi->hev_thr, lfin->mblim, lfin->lim,
                                           lfin->hev_thr, bd);

          if ((mask_4x4_int & 3) == 3) {
629
            vpx_highbd_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim,
630
631
632
633
634
                                             lfi->lim, lfi->hev_thr,
                                             lfin->mblim, lfin->lim,
                                             lfin->hev_thr, bd);
          } else {
            if (mask_4x4_int & 1) {
635
              vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
636
                                          lfi->lim, lfi->hev_thr, bd);
637
            } else if (mask_4x4_int & 2) {
638
              vpx_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
639
                                          lfin->lim, lfin->hev_thr, bd);
640
641
642
643
            }
          }
          count = 2;
        } else {
644
          vpx_highbd_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim,
645
                                      lfi->hev_thr, bd);
646
647

          if (mask_4x4_int & 1) {
648
            vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
649
                                        lfi->lim, lfi->hev_thr, bd);
650
651
652
653
654
655
656
          }
        }
      } else if (mask_4x4 & 1) {
        if ((mask_4x4 & 3) == 3) {
          // Next block's thresholds.
          const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);

657
          vpx_highbd_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim,
658
659
660
                                           lfi->hev_thr, lfin->mblim, lfin->lim,
                                           lfin->hev_thr, bd);
          if ((mask_4x4_int & 3) == 3) {
661
            vpx_highbd_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim,
662
663
664
665
666
                                             lfi->lim, lfi->hev_thr,
                                             lfin->mblim, lfin->lim,
                                             lfin->hev_thr, bd);
          } else {
            if (mask_4x4_int & 1) {
667
              vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
668
                                          lfi->lim, lfi->hev_thr, bd);
669
            } else if (mask_4x4_int & 2) {
670
              vpx_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
671
                                          lfin->lim, lfin->hev_thr, bd);
672
673
674
675
            }
          }
          count = 2;
        } else {
676
          vpx_highbd_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim,
677
                                      lfi->hev_thr, bd);
678
679

          if (mask_4x4_int & 1) {
680
            vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
681
                                        lfi->lim, lfi->hev_thr, bd);
682
683
684
          }
        }
      } else if (mask_4x4_int & 1) {
685
        vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
686
                                    lfi->hev_thr, bd);
687
688
689
690
691
692
693
694
695
696
697
698
      }
    }
    s += 8 * count;
    lfl += count;
    mask_16x16 >>= count;
    mask_8x8 >>= count;
    mask_4x4 >>= count;
    mask_4x4_int >>= count;
  }
}
#endif  // CONFIG_VP9_HIGHBITDEPTH

Jim Bankoski's avatar
Jim Bankoski committed
699
// This function ors into the current lfm structure, where to do loop
James Zern's avatar
James Zern committed
700
701
// filters for the specific mi we are looking at. It uses information
// including the block_size_type (32x16, 32x32, etc.), the transform size,
Jim Bankoski's avatar
Jim Bankoski committed
702
703
704
705
706
707
708
709
// whether there were any coefficients encoded, and the loop filter strength
// block we are currently looking at. Shift is used to position the
// 1's we produce.
// TODO(JBB) Need another function for different resolution color..
static void build_masks(const loop_filter_info_n *const lfi_n,
                        const MODE_INFO *mi, const int shift_y,
                        const int shift_uv,
                        LOOP_FILTER_MASK *lfm) {
Scott LaVarnway's avatar
Scott LaVarnway committed
710
711
  const BLOCK_SIZE block_size = mi->sb_type;
  const TX_SIZE tx_size_y = mi->tx_size;
712
  const TX_SIZE tx_size_uv = get_uv_tx_size_impl(tx_size_y, block_size, 1, 1);
Scott LaVarnway's avatar
Scott LaVarnway committed
713
  const int filter_level = get_filter_level(lfi_n, mi);
Dmitry Kovalev's avatar
Dmitry Kovalev committed
714
715
716
717
718
719
  uint64_t *const left_y = &lfm->left_y[tx_size_y];
  uint64_t *const above_y = &lfm->above_y[tx_size_y];
  uint64_t *const int_4x4_y = &lfm->int_4x4_y;
  uint16_t *const left_uv = &lfm->left_uv[tx_size_uv];
  uint16_t *const above_uv = &lfm->above_uv[tx_size_uv];
  uint16_t *const int_4x4_uv = &lfm->int_4x4_uv;
Yunqing Wang's avatar
Yunqing Wang committed
720
721
  int i;

Jim Bankoski's avatar
Jim Bankoski committed
722
  // If filter level is 0 we don't loop filter.
723
  if (!filter_level) {
Jim Bankoski's avatar
Jim Bankoski committed
724
    return;
725
  } else {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
726
727
    const int w = num_8x8_blocks_wide_lookup[block_size];
    const int h = num_8x8_blocks_high_lookup[block_size];
728
729
    int index = shift_y;
    for (i = 0; i < h; i++) {
James Zern's avatar
James Zern committed
730
      memset(&lfm->lfl_y[index], filter_level, w);
731
732
733
      index += 8;
    }
  }
Jim Bankoski's avatar
Jim Bankoski committed
734
735

  // These set 1 in the current block size for the block size edges.
James Zern's avatar
James Zern committed
736
  // For instance if the block size is 32x16, we'll set:
Jim Bankoski's avatar
Jim Bankoski committed
737
738
739
740
741
742
743
744
  //    above =   1111
  //              0000
  //    and
  //    left  =   1000
  //          =   1000
  // NOTE : In this example the low bit is left most ( 1000 ) is stored as
  //        1,  not 8...
  //
James Zern's avatar
James Zern committed
745
  // U and V set things on a 16 bit scale.
Jim Bankoski's avatar
Jim Bankoski committed
746
747
748
749
750
751
752
753
  //
  *above_y |= above_prediction_mask[block_size] << shift_y;
  *above_uv |= above_prediction_mask_uv[block_size] << shift_uv;
  *left_y |= left_prediction_mask[block_size] << shift_y;
  *left_uv |= left_prediction_mask_uv[block_size] << shift_uv;

  // If the block has no coefficients and is not intra we skip applying
  // the loop filter on block edges.
Scott LaVarnway's avatar
Scott LaVarnway committed
754
  if (mi->skip && is_inter_block(mi))
Jim Bankoski's avatar
Jim Bankoski committed
755
756
    return;

James Zern's avatar
James Zern committed
757
  // Here we are adding a mask for the transform size. The transform
Jim Bankoski's avatar
Jim Bankoski committed
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
  // size mask is set to be correct for a 64x64 prediction block size. We
  // mask to match the size of the block we are working on and then shift it
  // into place..
  *above_y |= (size_mask[block_size] &
               above_64x64_txform_mask[tx_size_y]) << shift_y;
  *above_uv |= (size_mask_uv[block_size] &
                above_64x64_txform_mask_uv[tx_size_uv]) << shift_uv;

  *left_y |= (size_mask[block_size] &
              left_64x64_txform_mask[tx_size_y]) << shift_y;
  *left_uv |= (size_mask_uv[block_size] &
               left_64x64_txform_mask_uv[tx_size_uv]) << shift_uv;

  // Here we are trying to determine what to do with the internal 4x4 block
  // boundaries.  These differ from the 4x4 boundaries on the outside edge of
  // an 8x8 in that the internal ones can be skipped and don't depend on
  // the prediction block size.
Dmitry Kovalev's avatar
Dmitry Kovalev committed
775
  if (tx_size_y == TX_4X4)
776
    *int_4x4_y |= size_mask[block_size] << shift_y;
Dmitry Kovalev's avatar
Dmitry Kovalev committed
777
778

  if (tx_size_uv == TX_4X4)
Jim Bankoski's avatar
Jim Bankoski committed
779
780
781
782
    *int_4x4_uv |= (size_mask_uv[block_size] & 0xffff) << shift_uv;
}

// This function does the same thing as the one above with the exception that
James Zern's avatar
James Zern committed
783
// it only affects the y masks. It exists because for blocks < 16x16 in size,
Jim Bankoski's avatar
Jim Bankoski committed
784
785
786
787
// we only update u and v masks on the first block.
static void build_y_mask(const loop_filter_info_n *const lfi_n,
                         const MODE_INFO *mi, const int shift_y,
                         LOOP_FILTER_MASK *lfm) {
Scott LaVarnway's avatar
Scott LaVarnway committed
788
789
790
  const BLOCK_SIZE block_size = mi->sb_type;
  const TX_SIZE tx_size_y = mi->tx_size;
  const int filter_level = get_filter_level(lfi_n, mi);
Dmitry Kovalev's avatar
Dmitry Kovalev committed
791
792
793
  uint64_t *const left_y = &lfm->left_y[tx_size_y];
  uint64_t *const above_y = &lfm->above_y[tx_size_y];
  uint64_t *const int_4x4_y = &lfm->int_4x4_y;
Yunqing Wang's avatar
Yunqing Wang committed
794
795
  int i;

796
  if (!filter_level) {
Jim Bankoski's avatar
Jim Bankoski committed
797
    return;
798
  } else {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
799
800
    const int w = num_8x8_blocks_wide_lookup[block_size];
    const int h = num_8x8_blocks_high_lookup[block_size];
801
802
    int index = shift_y;
    for (i = 0; i < h; i++) {
James Zern's avatar
James Zern committed
803
      memset(&lfm->lfl_y[index], filter_level, w);
804
805
806
      index += 8;
    }
  }
Jim Bankoski's avatar
Jim Bankoski committed
807
808
809
810

  *above_y |= above_prediction_mask[block_size] << shift_y;
  *left_y |= left_prediction_mask[block_size] << shift_y;

Scott LaVarnway's avatar
Scott LaVarnway committed
811
  if (mi->skip && is_inter_block(mi))
Jim Bankoski's avatar
Jim Bankoski committed
812
813
814
815
816
817
818
819
    return;

  *above_y |= (size_mask[block_size] &
               above_64x64_txform_mask[tx_size_y]) << shift_y;

  *left_y |= (size_mask[block_size] &
              left_64x64_txform_mask[tx_size_y]) << shift_y;

Dmitry Kovalev's avatar
Dmitry Kovalev committed
820
  if (tx_size_y == TX_4X4)
821
    *int_4x4_y |= size_mask[block_size] << shift_y;
Jim Bankoski's avatar
Jim Bankoski committed
822
823
}

824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
void vp9_adjust_mask(VP9_COMMON *const cm, const int mi_row,
                     const int mi_col, LOOP_FILTER_MASK *lfm) {
  int i;

  // The largest loopfilter we have is 16x16 so we use the 16x16 mask
  // for 32x32 transforms also.
  lfm->left_y[TX_16X16] |= lfm->left_y[TX_32X32];
  lfm->above_y[TX_16X16] |= lfm->above_y[TX_32X32];
  lfm->left_uv[TX_16X16] |= lfm->left_uv[TX_32X32];
  lfm->above_uv[TX_16X16] |= lfm->above_uv[TX_32X32];

  // We do at least 8 tap filter on every 32x32 even if the transform size
  // is 4x4. So if the 4x4 is set on a border pixel add it to the 8x8 and
  // remove it from the 4x4.
  lfm->left_y[TX_8X8] |= lfm->left_y[TX_4X4] & left_border;
  lfm->left_y[TX_4X4] &= ~left_border;
  lfm->above_y[TX_8X8] |= lfm->above_y[TX_4X4] & above_border;
  lfm->above_y[TX_4X4] &= ~above_border;
  lfm->left_uv[TX_8X8] |= lfm->left_uv[TX_4X4] & left_border_uv;
  lfm->left_uv[TX_4X4] &= ~left_border_uv;
  lfm->above_uv[TX_8X8] |= lfm->above_uv[TX_4X4] & above_border_uv;
  lfm->above_uv[TX_4X4] &= ~above_border_uv;

  // We do some special edge handling.
  if (mi_row + MI_BLOCK_SIZE > cm->mi_rows) {
    const uint64_t rows = cm->mi_rows - mi_row;

    // Each pixel inside the border gets a 1,
    const uint64_t mask_y = (((uint64_t) 1 << (rows << 3)) - 1);
    const uint16_t mask_uv = (((uint16_t) 1 << (((rows + 1) >> 1) << 2)) - 1);

    // Remove values completely outside our border.
    for (i = 0; i < TX_32X32; i++) {
      lfm->left_y[i] &= mask_y;
      lfm->above_y[i] &= mask_y;
      lfm->left_uv[i] &= mask_uv;
      lfm->above_uv[i] &= mask_uv;
    }
    lfm->int_4x4_y &= mask_y;
    lfm->int_4x4_uv &= mask_uv;

    // We don't apply a wide loop filter on the last uv block row. If set
    // apply the shorter one instead.
    if (rows == 1) {
      lfm->above_uv[TX_8X8] |= lfm->above_uv[TX_16X16];
      lfm->above_uv[TX_16X16] = 0;
    }
    if (rows == 5) {
      lfm->above_uv[TX_8X8] |= lfm->above_uv[TX_16X16] & 0xff00;
      lfm->above_uv[TX_16X16] &= ~(lfm->above_uv[TX_16X16] & 0xff00);
    }
  }

  if (mi_col + MI_BLOCK_SIZE > cm->mi_cols) {
    const uint64_t columns = cm->mi_cols - mi_col;

    // Each pixel inside the border gets a 1, the multiply copies the border
    // to where we need it.
    const uint64_t mask_y  = (((1 << columns) - 1)) * 0x0101010101010101ULL;
    const uint16_t mask_uv = ((1 << ((columns + 1) >> 1)) - 1) * 0x1111;

    // Internal edges are not applied on the last column of the image so
    // we mask 1 more for the internal edges
    const uint16_t mask_uv_int = ((1 << (columns >> 1)) - 1) * 0x1111;

    // Remove the bits outside the image edge.
    for (i = 0; i < TX_32X32; i++) {
      lfm->left_y[i] &= mask_y;
      lfm->above_y[i] &= mask_y;
      lfm->left_uv[i] &= mask_uv;
      lfm->above_uv[i] &= mask_uv;
    }
    lfm->int_4x4_y &= mask_y;
    lfm->int_4x4_uv &= mask_uv_int;

    // We don't apply a wide loop filter on the last uv column. If set
    // apply the shorter one instead.
    if (columns == 1) {
      lfm->left_uv[TX_8X8] |= lfm->left_uv[TX_16X16];
      lfm->left_uv[TX_16X16] = 0;
    }
    if (columns == 5) {
      lfm->left_uv[TX_8X8] |= (lfm->left_uv[TX_16X16] & 0xcccc);
      lfm->left_uv[TX_16X16] &= ~(lfm->left_uv[TX_16X16] & 0xcccc);
    }
  }
  // We don't apply a loop filter on the first column in the image, mask that
  // out.
  if (mi_col == 0) {
    for (i = 0; i < TX_32X32; i++) {
      lfm->left_y[i] &= 0xfefefefefefefefeULL;
      lfm->left_uv[i] &= 0xeeee;
    }
  }

  // Assert if we try to apply 2 different loop filters at the same position.
  assert(!(lfm->left_y[TX_16X16] & lfm->left_y[TX_8X8]));
  assert(!(lfm->left_y[TX_16X16] & lfm->left_y[TX_4X4]));
  assert(!(lfm->left_y[TX_8X8] & lfm->left_y[TX_4X4]));
  assert(!(lfm->int_4x4_y & lfm->left_y[TX_16X16]));
  assert(!(lfm->left_uv[TX_16X16]&lfm->left_uv[TX_8X8]));
  assert(!(lfm->left_uv[TX_16X16] & lfm->left_uv[TX_4X4]));
  assert(!(lfm->left_uv[TX_8X8] & lfm->left_uv[TX_4X4]));
  assert(!(lfm->int_4x4_uv & lfm->left_uv[TX_16X16]));
  assert(!(lfm->above_y[TX_16X16] & lfm->above_y[TX_8X8]));
  assert(!(lfm->above_y[TX_16X16] & lfm->above_y[TX_4X4]));
  assert(!(lfm->above_y[TX_8X8] & lfm->above_y[TX_4X4]));
  assert(!(lfm->int_4x4_y & lfm->above_y[TX_16X16]));
  assert(!(lfm->above_uv[TX_16X16] & lfm->above_uv[TX_8X8]));
  assert(!(lfm->above_uv[TX_16X16] & lfm->above_uv[TX_4X4]));
  assert(!(lfm->above_uv[TX_8X8] & lfm->above_uv[TX_4X4]));
  assert(!(lfm->int_4x4_uv & lfm->above_uv[TX_16X16]));
}

Jim Bankoski's avatar
Jim Bankoski committed
938
939
940
// This function sets up the bit masks for the entire 64x64 region represented
// by mi_row, mi_col.
// TODO(JBB): This function only works for yv12.
941
void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col,
942
                    MODE_INFO **mi, const int mode_info_stride,
943
                    LOOP_FILTER_MASK *lfm) {
Jim Bankoski's avatar
Jim Bankoski committed
944
945
  int idx_32, idx_16, idx_8;
  const loop_filter_info_n *const lfi_n = &cm->lf_info;
946
947
  MODE_INFO **mip = mi;
  MODE_INFO **mip2 = mi;
Jim Bankoski's avatar
Jim Bankoski committed
948
949

  // These are offsets to the next mi in the 64x64 block. It is what gets
James Zern's avatar
James Zern committed
950
951
  // added to the mi ptr as we go through each loop. It helps us to avoid
  // setting up special row and column counters for each index. The last step
Jim Bankoski's avatar
Jim Bankoski committed
952
953
954
955
956
957
958
959
  // brings us out back to the starting position.
  const int offset_32[] = {4, (mode_info_stride << 2) - 4, 4,
                           -(mode_info_stride << 2) - 4};
  const int offset_16[] = {2, (mode_info_stride << 1) - 2, 2,
                           -(mode_info_stride << 1) - 2};
  const int offset[] = {1, mode_info_stride - 1, 1, -mode_info_stride - 1};

  // Following variables represent shifts to position the current block
James Zern's avatar
James Zern committed
960
  // mask over the appropriate block. A shift of 36 to the left will move
Jim Bankoski's avatar
Jim Bankoski committed
961
962
963
964
965
966
967
968
969
970
971
972
973
  // the bits for the final 32 by 32 block in the 64x64 up 4 rows and left
  // 4 rows to the appropriate spot.
  const int shift_32_y[] = {0, 4, 32, 36};
  const int shift_16_y[] = {0, 2, 16, 18};
  const int shift_8_y[] = {0, 1, 8, 9};
  const int shift_32_uv[] = {0, 2, 8, 10};
  const int shift_16_uv[] = {0, 1, 4, 5};
  const int max_rows = (mi_row + MI_BLOCK_SIZE > cm->mi_rows ?
                        cm->mi_rows - mi_row : MI_BLOCK_SIZE);
  const int max_cols = (mi_col + MI_BLOCK_SIZE > cm->mi_cols ?
                        cm->mi_cols - mi_col : MI_BLOCK_SIZE);

  vp9_zero(*lfm);
974
  assert(mip[0] != NULL);
Jim Bankoski's avatar
Jim Bankoski committed
975
976
977
978

  // TODO(jimbankoski): Try moving most of the following code into decode
  // loop and storing lfm in the mbmi structure so that we don't have to go
  // through the recursive loop structure multiple times.
Scott LaVarnway's avatar
Scott LaVarnway committed
979
  switch (mip[0]->sb_type) {
Jim Bankoski's avatar
Jim Bankoski committed
980
    case BLOCK_64X64:
981
      build_masks(lfi_n, mip[0] , 0, 0, lfm);
Jim Bankoski's avatar
Jim Bankoski committed
982
983
      break;
    case BLOCK_64X32:
984
      build_masks(lfi_n, mip[0], 0, 0, lfm);
Jim Bankoski's avatar
Jim Bankoski committed
985
      mip2 = mip + mode_info_stride * 4;
986
987
      if (4 >= max_rows)
        break;
988
      build_masks(lfi_n, mip2[0], 32, 8, lfm);
Jim Bankoski's avatar
Jim Bankoski committed
989
990
      break;
    case BLOCK_32X64:
991
      build_masks(lfi_n, mip[0], 0, 0, lfm);
Jim Bankoski's avatar
Jim Bankoski committed
992
      mip2 = mip + 4;
993
994
      if (4 >= max_cols)
        break;
995
      build_masks(lfi_n, mip2[0], 4, 2, lfm);
Jim Bankoski's avatar
Jim Bankoski committed
996
997
998
999
1000
1001
1002
1003
1004
      break;
    default:
      for (idx_32 = 0; idx_32 < 4; mip += offset_32[idx_32], ++idx_32) {
        const int shift_y = shift_32_y[idx_32];
        const int shift_uv = shift_32_uv[idx_32];
        const int mi_32_col_offset = ((idx_32 & 1) << 2);
        const int mi_32_row_offset = ((idx_32 >> 1) << 2);
        if (mi_32_col_offset >= max_cols || mi_32_row_offset >= max_rows)
          continue;
Scott LaVarnway's avatar
Scott LaVarnway committed
1005
        switch (mip[0]->sb_type) {
Jim Bankoski's avatar
Jim Bankoski committed
1006
          case BLOCK_32X32:
1007
            build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
Jim Bankoski's avatar
Jim Bankoski committed
1008
1009
            break;
          case BLOCK_32X16:
1010
            build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
1011
1012
            if (mi_32_row_offset + 2 >= max_rows)
              continue;
Jim Bankoski's avatar
Jim Bankoski committed
1013
            mip2 = mip + mode_info_stride * 2;
1014
            build_masks(lfi_n, mip2[0], shift_y + 16, shift_uv + 4, lfm);
Jim Bankoski's avatar
Jim Bankoski committed
1015
1016
            break;
          case BLOCK_16X32:
1017
            build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
1018
1019
            if (mi_32_col_offset + 2 >= max_cols)
              continue;
Jim Bankoski's avatar
Jim Bankoski committed
1020
            mip2 = mip + 2;
1021
            build_masks(lfi_n, mip2[0], shift_y + 2, shift_uv + 1, lfm);
Jim Bankoski's avatar
Jim Bankoski committed
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
            break;
          default:
            for (idx_16 = 0; idx_16 < 4; mip += offset_16[idx_16], ++idx_16) {
              const int shift_y = shift_32_y[idx_32] + shift_16_y[idx_16];
              const int shift_uv = shift_32_uv[idx_32] + shift_16_uv[idx_16];
              const int mi_16_col_offset = mi_32_col_offset +
                  ((idx_16 & 1) << 1);
              const int mi_16_row_offset = mi_32_row_offset +
                  ((idx_16 >> 1) << 1);

              if (mi_16_col_offset >= max_cols || mi_16_row_offset >= max_rows)
                continue;

Scott LaVarnway's avatar
Scott LaVarnway committed
1035
              switch (mip[0]->sb_type) {
Jim Bankoski's avatar
Jim Bankoski committed
1036
                case BLOCK_16X16:
1037
                  build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
Jim Bankoski's avatar
Jim Bankoski committed
1038
1039
                  break;
                case BLOCK_16X8:
1040
                  build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
1041
1042
                  if (mi_16_row_offset + 1 >= max_rows)
                    continue;
Jim Bankoski's avatar
Jim Bankoski committed
1043
                  mip2 = mip + mode_info_stride;
1044
                  build_y_mask(lfi_n, mip2[0], shift_y+8, lfm);
Jim Bankoski's avatar
Jim Bankoski committed
1045
1046
                  break;
                case BLOCK_8X16:
1047
                  build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
1048
1049
                  if (mi_16_col_offset +1 >= max_cols)
                    continue;
Jim Bankoski's avatar
Jim Bankoski committed
1050
                  mip2 = mip + 1;
1051
                  build_y_mask(lfi_n, mip2[0], shift_y+1, lfm);