lrf.rs 38.7 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
// Copyright (c) 2017-2018, The rav1e contributors. All rights reserved
//
// This source code is subject to the terms of the BSD 2 Clause License and
// the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
// was not distributed with this source code in the LICENSE file, you can
// obtain it at www.aomedia.org/license/software. If the Alliance for Open
// Media Patent License 1.0 was not distributed with this source code in the
// PATENTS file, you can obtain it at www.aomedia.org/license/patent.

#![allow(safe_extern_statics)]

Luca Barbato's avatar
Luca Barbato committed
12
use crate::frame::Frame;
Raphaël Zumer's avatar
Raphaël Zumer committed
13
14
15
16
use crate::encoder::FrameInvariants;
use crate::context::PLANES;
use crate::context::MAX_SB_SIZE;
use crate::plane::Plane;
Monty's avatar
Monty committed
17
use crate::plane::PlaneSlice;
18
use crate::plane::PlaneMutSlice;
Raphaël Zumer's avatar
Raphaël Zumer committed
19
20
use crate::plane::PlaneOffset;
use crate::plane::PlaneConfig;
21
use std::cmp;
Raphaël Zumer's avatar
Raphaël Zumer committed
22
use crate::util::clamp;
23
24
use crate::util::CastFromPrimitive;
use crate::util::Pixel;
25

26
27
use std::ops::{Index, IndexMut};

28
pub const RESTORATION_TILESIZE_MAX_LOG2: usize = 8;
29

30
31
32
33
34
pub const RESTORE_NONE: u8 = 0;
pub const RESTORE_SWITCHABLE: u8 = 1;
pub const RESTORE_WIENER: u8 = 2;
pub const RESTORE_SGRPROJ: u8 = 3;

35
36
37
pub const WIENER_TAPS_MIN: [i8; 3] = [ -5, -23, -17 ];
pub const WIENER_TAPS_MID: [i8; 3] = [ 3, -7, 15 ];
pub const WIENER_TAPS_MAX: [i8; 3] = [ 10, 8, 46 ];
38
#[allow(unused)]
39
pub const WIENER_TAPS_K:   [i8; 3] = [ 1, 2, 3 ];
Monty's avatar
Monty committed
40
pub const WIENER_BITS: usize = 7;
41
42
43
44
45
46
47

pub const SGRPROJ_XQD_MIN: [i8; 2] = [ -96, -32 ];
pub const SGRPROJ_XQD_MID: [i8; 2] = [ -32, 31 ];
pub const SGRPROJ_XQD_MAX: [i8; 2] = [ 31, 95 ];
pub const SGRPROJ_PRJ_SUBEXP_K: u8 = 4;
pub const SGRPROJ_PRJ_BITS: u8 = 7;
pub const SGRPROJ_PARAMS_BITS: u8 = 4;
Monty's avatar
Monty committed
48
49
50
51
pub const SGRPROJ_MTABLE_BITS: u8 = 20;
pub const SGRPROJ_SGR_BITS: u8 = 8;
pub const SGRPROJ_RECIP_BITS: u8 = 12;
pub const SGRPROJ_RST_BITS: u8 = 4;
Monty's avatar
Monty committed
52
pub const SGRPROJ_PARAMS_S: [[u32; 2]; 1 << SGRPROJ_PARAMS_BITS] = [
53
54
55
56
  [140, 3236], [112, 2158], [ 93, 1618], [ 80, 1438],
  [ 70, 1295], [ 58, 1177], [ 47, 1079], [ 37,  996],
  [ 30,  925], [ 25,  863], [  0, 2589], [  0, 1618],
  [  0, 1177], [  0,  925], [ 56,    0], [ 22,    0]
57
58
];

Romain Vimont's avatar
Romain Vimont committed
59
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
60
61
62
pub enum RestorationFilter {
  None,
  Wiener  { coeffs: [[i8; 3]; 2] },
Monty's avatar
Monty committed
63
  Sgrproj { set: u8,
64
65
66
67
68
            xqd: [i8; 2] },
}

impl RestorationFilter {
  pub fn default() -> RestorationFilter {
69
    RestorationFilter::None{}
Monty's avatar
Monty committed
70
71
72
  }
}

73
#[inline(always)]
Monty's avatar
Monty committed
74
fn sgrproj_sum_finish(ssq: u32, sum: u32, n: u32, one_over_n: u32, s: u32, bdm8: usize) -> (u32, u32) {
75
76
  let scaled_ssq = (ssq + (1 << (2 * bdm8) >> 1)) >> (2 * bdm8);
  let scaled_sum = (sum + (1 << bdm8 >> 1)) >> bdm8;
Monty's avatar
Monty committed
77
78
79
  let p = cmp::max(0, (scaled_ssq*n) as i32 - (scaled_sum*scaled_sum) as i32) as u32;
  let z = (p * s + (1 << SGRPROJ_MTABLE_BITS >> 1)) >> SGRPROJ_MTABLE_BITS;
  let a = if z >= 255 {
80
81
82
83
    256
  } else if z == 0 {
    1
  } else {
Monty's avatar
Monty committed
84
    (((z << SGRPROJ_SGR_BITS) + z/2) / (z+1))
85
  };
86
87
  let b = ((1 << SGRPROJ_SGR_BITS) - a) * sum * one_over_n;
  (a, (b + (1 << SGRPROJ_RECIP_BITS >> 1)) >> SGRPROJ_RECIP_BITS)
88
}
Monty's avatar
Monty committed
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129

// The addressing below is a bit confusing, made worse by LRF's odd
// clipping requirements, and our reusing code for partial frames.  So
// I'm documenting the LRF conventions here in detail.

// 'Relative to plane storage' means that a coordinate or bound is
// being applied as if to the full Plane backing the PlaneSlice.  For
// example, a PlaneSlice may represent a subset of a middle of a
// plane, but when we say the top/left bounds are clipped 'relative to
// plane storage', that means relative to 0,0 of the plane, not 0,0 of
// the plane slice.

// 'Relative to the slice view' means that a coordinate or bound is
// counted from the 0,0 of the PlaneSlice, not the Plane from which it
// was sliced.

// Passed in plane slices may be the same size or different sizes;
// filter access will be clipped to 0,0..w,h of the underlying plane
// storage for both planes, depending which is accessed.  Note that
// the passed in w/h that specifies the storage clipping is actually
// relative to the the slice view, not the plane storage (it
// simplifies the math internally).  Eg, if a PlaceSlice has a y
// offset of -2 (meaning its origin is two rows above the top row of
// the backing plane), and we pass in a height of 4, the rows
// 0,1,2,3,4 of the slice address -2, -1, 0, 1, 2 of the backing plane
// with access clipped to 0, 0, 0, 1, 1.

// Active area cropping is done by specifying a w,h smaller
// than the actual underlying plane storage.

// stripe_y is the beginning of the current stripe (used for source
// buffer choice/clipping) relative to the passed in plane view.  It
// may (and regularly will) be negative.

// stripe_h is the hright of the current stripe, again used for source
// buffer choice/clipping).  It may specify a stripe boundary less
// than, eqqal to, or larger than the buffers we're accessing.

// x and y specify the center pixel of the current filter kernel
// application.  They are relative to the passed in slice views.

Monty's avatar
Monty committed
130
fn sgrproj_box_sum_slow<T: Pixel>(a: &mut u32, b: &mut u32,
Monty's avatar
Monty committed
131
132
                                  stripe_y: isize, stripe_h: usize,
                                  x: isize, y: isize,
Monty's avatar
Monty committed
133
                                  r: usize, n: u32, one_over_n: u32, s: u32, bdm8: usize,
Monty's avatar
Monty committed
134
135
                                  backing: &PlaneSlice<T>, cdeffed: &PlaneSlice<T>,
                                  crop_w: usize, crop_h: usize) {
Monty's avatar
Monty committed
136
137
  let mut ssq = 0;
  let mut sum = 0;
138

Monty's avatar
Monty committed
139
  for yi in y-r as isize..=y+r as isize {
Monty's avatar
Monty committed
140
141
142
143
144
145
146
147
148
149
    // clamp to crop rectangle before deciding source.
    let clamp_y = clamp(yi, -cdeffed.y, crop_h as isize - 1);
    let clamp_w = (crop_w as isize - x + r as isize) as usize;
    // clamp vertically to stripe limits
    let ly = clamp(clamp_y, stripe_y - 2, stripe_y + stripe_h as isize + 1);
    // left-hand addressing limit
    let left = cmp::max(0, r as isize - x - cdeffed.x) as usize;
    // right-hand addressing limit
    let right = cmp::min(2*r+1, clamp_w);

Monty's avatar
Monty committed
150
    // decide if we're vertically inside or outside the stripe
Monty's avatar
Monty committed
151
152
    let src_plane = if clamp_y >= stripe_y && clamp_y < stripe_y + stripe_h as isize {
      cdeffed
Monty's avatar
Monty committed
153
    } else {
Monty's avatar
Monty committed
154
      backing
Monty's avatar
Monty committed
155
    };
Monty's avatar
Monty committed
156
    // Reslice to avoid a negative X index.
157
    let p = &src_plane.reslice(x - r as isize,ly)[0];
Monty's avatar
Monty committed
158
159
160

    // run accumulation to left of frame storage (if any)
    for _xi in 0..left {
Monty's avatar
Monty committed
161
      let c = u32::cast_from(p[(r as isize - x) as usize]);
162
163
164
      ssq += c*c;
      sum += c;
    }
Monty's avatar
Monty committed
165
166
    // run accumulation in-frame
    for xi in left..right {
Monty's avatar
Monty committed
167
      let c = u32::cast_from(p[xi]);
168
169
170
      ssq += c*c;
      sum += c;
    }
Monty's avatar
Monty committed
171
    // run accumulation to right of frame (if any)
Monty's avatar
Monty committed
172
    for _xi in right..=2*r {
Monty's avatar
Monty committed
173
      let c = u32::cast_from(p[clamp_w - 1]);
174
175
176
177
      ssq += c*c;
      sum += c;
    }
  }
Monty's avatar
Monty committed
178
  let (reta, retb) = sgrproj_sum_finish(ssq, sum, n, one_over_n, s, bdm8);
179
180
181
  *a = reta;
  *b = retb;
}
Monty's avatar
Monty committed
182

Monty's avatar
Monty committed
183
// unrolled computation to be used when all bounds-checking has been satisfied.
Monty's avatar
Monty committed
184
185
186
187
fn sgrproj_box_sum_fastxy_r1<T: Pixel>(a: &mut u32, b: &mut u32, x: isize, y: isize,
                                       s: u32, bdm8: usize, p: &PlaneSlice<T>) {
  let mut ssq = 0;
  let mut sum = 0;
188
  for yi in -1..=1 {
189
    let x = &p.reslice(x - 1, y + yi)[0];
Monty's avatar
Monty committed
190
191
192
193
    ssq += u32::cast_from(x[0]) * u32::cast_from(x[0]) +
      u32::cast_from(x[1]) * u32::cast_from(x[1]) +
      u32::cast_from(x[2]) * u32::cast_from(x[2]);
    sum += u32::cast_from(x[0]) + u32::cast_from(x[1]) + u32::cast_from(x[2]);
194
  }
Monty's avatar
Monty committed
195
  let (reta, retb) = sgrproj_sum_finish(ssq, sum, 9, 455, s, bdm8);
196
197
198
199
  *a = reta;
  *b = retb;
}

Monty's avatar
Monty committed
200
201
202
203
fn sgrproj_box_sum_fastxy_r2<T: Pixel>(a: &mut u32, b: &mut u32, x: isize, y: isize,
                                       s: u32, bdm8: usize, p: &PlaneSlice<T>) {
  let mut ssq = 0;
  let mut sum = 0;
204
  for yi in -2..=2 {
205
    let x = &p.reslice(x - 2, y + yi)[0];
Monty's avatar
Monty committed
206
207
208
209
210
211
212
    ssq += u32::cast_from(x[0]) * u32::cast_from(x[0]) +
      u32::cast_from(x[1]) * u32::cast_from(x[1]) +
      u32::cast_from(x[2]) * u32::cast_from(x[2]) +
      u32::cast_from(x[3]) * u32::cast_from(x[3]) +
      u32::cast_from(x[4]) * u32::cast_from(x[4]);
    sum += u32::cast_from(x[0]) + u32::cast_from(x[1]) + u32::cast_from(x[2]) +
      u32::cast_from(x[3]) + u32::cast_from(x[4]);
213
  }
Monty's avatar
Monty committed
214
  let (reta, retb) = sgrproj_sum_finish(ssq, sum, 25, 164, s, bdm8);
215
216
217
218
  *a = reta;
  *b = retb;
}

Monty's avatar
Monty committed
219
// unrolled computation to be used when only X bounds-checking has been satisfied.
Monty's avatar
Monty committed
220
fn sgrproj_box_sum_fastx_r1<T: Pixel>(a: &mut u32, b: &mut u32,
Monty's avatar
Monty committed
221
222
                                      stripe_y: isize, stripe_h: usize,
                                      x: isize, y: isize,
Monty's avatar
Monty committed
223
                                      s: u32, bdm8: usize,
Monty's avatar
Monty committed
224
225
                                      backing: &PlaneSlice<T>, cdeffed: &PlaneSlice<T>,
                                      crop_h: usize) {
Monty's avatar
Monty committed
226
227
  let mut ssq = 0;
  let mut sum = 0;
Monty's avatar
Monty committed
228

Monty's avatar
Monty committed
229
  for yi in y-1..=y+1 {
Monty's avatar
Monty committed
230
231
    // clamp to crop rectangle before deciding source.
    let clamp_y = clamp(yi, -cdeffed.y, crop_h as isize - 1);
Monty's avatar
Monty committed
232
    // decide if we're vertically inside or outside the stripe
Monty's avatar
Monty committed
233
234
    let src_plane = if clamp_y >= stripe_y && clamp_y < stripe_y + stripe_h as isize {
      cdeffed
Monty's avatar
Monty committed
235
    } else {
Monty's avatar
Monty committed
236
      backing
Monty's avatar
Monty committed
237
    };
Monty's avatar
Monty committed
238
    // clamp vertically to stripe limits
Monty's avatar
Monty committed
239
    let ly = clamp(clamp_y, stripe_y - 2, stripe_y + stripe_h as isize + 1);
240
    let x = &src_plane.reslice(x - 1, ly)[0];
Monty's avatar
Monty committed
241
242
243
244
    ssq += u32::cast_from(x[0]) * u32::cast_from(x[0]) +
      u32::cast_from(x[1]) * u32::cast_from(x[1]) +
      u32::cast_from(x[2]) * u32::cast_from(x[2]);
    sum += u32::cast_from(x[0]) + u32::cast_from(x[1]) + u32::cast_from(x[2]);
245
  }
Monty's avatar
Monty committed
246
  let (reta, retb) = sgrproj_sum_finish(ssq, sum, 9, 455, s, bdm8);
247
248
249
250
  *a = reta;
  *b = retb;
}

Monty's avatar
Monty committed
251
fn sgrproj_box_sum_fastx_r2<T: Pixel>(a: &mut u32, b: &mut u32,
Monty's avatar
Monty committed
252
253
                                      stripe_y: isize, stripe_h: usize,
                                      x: isize, y: isize,
Monty's avatar
Monty committed
254
                                      s: u32, bdm8: usize,
Monty's avatar
Monty committed
255
256
                                      backing: &PlaneSlice<T>, cdeffed: &PlaneSlice<T>,
                                      crop_h: usize) {
Monty's avatar
Monty committed
257
258
  let mut ssq = 0;
  let mut sum = 0;
Monty's avatar
Monty committed
259
  for yi in y - 2..=y + 2 {
Monty's avatar
Monty committed
260
261
    // clamp to crop rectangle before deciding source.
    let clamp_y = clamp(yi, -cdeffed.y, crop_h as isize - 1);
Monty's avatar
Monty committed
262
    // decide if we're vertically inside or outside the stripe
Monty's avatar
Monty committed
263
264
    let src_plane = if clamp_y >= stripe_y && clamp_y < stripe_y + stripe_h as isize {
      cdeffed
Monty's avatar
Monty committed
265
    } else {
Monty's avatar
Monty committed
266
      backing
Monty's avatar
Monty committed
267
    };
Monty's avatar
Monty committed
268
    // clamp vertically to stripe limits
Monty's avatar
Monty committed
269
    let ly = clamp(clamp_y, stripe_y - 2, stripe_y + stripe_h as isize + 1);
270
    let x = &src_plane.reslice(x - 2, ly)[0];
Monty's avatar
Monty committed
271
272
273
274
275
276
277
    ssq += u32::cast_from(x[0]) * u32::cast_from(x[0]) +
      u32::cast_from(x[1]) * u32::cast_from(x[1]) +
      u32::cast_from(x[2]) * u32::cast_from(x[2]) +
      u32::cast_from(x[3]) * u32::cast_from(x[3]) +
      u32::cast_from(x[4]) * u32::cast_from(x[4]);
    sum += u32::cast_from(x[0]) + u32::cast_from(x[1]) + u32::cast_from(x[2]) +
      u32::cast_from(x[3]) + u32::cast_from(x[4]);
278
  }
Monty's avatar
Monty committed
279
  let (reta, retb) = sgrproj_sum_finish(ssq, sum, 25, 164, s, bdm8);
280
281
282
283
  *a = reta;
  *b = retb;
}

Monty's avatar
Monty committed
284
285
// computes an intermediate (ab) column for rows stripe_y through
// stripe_y+stripe_h (no inclusize) at column stripe_x.
286
// r=1 case computes every row as every row is used (see r2 version below)
Monty's avatar
Monty committed
287
288
fn sgrproj_box_ab_r1<T: Pixel>(af: &mut[u32; 64+2],
                               bf: &mut[u32; 64+2],
Monty's avatar
Monty committed
289
                               stripe_x: isize, stripe_y: isize, stripe_h: usize,
Monty's avatar
Monty committed
290
                               s: u32, bdm8: usize,
Monty's avatar
Monty committed
291
292
                               backing: &PlaneSlice<T>, cdeffed: &PlaneSlice<T>,
                               crop_w: usize, crop_h: usize) {
Monty's avatar
Monty committed
293
294
295
  // we will fill the af and bf arrays from 0..stripe_h+1 (ni),
  // representing stripe_y-1 to stripe_y+stripe_h+1 inclusive
  let boundary0 = 0;
296
  let boundary3 = stripe_h + 2;
Monty's avatar
Monty committed
297
  if backing.x + stripe_x > 0 && cdeffed.x + stripe_x > 0 && stripe_x < crop_w as isize - 1 {
Monty's avatar
Monty committed
298
299
300
    // Addressing is away from left and right edges of cdeffed storage;
    // no X clipping to worry about, but the top/bottom few rows still
    // need to worry about storage and stripe limits
301

Monty's avatar
Monty committed
302
    // boundary1 is the point where we're guaranteed all our y
303
    // addressing will be both in the stripe and in cdeffed storage
Monty's avatar
Monty committed
304
305
306
    let boundary1 = cmp::max(2, 2 - cdeffed.y - stripe_y) as usize;
    // boundary 2 is when we have to bounds check along the bottom of
    // the stripe or bottom of storage
Monty's avatar
Monty committed
307
    let boundary2 = cmp::min(crop_h as isize - stripe_y - 1, stripe_h as isize - 1) as usize;
Monty's avatar
Monty committed
308
309

    // top rows (if any), away from left and right columns
310
    for i in boundary0..boundary1 {
Monty's avatar
Monty committed
311
      sgrproj_box_sum_fastx_r1(&mut af[i], &mut bf[i],
312
                               stripe_y, stripe_h,
Monty's avatar
Monty committed
313
314
                               stripe_x, stripe_y + i as isize - 1,
                               s, bdm8,
Monty's avatar
Monty committed
315
316
                               backing, cdeffed,
                               crop_h);
317
318
319
320
    }
    // middle rows, away from left and right columns
    for i in boundary1..boundary2 {
      sgrproj_box_sum_fastxy_r1(&mut af[i], &mut bf[i],
Monty's avatar
Monty committed
321
                                stripe_x, stripe_y + i as isize - 1, s, bdm8, cdeffed);
322
    }
Monty's avatar
Monty committed
323
    // bottom rows (if any), away from left and right columns
324
    for i in boundary2..boundary3 {
Monty's avatar
Monty committed
325
      sgrproj_box_sum_fastx_r1(&mut af[i], &mut bf[i],
326
                               stripe_y, stripe_h,
Monty's avatar
Monty committed
327
328
                               stripe_x, stripe_y + i as isize - 1,
                               s, bdm8,
Monty's avatar
Monty committed
329
330
                               backing, cdeffed,
                               crop_h);
331
332
333
334
    }
  } else {
    // top/bottom rows and left/right columns, where we need to worry about frame and stripe clipping
    for i in boundary0..boundary3 {
335
      sgrproj_box_sum_slow(&mut af[i], &mut bf[i],
Monty's avatar
Monty committed
336
337
338
                           stripe_y, stripe_h,
                           stripe_x, stripe_y + i as isize - 1,
                           1, 9, 455, s, bdm8,
Monty's avatar
Monty committed
339
340
                           backing, cdeffed,
                           crop_w, crop_h);
Monty's avatar
Monty committed
341
342
    }
  }
Monty's avatar
Monty committed
343
344
}

345
346
347
348
349
350
351
// One oddness about the radius=2 intermediate array computations that
// the spec doesn't make clear: Although the spec defines computation
// of every row (of a, b and f), only half of the rows (every-other
// row) are actually used.  We use the full-size array here but only
// compute the even rows.  This is not so much optimization as trying
// to illustrate what this convoluted filter is actually doing
// (ie not as much as it may appear).
Monty's avatar
Monty committed
352
353
fn sgrproj_box_ab_r2<T: Pixel>(af: &mut[u32; 64+2],
                               bf: &mut[u32; 64+2],
Monty's avatar
Monty committed
354
                               stripe_x: isize, stripe_y: isize, stripe_h: usize,
Monty's avatar
Monty committed
355
                               s: u32, bdm8: usize,
Monty's avatar
Monty committed
356
357
358
                               backing: &PlaneSlice<T>, cdeffed: &PlaneSlice<T>,
                               crop_w: usize, crop_h: usize){

Monty's avatar
Monty committed
359
360
361
  // we will fill the af and bf arrays from 0..stripe_h+1 (ni),
  // representing stripe_y-1 to stripe_y+stripe_h+1 inclusive
  let boundary0 = 0; // even
362
  let boundary3 = stripe_h + 2; // don't care if odd
Monty's avatar
Monty committed
363
  if backing.x + stripe_x > 1 && cdeffed.x + stripe_x > 1 && stripe_x < crop_w as isize - 2 {
Monty's avatar
Monty committed
364
365
366
    // Addressing is away from left and right edges of cdeffed storage;
    // no X clipping to worry about, but the top/bottom few rows still
    // need to worry about storage and stripe limits
367

Monty's avatar
Monty committed
368
369
    // boundary1 is the point where we're guaranteed all our y
    // addressing will be both in the stripe and in cdeffed storage
370
    // make even and round up
371
    let boundary1 = ((cmp::max(3, 3 - cdeffed.y - stripe_y) + 1) >> 1 << 1) as usize;
Monty's avatar
Monty committed
372
373
    // boundary 2 is when we have to bounds check along the bottom of
    // the stripe or bottom of storage
374
    // must be even, rounding of +1 cancels fencepost of -1
Monty's avatar
Monty committed
375
    let boundary2 = (cmp::min(crop_h as isize - stripe_y, stripe_h as isize) >> 1 << 1) as usize;
376
377
378

    // top rows, away from left and right columns
    for i in (boundary0..boundary1).step_by(2) {
Monty's avatar
Monty committed
379
      sgrproj_box_sum_fastx_r2(&mut af[i], &mut bf[i],
380
                               stripe_y, stripe_h,
Monty's avatar
Monty committed
381
382
                               stripe_x, stripe_y + i as isize - 1,
                               s, bdm8,
Monty's avatar
Monty committed
383
384
                               backing, cdeffed,
                               crop_h);
385
386
387
388
    }
    // middle rows, away from left and right columns
    for i in (boundary1..boundary2).step_by(2) {
      sgrproj_box_sum_fastxy_r2(&mut af[i], &mut bf[i],
Monty's avatar
Monty committed
389
390
                                stripe_x, stripe_y + i as isize - 1,
                                s, bdm8, cdeffed);
391
392
393
    }
    // bottom rows, away from left and right columns
    for i in (boundary2..boundary3).step_by(2) {
Monty's avatar
Monty committed
394
      sgrproj_box_sum_fastx_r2(&mut af[i], &mut bf[i],
395
                               stripe_y, stripe_h,
Monty's avatar
Monty committed
396
397
                               stripe_x, stripe_y + i as isize - 1,
                               s, bdm8,
Monty's avatar
Monty committed
398
399
                               backing, cdeffed,
                               crop_h);
Monty's avatar
Monty committed
400
    }
401
402
403
  } else {
    // top/bottom rows and left/right columns, where we need to worry about frame and stripe clipping
    for i in (boundary0..boundary3).step_by(2) {
Monty's avatar
Monty committed
404
405
406
407
      sgrproj_box_sum_slow(&mut af[i], &mut bf[i],
                           stripe_y, stripe_h,
                           stripe_x, stripe_y + i as isize - 1,
                           2, 25, 164, s, bdm8,
Monty's avatar
Monty committed
408
409
                           backing, cdeffed,
                           crop_w, crop_h);
410
411
412
413
    }
  }
}

Monty's avatar
Monty committed
414
fn sgrproj_box_f_r0<T: Pixel>(f: &mut[u32; 64], x: usize, y: isize, h: usize, cdeffed: &PlaneSlice<T>) {
415
  for i in cmp::max(0, -y) as usize..h {
Monty's avatar
Monty committed
416
    f[i as usize] = (u32::cast_from(cdeffed.p(x, (y + i as isize) as usize))) << SGRPROJ_RST_BITS;
417
418
419
  }
}

Monty's avatar
Monty committed
420
fn sgrproj_box_f_r1<T: Pixel>(af: &[&[u32; 64+2]; 3], bf: &[&[u32; 64+2]; 3], f: &mut[u32; 64],
Monty's avatar
Monty committed
421
                              x: usize, y: isize, h: usize, cdeffed: &PlaneSlice<T>) {
422
423
424
  let shift = 5 + SGRPROJ_SGR_BITS - SGRPROJ_RST_BITS;
  for i in cmp::max(0, -y) as usize..h {
    let a =
425
426
      3 * (af[0][i] + af[2][i]   + af[0][i+2] + af[2][i+2]) +
      4 * (af[1][i] + af[0][i+1] + af[1][i+1] + af[2][i+1] + af[1][i+2]);
427
    let b =
428
429
      3 * (bf[0][i] + bf[2][i]   + bf[0][i+2] + bf[2][i+2]) +
      4 * (bf[1][i] + bf[0][i+1] + bf[1][i+1] + bf[2][i+1] + bf[1][i+2]);
Monty's avatar
Monty committed
430
    let v = a * u32::cast_from(cdeffed.p(x, (y + i as isize) as usize)) + b;
431
    f[i as usize] = (v + (1 << shift >> 1)) >> shift;
432
433
434
  }
}

Monty's avatar
Monty committed
435
fn sgrproj_box_f_r2<T: Pixel>(af: &[&[u32; 64+2]; 3], bf: &[&[u32; 64+2]; 3], f: &mut[u32; 64],
Monty's avatar
Monty committed
436
                              x: usize, y: isize, h: usize, cdeffed: &PlaneSlice<T>) {
437
438
439
440
  let shift = 5 + SGRPROJ_SGR_BITS - SGRPROJ_RST_BITS;
  let shifto = 4 + SGRPROJ_SGR_BITS - SGRPROJ_RST_BITS;
  for i in (cmp::max(0, -y) as usize..h).step_by(2) {
    let a =
441
442
      5 * (af[0][i] + af[2][i]) +
      6 * (af[1][i]);
443
    let b =
444
445
      5 * (bf[0][i] + bf[2][i]) +
      6 * (bf[1][i]);
446
    let ao =
447
      5 * (af[0][i+2] + af[2][i+2]) +
448
449
      6 * (af[1][i+2]);
    let bo =
450
      5 * (bf[0][i+2] + bf[2][i+2]) +
451
      6 * (bf[1][i+2]);
Monty's avatar
Monty committed
452
    let v = (a + ao) * u32::cast_from(cdeffed.p(x, (y+i as isize) as usize)) + b + bo;
453
    f[i as usize] = (v + (1 << shift >> 1)) >> shift;
Monty's avatar
Monty committed
454
    let vo = ao * u32::cast_from(cdeffed.p(x, (y + i as isize) as usize + 1)) + bo;
455
    f[i as usize + 1] = (vo + (1 << shifto >> 1)) >> shifto;
Monty's avatar
Monty committed
456
457
458
  }
}

459
460
461
462
463
464
pub fn sgrproj_stripe_filter<T: Pixel>(set: u8, xqd: [i8; 2], fi: &FrameInvariants<T>,
                                       crop_w: usize, crop_h: usize,
                                       stripe_w: usize, stripe_h: usize,
                                       cdeffed: &PlaneSlice<T>,
                                       deblocked: &PlaneSlice<T>,
                                       out: &mut PlaneMutSlice<T>) {
Monty's avatar
Monty committed
465
  assert!(stripe_h <= 64);
Monty's avatar
Monty committed
466
  let bdm8 = fi.sequence.bit_depth - 8;
Monty's avatar
Monty committed
467
468
469
470
471
472
  let mut a_r2: [[u32; 64+2]; 3] = [[0; 64+2]; 3];
  let mut b_r2: [[u32; 64+2]; 3] = [[0; 64+2]; 3];
  let mut f_r2: [u32; 64] = [0; 64];
  let mut a_r1: [[u32; 64+2]; 3] = [[0; 64+2]; 3];
  let mut b_r1: [[u32; 64+2]; 3] = [[0; 64+2]; 3];
  let mut f_r1: [u32; 64] = [0; 64];
473

Monty's avatar
Monty committed
474
475
  let s_r2: u32 = SGRPROJ_PARAMS_S[set as usize][0];
  let s_r1: u32 = SGRPROJ_PARAMS_S[set as usize][1];
Monty's avatar
Monty committed
476

477
  let outstart = cmp::max(0, cmp::max(-cdeffed.y, -out.y)) as usize;
478

Monty's avatar
Monty committed
479
  /* prime the intermediate arrays */
480
  if s_r2 > 0 {
Monty's avatar
Monty committed
481
482
483
    sgrproj_box_ab_r2(&mut a_r2[0], &mut b_r2[0],
                      -1, 0, stripe_h,
                      s_r2, bdm8,
Monty's avatar
Monty committed
484
485
                      &deblocked, &cdeffed,
                      crop_w, crop_h);
Monty's avatar
Monty committed
486
487
488
    sgrproj_box_ab_r2(&mut a_r2[1], &mut b_r2[1],
                      0, 0, stripe_h,
                      s_r2, bdm8,
Monty's avatar
Monty committed
489
490
                      &deblocked, &cdeffed,
                      crop_w, crop_h);
491
  }
492
  if s_r1 > 0 {
Monty's avatar
Monty committed
493
494
495
    sgrproj_box_ab_r1(&mut a_r1[0], &mut b_r1[0],
                      -1, 0, stripe_h,
                      s_r1, bdm8,
Monty's avatar
Monty committed
496
497
                      &deblocked, &cdeffed,
                      crop_w, crop_h);
Monty's avatar
Monty committed
498
499
500
    sgrproj_box_ab_r1(&mut a_r1[1], &mut b_r1[1],
                      0, 0, stripe_h,
                      s_r1, bdm8,
Monty's avatar
Monty committed
501
502
                      &deblocked, &cdeffed,
                      crop_w, crop_h);
503
  }
Raphaël Zumer's avatar
Raphaël Zumer committed
504

Monty's avatar
Monty committed
505
  /* iterate by column */
Monty's avatar
Monty committed
506
  for xi in 0..stripe_w {
507
    /* build intermediate array columns */
508
    if s_r2 > 0 {
Monty's avatar
Monty committed
509
510
511
      sgrproj_box_ab_r2(&mut a_r2[(xi+2)%3], &mut b_r2[(xi+2)%3],
                        xi as isize + 1, 0, stripe_h,
                        s_r2, bdm8,
Monty's avatar
Monty committed
512
513
                        &deblocked, &cdeffed,
                        crop_w, crop_h);
Monty's avatar
Monty committed
514
515
      let ap0: [&[u32; 64+2]; 3] = [&a_r2[xi%3], &a_r2[(xi+1)%3], &a_r2[(xi+2)%3]];
      let bp0: [&[u32; 64+2]; 3] = [&b_r2[xi%3], &b_r2[(xi+1)%3], &b_r2[(xi+2)%3]];
516
      sgrproj_box_f_r2(&ap0, &bp0, &mut f_r2, xi, 0, stripe_h as usize, &cdeffed);
517
    } else {
518
      sgrproj_box_f_r0(&mut f_r2, xi, 0, stripe_h as usize, &cdeffed);
519
    }
520
    if s_r1 > 0 {
Monty's avatar
Monty committed
521
522
523
      sgrproj_box_ab_r1(&mut a_r1[(xi+2)%3], &mut b_r1[(xi+2)%3],
                        xi as isize + 1, 0, stripe_h,
                        s_r1, bdm8,
Monty's avatar
Monty committed
524
525
                        &deblocked, &cdeffed,
                        crop_w, crop_h);
Monty's avatar
Monty committed
526
527
      let ap1: [&[u32; 64+2]; 3] = [&a_r1[xi%3], &a_r1[(xi+1)%3], &a_r1[(xi+2)%3]];
      let bp1: [&[u32; 64+2]; 3] = [&b_r1[xi%3], &b_r1[(xi+1)%3], &b_r1[(xi+2)%3]];
528

529
      sgrproj_box_f_r1(&ap1, &bp1, &mut f_r1, xi, 0, stripe_h as usize, &cdeffed);
530
    } else {
531
      sgrproj_box_f_r0(&mut f_r1, xi, 0, stripe_h as usize, &cdeffed);
Monty's avatar
Monty committed
532
    }
533
534

    /* apply filter */
535
    let bit_depth = fi.sequence.bit_depth;
536
537
538
    let w0 = xqd[0] as i32;
    let w1 = xqd[1] as i32;
    let w2 = (1 << SGRPROJ_PRJ_BITS) - w0 - w1;
539
540
    for yi in outstart..stripe_h as usize {
      let u = i32::cast_from(cdeffed.p(xi, yi)) << SGRPROJ_RST_BITS;
Monty's avatar
Monty committed
541
      let v = w0*f_r2[yi] as i32 + w1*u + w2*f_r1[yi] as i32;
542
      let s = (v + (1 << (SGRPROJ_RST_BITS + SGRPROJ_PRJ_BITS) >> 1)) >> (SGRPROJ_RST_BITS + SGRPROJ_PRJ_BITS);
543
      out[yi][xi] = T::cast_from(clamp(s, 0, (1 << bit_depth) - 1));
Monty's avatar
Monty committed
544
545
546
547
    }
  }
}

548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
// Frame inputs below aren't all equal, and will change as work
// continues.  There's no deblocked reconstruction available at this
// point of RDO, so we use the non-deblocked reconstruction, cdef and
// input.  The input can be a full-sized frame. Cdef input is a partial
// frame constructed specifically for RDO.

// For simplicity, this ignores stripe segmentation (it's possible the
// extra complexity isn't worth it and we'll ignore stripes
// permanently during RDO, but that's not been tested yet). Data
// access inside the cdef frame is monolithic and clipped to the cdef
// borders.

// Input params follow the same rules as sgrproj_stripe_filter.
// Inputs are relative to the colocated slice views.
pub fn sgrproj_solve<T: Pixel>(set: u8, fi: &FrameInvariants<T>,
                               input: &PlaneSlice<T>,
                               cdeffed: &PlaneSlice<T>,
                               cdef_w: usize, cdef_h: usize) -> (i8, i8) {

  assert!(cdef_h <= 64);
  let bdm8 = fi.sequence.bit_depth - 8;
Monty's avatar
Monty committed
569
570
571
572
573
574
  let mut a_r2: [[u32; 64+2]; 3] = [[0; 64+2]; 3];
  let mut b_r2: [[u32; 64+2]; 3] = [[0; 64+2]; 3];
  let mut f_r2: [u32; 64] = [0; 64];
  let mut a_r1: [[u32; 64+2]; 3] = [[0; 64+2]; 3];
  let mut b_r1: [[u32; 64+2]; 3] = [[0; 64+2]; 3];
  let mut f_r1: [u32; 64] = [0; 64];
575

Monty's avatar
Monty committed
576
577
  let s_r2: u32 = SGRPROJ_PARAMS_S[set as usize][0];
  let s_r1: u32 = SGRPROJ_PARAMS_S[set as usize][1];
578
579
580
581
582
583
584
585
586

  let mut h:[[f64; 2]; 2] = [[0.,0.],[0.,0.]];
  let mut c:[f64; 2] = [0., 0.];

  /* prime the intermediate arrays */
  if s_r2 > 0 {
    sgrproj_box_ab_r2(&mut a_r2[0], &mut b_r2[0],
                      -1, 0, cdef_h,
                      s_r2, bdm8,
Monty's avatar
Monty committed
587
588
                      &cdeffed, &cdeffed,
                      cdef_w, cdef_h);
589
590
591
    sgrproj_box_ab_r2(&mut a_r2[1], &mut b_r2[1],
                      0, 0, cdef_h,
                      s_r2, bdm8,
Monty's avatar
Monty committed
592
593
                      &cdeffed, &cdeffed,
                      cdef_w, cdef_h);
594
595
596
597
598
  }
  if s_r1 > 0 {
    sgrproj_box_ab_r1(&mut a_r1[0], &mut b_r1[0],
                      -1, 0, cdef_h,
                      s_r1, bdm8,
Monty's avatar
Monty committed
599
600
                      &cdeffed, &cdeffed,
                      cdef_w, cdef_h);
601
602
603
    sgrproj_box_ab_r1(&mut a_r1[1], &mut b_r1[1],
                      0, 0, cdef_h,
                      s_r1, bdm8,
Monty's avatar
Monty committed
604
605
                      &cdeffed, &cdeffed,
                      cdef_w, cdef_h);
606
  }
607

608
609
610
611
612
613
614
  /* iterate by column */
  for xi in 0..cdef_w {
    /* build intermediate array columns */
    if s_r2 > 0 {
      sgrproj_box_ab_r2(&mut a_r2[(xi+2)%3], &mut b_r2[(xi+2)%3],
                        xi as isize + 1, 0, cdef_h,
                        s_r2, bdm8,
Monty's avatar
Monty committed
615
616
                        &cdeffed, &cdeffed,
                        cdef_w, cdef_h);
Monty's avatar
Monty committed
617
618
      let ap0: [&[u32; 64+2]; 3] = [&a_r2[xi%3], &a_r2[(xi+1)%3], &a_r2[(xi+2)%3]];
      let bp0: [&[u32; 64+2]; 3] = [&b_r2[xi%3], &b_r2[(xi+1)%3], &b_r2[(xi+2)%3]];
619
620
621
622
623
624
625
626
      sgrproj_box_f_r2(&ap0, &bp0, &mut f_r2, xi, 0, cdef_h as usize, &cdeffed);
    } else {
      sgrproj_box_f_r0(&mut f_r2, xi, 0, cdef_h as usize, &cdeffed);
    }
    if s_r1 > 0 {
      sgrproj_box_ab_r1(&mut a_r1[(xi+2)%3], &mut b_r1[(xi+2)%3],
                        xi as isize + 1, 0, cdef_h,
                        s_r1, bdm8,
Monty's avatar
Monty committed
627
628
                        &cdeffed, &cdeffed,
                        cdef_w, cdef_h);
Monty's avatar
Monty committed
629
630
      let ap1: [&[u32; 64+2]; 3] = [&a_r1[xi%3], &a_r1[(xi+1)%3], &a_r1[(xi+2)%3]];
      let bp1: [&[u32; 64+2]; 3] = [&b_r1[xi%3], &b_r1[(xi+1)%3], &b_r1[(xi+2)%3]];
631
632
633
634
635
636
637
638
639

      sgrproj_box_f_r1(&ap1, &bp1, &mut f_r1, xi, 0, cdef_h as usize, &cdeffed);
    } else {
      sgrproj_box_f_r0(&mut f_r1, xi, 0, cdef_h as usize, &cdeffed);
    }

    for yi in 0..cdef_h {
      let u = i32::cast_from(cdeffed.p(yi,xi)) << SGRPROJ_RST_BITS;
      let s = i32::cast_from(input.p(yi,xi)) << SGRPROJ_RST_BITS;
Monty's avatar
Monty committed
640
641
      let f2 = f_r2[yi] as i32 - u;
      let f1 = f_r1[yi] as i32 - u;
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
      h[0][0] += f2 as f64 * f2 as f64;
      h[1][1] += f1 as f64 * f1 as f64;
      h[0][1] += f1 as f64 * f2 as f64;
      c[0] += f2 as f64 * s as f64;
      c[1] += f1 as f64 * s as f64;
    }
  }

  // this is lifted almost in-tact from libaom
  let n = cdef_w as f64 * cdef_h as f64;
  h[0][0] /= n;
  h[0][1] /= n;
  h[1][1] /= n;
  h[1][0] = h[0][1];
  c[0] /= n;
  c[1] /= n;
  let (xq0, xq1) = if s_r2 == 0 {
    // H matrix is now only the scalar h[1][1]
    // C vector is now only the scalar c[1]
    if h[1][1] == 0. {
      (0, 0)
    } else {
      (0, (c[1] / h[1][1]).round() as i32)
    }
  } else if s_r1 == 0 {
    // H matrix is now only the scalar h[0][0]
    // C vector is now only the scalar c[0]
    if h[0][0] == 0. {
      (0, 0)
    } else {
      ((c[0] / h[0][0]).round() as i32, 0)
    }
  } else {
    let det = h[0][0] * h[1][1] - h[0][1] * h[1][0];
    if det == 0. {
      (0, 0)
    } else {
      // If scaling up dividend would overflow, instead scale down the divisor
      let div1 = (h[1][1] * c[0] - h[0][1] * c[1]) * (1 << SGRPROJ_PRJ_BITS) as f64;
      let div2 = (h[0][0] * c[1] - h[1][0] * c[0]) * (1 << SGRPROJ_PRJ_BITS) as f64;

      ((div1 / det).round() as i32, (div2 / det).round() as i32)
    }
  };
  (clamp(xq0, SGRPROJ_XQD_MIN[0] as i32, SGRPROJ_XQD_MAX[0] as i32) as i8,
   clamp(xq1, SGRPROJ_XQD_MIN[1] as i32, SGRPROJ_XQD_MAX[1] as i32) as i8)
}

690
fn wiener_stripe_filter<T: Pixel>(coeffs: [[i8; 3]; 2], fi: &FrameInvariants<T>,
691
692
693
694
                                  crop_w: usize, crop_h: usize,
                                  stripe_w: usize, stripe_h: usize,
                                  stripe_x: usize, stripe_y: isize,
                                  cdeffed: &Plane<T>, deblocked: &Plane<T>, out: &mut Plane<T>) {
695
  let bit_depth = fi.sequence.bit_depth;
Monty's avatar
Monty committed
696
697
  let round_h = if bit_depth == 12 {5} else {3};
  let round_v = if bit_depth == 12 {9} else {11};
698
699
  let offset = 1 << (bit_depth + WIENER_BITS - round_h - 1);
  let limit = (1 << (bit_depth + 1 + WIENER_BITS - round_h)) - 1;
Raphaël Zumer's avatar
Raphaël Zumer committed
700

Monty's avatar
Monty committed
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
  let mut work: [i32; MAX_SB_SIZE+7] = [0; MAX_SB_SIZE+7];
  let vfilter: [i32; 7] = [ coeffs[0][0] as i32,
                            coeffs[0][1] as i32,
                            coeffs[0][2] as i32,
                            128 - 2 * (coeffs[0][0] as i32 +
                                       coeffs[0][1] as i32 +
                                       coeffs[0][2] as i32 ),
                            coeffs[0][2] as i32,
                            coeffs[0][1] as i32,
                            coeffs[0][0] as i32];
  let hfilter: [i32; 7] = [ coeffs[1][0] as i32,
                            coeffs[1][1] as i32,
                            coeffs[1][2] as i32,
                            128 - 2 * (coeffs[1][0] as i32 +
                                       coeffs[1][1] as i32 +
                                       coeffs[1][2] as i32),
                            coeffs[1][2] as i32,
                            coeffs[1][1] as i32,
                            coeffs[1][0] as i32];

  // unlike x, our y can be negative to start as the first stripe
  // starts off the top of the frame by 8 pixels, and can also run off the end of the frame
Monty's avatar
Monty committed
723
724
  let start_wi = if stripe_y < 0 {-stripe_y} else {0} as usize;
  let start_yi = if stripe_y < 0 {0} else {stripe_y} as usize;
725
  let end_i = cmp::max(0, if stripe_h as isize + stripe_y > crop_h as isize {
Monty's avatar
Monty committed
726
    crop_h as isize - stripe_y - start_wi as isize
Monty's avatar
Monty committed
727
  } else {
728
    stripe_h as isize - start_wi as isize
Monty's avatar
Monty committed
729
  }) as usize;
Raphaël Zumer's avatar
Raphaël Zumer committed
730

Romain Vimont's avatar
Romain Vimont committed
731
  let mut out_slice = out.mut_slice(PlaneOffset{x: 0, y: start_yi as isize});
Monty's avatar
Monty committed
732

Monty's avatar
Monty committed
733
734
  for xi in stripe_x..stripe_x+stripe_w {
    let n = cmp::min(7, crop_w as isize + 3 - xi as isize);
735
    for yi in stripe_y - 3..stripe_y + stripe_h as isize + 4 {
736
      let src_plane: &Plane<T>;
Monty's avatar
Monty committed
737
738
      let mut acc = 0;
      let ly;
Monty's avatar
Monty committed
739
740
      if yi < stripe_y {
        ly = cmp::max(clamp(yi, 0, crop_h as isize - 1), stripe_y - 2) as usize;
Monty's avatar
Monty committed
741
        src_plane = deblocked;
742
      } else if yi < stripe_y+stripe_h as isize {
Monty's avatar
Monty committed
743
        ly = clamp(yi, 0, crop_h as isize - 1) as usize;
Monty's avatar
Monty committed
744
745
        src_plane = cdeffed;
      } else {
746
        ly = cmp::min(clamp(yi, 0, crop_h as isize - 1), stripe_y + stripe_h as isize + 1) as usize;
Monty's avatar
Monty committed
747
748
        src_plane = deblocked;
      }
Raphaël Zumer's avatar
Raphaël Zumer committed
749

Monty's avatar
Monty committed
750
      for i in 0..3 - xi as isize {
751
        acc += hfilter[i as usize] * i32::cast_from(src_plane.p(0, ly));
Monty's avatar
Monty committed
752
753
      }
      for i in cmp::max(0,3 - (xi as isize))..n {
754
        acc += hfilter[i as usize] * i32::cast_from(src_plane.p((xi as isize + i - 3) as usize, ly));
Monty's avatar
Monty committed
755
756
      }
      for i in n..7 {
757
        acc += hfilter[i as usize] * i32::cast_from(src_plane.p(crop_w - 1, ly));
Monty's avatar
Monty committed
758
      }
Raphaël Zumer's avatar
Raphaël Zumer committed
759

760
      acc = (acc + (1 << round_h >> 1)) >> round_h;
Monty's avatar
Monty committed
761
      work[(yi-stripe_y+3) as usize] = clamp(acc, -offset, limit-offset);
Monty's avatar
Monty committed
762
763
    }

764
    for (wi, dst) in (start_wi..start_wi+end_i).zip(out_slice.rows_iter_mut().map(|row| &mut row[xi]).take(end_i)) {
Monty's avatar
Monty committed
765
766
767
768
      let mut acc = 0;
      for (i,src) in (0..7).zip(work[wi..wi+7].iter_mut()) {
        acc += vfilter[i] * *src;
      }
769
      *dst = T::cast_from(clamp((acc + (1 << round_v >> 1)) >> round_v, 0, (1 << bit_depth) - 1));
Monty's avatar
Monty committed
770
    }
771
772
773
  }
}

774
#[derive(Copy, Clone, Debug)]
775
776
777
778
779
780
781
782
783
784
785
786
pub struct RestorationUnit {
  pub filter: RestorationFilter,
}

impl RestorationUnit {
  pub fn default() -> RestorationUnit {
    RestorationUnit {
      filter: RestorationFilter::default(),
    }
  }
}

787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
#[derive(Clone, Debug)]
pub struct FrameRestorationUnits {
  units: Box<[RestorationUnit]>,
  pub cols: usize,
  pub rows: usize,
}

impl FrameRestorationUnits {
  pub fn new(cols: usize, rows: usize) -> Self {
    Self {
      units: vec![RestorationUnit::default(); cols * rows].into_boxed_slice(),
      cols,
      rows,
    }
  }
}

impl Index<usize> for FrameRestorationUnits {
  type Output = [RestorationUnit];
  #[inline(always)]
  fn index(&self, index: usize) -> &Self::Output {
    &self.units[index * self.cols..(index + 1) * self.cols]
  }
}

impl IndexMut<usize> for FrameRestorationUnits {
  #[inline(always)]
  fn index_mut(&mut self, index: usize) -> &mut Self::Output {
    &mut self.units[index * self.cols..(index + 1) * self.cols]
  }
}

819
#[derive(Clone, Debug)]
820
pub struct RestorationPlaneConfig {
821
822
  pub lrf_type: u8,
  pub unit_size: usize,
Romain Vimont's avatar
Romain Vimont committed
823
824
  // (1 << sb_shift) gives the number of superblocks both horizontally and
  // vertically in a restoration unit, not accounting for RU stretching
825
  pub sb_shift: usize,
Monty's avatar
Monty committed
826
827
828
  // stripe height is 64 in all cases except 4:2:0 chroma planes where
  // it is 32.  This is independent of all other setup parameters
  pub stripe_height: usize,
829
830
  pub cols: usize,
  pub rows: usize,
831
832
833
834
835
}

#[derive(Clone, Debug)]
pub struct RestorationPlane {
  pub cfg: RestorationPlaneConfig,
836
  pub units: FrameRestorationUnits,
837
838
839
840
841
842
843
844
845
}

#[derive(Clone, Default)]
pub struct RestorationPlaneOffset {
  pub row: usize,
  pub col: usize
}

impl RestorationPlane {
Monty's avatar
Monty committed
846
  pub fn new(lrf_type: u8, unit_size: usize, sb_shift: usize, stripe_decimate: usize,
847
             cols: usize, rows: usize) -> RestorationPlane {
Monty's avatar
Monty committed
848
    let stripe_height = if stripe_decimate != 0 {32} else {64};
849
    RestorationPlane {
850
851
852
853
854
855
856
857
      cfg: RestorationPlaneConfig {
        lrf_type,
        unit_size,
        sb_shift,
        stripe_height,
        cols,
        rows,
      },
858
      units: FrameRestorationUnits::new(cols, rows),
859
860
861
    }
  }

862
863
864
  // Stripes are always 64 pixels high in a non-subsampled
  // frame, and decimated from 64 pixels in chroma.  When
  // filtering, they are not co-located on Y with superblocks.
Monty's avatar
Monty committed
865
  fn restoration_unit_index_by_stripe(&self, stripenum: usize, rux: usize) -> (usize, usize) {
866
    (
867
868
      cmp::min(rux, self.cfg.cols - 1),
      cmp::min(stripenum * self.cfg.stripe_height / self.cfg.unit_size, self.cfg.rows - 1),
869
870
871
    )
  }

Monty's avatar
Monty committed
872
873
  pub fn restoration_unit_by_stripe(&self, stripenum: usize, rux: usize) -> &RestorationUnit {
    let (x, y) = self.restoration_unit_index_by_stripe(stripenum, rux);
874
    &self.units[y][x]
875
  }
876
877
}

878
#[derive(Clone, Debug)]
879
pub struct RestorationState {
Romain Vimont's avatar
Romain Vimont committed
880
  pub planes: [RestorationPlane; PLANES]
881
882
883
}

impl RestorationState {
884
  pub fn new<T: Pixel>(fi: &FrameInvariants<T>, input: &Frame<T>) -> Self {
885
    let PlaneConfig { xdec, ydec, .. } = input.planes[1].cfg;
Monty's avatar
Monty committed
886
    let stripe_uv_decimate = if xdec>0 && ydec>0 {1} else {0};
887
888
889
890
891
892
    // Currrently opt for smallest possible restoration unit size (1
    // superblock) This is *temporary*.  Counting on it will break
    // very shortly; the 1-superblock hardwiring is only until the
    // upper level encoder is capable of dealing with the delayed
    // writes that RU size > SB size will require.
    let lrf_y_shift = if fi.sequence.use_128x128_superblock {1} else {2};
Monty's avatar
Monty committed
893
    let lrf_uv_shift = lrf_y_shift + stripe_uv_decimate;
894
895
896
897
898
899
900

    // derive the rest
    let y_unit_log2 = RESTORATION_TILESIZE_MAX_LOG2 - lrf_y_shift;
    let uv_unit_log2 = RESTORATION_TILESIZE_MAX_LOG2 - lrf_uv_shift;
    let y_unit_size = 1 << y_unit_log2;
    let uv_unit_size = 1 << uv_unit_log2;
    let y_sb_log2 = if fi.sequence.use_128x128_superblock {7} else {6};
Monty's avatar
Monty committed
901
    let uv_sb_log2 = y_sb_log2 - stripe_uv_decimate;
902
903
    let cols = ((fi.width + (y_unit_size >> 1)) / y_unit_size).max(1);
    let rows = ((fi.height + (y_unit_size >> 1)) / y_unit_size).max(1);
904

905
    RestorationState {
Romain Vimont's avatar
Romain Vimont committed
906
907
908
909
910
911
912
913
      planes: [
        RestorationPlane::new(RESTORE_SWITCHABLE, y_unit_size, y_unit_log2 - y_sb_log2,
                              0, cols, rows),
        RestorationPlane::new(RESTORE_SWITCHABLE, uv_unit_size, uv_unit_log2 - uv_sb_log2,
                              stripe_uv_decimate, cols, rows),
        RestorationPlane::new(RESTORE_SWITCHABLE, uv_unit_size, uv_unit_log2 - uv_sb_log2,
                              stripe_uv_decimate, cols, rows)
      ],
914
915
    }
  }
916

917
918
  pub fn lrf_filter_frame<T: Pixel>(&mut self, out: &mut Frame<T>, pre_cdef: &Frame<T>,
                                    fi: &FrameInvariants<T>) {
919
    let cdeffed = out.clone();
Raphaël Zumer's avatar
Raphaël Zumer committed
920

921
922
923
924
925
    // unlike the other loop filters that operate over the padded
    // frame dimensions, restoration filtering and source pixel
    // accesses are clipped to the original frame dimensions
    // that's why we use fi.width and fi.height instead of PlaneConfig fields

Monty's avatar
Monty committed
926
    // number of stripes (counted according to colocated Y luma position)
927
    let stripe_n = (fi.height + 7) / 64 + 1;
Raphaël Zumer's avatar
Raphaël Zumer committed
928

Monty's avatar
Monty committed
929
    for pli in 0..PLANES {
Romain Vimont's avatar
Romain Vimont committed
930
      let rp = &self.planes[pli];
Monty's avatar
Monty committed
931
      let xdec = out.planes[pli].cfg.xdec;
932
      let ydec = out.planes[pli].cfg.ydec;
933
934
      let crop_w = (fi.width + (1 << xdec >> 1)) >> xdec;
      let crop_h = (fi.height + (1 << ydec >> 1)) >> ydec;
Raphaël Zumer's avatar
Raphaël Zumer committed
935

Monty's avatar
Monty committed
936
937
      for si in 0..stripe_n {
        // stripe y pixel locations must be able to overspan the frame
938
        let stripe_start_y = (si as isize * 64 - 8) >> ydec;
Monty's avatar
Monty committed
939
        let stripe_size = 64 >> ydec; // one past, unlike spec
Raphaël Zumer's avatar
Raphaël Zumer committed
940

Monty's avatar
Monty committed
941
        // horizontally, go rdu-by-rdu
942
        for rux in 0..rp.cfg.cols {
Monty's avatar
Monty committed
943
          // stripe x pixel locations must be clipped to frame, last may need to stretch
944
945
          let x = rux * rp.cfg.unit_size;
          let size = if rux == rp.cfg.cols - 1 {
946
            crop_w - x
Monty's avatar
Monty committed
947
          } else {
948
            rp.cfg.unit_size
Monty's avatar
Monty committed
949
          };
Monty's avatar
Monty committed
950
          let ru = rp.restoration_unit_by_stripe(si, rux);
Monty's avatar
Monty committed
951
          match ru.filter {
952
            RestorationFilter::Wiener{coeffs} => {
953
954
              wiener_stripe_filter(coeffs, fi,
                                   crop_w, crop_h,
955
956
                                   size, stripe_size,
                                   x, stripe_start_y,
Monty's avatar