lrf.rs 39.5 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
// Copyright (c) 2017-2018, The rav1e contributors. All rights reserved
//
// This source code is subject to the terms of the BSD 2 Clause License and
// the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
// was not distributed with this source code in the LICENSE file, you can
// obtain it at www.aomedia.org/license/software. If the Alliance for Open
// Media Patent License 1.0 was not distributed with this source code in the
// PATENTS file, you can obtain it at www.aomedia.org/license/patent.

#![allow(safe_extern_statics)]

Luca Barbato's avatar
Luca Barbato committed
12
use crate::frame::Frame;
Raphaël Zumer's avatar
Raphaël Zumer committed
13
14
15
16
use crate::encoder::FrameInvariants;
use crate::context::PLANES;
use crate::context::MAX_SB_SIZE;
use crate::plane::Plane;
Monty's avatar
Monty committed
17
use crate::plane::PlaneSlice;
18
use crate::plane::PlaneMutSlice;
Raphaël Zumer's avatar
Raphaël Zumer committed
19
20
use crate::plane::PlaneOffset;
use crate::plane::PlaneConfig;
21
use std::cmp;
Raphaël Zumer's avatar
Raphaël Zumer committed
22
use crate::util::clamp;
23
24
use crate::util::CastFromPrimitive;
use crate::util::Pixel;
25

26
27
use std::ops::{Index, IndexMut};

28
pub const RESTORATION_TILESIZE_MAX_LOG2: usize = 8;
29

30
31
32
33
34
pub const RESTORE_NONE: u8 = 0;
pub const RESTORE_SWITCHABLE: u8 = 1;
pub const RESTORE_WIENER: u8 = 2;
pub const RESTORE_SGRPROJ: u8 = 3;

35
36
37
pub const WIENER_TAPS_MIN: [i8; 3] = [ -5, -23, -17 ];
pub const WIENER_TAPS_MID: [i8; 3] = [ 3, -7, 15 ];
pub const WIENER_TAPS_MAX: [i8; 3] = [ 10, 8, 46 ];
38
pub const WIENER_TAPS_K:   [i8; 3] = [ 1, 2, 3 ];
Monty's avatar
Monty committed
39
pub const WIENER_BITS: usize = 7;
40
41
42
43
44
45
46

pub const SGRPROJ_XQD_MIN: [i8; 2] = [ -96, -32 ];
pub const SGRPROJ_XQD_MID: [i8; 2] = [ -32, 31 ];
pub const SGRPROJ_XQD_MAX: [i8; 2] = [ 31, 95 ];
pub const SGRPROJ_PRJ_SUBEXP_K: u8 = 4;
pub const SGRPROJ_PRJ_BITS: u8 = 7;
pub const SGRPROJ_PARAMS_BITS: u8 = 4;
Monty's avatar
Monty committed
47
48
49
50
pub const SGRPROJ_MTABLE_BITS: u8 = 20;
pub const SGRPROJ_SGR_BITS: u8 = 8;
pub const SGRPROJ_RECIP_BITS: u8 = 12;
pub const SGRPROJ_RST_BITS: u8 = 4;
51
52
53
54
55
pub const SGRPROJ_PARAMS_S: [[i32; 2]; 1 << SGRPROJ_PARAMS_BITS] = [
  [140, 3236], [112, 2158], [ 93, 1618], [ 80, 1438],
  [ 70, 1295], [ 58, 1177], [ 47, 1079], [ 37,  996],
  [ 30,  925], [ 25,  863], [  0, 2589], [  0, 1618],
  [  0, 1177], [  0,  925], [ 56,    0], [ 22,    0]
56
57
];

Romain Vimont's avatar
Romain Vimont committed
58
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
59
60
61
pub enum RestorationFilter {
  None,
  Wiener  { coeffs: [[i8; 3]; 2] },
Monty's avatar
Monty committed
62
  Sgrproj { set: u8,
63
64
65
66
67
            xqd: [i8; 2] },
}

impl RestorationFilter {
  pub fn default() -> RestorationFilter {
68
    RestorationFilter::None{}
Monty's avatar
Monty committed
69
70
71
  }
}

72
#[inline(always)]
Monty's avatar
Monty committed
73
fn sgrproj_sum_finish(ssq: i32, sum: i32, n: i32, one_over_n: i32, s: i32, bdm8: usize) -> (i32, i32) {
74
75
  let scaled_ssq = (ssq + (1 << (2 * bdm8) >> 1)) >> (2 * bdm8);
  let scaled_sum = (sum + (1 << bdm8 >> 1)) >> bdm8;
76
  let p = cmp::max(0, scaled_ssq*(n as i32) - scaled_sum*scaled_sum) as u32;
77
  let z = (p * s as u32 + (1 << SGRPROJ_MTABLE_BITS >> 1)) >> SGRPROJ_MTABLE_BITS;
78
  let a:i32 = if z >= 255 {
79
80
81
82
    256
  } else if z == 0 {
    1
  } else {
83
    (((z << SGRPROJ_SGR_BITS) + z/2) / (z+1)) as i32
84
  };
85
86
  let b = ((1 << SGRPROJ_SGR_BITS) - a) * sum * one_over_n;
  (a, (b + (1 << SGRPROJ_RECIP_BITS >> 1)) >> SGRPROJ_RECIP_BITS)
87
}
Monty's avatar
Monty committed
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133

// The addressing below is a bit confusing, made worse by LRF's odd
// clipping requirements, and our reusing code for partial frames.  So
// I'm documenting the LRF conventions here in detail.

// 'Relative to plane storage' means that a coordinate or bound is
// being applied as if to the full Plane backing the PlaneSlice.  For
// example, a PlaneSlice may represent a subset of a middle of a
// plane, but when we say the top/left bounds are clipped 'relative to
// plane storage', that means relative to 0,0 of the plane, not 0,0 of
// the plane slice.

// 'Relative to the slice view' means that a coordinate or bound is
// counted from the 0,0 of the PlaneSlice, not the Plane from which it
// was sliced.

// Passed in plane slices may be the same size or different sizes;
// filter access will be clipped to 0,0..w,h of the underlying plane
// storage for both planes, depending which is accessed.  Note that
// the passed in w/h that specifies the storage clipping is actually
// relative to the the slice view, not the plane storage (it
// simplifies the math internally).  Eg, if a PlaceSlice has a y
// offset of -2 (meaning its origin is two rows above the top row of
// the backing plane), and we pass in a height of 4, the rows
// 0,1,2,3,4 of the slice address -2, -1, 0, 1, 2 of the backing plane
// with access clipped to 0, 0, 0, 1, 1.

// Active area cropping is done by specifying a w,h smaller
// than the actual underlying plane storage.

// stripe_y is the beginning of the current stripe (used for source
// buffer choice/clipping) relative to the passed in plane view.  It
// may (and regularly will) be negative.

// stripe_h is the hright of the current stripe, again used for source
// buffer choice/clipping).  It may specify a stripe boundary less
// than, eqqal to, or larger than the buffers we're accessing.

// x and y specify the center pixel of the current filter kernel
// application.  They are relative to the passed in slice views.

fn sgrproj_box_sum_slow<T: Pixel>(a: &mut i32, b: &mut i32,
                                  stripe_y: isize, stripe_h: usize,
                                  x: isize, y: isize,
                                  r: usize, n: i32, one_over_n: i32, s: i32, bdm8: usize,
                                  backing: &PlaneSlice<T>, backing_w: usize, backing_h: usize,
134
                                  cdeffed: &PlaneSlice<T>, cdeffed_w: usize, cdeffed_h: usize) {
135
136
137
  let mut ssq:i32 = 0;
  let mut sum:i32 = 0;

Monty's avatar
Monty committed
138
139
  for yi in y-r as isize..=y+r as isize {
    // decide if we're vertically inside or outside the stripe
Monty's avatar
Monty committed
140
    let (src_plane, src_w, src_h) = if yi >= stripe_y && yi < stripe_y + stripe_h as isize {
141
      (cdeffed,
Monty's avatar
Monty committed
142
143
       (cdeffed_w as isize - x + r as isize) as usize,
       cdeffed_h as isize)
Monty's avatar
Monty committed
144
    } else {
Monty's avatar
Monty committed
145
146
147
148
      (backing,
       (backing_w as isize - x + r as isize) as usize,
       backing_h as isize)
    };
149
    // clamp vertically to storage at top and passed-in height at bottom
Monty's avatar
Monty committed
150
151
152
153
    let cropped_y = clamp(yi, -src_plane.y, src_h - 1);
    // clamp vertically to stripe limits
    let ly = clamp(cropped_y, stripe_y - 2, stripe_y + stripe_h as isize + 1);
    // Reslice to avoid a negative X index.
154
    let p = &src_plane.reslice(x - r as isize,ly)[0];
Monty's avatar
Monty committed
155
156
157
158
159
160
161
162
    // left-hand addressing limit
    let left = cmp::max(0, r as isize - x - src_plane.x) as usize;
    // right-hand addressing limit
    let right = cmp::min(2*r+1, src_w);

    // run accumulation to left of frame storage (if any)
    for _xi in 0..left {
      let c = i32::cast_from(p[(r as isize - x) as usize]);
163
164
165
      ssq += c*c;
      sum += c;
    }
Monty's avatar
Monty committed
166
167
168
    // run accumulation in-frame
    for xi in left..right {
      let c = i32::cast_from(p[xi]);
169
170
171
      ssq += c*c;
      sum += c;
    }
Monty's avatar
Monty committed
172
    // run accumulation to right of frame (if any)
Monty's avatar
Monty committed
173
    for _xi in right..=2*r {
Monty's avatar
Monty committed
174
      let c = i32::cast_from(p[src_w - 1]);
175
176
177
178
      ssq += c*c;
      sum += c;
    }
  }
Monty's avatar
Monty committed
179
  let (reta, retb) = sgrproj_sum_finish(ssq, sum, n, one_over_n, s, bdm8);
180
181
182
  *a = reta;
  *b = retb;
}
Monty's avatar
Monty committed
183

Monty's avatar
Monty committed
184
185
186
// unrolled computation to be used when all bounds-checking has been satisfied.
fn sgrproj_box_sum_fastxy_r1<T: Pixel>(a: &mut i32, b: &mut i32, x: isize, y: isize,
                                       s: i32, bdm8: usize, p: &PlaneSlice<T>) {
187
188
189
  let mut ssq:i32 = 0;
  let mut sum:i32 = 0;
  for yi in -1..=1 {
190
    let x = &p.reslice(x - 1, y + yi)[0];
191
192
193
194
195
    ssq += i32::cast_from(x[0]) * i32::cast_from(x[0]) +
      i32::cast_from(x[1]) * i32::cast_from(x[1]) +
      i32::cast_from(x[2]) * i32::cast_from(x[2]);
    sum += i32::cast_from(x[0]) + i32::cast_from(x[1]) + i32::cast_from(x[2]);
  }
Monty's avatar
Monty committed
196
  let (reta, retb) = sgrproj_sum_finish(ssq, sum, 9, 455, s, bdm8);
197
198
199
200
  *a = reta;
  *b = retb;
}

Monty's avatar
Monty committed
201
202
fn sgrproj_box_sum_fastxy_r2<T: Pixel>(a: &mut i32, b: &mut i32, x: isize, y: isize,
                                       s: i32, bdm8: usize, p: &PlaneSlice<T>) {
203
204
205
  let mut ssq:i32 = 0;
  let mut sum:i32 = 0;
  for yi in -2..=2 {
206
    let x = &p.reslice(x - 2, y + yi)[0];
207
208
209
210
211
212
213
214
    ssq += i32::cast_from(x[0]) * i32::cast_from(x[0]) +
      i32::cast_from(x[1]) * i32::cast_from(x[1]) +
      i32::cast_from(x[2]) * i32::cast_from(x[2]) +
      i32::cast_from(x[3]) * i32::cast_from(x[3]) +
      i32::cast_from(x[4]) * i32::cast_from(x[4]);
    sum += i32::cast_from(x[0]) + i32::cast_from(x[1]) + i32::cast_from(x[2]) +
      i32::cast_from(x[3]) + i32::cast_from(x[4]);
  }
Monty's avatar
Monty committed
215
  let (reta, retb) = sgrproj_sum_finish(ssq, sum, 25, 164, s, bdm8);
216
217
218
219
  *a = reta;
  *b = retb;
}

Monty's avatar
Monty committed
220
221
222
223
224
225
// unrolled computation to be used when only X bounds-checking has been satisfied.
fn sgrproj_box_sum_fastx_r1<T: Pixel>(a: &mut i32, b: &mut i32,
                                      stripe_y: isize, stripe_h: usize,
                                      x: isize, y: isize,
                                      s: i32, bdm8: usize,
                                      backing: &PlaneSlice<T>, backing_h: usize,
226
                                      cdeffed: &PlaneSlice<T>, cdeffed_h: usize) {
227
228
  let mut ssq:i32 = 0;
  let mut sum:i32 = 0;
Monty's avatar
Monty committed
229
230
  for yi in y-1..=y+1 {
    // decide if we're vertically inside or outside the stripe
Monty's avatar
Monty committed
231
232
233
    let (src_plane, src_h) = if yi >= stripe_y && yi < stripe_y + stripe_h as isize {
      (cdeffed,
       cdeffed_h as isize)
Monty's avatar
Monty committed
234
    } else {
Monty's avatar
Monty committed
235
236
237
      (backing,
       backing_h as isize)
    };
Monty's avatar
Monty committed
238
239
240
241
    // clamp vertically to storage addressing limit
    let cropped_y = clamp(yi, -src_plane.y, src_h - 1);
    // clamp vertically to stripe limits
    let ly = clamp(cropped_y, stripe_y - 2, stripe_y + stripe_h as isize + 1);
242
    let x = &src_plane.reslice(x - 1, ly)[0];
243
244
245
246
247
    ssq += i32::cast_from(x[0]) * i32::cast_from(x[0]) +
      i32::cast_from(x[1]) * i32::cast_from(x[1]) +
      i32::cast_from(x[2]) * i32::cast_from(x[2]);
    sum += i32::cast_from(x[0]) + i32::cast_from(x[1]) + i32::cast_from(x[2]);
  }
Monty's avatar
Monty committed
248
  let (reta, retb) = sgrproj_sum_finish(ssq, sum, 9, 455, s, bdm8);
249
250
251
252
  *a = reta;
  *b = retb;
}

Monty's avatar
Monty committed
253
254
255
256
257
fn sgrproj_box_sum_fastx_r2<T: Pixel>(a: &mut i32, b: &mut i32,
                                      stripe_y: isize, stripe_h: usize,
                                      x: isize, y: isize,
                                      s: i32, bdm8: usize,
                                      backing: &PlaneSlice<T>, backing_h: usize,
258
                                      cdeffed: &PlaneSlice<T>, cdeffed_h: usize) {
259
260
  let mut ssq:i32 = 0;
  let mut sum:i32 = 0;
Monty's avatar
Monty committed
261
262
  for yi in y - 2..=y + 2 {
    // decide if we're vertically inside or outside the stripe
Monty's avatar
Monty committed
263
264
265
    let (src_plane, src_h) = if yi >= stripe_y && yi < stripe_y + stripe_h as isize {
      (cdeffed,
       cdeffed_h as isize)
Monty's avatar
Monty committed
266
    } else {
Monty's avatar
Monty committed
267
268
269
      (backing,
       backing_h as isize)
    };
Monty's avatar
Monty committed
270
271
272
273
    // clamp vertically to storage addressing limit
    let cropped_y = clamp(yi, -src_plane.y, src_h as isize - 1);
    // clamp vertically to stripe limits
    let ly = clamp(cropped_y, stripe_y - 2, stripe_y + stripe_h as isize + 1);
274
    let x = &src_plane.reslice(x - 2, ly)[0];
275
276
277
278
279
280
281
282
    ssq += i32::cast_from(x[0]) * i32::cast_from(x[0]) +
      i32::cast_from(x[1]) * i32::cast_from(x[1]) +
      i32::cast_from(x[2]) * i32::cast_from(x[2]) +
      i32::cast_from(x[3]) * i32::cast_from(x[3]) +
      i32::cast_from(x[4]) * i32::cast_from(x[4]);
    sum += i32::cast_from(x[0]) + i32::cast_from(x[1]) + i32::cast_from(x[2]) +
      i32::cast_from(x[3]) + i32::cast_from(x[4]);
  }
Monty's avatar
Monty committed
283
  let (reta, retb) = sgrproj_sum_finish(ssq, sum, 25, 164, s, bdm8);
284
285
286
287
  *a = reta;
  *b = retb;
}

Monty's avatar
Monty committed
288
289
// computes an intermediate (ab) column for rows stripe_y through
// stripe_y+stripe_h (no inclusize) at column stripe_x.
290
291
292
// r=1 case computes every row as every row is used (see r2 version below)
fn sgrproj_box_ab_r1<T: Pixel>(af: &mut[i32; 64+2],
                               bf: &mut[i32; 64+2],
Monty's avatar
Monty committed
293
294
295
296
297
298
299
                               stripe_x: isize, stripe_y: isize, stripe_h: usize,
                               s: i32, bdm8: usize,
                               backing: &PlaneSlice<T>, backing_w: usize, backing_h: usize,
                               cdeffed: &PlaneSlice<T>, cdeffed_w: usize, cdeffed_h: usize) {
  // we will fill the af and bf arrays from 0..stripe_h+1 (ni),
  // representing stripe_y-1 to stripe_y+stripe_h+1 inclusive
  let boundary0 = 0;
300
  let boundary3 = stripe_h + 2;
Monty's avatar
Monty committed
301
302
303
304
305
  if backing.x + stripe_x > 0 && stripe_x < backing_w as isize - 1 &&
    cdeffed.x + stripe_x > 0 && stripe_x < cdeffed_w as isize - 1 {
    // Addressing is away from left and right edges of cdeffed storage;
    // no X clipping to worry about, but the top/bottom few rows still
    // need to worry about storage and stripe limits
306

Monty's avatar
Monty committed
307
    // boundary1 is the point where we're guaranteed all our y
308
    // addressing will be both in the stripe and in cdeffed storage
Monty's avatar
Monty committed
309
310
311
312
313
314
    let boundary1 = cmp::max(2, 2 - cdeffed.y - stripe_y) as usize;
    // boundary 2 is when we have to bounds check along the bottom of
    // the stripe or bottom of storage
    let boundary2 = cmp::min(cdeffed_h as isize - stripe_y - 1, stripe_h as isize - 1) as usize;

    // top rows (if any), away from left and right columns
315
    for i in boundary0..boundary1 {
Monty's avatar
Monty committed
316
      sgrproj_box_sum_fastx_r1(&mut af[i], &mut bf[i],
317
                               stripe_y, stripe_h,
Monty's avatar
Monty committed
318
319
320
321
                               stripe_x, stripe_y + i as isize - 1,
                               s, bdm8,
                               backing, backing_h,
                               cdeffed, cdeffed_h);
322
323
324
325
    }
    // middle rows, away from left and right columns
    for i in boundary1..boundary2 {
      sgrproj_box_sum_fastxy_r1(&mut af[i], &mut bf[i],
Monty's avatar
Monty committed
326
                                stripe_x, stripe_y + i as isize - 1, s, bdm8, cdeffed);
327
    }
Monty's avatar
Monty committed
328
    // bottom rows (if any), away from left and right columns
329
    for i in boundary2..boundary3 {
Monty's avatar
Monty committed
330
      sgrproj_box_sum_fastx_r1(&mut af[i], &mut bf[i],
331
                               stripe_y, stripe_h,
Monty's avatar
Monty committed
332
333
334
335
                               stripe_x, stripe_y + i as isize - 1,
                               s, bdm8,
                               backing, backing_h,
                               cdeffed, cdeffed_h);
336
337
338
339
    }
  } else {
    // top/bottom rows and left/right columns, where we need to worry about frame and stripe clipping
    for i in boundary0..boundary3 {
340
      sgrproj_box_sum_slow(&mut af[i], &mut bf[i],
Monty's avatar
Monty committed
341
342
343
344
345
                           stripe_y, stripe_h,
                           stripe_x, stripe_y + i as isize - 1,
                           1, 9, 455, s, bdm8,
                           backing, backing_w, backing_h,
                           cdeffed, cdeffed_w, cdeffed_h);
Monty's avatar
Monty committed
346
347
    }
  }
Monty's avatar
Monty committed
348
349
}

350
351
352
353
354
355
356
357
358
// One oddness about the radius=2 intermediate array computations that
// the spec doesn't make clear: Although the spec defines computation
// of every row (of a, b and f), only half of the rows (every-other
// row) are actually used.  We use the full-size array here but only
// compute the even rows.  This is not so much optimization as trying
// to illustrate what this convoluted filter is actually doing
// (ie not as much as it may appear).
fn sgrproj_box_ab_r2<T: Pixel>(af: &mut[i32; 64+2],
                               bf: &mut[i32; 64+2],
Monty's avatar
Monty committed
359
360
361
                               stripe_x: isize, stripe_y: isize, stripe_h: usize,
                               s: i32, bdm8: usize,
                               backing: &PlaneSlice<T>, backing_w: usize, backing_h: usize,
362
                               cdeffed: &PlaneSlice<T>, cdeffed_w: usize, cdeffed_h: usize) {
Monty's avatar
Monty committed
363
364
365
  // we will fill the af and bf arrays from 0..stripe_h+1 (ni),
  // representing stripe_y-1 to stripe_y+stripe_h+1 inclusive
  let boundary0 = 0; // even
366
  let boundary3 = stripe_h + 2; // don't care if odd
Monty's avatar
Monty committed
367
368
369
370
371
  if backing.x + stripe_x > 1 && stripe_x < backing_w as isize - 2 &&
    cdeffed.x + stripe_x > 1 && stripe_x < cdeffed_w as isize - 2 {
    // Addressing is away from left and right edges of cdeffed storage;
    // no X clipping to worry about, but the top/bottom few rows still
    // need to worry about storage and stripe limits
372

Monty's avatar
Monty committed
373
374
    // boundary1 is the point where we're guaranteed all our y
    // addressing will be both in the stripe and in cdeffed storage
375
    // make even and round up
376
    let boundary1 = ((cmp::max(3, 3 - cdeffed.y - stripe_y) + 1) >> 1 << 1) as usize;
Monty's avatar
Monty committed
377
378
    // boundary 2 is when we have to bounds check along the bottom of
    // the stripe or bottom of storage
379
    // must be even, rounding of +1 cancels fencepost of -1
Monty's avatar
Monty committed
380
    let boundary2 = (cmp::min(cdeffed_h as isize - stripe_y, stripe_h as isize) >> 1 << 1) as usize;
381
382
383

    // top rows, away from left and right columns
    for i in (boundary0..boundary1).step_by(2) {
Monty's avatar
Monty committed
384
      sgrproj_box_sum_fastx_r2(&mut af[i], &mut bf[i],
385
                               stripe_y, stripe_h,
Monty's avatar
Monty committed
386
387
388
389
                               stripe_x, stripe_y + i as isize - 1,
                               s, bdm8,
                               backing, backing_h,
                               cdeffed, cdeffed_h);
390
391
392
393
    }
    // middle rows, away from left and right columns
    for i in (boundary1..boundary2).step_by(2) {
      sgrproj_box_sum_fastxy_r2(&mut af[i], &mut bf[i],
Monty's avatar
Monty committed
394
395
                                stripe_x, stripe_y + i as isize - 1,
                                s, bdm8, cdeffed);
396
397
398
    }
    // bottom rows, away from left and right columns
    for i in (boundary2..boundary3).step_by(2) {
Monty's avatar
Monty committed
399
      sgrproj_box_sum_fastx_r2(&mut af[i], &mut bf[i],
400
                               stripe_y, stripe_h,
Monty's avatar
Monty committed
401
402
403
404
                               stripe_x, stripe_y + i as isize - 1,
                               s, bdm8,
                               backing, backing_h,
                               cdeffed, cdeffed_h);
Monty's avatar
Monty committed
405
    }
406
407
408
  } else {
    // top/bottom rows and left/right columns, where we need to worry about frame and stripe clipping
    for i in (boundary0..boundary3).step_by(2) {
Monty's avatar
Monty committed
409
410
411
412
413
414
      sgrproj_box_sum_slow(&mut af[i], &mut bf[i],
                           stripe_y, stripe_h,
                           stripe_x, stripe_y + i as isize - 1,
                           2, 25, 164, s, bdm8,
                           backing, backing_w, backing_h,
                           cdeffed, cdeffed_w, cdeffed_h);
415
416
417
418
    }
  }
}

Monty's avatar
Monty committed
419
fn sgrproj_box_f_r0<T: Pixel>(f: &mut[i32; 64], x: usize, y: isize, h: usize, cdeffed: &PlaneSlice<T>) {
420
421
422
423
424
425
  for i in cmp::max(0, -y) as usize..h {
    f[i as usize] = (i32::cast_from(cdeffed.p(x, (y + i as isize) as usize))) << SGRPROJ_RST_BITS;
  }
}

fn sgrproj_box_f_r1<T: Pixel>(af: &[&[i32; 64+2]; 3], bf: &[&[i32; 64+2]; 3], f: &mut[i32; 64],
Monty's avatar
Monty committed
426
                              x: usize, y: isize, h: usize, cdeffed: &PlaneSlice<T>) {
427
428
429
  let shift = 5 + SGRPROJ_SGR_BITS - SGRPROJ_RST_BITS;
  for i in cmp::max(0, -y) as usize..h {
    let a =
430
431
      3 * (af[0][i] + af[2][i]   + af[0][i+2] + af[2][i+2]) +
      4 * (af[1][i] + af[0][i+1] + af[1][i+1] + af[2][i+1] + af[1][i+2]);
432
    let b =
433
434
      3 * (bf[0][i] + bf[2][i]   + bf[0][i+2] + bf[2][i+2]) +
      4 * (bf[1][i] + bf[0][i+1] + bf[1][i+1] + bf[2][i+1] + bf[1][i+2]);
435
    let v = a * i32::cast_from(cdeffed.p(x, (y + i as isize) as usize)) + b;
436
    f[i as usize] = (v + (1 << shift >> 1)) >> shift;
437
438
439
440
  }
}

fn sgrproj_box_f_r2<T: Pixel>(af: &[&[i32; 64+2]; 3], bf: &[&[i32; 64+2]; 3], f: &mut[i32; 64],
Monty's avatar
Monty committed
441
                              x: usize, y: isize, h: usize, cdeffed: &PlaneSlice<T>) {
442
443
444
445
  let shift = 5 + SGRPROJ_SGR_BITS - SGRPROJ_RST_BITS;
  let shifto = 4 + SGRPROJ_SGR_BITS - SGRPROJ_RST_BITS;
  for i in (cmp::max(0, -y) as usize..h).step_by(2) {
    let a =
446
447
      5 * (af[0][i] + af[2][i]) +
      6 * (af[1][i]);
448
    let b =
449
450
      5 * (bf[0][i] + bf[2][i]) +
      6 * (bf[1][i]);
451
    let ao =
452
      5 * (af[0][i+2] + af[2][i+2]) +
453
454
      6 * (af[1][i+2]);
    let bo =
455
      5 * (bf[0][i+2] + bf[2][i+2]) +
456
457
      6 * (bf[1][i+2]);
    let v = (a + ao) * i32::cast_from(cdeffed.p(x, (y+i as isize) as usize)) + b + bo;
458
    f[i as usize] = (v + (1 << shift >> 1)) >> shift;
459
    let vo = ao * i32::cast_from(cdeffed.p(x, (y + i as isize) as usize + 1)) + bo;
460
    f[i as usize + 1] = (vo + (1 << shifto >> 1)) >> shifto;
Monty's avatar
Monty committed
461
462
463
  }
}

464
465
466
467
468
469
pub fn sgrproj_stripe_filter<T: Pixel>(set: u8, xqd: [i8; 2], fi: &FrameInvariants<T>,
                                       crop_w: usize, crop_h: usize,
                                       stripe_w: usize, stripe_h: usize,
                                       cdeffed: &PlaneSlice<T>,
                                       deblocked: &PlaneSlice<T>,
                                       out: &mut PlaneMutSlice<T>) {
Monty's avatar
Monty committed
470
  assert!(stripe_h <= 64);
Monty's avatar
Monty committed
471
  let bdm8 = fi.sequence.bit_depth - 8;
472
473
474
475
476
477
478
479
480
  let mut a_r2: [[i32; 64+2]; 3] = [[0; 64+2]; 3];
  let mut b_r2: [[i32; 64+2]; 3] = [[0; 64+2]; 3];
  let mut f_r2: [i32; 64] = [0; 64];
  let mut a_r1: [[i32; 64+2]; 3] = [[0; 64+2]; 3];
  let mut b_r1: [[i32; 64+2]; 3] = [[0; 64+2]; 3];
  let mut f_r1: [i32; 64] = [0; 64];

  let s_r2: i32 = SGRPROJ_PARAMS_S[set as usize][0];
  let s_r1: i32 = SGRPROJ_PARAMS_S[set as usize][1];
Monty's avatar
Monty committed
481

482
  let outstart = cmp::max(0, cmp::max(-cdeffed.y, -out.y)) as usize;
483

Monty's avatar
Monty committed
484
  /* prime the intermediate arrays */
485
  if s_r2 > 0 {
Monty's avatar
Monty committed
486
487
488
    sgrproj_box_ab_r2(&mut a_r2[0], &mut b_r2[0],
                      -1, 0, stripe_h,
                      s_r2, bdm8,
489
490
                      &deblocked, crop_w, crop_h,
                      &cdeffed, crop_w, crop_h);
Monty's avatar
Monty committed
491
492
493
    sgrproj_box_ab_r2(&mut a_r2[1], &mut b_r2[1],
                      0, 0, stripe_h,
                      s_r2, bdm8,
494
495
                      &deblocked, crop_w, crop_h,
                      &cdeffed, crop_w, crop_h);
496
  }
497
  if s_r1 > 0 {
Monty's avatar
Monty committed
498
499
500
    sgrproj_box_ab_r1(&mut a_r1[0], &mut b_r1[0],
                      -1, 0, stripe_h,
                      s_r1, bdm8,
501
502
                      &deblocked, crop_w, crop_h,
                      &cdeffed, crop_w, crop_h);
Monty's avatar
Monty committed
503
504
505
    sgrproj_box_ab_r1(&mut a_r1[1], &mut b_r1[1],
                      0, 0, stripe_h,
                      s_r1, bdm8,
506
507
                      &deblocked, crop_w, crop_h,
                      &cdeffed, crop_w, crop_h);
508
  }
Raphaël Zumer's avatar
Raphaël Zumer committed
509

Monty's avatar
Monty committed
510
  /* iterate by column */
Monty's avatar
Monty committed
511
  for xi in 0..stripe_w {
512
    /* build intermediate array columns */
513
    if s_r2 > 0 {
Monty's avatar
Monty committed
514
515
516
      sgrproj_box_ab_r2(&mut a_r2[(xi+2)%3], &mut b_r2[(xi+2)%3],
                        xi as isize + 1, 0, stripe_h,
                        s_r2, bdm8,
517
518
                        &deblocked, crop_w, crop_h,
                        &cdeffed, crop_w, crop_h);
519
520
      let ap0: [&[i32; 64+2]; 3] = [&a_r2[xi%3], &a_r2[(xi+1)%3], &a_r2[(xi+2)%3]];
      let bp0: [&[i32; 64+2]; 3] = [&b_r2[xi%3], &b_r2[(xi+1)%3], &b_r2[(xi+2)%3]];
521
      sgrproj_box_f_r2(&ap0, &bp0, &mut f_r2, xi, 0, stripe_h as usize, &cdeffed);
522
    } else {
523
      sgrproj_box_f_r0(&mut f_r2, xi, 0, stripe_h as usize, &cdeffed);
524
    }
525
    if s_r1 > 0 {
Monty's avatar
Monty committed
526
527
528
      sgrproj_box_ab_r1(&mut a_r1[(xi+2)%3], &mut b_r1[(xi+2)%3],
                        xi as isize + 1, 0, stripe_h,
                        s_r1, bdm8,
529
530
                        &deblocked, crop_w, crop_h,
                        &cdeffed, crop_w, crop_h);
531
532
533
      let ap1: [&[i32; 64+2]; 3] = [&a_r1[xi%3], &a_r1[(xi+1)%3], &a_r1[(xi+2)%3]];
      let bp1: [&[i32; 64+2]; 3] = [&b_r1[xi%3], &b_r1[(xi+1)%3], &b_r1[(xi+2)%3]];

534
      sgrproj_box_f_r1(&ap1, &bp1, &mut f_r1, xi, 0, stripe_h as usize, &cdeffed);
535
    } else {
536
      sgrproj_box_f_r0(&mut f_r1, xi, 0, stripe_h as usize, &cdeffed);
Monty's avatar
Monty committed
537
    }
538
539

    /* apply filter */
540
    let bit_depth = fi.sequence.bit_depth;
541
542
543
    let w0 = xqd[0] as i32;
    let w1 = xqd[1] as i32;
    let w2 = (1 << SGRPROJ_PRJ_BITS) - w0 - w1;
544
545
    for yi in outstart..stripe_h as usize {
      let u = i32::cast_from(cdeffed.p(xi, yi)) << SGRPROJ_RST_BITS;
546
      let v = w0*f_r2[yi] + w1*u + w2*f_r1[yi];
547
      let s = (v + (1 << (SGRPROJ_RST_BITS + SGRPROJ_PRJ_BITS) >> 1)) >> (SGRPROJ_RST_BITS + SGRPROJ_PRJ_BITS);
548
      out[yi][xi] = T::cast_from(clamp(s, 0, (1 << bit_depth) - 1));
Monty's avatar
Monty committed
549
550
551
552
    }
  }
}

553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
// Frame inputs below aren't all equal, and will change as work
// continues.  There's no deblocked reconstruction available at this
// point of RDO, so we use the non-deblocked reconstruction, cdef and
// input.  The input can be a full-sized frame. Cdef input is a partial
// frame constructed specifically for RDO.

// For simplicity, this ignores stripe segmentation (it's possible the
// extra complexity isn't worth it and we'll ignore stripes
// permanently during RDO, but that's not been tested yet). Data
// access inside the cdef frame is monolithic and clipped to the cdef
// borders.

// Input params follow the same rules as sgrproj_stripe_filter.
// Inputs are relative to the colocated slice views.
pub fn sgrproj_solve<T: Pixel>(set: u8, fi: &FrameInvariants<T>,
                               input: &PlaneSlice<T>,
                               cdeffed: &PlaneSlice<T>,
                               cdef_w: usize, cdef_h: usize) -> (i8, i8) {

  assert!(cdef_h <= 64);
  let bdm8 = fi.sequence.bit_depth - 8;
  let mut a_r2: [[i32; 64+2]; 3] = [[0; 64+2]; 3];
  let mut b_r2: [[i32; 64+2]; 3] = [[0; 64+2]; 3];
  let mut f_r2: [i32; 64] = [0; 64];
  let mut a_r1: [[i32; 64+2]; 3] = [[0; 64+2]; 3];
  let mut b_r1: [[i32; 64+2]; 3] = [[0; 64+2]; 3];
  let mut f_r1: [i32; 64] = [0; 64];

  let s_r2: i32 = SGRPROJ_PARAMS_S[set as usize][0];
  let s_r1: i32 = SGRPROJ_PARAMS_S[set as usize][1];

  let mut h:[[f64; 2]; 2] = [[0.,0.],[0.,0.]];
  let mut c:[f64; 2] = [0., 0.];

  /* prime the intermediate arrays */
  if s_r2 > 0 {
    sgrproj_box_ab_r2(&mut a_r2[0], &mut b_r2[0],
                      -1, 0, cdef_h,
                      s_r2, bdm8,
                      &cdeffed, cdef_w, cdef_h,
                      &cdeffed, cdef_w, cdef_h);
    sgrproj_box_ab_r2(&mut a_r2[1], &mut b_r2[1],
                      0, 0, cdef_h,
                      s_r2, bdm8,
                      &cdeffed, cdef_w, cdef_h,
                      &cdeffed, cdef_w, cdef_h);
  }
  if s_r1 > 0 {
    sgrproj_box_ab_r1(&mut a_r1[0], &mut b_r1[0],
                      -1, 0, cdef_h,
                      s_r1, bdm8,
                      &cdeffed, cdef_w, cdef_h,
                      &cdeffed, cdef_w, cdef_h);
    sgrproj_box_ab_r1(&mut a_r1[1], &mut b_r1[1],
                      0, 0, cdef_h,
                      s_r1, bdm8,
                      &cdeffed, cdef_w, cdef_h,
                      &cdeffed, cdef_w, cdef_h);
  }
612

613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
  /* iterate by column */
  for xi in 0..cdef_w {
    /* build intermediate array columns */
    if s_r2 > 0 {
      sgrproj_box_ab_r2(&mut a_r2[(xi+2)%3], &mut b_r2[(xi+2)%3],
                        xi as isize + 1, 0, cdef_h,
                        s_r2, bdm8,
                        &cdeffed, cdef_w, cdef_h,
                        &cdeffed, cdef_w, cdef_h);
      let ap0: [&[i32; 64+2]; 3] = [&a_r2[xi%3], &a_r2[(xi+1)%3], &a_r2[(xi+2)%3]];
      let bp0: [&[i32; 64+2]; 3] = [&b_r2[xi%3], &b_r2[(xi+1)%3], &b_r2[(xi+2)%3]];
      sgrproj_box_f_r2(&ap0, &bp0, &mut f_r2, xi, 0, cdef_h as usize, &cdeffed);
    } else {
      sgrproj_box_f_r0(&mut f_r2, xi, 0, cdef_h as usize, &cdeffed);
    }
    if s_r1 > 0 {
      sgrproj_box_ab_r1(&mut a_r1[(xi+2)%3], &mut b_r1[(xi+2)%3],
                        xi as isize + 1, 0, cdef_h,
                        s_r1, bdm8,
                        &cdeffed, cdef_w, cdef_h,
                        &cdeffed, cdef_w, cdef_h);
      let ap1: [&[i32; 64+2]; 3] = [&a_r1[xi%3], &a_r1[(xi+1)%3], &a_r1[(xi+2)%3]];
      let bp1: [&[i32; 64+2]; 3] = [&b_r1[xi%3], &b_r1[(xi+1)%3], &b_r1[(xi+2)%3]];

      sgrproj_box_f_r1(&ap1, &bp1, &mut f_r1, xi, 0, cdef_h as usize, &cdeffed);
    } else {
      sgrproj_box_f_r0(&mut f_r1, xi, 0, cdef_h as usize, &cdeffed);
    }

    for yi in 0..cdef_h {
      let u = i32::cast_from(cdeffed.p(yi,xi)) << SGRPROJ_RST_BITS;
      let s = i32::cast_from(input.p(yi,xi)) << SGRPROJ_RST_BITS;
      let f2 = f_r2[yi] - u;
      let f1 = f_r1[yi] - u;
      h[0][0] += f2 as f64 * f2 as f64;
      h[1][1] += f1 as f64 * f1 as f64;
      h[0][1] += f1 as f64 * f2 as f64;
      c[0] += f2 as f64 * s as f64;
      c[1] += f1 as f64 * s as f64;
    }
  }

  // this is lifted almost in-tact from libaom
  let n = cdef_w as f64 * cdef_h as f64;
  h[0][0] /= n;
  h[0][1] /= n;
  h[1][1] /= n;
  h[1][0] = h[0][1];
  c[0] /= n;
  c[1] /= n;
  let (xq0, xq1) = if s_r2 == 0 {
    // H matrix is now only the scalar h[1][1]
    // C vector is now only the scalar c[1]
    if h[1][1] == 0. {
      (0, 0)
    } else {
      (0, (c[1] / h[1][1]).round() as i32)
    }
  } else if s_r1 == 0 {
    // H matrix is now only the scalar h[0][0]
    // C vector is now only the scalar c[0]
    if h[0][0] == 0. {
      (0, 0)
    } else {
      ((c[0] / h[0][0]).round() as i32, 0)
    }
  } else {
    let det = h[0][0] * h[1][1] - h[0][1] * h[1][0];
    if det == 0. {
      (0, 0)
    } else {
      // If scaling up dividend would overflow, instead scale down the divisor
      let div1 = (h[1][1] * c[0] - h[0][1] * c[1]) * (1 << SGRPROJ_PRJ_BITS) as f64;
      let div2 = (h[0][0] * c[1] - h[1][0] * c[0]) * (1 << SGRPROJ_PRJ_BITS) as f64;

      ((div1 / det).round() as i32, (div2 / det).round() as i32)
    }
  };
  (clamp(xq0, SGRPROJ_XQD_MIN[0] as i32, SGRPROJ_XQD_MAX[0] as i32) as i8,
   clamp(xq1, SGRPROJ_XQD_MIN[1] as i32, SGRPROJ_XQD_MAX[1] as i32) as i8)
}

695
fn wiener_stripe_filter<T: Pixel>(coeffs: [[i8; 3]; 2], fi: &FrameInvariants<T>,
696
697
698
699
                                  crop_w: usize, crop_h: usize,
                                  stripe_w: usize, stripe_h: usize,
                                  stripe_x: usize, stripe_y: isize,
                                  cdeffed: &Plane<T>, deblocked: &Plane<T>, out: &mut Plane<T>) {
700
  let bit_depth = fi.sequence.bit_depth;
Monty's avatar
Monty committed
701
702
  let round_h = if bit_depth == 12 {5} else {3};
  let round_v = if bit_depth == 12 {9} else {11};
703
704
  let offset = 1 << (bit_depth + WIENER_BITS - round_h - 1);
  let limit = (1 << (bit_depth + 1 + WIENER_BITS - round_h)) - 1;
Raphaël Zumer's avatar
Raphaël Zumer committed
705

Monty's avatar
Monty committed
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
  let mut work: [i32; MAX_SB_SIZE+7] = [0; MAX_SB_SIZE+7];
  let vfilter: [i32; 7] = [ coeffs[0][0] as i32,
                            coeffs[0][1] as i32,
                            coeffs[0][2] as i32,
                            128 - 2 * (coeffs[0][0] as i32 +
                                       coeffs[0][1] as i32 +
                                       coeffs[0][2] as i32 ),
                            coeffs[0][2] as i32,
                            coeffs[0][1] as i32,
                            coeffs[0][0] as i32];
  let hfilter: [i32; 7] = [ coeffs[1][0] as i32,
                            coeffs[1][1] as i32,
                            coeffs[1][2] as i32,
                            128 - 2 * (coeffs[1][0] as i32 +
                                       coeffs[1][1] as i32 +
                                       coeffs[1][2] as i32),
                            coeffs[1][2] as i32,
                            coeffs[1][1] as i32,
                            coeffs[1][0] as i32];

  // unlike x, our y can be negative to start as the first stripe
  // starts off the top of the frame by 8 pixels, and can also run off the end of the frame
Monty's avatar
Monty committed
728
729
  let start_wi = if stripe_y < 0 {-stripe_y} else {0} as usize;
  let start_yi = if stripe_y < 0 {0} else {stripe_y} as usize;
730
  let end_i = cmp::max(0, if stripe_h as isize + stripe_y > crop_h as isize {
Monty's avatar
Monty committed
731
    crop_h as isize - stripe_y - start_wi as isize
Monty's avatar
Monty committed
732
  } else {
733
    stripe_h as isize - start_wi as isize
Monty's avatar
Monty committed
734
  }) as usize;
Raphaël Zumer's avatar
Raphaël Zumer committed
735

Romain Vimont's avatar
Romain Vimont committed
736
  let mut out_slice = out.mut_slice(PlaneOffset{x: 0, y: start_yi as isize});
Monty's avatar
Monty committed
737

Monty's avatar
Monty committed
738
739
  for xi in stripe_x..stripe_x+stripe_w {
    let n = cmp::min(7, crop_w as isize + 3 - xi as isize);
740
    for yi in stripe_y - 3..stripe_y + stripe_h as isize + 4 {
741
      let src_plane: &Plane<T>;
Monty's avatar
Monty committed
742
743
      let mut acc = 0;
      let ly;
Monty's avatar
Monty committed
744
745
      if yi < stripe_y {
        ly = cmp::max(clamp(yi, 0, crop_h as isize - 1), stripe_y - 2) as usize;
Monty's avatar
Monty committed
746
        src_plane = deblocked;
747
      } else if yi < stripe_y+stripe_h as isize {
Monty's avatar
Monty committed
748
        ly = clamp(yi, 0, crop_h as isize - 1) as usize;
Monty's avatar
Monty committed
749
750
        src_plane = cdeffed;
      } else {
751
        ly = cmp::min(clamp(yi, 0, crop_h as isize - 1), stripe_y + stripe_h as isize + 1) as usize;
Monty's avatar
Monty committed
752
753
        src_plane = deblocked;
      }
Raphaël Zumer's avatar
Raphaël Zumer committed
754

Monty's avatar
Monty committed
755
      for i in 0..3 - xi as isize {
756
        acc += hfilter[i as usize] * i32::cast_from(src_plane.p(0, ly));
Monty's avatar
Monty committed
757
758
      }
      for i in cmp::max(0,3 - (xi as isize))..n {
759
        acc += hfilter[i as usize] * i32::cast_from(src_plane.p((xi as isize + i - 3) as usize, ly));
Monty's avatar
Monty committed
760
761
      }
      for i in n..7 {
762
        acc += hfilter[i as usize] * i32::cast_from(src_plane.p(crop_w - 1, ly));
Monty's avatar
Monty committed
763
      }
Raphaël Zumer's avatar
Raphaël Zumer committed
764

765
      acc = (acc + (1 << round_h >> 1)) >> round_h;
Monty's avatar
Monty committed
766
      work[(yi-stripe_y+3) as usize] = clamp(acc, -offset, limit-offset);
Monty's avatar
Monty committed
767
768
    }

769
    for (wi, dst) in (start_wi..start_wi+end_i).zip(out_slice.rows_iter_mut().map(|row| &mut row[xi]).take(end_i)) {
Monty's avatar
Monty committed
770
771
772
773
      let mut acc = 0;
      for (i,src) in (0..7).zip(work[wi..wi+7].iter_mut()) {
        acc += vfilter[i] * *src;
      }
774
      *dst = T::cast_from(clamp((acc + (1 << round_v >> 1)) >> round_v, 0, (1 << bit_depth) - 1));
Monty's avatar
Monty committed
775
    }
776
777
778
  }
}

779
#[derive(Copy, Clone, Debug)]
780
781
782
783
784
785
786
787
788
789
790
791
pub struct RestorationUnit {
  pub filter: RestorationFilter,
}

impl RestorationUnit {
  pub fn default() -> RestorationUnit {
    RestorationUnit {
      filter: RestorationFilter::default(),
    }
  }
}

792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
#[derive(Clone, Debug)]
pub struct FrameRestorationUnits {
  units: Box<[RestorationUnit]>,
  pub cols: usize,
  pub rows: usize,
}

impl FrameRestorationUnits {
  pub fn new(cols: usize, rows: usize) -> Self {
    Self {
      units: vec![RestorationUnit::default(); cols * rows].into_boxed_slice(),
      cols,
      rows,
    }
  }
}

impl Index<usize> for FrameRestorationUnits {
  type Output = [RestorationUnit];
  #[inline(always)]
  fn index(&self, index: usize) -> &Self::Output {
    &self.units[index * self.cols..(index + 1) * self.cols]
  }
}

impl IndexMut<usize> for FrameRestorationUnits {
  #[inline(always)]
  fn index_mut(&mut self, index: usize) -> &mut Self::Output {
    &mut self.units[index * self.cols..(index + 1) * self.cols]
  }
}

824
#[derive(Clone, Debug)]
825
pub struct RestorationPlaneConfig {
826
827
  pub lrf_type: u8,
  pub unit_size: usize,
Romain Vimont's avatar
Romain Vimont committed
828
829
  // (1 << sb_shift) gives the number of superblocks both horizontally and
  // vertically in a restoration unit, not accounting for RU stretching
830
  pub sb_shift: usize,
Monty's avatar
Monty committed
831
832
833
  // stripe height is 64 in all cases except 4:2:0 chroma planes where
  // it is 32.  This is independent of all other setup parameters
  pub stripe_height: usize,
834
835
  pub cols: usize,
  pub rows: usize,
836
837
838
839
840
}

#[derive(Clone, Debug)]
pub struct RestorationPlane {
  pub cfg: RestorationPlaneConfig,
841
  pub units: FrameRestorationUnits,
842
843
844
845
846
847
848
849
850
}

#[derive(Clone, Default)]
pub struct RestorationPlaneOffset {
  pub row: usize,
  pub col: usize
}

impl RestorationPlane {
Monty's avatar
Monty committed
851
  pub fn new(lrf_type: u8, unit_size: usize, sb_shift: usize, stripe_decimate: usize,
852
             cols: usize, rows: usize) -> RestorationPlane {
Monty's avatar
Monty committed
853
    let stripe_height = if stripe_decimate != 0 {32} else {64};
854
    RestorationPlane {
855
856
857
858
859
860
861
862
      cfg: RestorationPlaneConfig {
        lrf_type,
        unit_size,
        sb_shift,
        stripe_height,
        cols,
        rows,
      },
863
      units: FrameRestorationUnits::new(cols, rows),
864
865
866
    }
  }

867
868
869
  // Stripes are always 64 pixels high in a non-subsampled
  // frame, and decimated from 64 pixels in chroma.  When
  // filtering, they are not co-located on Y with superblocks.
Monty's avatar
Monty committed
870
  fn restoration_unit_index_by_stripe(&self, stripenum: usize, rux: usize) -> (usize, usize) {
871
    (
872
873
      cmp::min(rux, self.cfg.cols - 1),
      cmp::min(stripenum * self.cfg.stripe_height / self.cfg.unit_size, self.cfg.rows - 1),
874
875
876
    )
  }

Monty's avatar
Monty committed
877
878
  pub fn restoration_unit_by_stripe(&self, stripenum: usize, rux: usize) -> &RestorationUnit {
    let (x, y) = self.restoration_unit_index_by_stripe(stripenum, rux);
879
    &self.units[y][x]
880
  }
881
882
}

883
#[derive(Clone, Debug)]
884
pub struct RestorationState {
Romain Vimont's avatar
Romain Vimont committed
885
  pub planes: [RestorationPlane; PLANES]
886
887
888
}

impl RestorationState {
889
  pub fn new<T: Pixel>(fi: &FrameInvariants<T>, input: &Frame<T>) -> Self {
890
    let PlaneConfig { xdec, ydec, .. } = input.planes[1].cfg;
Monty's avatar
Monty committed
891
    let stripe_uv_decimate = if xdec>0 && ydec>0 {1} else {0};
892
893
894
895
896
897
    // Currrently opt for smallest possible restoration unit size (1
    // superblock) This is *temporary*.  Counting on it will break
    // very shortly; the 1-superblock hardwiring is only until the
    // upper level encoder is capable of dealing with the delayed
    // writes that RU size > SB size will require.
    let lrf_y_shift = if fi.sequence.use_128x128_superblock {1} else {2};
Monty's avatar
Monty committed
898
    let lrf_uv_shift = lrf_y_shift + stripe_uv_decimate;
899
900
901
902
903
904
905

    // derive the rest
    let y_unit_log2 = RESTORATION_TILESIZE_MAX_LOG2 - lrf_y_shift;
    let uv_unit_log2 = RESTORATION_TILESIZE_MAX_LOG2 - lrf_uv_shift;
    let y_unit_size = 1 << y_unit_log2;
    let uv_unit_size = 1 << uv_unit_log2;
    let y_sb_log2 = if fi.sequence.use_128x128_superblock {7} else {6};
Monty's avatar
Monty committed
906
    let uv_sb_log2 = y_sb_log2 - stripe_uv_decimate;
907
908
    let cols = ((fi.width + (y_unit_size >> 1)) / y_unit_size).max(1);
    let rows = ((fi.height + (y_unit_size >> 1)) / y_unit_size).max(1);
909

910
    RestorationState {
Romain Vimont's avatar
Romain Vimont committed
911
912
913
914
915
916
917
918
      planes: [
        RestorationPlane::new(RESTORE_SWITCHABLE, y_unit_size, y_unit_log2 - y_sb_log2,
                              0, cols, rows),
        RestorationPlane::new(RESTORE_SWITCHABLE, uv_unit_size, uv_unit_log2 - uv_sb_log2,
                              stripe_uv_decimate, cols, rows),
        RestorationPlane::new(RESTORE_SWITCHABLE, uv_unit_size, uv_unit_log2 - uv_sb_log2,
                              stripe_uv_decimate, cols, rows)
      ],
919
920
    }
  }
921

922
923
  pub fn lrf_filter_frame<T: Pixel>(&mut self, out: &mut Frame<T>, pre_cdef: &Frame<T>,
                                    fi: &FrameInvariants<T>) {
924
    let cdeffed = out.clone();
Raphaël Zumer's avatar
Raphaël Zumer committed
925

926
927
928
929
930
    // unlike the other loop filters that operate over the padded
    // frame dimensions, restoration filtering and source pixel
    // accesses are clipped to the original frame dimensions
    // that's why we use fi.width and fi.height instead of PlaneConfig fields

Monty's avatar
Monty committed
931
    // number of stripes (counted according to colocated Y luma position)
932
    let stripe_n = (fi.height + 7) / 64 + 1;
Raphaël Zumer's avatar
Raphaël Zumer committed
933

Monty's avatar
Monty committed
934
    for pli in 0..PLANES {
Romain Vimont's avatar
Romain Vimont committed
935
      let rp = &self.planes[pli];
Monty's avatar
Monty committed
936
      let xdec = out.planes[pli].cfg.xdec;
937
      let ydec = out.planes[pli].cfg.ydec;
938
939
      let crop_w = (fi.width + (1 << xdec >> 1)) >> xdec;
      let crop_h = (fi.height + (1 << ydec >> 1)) >> ydec;
Raphaël Zumer's avatar
Raphaël Zumer committed
940

Monty's avatar
Monty committed
941
942
      for si in 0..stripe_n {
        // stripe y pixel locations must be able to overspan the frame
943
        let stripe_start_y = (si as isize * 64 - 8) >> ydec;
Monty's avatar
Monty committed
944
        let stripe_size = 64 >> ydec; // one past, unlike spec
Raphaël Zumer's avatar
Raphaël Zumer committed
945

Monty's avatar
Monty committed
946
        // horizontally, go rdu-by-rdu
947
        for rux in 0..rp.cfg.cols {
Monty's avatar
Monty committed
948
          // stripe x pixel locations must be clipped to frame, last may need to stretch
949
950
          let x = rux * rp.cfg.unit_size;
          let size = if rux == rp.cfg.cols - 1 {
951
            crop_w - x
Monty's avatar
Monty committed
952
          } else {
953
            rp.cfg.unit_size
Monty's avatar
Monty committed
954
          };
Monty's avatar
Monty committed
955
          let ru = rp.restoration_unit_by_stripe(si, rux);
Monty's avatar
Monty committed
956
          match ru.filter {
957
            RestorationFilter::Wiener{coeffs} => {
958
959
              wiener_stripe_filter(coeffs, fi,
                                   crop_w, crop_h,
960
961
                                   size, stripe_size,
                                   x, stripe_start_y,
962
963
                                   &cdeffed.planes[pli], &pre_cdef.planes[pli],
                                   &mut out.planes[pli]);
Monty's avatar
Monty committed
964
965
            },
            RestorationFilter::Sgrproj{set, xqd} => {
966
              sgrproj_stripe_filter(set, xqd, fi,
967
968
969
                                    crop_w - x,
                                    (crop_h as isize - stripe_start_y) as usize,
                                    size, stripe_size,
Romain Vimont's avatar
Romain Vimont committed
970
                                    &cdeffed.planes[pli].slice(PlaneOffset{x: x as isize,
971
                                                                           y: stripe_start_y}),
Romain Vimont's avatar
Romain Vimont committed
972
                                    &pre_cdef.planes[pli].slice(PlaneOffset{x: x as isize,
973
                                                                            y: stripe_start_y}),
Romain Vimont's avatar
Romain Vimont committed
974
                                    &mut out.planes[pli].mut_slice(PlaneOffset{x: x as isize,
975
                                                                               y: stripe_start_y}));
Monty's avatar
Monty committed
976
977
978
979
980
981
982
            },
            RestorationFilter::None => {
              // do nothing
            }
          }
        }
      }
Raphaël Zumer's avatar
Raphaël Zumer committed
983
    }
984
985
  }
}