lrf.rs 40 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
// Copyright (c) 2017-2018, The rav1e contributors. All rights reserved
//
// This source code is subject to the terms of the BSD 2 Clause License and
// the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
// was not distributed with this source code in the LICENSE file, you can
// obtain it at www.aomedia.org/license/software. If the Alliance for Open
// Media Patent License 1.0 was not distributed with this source code in the
// PATENTS file, you can obtain it at www.aomedia.org/license/patent.

#![allow(safe_extern_statics)]

Raphaël Zumer's avatar
Raphaël Zumer committed
12
13
14
15
16
17
use crate::encoder::Frame;
use crate::encoder::FrameInvariants;
use crate::context::SuperBlockOffset;
use crate::context::PLANES;
use crate::context::MAX_SB_SIZE;
use crate::plane::Plane;
Monty's avatar
Monty committed
18
use crate::plane::PlaneSlice;
19
use crate::plane::PlaneMutSlice;
Raphaël Zumer's avatar
Raphaël Zumer committed
20
21
use crate::plane::PlaneOffset;
use crate::plane::PlaneConfig;
22
use std::cmp;
Raphaël Zumer's avatar
Raphaël Zumer committed
23
use crate::util::clamp;
24
25
use crate::util::CastFromPrimitive;
use crate::util::Pixel;
26

27
pub const RESTORATION_TILESIZE_MAX_LOG2: usize = 8;
28

29
30
31
32
33
pub const RESTORE_NONE: u8 = 0;
pub const RESTORE_SWITCHABLE: u8 = 1;
pub const RESTORE_WIENER: u8 = 2;
pub const RESTORE_SGRPROJ: u8 = 3;

34
35
36
pub const WIENER_TAPS_MIN: [i8; 3] = [ -5, -23, -17 ];
pub const WIENER_TAPS_MID: [i8; 3] = [ 3, -7, 15 ];
pub const WIENER_TAPS_MAX: [i8; 3] = [ 10, 8, 46 ];
37
pub const WIENER_TAPS_K:   [i8; 3] = [ 1, 2, 3 ];
Monty's avatar
Monty committed
38
pub const WIENER_BITS: usize = 7;
39
40
41
42
43
44
45

pub const SGRPROJ_XQD_MIN: [i8; 2] = [ -96, -32 ];
pub const SGRPROJ_XQD_MID: [i8; 2] = [ -32, 31 ];
pub const SGRPROJ_XQD_MAX: [i8; 2] = [ 31, 95 ];
pub const SGRPROJ_PRJ_SUBEXP_K: u8 = 4;
pub const SGRPROJ_PRJ_BITS: u8 = 7;
pub const SGRPROJ_PARAMS_BITS: u8 = 4;
Monty's avatar
Monty committed
46
47
48
49
pub const SGRPROJ_MTABLE_BITS: u8 = 20;
pub const SGRPROJ_SGR_BITS: u8 = 8;
pub const SGRPROJ_RECIP_BITS: u8 = 12;
pub const SGRPROJ_RST_BITS: u8 = 4;
50
51
52
53
54
pub const SGRPROJ_PARAMS_S: [[i32; 2]; 1 << SGRPROJ_PARAMS_BITS] = [
  [140, 3236], [112, 2158], [ 93, 1618], [ 80, 1438],
  [ 70, 1295], [ 58, 1177], [ 47, 1079], [ 37,  996],
  [ 30,  925], [ 25,  863], [  0, 2589], [  0, 1618],
  [  0, 1177], [  0,  925], [ 56,    0], [ 22,    0]
55
56
];

57
#[derive(Copy, Clone, Debug)]
58
59
60
pub enum RestorationFilter {
  None,
  Wiener  { coeffs: [[i8; 3]; 2] },
Monty's avatar
Monty committed
61
  Sgrproj { set: u8,
62
63
64
65
66
            xqd: [i8; 2] },
}

impl RestorationFilter {
  pub fn default() -> RestorationFilter {
67
    RestorationFilter::None{}
Monty's avatar
Monty committed
68
69
70
  }
}

Monty's avatar
Monty committed
71
72
73
fn sgrproj_sum_finish(ssq: i32, sum: i32, n: i32, one_over_n: i32, s: i32, bdm8: usize) -> (i32, i32) {
  let scaled_ssq = ssq + (1 << 2*bdm8 >> 1) >> 2*bdm8;
  let scaled_sum = sum + (1 << bdm8 >> 1) >> bdm8;
74
75
76
77
78
79
80
81
82
83
84
85
  let p = cmp::max(0, scaled_ssq*(n as i32) - scaled_sum*scaled_sum);
  let z = p*s + (1 << SGRPROJ_MTABLE_BITS >> 1) >> SGRPROJ_MTABLE_BITS;
  let a = if z >= 255 {
    256
  } else if z == 0 {
    1
  } else {
    ((z << SGRPROJ_SGR_BITS) + z/2) / (z+1)
  };
  let b = ((1 << SGRPROJ_SGR_BITS) - a ) * sum * one_over_n;
  (a, b + (1 << SGRPROJ_RECIP_BITS >> 1) >> SGRPROJ_RECIP_BITS)
}
Monty's avatar
Monty committed
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132

// The addressing below is a bit confusing, made worse by LRF's odd
// clipping requirements, and our reusing code for partial frames.  So
// I'm documenting the LRF conventions here in detail.

// 'Relative to plane storage' means that a coordinate or bound is
// being applied as if to the full Plane backing the PlaneSlice.  For
// example, a PlaneSlice may represent a subset of a middle of a
// plane, but when we say the top/left bounds are clipped 'relative to
// plane storage', that means relative to 0,0 of the plane, not 0,0 of
// the plane slice.

// 'Relative to the slice view' means that a coordinate or bound is
// counted from the 0,0 of the PlaneSlice, not the Plane from which it
// was sliced.

// Passed in plane slices may be the same size or different sizes;
// filter access will be clipped to 0,0..w,h of the underlying plane
// storage for both planes, depending which is accessed.  Note that
// the passed in w/h that specifies the storage clipping is actually
// relative to the the slice view, not the plane storage (it
// simplifies the math internally).  Eg, if a PlaceSlice has a y
// offset of -2 (meaning its origin is two rows above the top row of
// the backing plane), and we pass in a height of 4, the rows
// 0,1,2,3,4 of the slice address -2, -1, 0, 1, 2 of the backing plane
// with access clipped to 0, 0, 0, 1, 1.

// Active area cropping is done by specifying a w,h smaller
// than the actual underlying plane storage.

// stripe_y is the beginning of the current stripe (used for source
// buffer choice/clipping) relative to the passed in plane view.  It
// may (and regularly will) be negative.

// stripe_h is the hright of the current stripe, again used for source
// buffer choice/clipping).  It may specify a stripe boundary less
// than, eqqal to, or larger than the buffers we're accessing.

// x and y specify the center pixel of the current filter kernel
// application.  They are relative to the passed in slice views.

fn sgrproj_box_sum_slow<T: Pixel>(a: &mut i32, b: &mut i32,
                                  stripe_y: isize, stripe_h: usize,
                                  x: isize, y: isize,
                                  r: usize, n: i32, one_over_n: i32, s: i32, bdm8: usize,
                                  backing: &PlaneSlice<T>, backing_w: usize, backing_h: usize,
                                  cdeffed: &PlaneSlice<T>, cdeffed_w: usize, cdeffed_h: usize) {  
133
134
135
  let mut ssq:i32 = 0;
  let mut sum:i32 = 0;

Monty's avatar
Monty committed
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
  for yi in y-r as isize..=y+r as isize {
    let src_plane;
    let src_w;
    let src_h;
    
    // decide if we're vertically inside or outside the stripe
    if yi >= stripe_y && yi < stripe_y + stripe_h as isize {
      src_plane = cdeffed;
      src_w = (cdeffed_w as isize - x + r as isize) as usize;
      src_h = cdeffed_h as isize;
    } else {
      src_plane = backing;
      src_w = (backing_w as isize - x + r as isize) as usize;
      src_h = backing_h as isize;
    }
    // clamp vertically to storage at top and passed-in height at bottom 
    let cropped_y = clamp(yi, -src_plane.y, src_h - 1);
    // clamp vertically to stripe limits
    let ly = clamp(cropped_y, stripe_y - 2, stripe_y + stripe_h as isize + 1);
    // Reslice to avoid a negative X index.
    let p = src_plane.reslice(x - r as isize,ly).as_slice();
    // left-hand addressing limit
    let left = cmp::max(0, r as isize - x - src_plane.x) as usize;
    // right-hand addressing limit
    let right = cmp::min(2*r+1, src_w);

    // run accumulation to left of frame storage (if any)
    for _xi in 0..left {
      let c = i32::cast_from(p[(r as isize - x) as usize]);
165
166
167
      ssq += c*c;
      sum += c;
    }
Monty's avatar
Monty committed
168
169
170
    // run accumulation in-frame
    for xi in left..right {
      let c = i32::cast_from(p[xi]);
171
172
173
      ssq += c*c;
      sum += c;
    }
Monty's avatar
Monty committed
174
175
176
    // run accumulation to right of frame (if any)
    for _xi in right..2*r+1 {
      let c = i32::cast_from(p[src_w - 1]);
177
178
179
180
      ssq += c*c;
      sum += c;
    }
  }
Monty's avatar
Monty committed
181
  let (reta, retb) = sgrproj_sum_finish(ssq, sum, n, one_over_n, s, bdm8);
182
183
184
  *a = reta;
  *b = retb;
}
Monty's avatar
Monty committed
185

Monty's avatar
Monty committed
186
187
188
// unrolled computation to be used when all bounds-checking has been satisfied.
fn sgrproj_box_sum_fastxy_r1<T: Pixel>(a: &mut i32, b: &mut i32, x: isize, y: isize,
                                       s: i32, bdm8: usize, p: &PlaneSlice<T>) {
189
190
191
  let mut ssq:i32 = 0;
  let mut sum:i32 = 0;
  for yi in -1..=1 {
Monty's avatar
Monty committed
192
    let x = p.reslice(x - 1, y + yi).as_slice();
193
194
195
196
197
    ssq += i32::cast_from(x[0]) * i32::cast_from(x[0]) +
      i32::cast_from(x[1]) * i32::cast_from(x[1]) +
      i32::cast_from(x[2]) * i32::cast_from(x[2]);
    sum += i32::cast_from(x[0]) + i32::cast_from(x[1]) + i32::cast_from(x[2]);
  }
Monty's avatar
Monty committed
198
  let (reta, retb) = sgrproj_sum_finish(ssq, sum, 9, 455, s, bdm8);
199
200
201
202
  *a = reta;
  *b = retb;
}

Monty's avatar
Monty committed
203
204
fn sgrproj_box_sum_fastxy_r2<T: Pixel>(a: &mut i32, b: &mut i32, x: isize, y: isize,
                                       s: i32, bdm8: usize, p: &PlaneSlice<T>) {
205
206
207
  let mut ssq:i32 = 0;
  let mut sum:i32 = 0;
  for yi in -2..=2 {
Monty's avatar
Monty committed
208
    let x = p.reslice(x - 2, y + yi).as_slice();
209
210
211
212
213
214
215
216
    ssq += i32::cast_from(x[0]) * i32::cast_from(x[0]) +
      i32::cast_from(x[1]) * i32::cast_from(x[1]) +
      i32::cast_from(x[2]) * i32::cast_from(x[2]) +
      i32::cast_from(x[3]) * i32::cast_from(x[3]) +
      i32::cast_from(x[4]) * i32::cast_from(x[4]);
    sum += i32::cast_from(x[0]) + i32::cast_from(x[1]) + i32::cast_from(x[2]) +
      i32::cast_from(x[3]) + i32::cast_from(x[4]);
  }
Monty's avatar
Monty committed
217
  let (reta, retb) = sgrproj_sum_finish(ssq, sum, 25, 164, s, bdm8);
218
219
220
221
  *a = reta;
  *b = retb;
}

Monty's avatar
Monty committed
222
223
224
225
226
227
228
// unrolled computation to be used when only X bounds-checking has been satisfied.
fn sgrproj_box_sum_fastx_r1<T: Pixel>(a: &mut i32, b: &mut i32,
                                      stripe_y: isize, stripe_h: usize,
                                      x: isize, y: isize,
                                      s: i32, bdm8: usize,
                                      backing: &PlaneSlice<T>, backing_h: usize,
                                      cdeffed: &PlaneSlice<T>, cdeffed_h: usize) {  
229
230
  let mut ssq:i32 = 0;
  let mut sum:i32 = 0;
Monty's avatar
Monty committed
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
  for yi in y-1..=y+1 {
    let src_plane;
    let src_h;
    
    // decide if we're vertically inside or outside the stripe
    if yi >= stripe_y && yi < stripe_y + stripe_h as isize {
      src_plane = cdeffed;
      src_h = cdeffed_h as isize;
    } else {
      src_plane = backing;
      src_h = backing_h as isize;
    }
    // clamp vertically to storage addressing limit
    let cropped_y = clamp(yi, -src_plane.y, src_h - 1);
    // clamp vertically to stripe limits
    let ly = clamp(cropped_y, stripe_y - 2, stripe_y + stripe_h as isize + 1);
    let x = src_plane.reslice(x - 1, ly).as_slice();
248
249
250
251
252
    ssq += i32::cast_from(x[0]) * i32::cast_from(x[0]) +
      i32::cast_from(x[1]) * i32::cast_from(x[1]) +
      i32::cast_from(x[2]) * i32::cast_from(x[2]);
    sum += i32::cast_from(x[0]) + i32::cast_from(x[1]) + i32::cast_from(x[2]);
  }
Monty's avatar
Monty committed
253
  let (reta, retb) = sgrproj_sum_finish(ssq, sum, 9, 455, s, bdm8);
254
255
256
257
  *a = reta;
  *b = retb;
}

Monty's avatar
Monty committed
258
259
260
261
262
263
fn sgrproj_box_sum_fastx_r2<T: Pixel>(a: &mut i32, b: &mut i32,
                                      stripe_y: isize, stripe_h: usize,
                                      x: isize, y: isize,
                                      s: i32, bdm8: usize,
                                      backing: &PlaneSlice<T>, backing_h: usize,
                                      cdeffed: &PlaneSlice<T>, cdeffed_h: usize) {  
264
265
  let mut ssq:i32 = 0;
  let mut sum:i32 = 0;
Monty's avatar
Monty committed
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
  for yi in y - 2..=y + 2 {
    let src_plane;
    let src_h;
    
    // decide if we're vertically inside or outside the stripe
    if yi >= stripe_y && yi < stripe_y + stripe_h as isize {
      src_plane = cdeffed;
      src_h = cdeffed_h as isize;
    } else {
      src_plane = backing;
      src_h = backing_h as isize;
    }
    // clamp vertically to storage addressing limit
    let cropped_y = clamp(yi, -src_plane.y, src_h as isize - 1);
    // clamp vertically to stripe limits
    let ly = clamp(cropped_y, stripe_y - 2, stripe_y + stripe_h as isize + 1);
    let x = src_plane.reslice(x - 2, ly).as_slice();
283
284
285
286
287
288
289
290
    ssq += i32::cast_from(x[0]) * i32::cast_from(x[0]) +
      i32::cast_from(x[1]) * i32::cast_from(x[1]) +
      i32::cast_from(x[2]) * i32::cast_from(x[2]) +
      i32::cast_from(x[3]) * i32::cast_from(x[3]) +
      i32::cast_from(x[4]) * i32::cast_from(x[4]);
    sum += i32::cast_from(x[0]) + i32::cast_from(x[1]) + i32::cast_from(x[2]) +
      i32::cast_from(x[3]) + i32::cast_from(x[4]);
  }
Monty's avatar
Monty committed
291
  let (reta, retb) = sgrproj_sum_finish(ssq, sum, 25, 164, s, bdm8);
292
293
294
295
  *a = reta;
  *b = retb;
}

Monty's avatar
Monty committed
296
297
// computes an intermediate (ab) column for rows stripe_y through
// stripe_y+stripe_h (no inclusize) at column stripe_x.
298
299
300
// r=1 case computes every row as every row is used (see r2 version below)
fn sgrproj_box_ab_r1<T: Pixel>(af: &mut[i32; 64+2],
                               bf: &mut[i32; 64+2],
Monty's avatar
Monty committed
301
302
303
304
305
306
307
                               stripe_x: isize, stripe_y: isize, stripe_h: usize,
                               s: i32, bdm8: usize,
                               backing: &PlaneSlice<T>, backing_w: usize, backing_h: usize,
                               cdeffed: &PlaneSlice<T>, cdeffed_w: usize, cdeffed_h: usize) {
  // we will fill the af and bf arrays from 0..stripe_h+1 (ni),
  // representing stripe_y-1 to stripe_y+stripe_h+1 inclusive
  let boundary0 = 0;
308
  let boundary3 = stripe_h + 2;
Monty's avatar
Monty committed
309
310
311
312
313
314
315
316
317
318
319
320
321
322
  if backing.x + stripe_x > 0 && stripe_x < backing_w as isize - 1 &&
    cdeffed.x + stripe_x > 0 && stripe_x < cdeffed_w as isize - 1 {
    // Addressing is away from left and right edges of cdeffed storage;
    // no X clipping to worry about, but the top/bottom few rows still
    // need to worry about storage and stripe limits
      
    // boundary1 is the point where we're guaranteed all our y
    // addressing will be both in the stripe and in cdeffed storage  
    let boundary1 = cmp::max(2, 2 - cdeffed.y - stripe_y) as usize;
    // boundary 2 is when we have to bounds check along the bottom of
    // the stripe or bottom of storage
    let boundary2 = cmp::min(cdeffed_h as isize - stripe_y - 1, stripe_h as isize - 1) as usize;

    // top rows (if any), away from left and right columns
323
    for i in boundary0..boundary1 {
Monty's avatar
Monty committed
324
325
326
327
328
329
      sgrproj_box_sum_fastx_r1(&mut af[i], &mut bf[i],
                               stripe_y, stripe_h, 
                               stripe_x, stripe_y + i as isize - 1,
                               s, bdm8,
                               backing, backing_h,
                               cdeffed, cdeffed_h);
330
331
332
333
    }
    // middle rows, away from left and right columns
    for i in boundary1..boundary2 {
      sgrproj_box_sum_fastxy_r1(&mut af[i], &mut bf[i],
Monty's avatar
Monty committed
334
                                stripe_x, stripe_y + i as isize - 1, s, bdm8, cdeffed);
335
    }
Monty's avatar
Monty committed
336
    // bottom rows (if any), away from left and right columns
337
    for i in boundary2..boundary3 {
Monty's avatar
Monty committed
338
339
340
341
342
343
      sgrproj_box_sum_fastx_r1(&mut af[i], &mut bf[i],
                               stripe_y, stripe_h, 
                               stripe_x, stripe_y + i as isize - 1,
                               s, bdm8,
                               backing, backing_h,
                               cdeffed, cdeffed_h);
344
345
346
347
    }
  } else {
    // top/bottom rows and left/right columns, where we need to worry about frame and stripe clipping
    for i in boundary0..boundary3 {
Monty's avatar
Monty committed
348
349
350
351
352
353
      sgrproj_box_sum_slow(&mut af[i], &mut bf[i],                           
                           stripe_y, stripe_h,
                           stripe_x, stripe_y + i as isize - 1,
                           1, 9, 455, s, bdm8,
                           backing, backing_w, backing_h,
                           cdeffed, cdeffed_w, cdeffed_h);
Monty's avatar
Monty committed
354
355
    }
  }
Monty's avatar
Monty committed
356
357
}

358
359
360
361
362
363
364
365
366
// One oddness about the radius=2 intermediate array computations that
// the spec doesn't make clear: Although the spec defines computation
// of every row (of a, b and f), only half of the rows (every-other
// row) are actually used.  We use the full-size array here but only
// compute the even rows.  This is not so much optimization as trying
// to illustrate what this convoluted filter is actually doing
// (ie not as much as it may appear).
fn sgrproj_box_ab_r2<T: Pixel>(af: &mut[i32; 64+2],
                               bf: &mut[i32; 64+2],
Monty's avatar
Monty committed
367
368
369
370
371
372
373
                               stripe_x: isize, stripe_y: isize, stripe_h: usize,
                               s: i32, bdm8: usize,
                               backing: &PlaneSlice<T>, backing_w: usize, backing_h: usize,
                               cdeffed: &PlaneSlice<T>, cdeffed_w: usize, cdeffed_h: usize) {  
  // we will fill the af and bf arrays from 0..stripe_h+1 (ni),
  // representing stripe_y-1 to stripe_y+stripe_h+1 inclusive
  let boundary0 = 0; // even
374
  let boundary3 = stripe_h + 2; // don't care if odd
Monty's avatar
Monty committed
375
376
377
378
379
380
381
382
383
384
385
386
  if backing.x + stripe_x > 1 && stripe_x < backing_w as isize - 2 &&
    cdeffed.x + stripe_x > 1 && stripe_x < cdeffed_w as isize - 2 {
    // Addressing is away from left and right edges of cdeffed storage;
    // no X clipping to worry about, but the top/bottom few rows still
    // need to worry about storage and stripe limits
      
    // boundary1 is the point where we're guaranteed all our y
    // addressing will be both in the stripe and in cdeffed storage
    // make even and round up  
    let boundary1 = (cmp::max(3, 3 - cdeffed.y - stripe_y) + 1 >> 1 << 1) as usize;
    // boundary 2 is when we have to bounds check along the bottom of
    // the stripe or bottom of storage
387
    // must be even, rounding of +1 cancels fencepost of -1
Monty's avatar
Monty committed
388
    let boundary2 = (cmp::min(cdeffed_h as isize - stripe_y, stripe_h as isize) >> 1 << 1) as usize;
389
390
391

    // top rows, away from left and right columns
    for i in (boundary0..boundary1).step_by(2) {
Monty's avatar
Monty committed
392
393
394
395
396
397
      sgrproj_box_sum_fastx_r2(&mut af[i], &mut bf[i],
                               stripe_y, stripe_h, 
                               stripe_x, stripe_y + i as isize - 1,
                               s, bdm8,
                               backing, backing_h,
                               cdeffed, cdeffed_h);
398
399
400
401
    }
    // middle rows, away from left and right columns
    for i in (boundary1..boundary2).step_by(2) {
      sgrproj_box_sum_fastxy_r2(&mut af[i], &mut bf[i],
Monty's avatar
Monty committed
402
403
                                stripe_x, stripe_y + i as isize - 1,
                                s, bdm8, cdeffed);
404
405
406
    }
    // bottom rows, away from left and right columns
    for i in (boundary2..boundary3).step_by(2) {
Monty's avatar
Monty committed
407
408
409
410
411
412
      sgrproj_box_sum_fastx_r2(&mut af[i], &mut bf[i],
                               stripe_y, stripe_h, 
                               stripe_x, stripe_y + i as isize - 1,
                               s, bdm8,
                               backing, backing_h,
                               cdeffed, cdeffed_h);
Monty's avatar
Monty committed
413
    }
414
415
416
  } else {
    // top/bottom rows and left/right columns, where we need to worry about frame and stripe clipping
    for i in (boundary0..boundary3).step_by(2) {
Monty's avatar
Monty committed
417
418
419
420
421
422
      sgrproj_box_sum_slow(&mut af[i], &mut bf[i],
                           stripe_y, stripe_h,
                           stripe_x, stripe_y + i as isize - 1,
                           2, 25, 164, s, bdm8,
                           backing, backing_w, backing_h,
                           cdeffed, cdeffed_w, cdeffed_h);
423
424
425
426
    }
  }
}

Monty's avatar
Monty committed
427
fn sgrproj_box_f_r0<T: Pixel>(f: &mut[i32; 64], x: usize, y: isize, h: usize, cdeffed: &PlaneSlice<T>) {
428
429
430
431
432
433
  for i in cmp::max(0, -y) as usize..h {
    f[i as usize] = (i32::cast_from(cdeffed.p(x, (y + i as isize) as usize))) << SGRPROJ_RST_BITS;
  }
}

fn sgrproj_box_f_r1<T: Pixel>(af: &[&[i32; 64+2]; 3], bf: &[&[i32; 64+2]; 3], f: &mut[i32; 64],
Monty's avatar
Monty committed
434
                              x: usize, y: isize, h: usize, cdeffed: &PlaneSlice<T>) {
435
436
437
438
439
440
441
442
443
444
445
446
447
448
  let shift = 5 + SGRPROJ_SGR_BITS - SGRPROJ_RST_BITS;
  for i in cmp::max(0, -y) as usize..h {
    let a =
      3 * (af[0][i+0] + af[2][i+0] + af[0][i+2] + af[2][i+2]) +
      4 * (af[1][i+0] + af[0][i+1] + af[1][i+1] + af[2][i+1] + af[1][i+2]);
    let b =
      3 * (bf[0][i+0] + bf[2][i+0] + bf[0][i+2] + bf[2][i+2]) +
      4 * (bf[1][i+0] + bf[0][i+1] + bf[1][i+1] + bf[2][i+1] + bf[1][i+2]);
    let v = a * i32::cast_from(cdeffed.p(x, (y + i as isize) as usize)) + b;
    f[i as usize] = v + (1 << shift >> 1) >> shift;
  }
}

fn sgrproj_box_f_r2<T: Pixel>(af: &[&[i32; 64+2]; 3], bf: &[&[i32; 64+2]; 3], f: &mut[i32; 64],
Monty's avatar
Monty committed
449
                              x: usize, y: isize, h: usize, cdeffed: &PlaneSlice<T>) {
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
  let shift = 5 + SGRPROJ_SGR_BITS - SGRPROJ_RST_BITS;
  let shifto = 4 + SGRPROJ_SGR_BITS - SGRPROJ_RST_BITS;
  for i in (cmp::max(0, -y) as usize..h).step_by(2) {
    let a =
      5 * (af[0][i+0] + af[2][i+0]) + 
      6 * (af[1][i+0]);
    let b =
      5 * (bf[0][i+0] + bf[2][i+0]) + 
      6 * (bf[1][i+0]);
    let ao =
      5 * (af[0][i+2] + af[2][i+2]) + 
      6 * (af[1][i+2]);
    let bo =
      5 * (bf[0][i+2] + bf[2][i+2]) + 
      6 * (bf[1][i+2]);
    let v = (a + ao) * i32::cast_from(cdeffed.p(x, (y+i as isize) as usize)) + b + bo;
Monty's avatar
Monty committed
466
    f[i as usize] = v + (1 << shift >> 1) >> shift;
467
468
    let vo = ao * i32::cast_from(cdeffed.p(x, (y + i as isize) as usize + 1)) + bo;
    f[i as usize + 1] = vo + (1 << shifto >> 1) >> shifto;
Monty's avatar
Monty committed
469
470
471
  }
}

472
473
474
475
476
477
pub fn sgrproj_stripe_filter<T: Pixel>(set: u8, xqd: [i8; 2], fi: &FrameInvariants<T>,
                                       crop_w: usize, crop_h: usize,
                                       stripe_w: usize, stripe_h: usize,
                                       cdeffed: &PlaneSlice<T>,
                                       deblocked: &PlaneSlice<T>,
                                       out: &mut PlaneMutSlice<T>) {
Monty's avatar
Monty committed
478
  assert!(stripe_h <= 64);
Monty's avatar
Monty committed
479
  let bdm8 = fi.sequence.bit_depth - 8;
480
481
482
483
484
485
486
487
488
  let mut a_r2: [[i32; 64+2]; 3] = [[0; 64+2]; 3];
  let mut b_r2: [[i32; 64+2]; 3] = [[0; 64+2]; 3];
  let mut f_r2: [i32; 64] = [0; 64];
  let mut a_r1: [[i32; 64+2]; 3] = [[0; 64+2]; 3];
  let mut b_r1: [[i32; 64+2]; 3] = [[0; 64+2]; 3];
  let mut f_r1: [i32; 64] = [0; 64];

  let s_r2: i32 = SGRPROJ_PARAMS_S[set as usize][0];
  let s_r1: i32 = SGRPROJ_PARAMS_S[set as usize][1];
Monty's avatar
Monty committed
489

490
491
492
  let outstart = cmp::max(0, cmp::max(-cdeffed.y, -out.y)) as usize;
  let outstride = out.plane.cfg.stride; 
  let out_data = out.as_mut_slice();
Monty's avatar
Monty committed
493
  
Monty's avatar
Monty committed
494
  /* prime the intermediate arrays */
495
  if s_r2 > 0 {
Monty's avatar
Monty committed
496
497
498
    sgrproj_box_ab_r2(&mut a_r2[0], &mut b_r2[0],
                      -1, 0, stripe_h,
                      s_r2, bdm8,
499
500
                      &deblocked, crop_w, crop_h,
                      &cdeffed, crop_w, crop_h);
Monty's avatar
Monty committed
501
502
503
    sgrproj_box_ab_r2(&mut a_r2[1], &mut b_r2[1],
                      0, 0, stripe_h,
                      s_r2, bdm8,
504
505
                      &deblocked, crop_w, crop_h,
                      &cdeffed, crop_w, crop_h);
506
  }
507
  if s_r1 > 0 {
Monty's avatar
Monty committed
508
509
510
    sgrproj_box_ab_r1(&mut a_r1[0], &mut b_r1[0],
                      -1, 0, stripe_h,
                      s_r1, bdm8,
511
512
                      &deblocked, crop_w, crop_h,
                      &cdeffed, crop_w, crop_h);
Monty's avatar
Monty committed
513
514
515
    sgrproj_box_ab_r1(&mut a_r1[1], &mut b_r1[1],
                      0, 0, stripe_h,
                      s_r1, bdm8,
516
517
                      &deblocked, crop_w, crop_h,
                      &cdeffed, crop_w, crop_h);
518
  }
Raphaël Zumer's avatar
Raphaël Zumer committed
519

Monty's avatar
Monty committed
520
  /* iterate by column */
Monty's avatar
Monty committed
521
  for xi in 0..stripe_w {
522
    /* build intermediate array columns */
523
    if s_r2 > 0 {
Monty's avatar
Monty committed
524
525
526
      sgrproj_box_ab_r2(&mut a_r2[(xi+2)%3], &mut b_r2[(xi+2)%3],
                        xi as isize + 1, 0, stripe_h,
                        s_r2, bdm8,
527
528
                        &deblocked, crop_w, crop_h,
                        &cdeffed, crop_w, crop_h);
529
530
      let ap0: [&[i32; 64+2]; 3] = [&a_r2[xi%3], &a_r2[(xi+1)%3], &a_r2[(xi+2)%3]];
      let bp0: [&[i32; 64+2]; 3] = [&b_r2[xi%3], &b_r2[(xi+1)%3], &b_r2[(xi+2)%3]];
531
      sgrproj_box_f_r2(&ap0, &bp0, &mut f_r2, xi, 0, stripe_h as usize, &cdeffed);
532
    } else {
533
      sgrproj_box_f_r0(&mut f_r2, xi, 0, stripe_h as usize, &cdeffed);
534
    }
535
    if s_r1 > 0 {
Monty's avatar
Monty committed
536
537
538
      sgrproj_box_ab_r1(&mut a_r1[(xi+2)%3], &mut b_r1[(xi+2)%3],
                        xi as isize + 1, 0, stripe_h,
                        s_r1, bdm8,
539
540
                        &deblocked, crop_w, crop_h,
                        &cdeffed, crop_w, crop_h);
541
542
543
      let ap1: [&[i32; 64+2]; 3] = [&a_r1[xi%3], &a_r1[(xi+1)%3], &a_r1[(xi+2)%3]];
      let bp1: [&[i32; 64+2]; 3] = [&b_r1[xi%3], &b_r1[(xi+1)%3], &b_r1[(xi+2)%3]];

544
      sgrproj_box_f_r1(&ap1, &bp1, &mut f_r1, xi, 0, stripe_h as usize, &cdeffed);
545
    } else {
546
      sgrproj_box_f_r0(&mut f_r1, xi, 0, stripe_h as usize, &cdeffed);
Monty's avatar
Monty committed
547
    }
548
549

    /* apply filter */
550
    let bit_depth = fi.sequence.bit_depth;
551
552
553
    let w0 = xqd[0] as i32;
    let w1 = xqd[1] as i32;
    let w2 = (1 << SGRPROJ_PRJ_BITS) - w0 - w1;
554
555
    for yi in outstart..stripe_h as usize {
      let u = i32::cast_from(cdeffed.p(xi, yi)) << SGRPROJ_RST_BITS;
556
557
558
      let v = w0*f_r2[yi] + w1*u + w2*f_r1[yi];
      let s = v + (1 << SGRPROJ_RST_BITS + SGRPROJ_PRJ_BITS >> 1) >> SGRPROJ_RST_BITS + SGRPROJ_PRJ_BITS;
      out_data[xi + yi*outstride] = T::cast_from(clamp(s, 0, (1 << bit_depth) - 1));
Monty's avatar
Monty committed
559
560
561
562
    }
  }
}

563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
// Frame inputs below aren't all equal, and will change as work
// continues.  There's no deblocked reconstruction available at this
// point of RDO, so we use the non-deblocked reconstruction, cdef and
// input.  The input can be a full-sized frame. Cdef input is a partial
// frame constructed specifically for RDO.

// For simplicity, this ignores stripe segmentation (it's possible the
// extra complexity isn't worth it and we'll ignore stripes
// permanently during RDO, but that's not been tested yet). Data
// access inside the cdef frame is monolithic and clipped to the cdef
// borders.

// Input params follow the same rules as sgrproj_stripe_filter.
// Inputs are relative to the colocated slice views.
pub fn sgrproj_solve<T: Pixel>(set: u8, fi: &FrameInvariants<T>,
                               input: &PlaneSlice<T>,
                               cdeffed: &PlaneSlice<T>,
                               cdef_w: usize, cdef_h: usize) -> (i8, i8) {

  assert!(cdef_h <= 64);
  let bdm8 = fi.sequence.bit_depth - 8;
  let mut a_r2: [[i32; 64+2]; 3] = [[0; 64+2]; 3];
  let mut b_r2: [[i32; 64+2]; 3] = [[0; 64+2]; 3];
  let mut f_r2: [i32; 64] = [0; 64];
  let mut a_r1: [[i32; 64+2]; 3] = [[0; 64+2]; 3];
  let mut b_r1: [[i32; 64+2]; 3] = [[0; 64+2]; 3];
  let mut f_r1: [i32; 64] = [0; 64];

  let s_r2: i32 = SGRPROJ_PARAMS_S[set as usize][0];
  let s_r1: i32 = SGRPROJ_PARAMS_S[set as usize][1];

  let mut h:[[f64; 2]; 2] = [[0.,0.],[0.,0.]];
  let mut c:[f64; 2] = [0., 0.];

  /* prime the intermediate arrays */
  if s_r2 > 0 {
    sgrproj_box_ab_r2(&mut a_r2[0], &mut b_r2[0],
                      -1, 0, cdef_h,
                      s_r2, bdm8,
                      &cdeffed, cdef_w, cdef_h,
                      &cdeffed, cdef_w, cdef_h);
    sgrproj_box_ab_r2(&mut a_r2[1], &mut b_r2[1],
                      0, 0, cdef_h,
                      s_r2, bdm8,
                      &cdeffed, cdef_w, cdef_h,
                      &cdeffed, cdef_w, cdef_h);
  }
  if s_r1 > 0 {
    sgrproj_box_ab_r1(&mut a_r1[0], &mut b_r1[0],
                      -1, 0, cdef_h,
                      s_r1, bdm8,
                      &cdeffed, cdef_w, cdef_h,
                      &cdeffed, cdef_w, cdef_h);
    sgrproj_box_ab_r1(&mut a_r1[1], &mut b_r1[1],
                      0, 0, cdef_h,
                      s_r1, bdm8,
                      &cdeffed, cdef_w, cdef_h,
                      &cdeffed, cdef_w, cdef_h);
  }
  
  /* iterate by column */
  for xi in 0..cdef_w {
    /* build intermediate array columns */
    if s_r2 > 0 {
      sgrproj_box_ab_r2(&mut a_r2[(xi+2)%3], &mut b_r2[(xi+2)%3],
                        xi as isize + 1, 0, cdef_h,
                        s_r2, bdm8,
                        &cdeffed, cdef_w, cdef_h,
                        &cdeffed, cdef_w, cdef_h);
      let ap0: [&[i32; 64+2]; 3] = [&a_r2[xi%3], &a_r2[(xi+1)%3], &a_r2[(xi+2)%3]];
      let bp0: [&[i32; 64+2]; 3] = [&b_r2[xi%3], &b_r2[(xi+1)%3], &b_r2[(xi+2)%3]];
      sgrproj_box_f_r2(&ap0, &bp0, &mut f_r2, xi, 0, cdef_h as usize, &cdeffed);
    } else {
      sgrproj_box_f_r0(&mut f_r2, xi, 0, cdef_h as usize, &cdeffed);
    }
    if s_r1 > 0 {
      sgrproj_box_ab_r1(&mut a_r1[(xi+2)%3], &mut b_r1[(xi+2)%3],
                        xi as isize + 1, 0, cdef_h,
                        s_r1, bdm8,
                        &cdeffed, cdef_w, cdef_h,
                        &cdeffed, cdef_w, cdef_h);
      let ap1: [&[i32; 64+2]; 3] = [&a_r1[xi%3], &a_r1[(xi+1)%3], &a_r1[(xi+2)%3]];
      let bp1: [&[i32; 64+2]; 3] = [&b_r1[xi%3], &b_r1[(xi+1)%3], &b_r1[(xi+2)%3]];

      sgrproj_box_f_r1(&ap1, &bp1, &mut f_r1, xi, 0, cdef_h as usize, &cdeffed);
    } else {
      sgrproj_box_f_r0(&mut f_r1, xi, 0, cdef_h as usize, &cdeffed);
    }

    for yi in 0..cdef_h {
      let u = i32::cast_from(cdeffed.p(yi,xi)) << SGRPROJ_RST_BITS;
      let s = i32::cast_from(input.p(yi,xi)) << SGRPROJ_RST_BITS;
      let f2 = f_r2[yi] - u;
      let f1 = f_r1[yi] - u;
      h[0][0] += f2 as f64 * f2 as f64;
      h[1][1] += f1 as f64 * f1 as f64;
      h[0][1] += f1 as f64 * f2 as f64;
      c[0] += f2 as f64 * s as f64;
      c[1] += f1 as f64 * s as f64;
    }
  }

  // this is lifted almost in-tact from libaom
  let n = cdef_w as f64 * cdef_h as f64;
  h[0][0] /= n;
  h[0][1] /= n;
  h[1][1] /= n;
  h[1][0] = h[0][1];
  c[0] /= n;
  c[1] /= n;
  let (xq0, xq1) = if s_r2 == 0 {
    // H matrix is now only the scalar h[1][1]
    // C vector is now only the scalar c[1]
    if h[1][1] == 0. {
      (0, 0)
    } else {
      (0, (c[1] / h[1][1]).round() as i32)
    }
  } else if s_r1 == 0 {
    // H matrix is now only the scalar h[0][0]
    // C vector is now only the scalar c[0]
    if h[0][0] == 0. {
      (0, 0)
    } else {
      ((c[0] / h[0][0]).round() as i32, 0)
    }
  } else {
    let det = h[0][0] * h[1][1] - h[0][1] * h[1][0];
    if det == 0. {
      (0, 0)
    } else {
      // If scaling up dividend would overflow, instead scale down the divisor
      let div1 = (h[1][1] * c[0] - h[0][1] * c[1]) * (1 << SGRPROJ_PRJ_BITS) as f64;
      let div2 = (h[0][0] * c[1] - h[1][0] * c[0]) * (1 << SGRPROJ_PRJ_BITS) as f64;

      ((div1 / det).round() as i32, (div2 / det).round() as i32)
    }
  };
  (clamp(xq0, SGRPROJ_XQD_MIN[0] as i32, SGRPROJ_XQD_MAX[0] as i32) as i8,
   clamp(xq1, SGRPROJ_XQD_MIN[1] as i32, SGRPROJ_XQD_MAX[1] as i32) as i8)
}

705
fn wiener_stripe_filter<T: Pixel>(coeffs: [[i8; 3]; 2], fi: &FrameInvariants<T>,
706
707
708
709
                                  crop_w: usize, crop_h: usize,
                                  stripe_w: usize, stripe_h: usize,
                                  stripe_x: usize, stripe_y: isize,
                                  cdeffed: &Plane<T>, deblocked: &Plane<T>, out: &mut Plane<T>) {
710
  let bit_depth = fi.sequence.bit_depth;
Monty's avatar
Monty committed
711
712
713
714
  let round_h = if bit_depth == 12 {5} else {3};
  let round_v = if bit_depth == 12 {9} else {11};
  let offset = 1 << bit_depth + WIENER_BITS - round_h - 1;
  let limit = (1 << bit_depth + 1 + WIENER_BITS - round_h) - 1;
Raphaël Zumer's avatar
Raphaël Zumer committed
715

Monty's avatar
Monty committed
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
  let mut work: [i32; MAX_SB_SIZE+7] = [0; MAX_SB_SIZE+7];
  let vfilter: [i32; 7] = [ coeffs[0][0] as i32,
                            coeffs[0][1] as i32,
                            coeffs[0][2] as i32,
                            128 - 2 * (coeffs[0][0] as i32 +
                                       coeffs[0][1] as i32 +
                                       coeffs[0][2] as i32 ),
                            coeffs[0][2] as i32,
                            coeffs[0][1] as i32,
                            coeffs[0][0] as i32];
  let hfilter: [i32; 7] = [ coeffs[1][0] as i32,
                            coeffs[1][1] as i32,
                            coeffs[1][2] as i32,
                            128 - 2 * (coeffs[1][0] as i32 +
                                       coeffs[1][1] as i32 +
                                       coeffs[1][2] as i32),
                            coeffs[1][2] as i32,
                            coeffs[1][1] as i32,
                            coeffs[1][0] as i32];

  // unlike x, our y can be negative to start as the first stripe
  // starts off the top of the frame by 8 pixels, and can also run off the end of the frame
Monty's avatar
Monty committed
738
739
  let start_wi = if stripe_y < 0 {-stripe_y} else {0} as usize;
  let start_yi = if stripe_y < 0 {0} else {stripe_y} as usize;
740
  let end_i = cmp::max(0, if stripe_h as isize + stripe_y > crop_h as isize {
Monty's avatar
Monty committed
741
    crop_h as isize - stripe_y - start_wi as isize
Monty's avatar
Monty committed
742
  } else {
743
    stripe_h as isize - start_wi as isize
Monty's avatar
Monty committed
744
  }) as usize;
Raphaël Zumer's avatar
Raphaël Zumer committed
745

Monty's avatar
Monty committed
746
747
748
749
  let stride = out.cfg.stride;
  let mut out_slice = out.mut_slice(&PlaneOffset{x: 0, y: start_yi as isize});
  let out_data = out_slice.as_mut_slice();

Monty's avatar
Monty committed
750
751
  for xi in stripe_x..stripe_x+stripe_w {
    let n = cmp::min(7, crop_w as isize + 3 - xi as isize);
752
    for yi in stripe_y - 3..stripe_y + stripe_h as isize + 4 {
753
      let src_plane: &Plane<T>;
Monty's avatar
Monty committed
754
755
      let mut acc = 0;
      let ly;
Monty's avatar
Monty committed
756
757
      if yi < stripe_y {
        ly = cmp::max(clamp(yi, 0, crop_h as isize - 1), stripe_y - 2) as usize;
Monty's avatar
Monty committed
758
        src_plane = deblocked;
759
      } else if yi < stripe_y+stripe_h as isize {
Monty's avatar
Monty committed
760
        ly = clamp(yi, 0, crop_h as isize - 1) as usize;
Monty's avatar
Monty committed
761
762
        src_plane = cdeffed;
      } else {
763
        ly = cmp::min(clamp(yi, 0, crop_h as isize - 1), stripe_y + stripe_h as isize + 1) as usize;
Monty's avatar
Monty committed
764
765
        src_plane = deblocked;
      }
Raphaël Zumer's avatar
Raphaël Zumer committed
766

Monty's avatar
Monty committed
767
      for i in 0..3 - xi as isize {
768
        acc += hfilter[i as usize] * i32::cast_from(src_plane.p(0, ly));
Monty's avatar
Monty committed
769
770
      }
      for i in cmp::max(0,3 - (xi as isize))..n {
771
        acc += hfilter[i as usize] * i32::cast_from(src_plane.p((xi as isize + i - 3) as usize, ly));
Monty's avatar
Monty committed
772
773
      }
      for i in n..7 {
774
        acc += hfilter[i as usize] * i32::cast_from(src_plane.p(crop_w - 1, ly));
Monty's avatar
Monty committed
775
      }
Raphaël Zumer's avatar
Raphaël Zumer committed
776

Monty's avatar
Monty committed
777
      acc = acc + (1 << round_h >> 1) >> round_h;
Monty's avatar
Monty committed
778
      work[(yi-stripe_y+3) as usize] = clamp(acc, -offset, limit-offset);
Monty's avatar
Monty committed
779
780
781
782
783
784
785
    }

    for (wi, dst) in (start_wi..start_wi+end_i).zip(out_data[xi..].iter_mut().step_by(stride).take(end_i)) {
      let mut acc = 0;
      for (i,src) in (0..7).zip(work[wi..wi+7].iter_mut()) {
        acc += vfilter[i] * *src;
      }
786
      *dst = T::cast_from(clamp(acc + (1 << round_v >> 1) >> round_v, 0, (1 << bit_depth) - 1));
Monty's avatar
Monty committed
787
    }
788
789
790
  }
}

791
#[derive(Copy, Clone, Debug)]
792
793
794
795
796
797
798
799
800
801
802
803
804
805
pub struct RestorationUnit {
  pub filter: RestorationFilter,
  pub coded: bool,
}

impl RestorationUnit {
  pub fn default() -> RestorationUnit {
    RestorationUnit {
      filter: RestorationFilter::default(),
      coded: false,
    }
  }
}

806
#[derive(Clone, Debug)]
807
808
809
pub struct RestorationPlane {
  pub lrf_type: u8,
  pub unit_size: usize,
810
  // (1 << sb_shift) gives the number of superblocks having size 1 << SUPERBLOCK_TO_PLANE_SHIFT
811
  // both horizontally and vertically in a restoration unit, not accounting for RU stretching
812
  pub sb_shift: usize,
Monty's avatar
Monty committed
813
814
815
  // stripe height is 64 in all cases except 4:2:0 chroma planes where
  // it is 32.  This is independent of all other setup parameters
  pub stripe_height: usize,
816
817
818
819
  pub cols: usize,
  pub rows: usize,
  pub wiener_ref: [[i8; 3]; 2],
  pub sgrproj_ref: [i8; 2],
820
  pub units: Box<[RestorationUnit]>,
821
822
823
824
825
826
827
828
829
}

#[derive(Clone, Default)]
pub struct RestorationPlaneOffset {
  pub row: usize,
  pub col: usize
}

impl RestorationPlane {
Monty's avatar
Monty committed
830
  pub fn new(lrf_type: u8, unit_size: usize, sb_shift: usize, stripe_decimate: usize,
831
             cols: usize, rows: usize) -> RestorationPlane {
Monty's avatar
Monty committed
832
    let stripe_height = if stripe_decimate != 0 {32} else {64};
833
834
835
    RestorationPlane {
      lrf_type,
      unit_size,
836
      sb_shift,
Monty's avatar
Monty committed
837
      stripe_height,
838
839
840
841
      cols,
      rows,
      wiener_ref: [WIENER_TAPS_MID; 2],
      sgrproj_ref: SGRPROJ_XQD_MID,
842
      units: vec![RestorationUnit::default(); cols * rows].into_boxed_slice(),
843
844
845
    }
  }

846
847
  fn restoration_unit_index(&self, sbo: &SuperBlockOffset) -> (usize, usize) {
    (
848
849
      (sbo.x >> self.sb_shift).min(self.cols - 1),
      (sbo.y >> self.sb_shift).min(self.rows - 1),
850
    )
851
852
  }

853
854
855
  // Stripes are always 64 pixels high in a non-subsampled
  // frame, and decimated from 64 pixels in chroma.  When
  // filtering, they are not co-located on Y with superblocks.
Monty's avatar
Monty committed
856
  fn restoration_unit_index_by_stripe(&self, stripenum: usize, rux: usize) -> (usize, usize) {
857
858
    (
      cmp::min(rux, self.cols - 1),
Monty's avatar
Monty committed
859
      cmp::min(stripenum * self.stripe_height / self.unit_size, self.rows - 1),
860
861
862
    )
  }

863
  pub fn restoration_unit(&self, sbo: &SuperBlockOffset) -> &RestorationUnit {
864
    let (x, y) = self.restoration_unit_index(sbo);
865
    &self.units[y * self.cols + x]
866
867
868
  }

  pub fn restoration_unit_as_mut(&mut self, sbo: &SuperBlockOffset) -> &mut RestorationUnit {
869
    let (x, y) = self.restoration_unit_index(sbo);
870
    &mut self.units[y * self.cols + x]
Monty's avatar
Monty committed
871
  }
872

Monty's avatar
Monty committed
873
874
  pub fn restoration_unit_by_stripe(&self, stripenum: usize, rux: usize) -> &RestorationUnit {
    let (x, y) = self.restoration_unit_index_by_stripe(stripenum, rux);
875
876
    &self.units[y * self.cols + x]
  }
877
878
}

879
#[derive(Clone, Debug)]
880
881
882
883
884
pub struct RestorationState {
  pub plane: [RestorationPlane; PLANES]
}

impl RestorationState {
885
  pub fn new<T: Pixel>(fi: &FrameInvariants<T>, input: &Frame<T>) -> Self {
886
    let PlaneConfig { xdec, ydec, .. } = input.planes[1].cfg;
Monty's avatar
Monty committed
887
    let stripe_uv_decimate = if xdec>0 && ydec>0 {1} else {0};
888
889
890
891
892
893
    // Currrently opt for smallest possible restoration unit size (1
    // superblock) This is *temporary*.  Counting on it will break
    // very shortly; the 1-superblock hardwiring is only until the
    // upper level encoder is capable of dealing with the delayed
    // writes that RU size > SB size will require.
    let lrf_y_shift = if fi.sequence.use_128x128_superblock {1} else {2};
Monty's avatar
Monty committed
894
    let lrf_uv_shift = lrf_y_shift + stripe_uv_decimate;
895
896
897
898
899
900
901

    // derive the rest
    let y_unit_log2 = RESTORATION_TILESIZE_MAX_LOG2 - lrf_y_shift;
    let uv_unit_log2 = RESTORATION_TILESIZE_MAX_LOG2 - lrf_uv_shift;
    let y_unit_size = 1 << y_unit_log2;
    let uv_unit_size = 1 << uv_unit_log2;
    let y_sb_log2 = if fi.sequence.use_128x128_superblock {7} else {6};
Monty's avatar
Monty committed
902
    let uv_sb_log2 = y_sb_log2 - stripe_uv_decimate;
903
904
    let cols = ((fi.width + (y_unit_size >> 1)) / y_unit_size).max(1);
    let rows = ((fi.height + (y_unit_size >> 1)) / y_unit_size).max(1);
905

906
    RestorationState {
Monty's avatar
Monty committed
907
908
909
910
911
912
      plane: [RestorationPlane::new(RESTORE_SWITCHABLE, y_unit_size, y_unit_log2 - y_sb_log2,
                                    0, cols, rows),
              RestorationPlane::new(RESTORE_SWITCHABLE, uv_unit_size, uv_unit_log2 - uv_sb_log2,
                                    stripe_uv_decimate, cols, rows),
              RestorationPlane::new(RESTORE_SWITCHABLE, uv_unit_size, uv_unit_log2 - uv_sb_log2,
                                    stripe_uv_decimate, cols, rows)],
913
914
    }
  }
915

916
  pub fn restoration_unit(&self, sbo: &SuperBlockOffset, pli: usize) -> &RestorationUnit {
917
    self.plane[pli].restoration_unit(sbo)
918
919
920
  }

  pub fn restoration_unit_as_mut(&mut self, sbo: &SuperBlockOffset, pli: usize) -> &mut RestorationUnit {
921
922
    self.plane[pli].restoration_unit_as_mut(sbo)
  }
923

924
925
  pub fn lrf_filter_frame<T: Pixel>(&mut self, out: &mut Frame<T>, pre_cdef: &Frame<T>,
                                    fi: &FrameInvariants<T>) {
926
    let cdeffed = out.clone();
Raphaël Zumer's avatar
Raphaël Zumer committed
927

928
929
930
931
932
    // unlike the other loop filters that operate over the padded
    // frame dimensions, restoration filtering and source pixel
    // accesses are clipped to the original frame dimensions
    // that's why we use fi.width and fi.height instead of PlaneConfig fields

Monty's avatar
Monty committed
933
    // number of stripes (counted according to colocated Y luma position)
934
    let stripe_n = (fi.height + 7) / 64 + 1;
Raphaël Zumer's avatar
Raphaël Zumer committed
935

Monty's avatar
Monty committed
936
937
    for pli in 0..PLANES {
      let rp = &self.plane[pli];
Monty's avatar
Monty committed
938
      let xdec = out.planes[pli].cfg.xdec;
939
      let ydec = out.planes[pli].cfg.ydec;
Monty's avatar
Monty committed
940
941
      let crop_w = fi.width + (1 << xdec >> 1) >> xdec;
      let crop_h = fi.height + (1 << ydec >> 1) >> ydec;
Raphaël Zumer's avatar
Raphaël Zumer committed
942

Monty's avatar
Monty committed
943
944
945
946
      for si in 0..stripe_n {
        // stripe y pixel locations must be able to overspan the frame
        let stripe_start_y = si as isize * 64 - 8 >> ydec;
        let stripe_size = 64 >> ydec; // one past, unlike spec
Raphaël Zumer's avatar
Raphaël Zumer committed
947

Monty's avatar
Monty committed
948
949
950
        // horizontally, go rdu-by-rdu
        for rux in 0..rp.cols {
          // stripe x pixel locations must be clipped to frame, last may need to stretch
951
952
953
          let x = rux * rp.unit_size;
          let size = if rux == rp.cols - 1 {
            crop_w - x
Monty's avatar
Monty committed
954
955
956
          } else {
            rp.unit_size
          };
Monty's avatar
Monty committed
957
          let ru = rp.restoration_unit_by_stripe(si, rux);
Monty's avatar
Monty committed
958
          match ru.filter {
959
            RestorationFilter::Wiener{coeffs} => {
960
961
              wiener_stripe_filter(coeffs, fi,
                                   crop_w, crop_h,
962
963
                                   size, stripe_size,
                                   x, stripe_start_y,
964
965
                                   &cdeffed.planes[pli], &pre_cdef.planes[pli],
                                   &mut out.planes[pli]);
Monty's avatar
Monty committed
966
967
            },
            RestorationFilter::Sgrproj{set, xqd} => {
968
              sgrproj_stripe_filter(set, xqd, fi,
969
970
971
972
973
974
975
976
977
                                    crop_w - x,
                                    (crop_h as isize - stripe_start_y) as usize,
                                    size, stripe_size,
                                    &cdeffed.planes[pli].slice(&PlaneOffset{x: x as isize,
                                                                           y: stripe_start_y}),
                                    &pre_cdef.planes[pli].slice(&PlaneOffset{x: x as isize,
                                                                            y: stripe_start_y}),
                                    &mut out.planes[pli].mut_slice(&PlaneOffset{x: x as isize,
                                                                               y: stripe_start_y}));
Monty's avatar
Monty committed
978
979
980
981
982
983
984
            },
            RestorationFilter::None => {
              // do nothing
            }
          }
        }
      }
Raphaël Zumer's avatar
Raphaël Zumer committed
985
    }
986
987
  }
}