Commit 237fafaa authored by Monty's avatar Monty Committed by Monty Montgomery
Browse files

Initial LRF RDO taking cdef into account

Early functional implementation; currently requires LRU size == SB size
and only considers None and Sgrproj.  Some addressing simplifications.
parent b81404d9
......@@ -257,11 +257,11 @@ pub fn cdef_sb_frame<T: Pixel>(fi: &FrameInvariants<T>, f: &Frame<T>) -> Frame<T
Frame {
planes: [
Plane::new(sb_size >> f.planes[0].cfg.xdec, sb_size >> f.planes[0].cfg.ydec,
f.planes[0].cfg.xdec, f.planes[0].cfg.ydec, 0, 0),
f.planes[0].cfg.xdec, f.planes[0].cfg.ydec, 3, 3),
Plane::new(sb_size >> f.planes[1].cfg.xdec, sb_size >> f.planes[1].cfg.ydec,
f.planes[1].cfg.xdec, f.planes[1].cfg.ydec, 0, 0),
f.planes[1].cfg.xdec, f.planes[1].cfg.ydec, 3, 3),
Plane::new(sb_size >> f.planes[2].cfg.xdec, sb_size >> f.planes[2].cfg.ydec,
f.planes[2].cfg.xdec, f.planes[2].cfg.ydec, 0, 0),
f.planes[2].cfg.xdec, f.planes[2].cfg.ydec, 3, 3),
]
}
}
......@@ -276,11 +276,11 @@ pub fn cdef_sb_padded_frame_copy<T: Pixel>(
let mut out = Frame {
planes: [
Plane::new((sb_size >> f.planes[0].cfg.xdec) + pad*2, (sb_size >> f.planes[0].cfg.ydec) + pad*2,
f.planes[0].cfg.xdec, f.planes[0].cfg.ydec, 0, 0),
f.planes[0].cfg.xdec, f.planes[0].cfg.ydec, 3, 3),
Plane::new((sb_size >> f.planes[1].cfg.xdec) + pad*2, (sb_size >> f.planes[1].cfg.ydec) + pad*2,
f.planes[1].cfg.xdec, f.planes[1].cfg.ydec, 0, 0),
f.planes[1].cfg.xdec, f.planes[1].cfg.ydec, 3, 3),
Plane::new((sb_size >> f.planes[2].cfg.xdec) + pad*2, (sb_size >> f.planes[2].cfg.ydec) + pad*2,
f.planes[2].cfg.xdec, f.planes[2].cfg.ydec, 0, 0),
f.planes[2].cfg.xdec, f.planes[2].cfg.ydec, 3, 3),
]
};
// Copy data into padded frame
......@@ -325,6 +325,18 @@ pub fn cdef_sb_padded_frame_copy<T: Pixel>(
out
}
pub fn cdef_empty_frame<T: Pixel>(f: &Frame<T>) -> Frame<T> {
let out = Frame {
planes: [
Plane::new(0, 0, f.planes[0].cfg.xdec, f.planes[0].cfg.ydec, 0, 0),
Plane::new(0, 0, f.planes[0].cfg.xdec, f.planes[0].cfg.ydec, 0, 0),
Plane::new(0, 0, f.planes[0].cfg.xdec, f.planes[0].cfg.ydec, 0, 0),
]
};
out
}
// We assume in is padded, and the area we'll write out is at least as
// large as the unpadded area of in
// cdef_index is taken from the block context
......
......@@ -13,6 +13,7 @@
#![allow(non_camel_case_types)]
use crate::ec::Writer;
use crate::ec::OD_BITRES;
use crate::encoder::FrameInvariants;
use crate::entropymode::*;
use crate::header::ReferenceMode;
......@@ -1561,6 +1562,11 @@ impl BlockContext {
}
}
pub fn get_cdef(&mut self, sbo: &SuperBlockOffset) -> u8 {
let bo = sbo.block_offset(0, 0);
self.blocks[bo.y][bo.x].cdef_index
}
// The mode info data structure has a one element border above and to the
// left of the entries corresponding to real macroblocks.
// The prediction flags in these dummy entries are initialized to 0.
......@@ -3015,6 +3021,36 @@ impl ContextWriter {
symbol_with_update!(self, w, coded_id as u32, &mut self.fc.spatial_segmentation_cdfs[cdf_index as usize]);
}
// rather than test writing and rolling back the cdf, we just count Q8 bits using the current cdf
pub fn count_lrf_switchable(&mut self, w: &dyn Writer, rs: &RestorationState,
filter: &RestorationFilter, pli: usize) -> u32 {
let nsym = &self.fc.lrf_switchable_cdf.len()-1;
match *filter {
RestorationFilter::None => {
w.symbol_bits(0, &self.fc.lrf_switchable_cdf[..nsym])
}
RestorationFilter::Wiener{coeffs: _} => {
unreachable!() // for now, not permanently
}
RestorationFilter::Sgrproj{set, xqd} => {
// Does *not* use 'RESTORE_SGRPROJ' but rather just '2'
let rp = &rs.plane[pli];
let mut bits = w.symbol_bits(2, &self.fc.lrf_switchable_cdf[..nsym]) +
((SGRPROJ_PARAMS_BITS as u32) << OD_BITRES);
for i in 0..2 {
let s = SGRPROJ_PARAMS_S[set as usize][i];
let min = SGRPROJ_XQD_MIN[i] as i32;
let max = SGRPROJ_XQD_MAX[i] as i32;
if s > 0 {
bits += w.count_signed_subexp_with_ref(xqd[i] as i32, min, max+1, SGRPROJ_PRJ_SUBEXP_K,
rp.sgrproj_ref[i] as i32);
}
}
bits
}
}
}
pub fn write_lrf<T: Pixel>(
&mut self, w: &mut dyn Writer, fi: &FrameInvariants<T>, rs: &mut RestorationState, sbo: &SuperBlockOffset
) {
......@@ -3051,7 +3087,7 @@ impl ContextWriter {
}
RESTORE_SWITCHABLE => {
// Does *not* write 'RESTORE_SGRPROJ'
symbol_with_update!(self, w, 2 as u32, &mut self.fc.lrf_switchable_cdf);
symbol_with_update!(self, w, 2, &mut self.fc.lrf_switchable_cdf);
}
_ => unreachable!()
}
......@@ -3070,7 +3106,6 @@ impl ContextWriter {
assert!(xqd[i] == 0);
rp.sgrproj_ref[0] = 0;
} else {
assert!(xqd[i] == 95);
rp.sgrproj_ref[1] = 95; // LOL at spec. The result is always 95.
}
}
......
......@@ -31,6 +31,10 @@ type ec_window = u32;
pub trait Writer {
/// Write a symbol s, using the passed in cdf reference; leaves cdf unchanged
fn symbol(&mut self, s: u32, cdf: &[u16]);
/// return approximate number of fractional bits in OD_BITRES
/// precision to write a symbol s using the passed in cdf reference;
/// leaves cdf unchanged
fn symbol_bits(&self, s: u32, cdf: &[u16]) -> u32;
/// Write a symbol s, using the passed in cdf reference; updates the referenced cdf.
fn symbol_with_update(&mut self, s: u32, cdf: &mut [u16]);
/// Write a bool using passed in probability
......@@ -43,14 +47,28 @@ pub trait Writer {
fn write_golomb(&mut self, level: u16);
/// Write a value v in [0, n-1] quasi-uniformly
fn write_quniform(&mut self, n: u32, v: u32);
/// Return fractional bits needed to write Write a value v in [0,
/// n-1] quasi-uniformly
fn count_quniform(&self, n: u32, v: u32) -> u32;
/// Write symbol v in [0, n-1] with parameter k as finite subexponential
fn write_subexp(&mut self, n: u32, k: u8, v: u32);
/// Return fractional bits needed to write symbol v in [0, n-1] with
/// parameter k as finite subexponential
fn count_subexp(&self, n: u32, k: u8, v: u32) -> u32;
/// Write symbol v in [0, n-1] with parameter k as finite
/// subexponential based on a reference ref also in [0, n-1].
fn write_unsigned_subexp_with_ref(&mut self, v: u32, mx: u32, k: u8, r: u32);
/// Return fractional bits beed to write symbol v in [0, n-1] with
/// parameter k as finite subexponential based on a reference ref
/// also in [0, n-1].
fn count_unsigned_subexp_with_ref(&self, v: u32, mx: u32, k: u8, r: u32) -> u32;
/// Write symbol v in [-(n-1), n-1] with parameter k as finite
/// subexponential based on a reference ref also in [-(n-1), n-1].
fn write_signed_subexp_with_ref(&mut self, v: i32, low: i32, high: i32, k: u8, r: i32);
/// Return fractional bits needed to write symbol v in [-(n-1), n-1]
/// with parameter k as finite subexponential based on a reference
/// ref also in [-(n-1), n-1].
fn count_signed_subexp_with_ref(&self, v: i32, low: i32, high: i32, k: u8, r: i32) -> u32;
/// Return current length of range-coded bitstream in integer bits
fn tell(&mut self) -> u32;
/// Return currrent length of range-coded bitstream in fractional
......@@ -503,6 +521,45 @@ where
self.symbol(s, &cdf[..nsymbs]);
Self::update_cdf(cdf, s);
}
/// Returns approximate cost for a symbol given a cumulative
/// distribution function (CDF) table and current write state.
/// `s`: The index of the symbol to encode.
/// `cdf`: The CDF, such that symbol s falls in the range
/// `[s > 0 ? cdf[s - 1] : 0, cdf[s])`.
/// The values must be monotonically non-decreasing, and the last value
/// must be exactly 32768. There should be at most 16 values.
fn symbol_bits(&self, s: u32, cdf: &[u16]) -> u32 {
let mut bits = 0;
debug_assert!(cdf[cdf.len() - 1] == 0);
debug_assert!(32768 <= self.rng);
let rng = (self.rng >> 8) as u32;
let fh = cdf[s as usize] as u32 >> EC_PROB_SHIFT;
let r =
if s > 0 {
let fl = cdf[s as usize - 1] as u32 >> EC_PROB_SHIFT;
(rng * fl >> 7 - EC_PROB_SHIFT) - (rng * fh >> 7 - EC_PROB_SHIFT) + EC_MIN_PROB
} else {
let nms1 = cdf.len() as u32 - s - 1;
self.rng as u32 - (rng * fh >> 7 - EC_PROB_SHIFT) - nms1 * EC_MIN_PROB
};
// The 9 here counteracts the offset of -9 baked into cnt. Don't include a termination bit.
let pre = Self::frac_compute((self.cnt + 9) as u32, self.rng as u32);
let d = 16 - r.ilog();
let mut c = self.cnt;
let mut sh = c + (d as i16);
if sh >= 0 {
c += 16;
if sh >= 8 {
bits += 8;
c -= 8;
}
bits += 8;
sh = c + (d as i16) - 24;
}
// The 9 here counteracts the offset of -9 baked into cnt. Don't include a termination bit.
Self::frac_compute((bits + sh + 9) as u32, r << d) - pre
}
/// Encode a golomb to the bitstream.
/// 'level': passed in value to encode
fn write_golomb(&mut self, level: u16) {
......@@ -539,6 +596,23 @@ where
}
}
}
/// Returns QOD_BITRES bits for a value v in [0, n-1] quasi-uniformly
/// n: size of interval
/// v: value to encode
fn count_quniform(&self, n: u32, v: u32) -> u32 {
let mut bits = 0;
if n > 1 {
let l = (msb(n as i32) + 1) as u32;
let m = (1 << l) - n;
if v < m {
bits += l-1 << OD_BITRES;
} else {
bits += l-1 << OD_BITRES;
bits += 1 << OD_BITRES;
}
}
bits
}
/// Write symbol v in [0, n-1] with parameter k as finite subexponential
/// n: size of interval
/// k: 'parameter'
......@@ -565,6 +639,34 @@ where
}
}
}
/// Resturns QOD_BITRES bits for symbol v in [0, n-1] with parameter k as finite subexponential
/// n: size of interval
/// k: 'parameter'
/// v: value to encode
fn count_subexp(&self, n: u32, k: u8, v: u32) -> u32 {
let mut i = 0;
let mut mk = 0;
let mut bits = 0;
loop {
let b = if i != 0 {k + i - 1} else {k};
let a = 1 << b;
if n <= mk + 3 * a {
bits += self.count_quniform(n - mk, v - mk);
break;
} else {
let t = v >= mk + a;
bits += 1 << OD_BITRES;
if t {
i += 1;
mk += a;
} else {
bits += (b as u32) << OD_BITRES;
break;
}
}
}
bits
}
/// Write symbol v in [0, n-1] with parameter k as finite
/// subexponential based on a reference ref also in [0, n-1].
/// v: value to encode
......@@ -578,6 +680,19 @@ where
self.write_subexp(n, k, Self::recenter(n-1-r, n-1-v));
}
}
/// Returns QOD_BITRES bits for symbol v in [0, n-1] with parameter k as finite
/// subexponential based on a reference ref also in [0, n-1].
/// v: value to encode
/// n: size of interval
/// k: 'parameter'
/// r: reference
fn count_unsigned_subexp_with_ref(&self, v: u32, n: u32, k: u8, r: u32) -> u32 {
if (r << 1) <= n {
self.count_subexp(n, k, Self::recenter(r, v))
} else {
self.count_subexp(n, k, Self::recenter(n-1-r, n-1-v))
}
}
/// Write symbol v in [-(n-1), n-1] with parameter k as finite
/// subexponential based on a reference ref also in [-(n-1), n-1].
/// v: value to encode
......@@ -587,7 +702,15 @@ where
fn write_signed_subexp_with_ref(&mut self, v: i32, low: i32, high: i32, k: u8, r: i32) {
self.write_unsigned_subexp_with_ref((v - low) as u32, (high - low) as u32, k, (r - low) as u32);
}
/// Returns QOD_BITRES bits for symbol v in [-(n-1), n-1] with parameter k as finite
/// subexponential based on a reference ref also in [-(n-1), n-1].
/// v: value to encode
/// n: size of interval
/// k: 'parameter'
/// r: reference
fn count_signed_subexp_with_ref(&self, v: i32, low: i32, high: i32, k: u8, r: i32) -> u32 {
self.count_unsigned_subexp_with_ref((v - low) as u32, (high - low) as u32, k, (r - low) as u32)
}
/// Returns the number of bits "used" by the encoded symbols so far.
/// This same number can be computed in either the encoder or the
/// decoder, and is suitable for making coding decisions. The value
......
......@@ -1934,7 +1934,6 @@ fn encode_tile<T: Pixel>(fi: &FrameInvariants<T>, fs: &mut FrameState<T>) -> Vec
for sbx in 0..fi.sb_width {
let mut w_pre_cdef = WriterRecorder::new();
let mut w_post_cdef = WriterRecorder::new();
let mut cdef_index = 0;
let sbo = SuperBlockOffset { x: sbx, y: sby };
let bo = sbo.block_offset(0, 0);
cw.bc.cdef_coded = false;
......@@ -2036,15 +2035,13 @@ fn encode_tile<T: Pixel>(fi: &FrameInvariants<T>, fs: &mut FrameState<T>) -> Vec
BlockSize::BLOCK_64X64, &bo, &None, &pmvs);
}
// CDEF has to be decided before loop restoration, but coded after
if cw.bc.cdef_coded {
cdef_index = rdo_cdef_decision(&sbo, fi, fs, &mut cw);
cw.bc.set_cdef(&sbo, cdef_index);
// CDEF has to be decided before loop restoration, but coded after.
// loop restoration must be decided last but coded before anything else.
if cw.bc.cdef_coded || fi.sequence.enable_restoration {
rdo_loop_decision(&sbo, fi, fs, &mut cw, &mut w);
}
// loop restoration must be decided last but coded before anything else
if fi.sequence.enable_restoration {
// optimization would occur here
cw.write_lrf(&mut w, fi, &mut fs.restoration, &sbo);
}
......@@ -2053,6 +2050,7 @@ fn encode_tile<T: Pixel>(fi: &FrameInvariants<T>, fs: &mut FrameState<T>) -> Vec
if cw.bc.cdef_coded {
// CDEF index must be written in the middle, we can code it now
let cdef_index = cw.bc.get_cdef(&sbo);
cw.write_cdef(&mut w, cdef_index, fi.cdef_bits);
// ...and then finally code what comes after the CDEF index
w_post_cdef.replay(&mut w);
......
......@@ -64,7 +64,7 @@ pub enum RestorationFilter {
impl RestorationFilter {
pub fn default() -> RestorationFilter {
RestorationFilter::Sgrproj {set:4, xqd: SGRPROJ_XQD_MID}
RestorationFilter::None{}
}
}
......@@ -469,13 +469,12 @@ fn sgrproj_box_f_r2<T: Pixel>(af: &[&[i32; 64+2]; 3], bf: &[&[i32; 64+2]; 3], f:
}
}
fn sgrproj_stripe_filter<T: Pixel>(set: u8, xqd: [i8; 2], fi: &FrameInvariants<T>,
crop_w: usize, crop_h: usize,
stripe_w: usize, stripe_h: usize,
cdeffed: &PlaneSlice<T>,
deblocked: &PlaneSlice<T>,
out: &mut PlaneMutSlice<T>) {
pub fn sgrproj_stripe_filter<T: Pixel>(set: u8, xqd: [i8; 2], fi: &FrameInvariants<T>,
crop_w: usize, crop_h: usize,
stripe_w: usize, stripe_h: usize,
cdeffed: &PlaneSlice<T>,
deblocked: &PlaneSlice<T>,
out: &mut PlaneMutSlice<T>) {
assert!(stripe_h <= 64);
let bdm8 = fi.sequence.bit_depth - 8;
let mut a_r2: [[i32; 64+2]; 3] = [[0; 64+2]; 3];
......@@ -561,11 +560,153 @@ fn sgrproj_stripe_filter<T: Pixel>(set: u8, xqd: [i8; 2], fi: &FrameInvariants<T
}
}
// Frame inputs below aren't all equal, and will change as work
// continues. There's no deblocked reconstruction available at this
// point of RDO, so we use the non-deblocked reconstruction, cdef and
// input. The input can be a full-sized frame. Cdef input is a partial
// frame constructed specifically for RDO.
// For simplicity, this ignores stripe segmentation (it's possible the
// extra complexity isn't worth it and we'll ignore stripes
// permanently during RDO, but that's not been tested yet). Data
// access inside the cdef frame is monolithic and clipped to the cdef
// borders.
// Input params follow the same rules as sgrproj_stripe_filter.
// Inputs are relative to the colocated slice views.
pub fn sgrproj_solve<T: Pixel>(set: u8, fi: &FrameInvariants<T>,
input: &PlaneSlice<T>,
cdeffed: &PlaneSlice<T>,
cdef_w: usize, cdef_h: usize) -> (i8, i8) {
assert!(cdef_h <= 64);
let bdm8 = fi.sequence.bit_depth - 8;
let mut a_r2: [[i32; 64+2]; 3] = [[0; 64+2]; 3];
let mut b_r2: [[i32; 64+2]; 3] = [[0; 64+2]; 3];
let mut f_r2: [i32; 64] = [0; 64];
let mut a_r1: [[i32; 64+2]; 3] = [[0; 64+2]; 3];
let mut b_r1: [[i32; 64+2]; 3] = [[0; 64+2]; 3];
let mut f_r1: [i32; 64] = [0; 64];
let s_r2: i32 = SGRPROJ_PARAMS_S[set as usize][0];
let s_r1: i32 = SGRPROJ_PARAMS_S[set as usize][1];
let mut h:[[f64; 2]; 2] = [[0.,0.],[0.,0.]];
let mut c:[f64; 2] = [0., 0.];
/* prime the intermediate arrays */
if s_r2 > 0 {
sgrproj_box_ab_r2(&mut a_r2[0], &mut b_r2[0],
-1, 0, cdef_h,
s_r2, bdm8,
&cdeffed, cdef_w, cdef_h,
&cdeffed, cdef_w, cdef_h);
sgrproj_box_ab_r2(&mut a_r2[1], &mut b_r2[1],
0, 0, cdef_h,
s_r2, bdm8,
&cdeffed, cdef_w, cdef_h,
&cdeffed, cdef_w, cdef_h);
}
if s_r1 > 0 {
sgrproj_box_ab_r1(&mut a_r1[0], &mut b_r1[0],
-1, 0, cdef_h,
s_r1, bdm8,
&cdeffed, cdef_w, cdef_h,
&cdeffed, cdef_w, cdef_h);
sgrproj_box_ab_r1(&mut a_r1[1], &mut b_r1[1],
0, 0, cdef_h,
s_r1, bdm8,
&cdeffed, cdef_w, cdef_h,
&cdeffed, cdef_w, cdef_h);
}
/* iterate by column */
for xi in 0..cdef_w {
/* build intermediate array columns */
if s_r2 > 0 {
sgrproj_box_ab_r2(&mut a_r2[(xi+2)%3], &mut b_r2[(xi+2)%3],
xi as isize + 1, 0, cdef_h,
s_r2, bdm8,
&cdeffed, cdef_w, cdef_h,
&cdeffed, cdef_w, cdef_h);
let ap0: [&[i32; 64+2]; 3] = [&a_r2[xi%3], &a_r2[(xi+1)%3], &a_r2[(xi+2)%3]];
let bp0: [&[i32; 64+2]; 3] = [&b_r2[xi%3], &b_r2[(xi+1)%3], &b_r2[(xi+2)%3]];
sgrproj_box_f_r2(&ap0, &bp0, &mut f_r2, xi, 0, cdef_h as usize, &cdeffed);
} else {
sgrproj_box_f_r0(&mut f_r2, xi, 0, cdef_h as usize, &cdeffed);
}
if s_r1 > 0 {
sgrproj_box_ab_r1(&mut a_r1[(xi+2)%3], &mut b_r1[(xi+2)%3],
xi as isize + 1, 0, cdef_h,
s_r1, bdm8,
&cdeffed, cdef_w, cdef_h,
&cdeffed, cdef_w, cdef_h);
let ap1: [&[i32; 64+2]; 3] = [&a_r1[xi%3], &a_r1[(xi+1)%3], &a_r1[(xi+2)%3]];
let bp1: [&[i32; 64+2]; 3] = [&b_r1[xi%3], &b_r1[(xi+1)%3], &b_r1[(xi+2)%3]];
sgrproj_box_f_r1(&ap1, &bp1, &mut f_r1, xi, 0, cdef_h as usize, &cdeffed);
} else {
sgrproj_box_f_r0(&mut f_r1, xi, 0, cdef_h as usize, &cdeffed);
}
for yi in 0..cdef_h {
let u = i32::cast_from(cdeffed.p(yi,xi)) << SGRPROJ_RST_BITS;
let s = i32::cast_from(input.p(yi,xi)) << SGRPROJ_RST_BITS;
let f2 = f_r2[yi] - u;
let f1 = f_r1[yi] - u;
h[0][0] += f2 as f64 * f2 as f64;
h[1][1] += f1 as f64 * f1 as f64;
h[0][1] += f1 as f64 * f2 as f64;
c[0] += f2 as f64 * s as f64;
c[1] += f1 as f64 * s as f64;
}
}
// this is lifted almost in-tact from libaom
let n = cdef_w as f64 * cdef_h as f64;
h[0][0] /= n;
h[0][1] /= n;
h[1][1] /= n;
h[1][0] = h[0][1];
c[0] /= n;
c[1] /= n;
let (xq0, xq1) = if s_r2 == 0 {
// H matrix is now only the scalar h[1][1]
// C vector is now only the scalar c[1]
if h[1][1] == 0. {
(0, 0)
} else {
(0, (c[1] / h[1][1]).round() as i32)
}
} else if s_r1 == 0 {
// H matrix is now only the scalar h[0][0]
// C vector is now only the scalar c[0]
if h[0][0] == 0. {
(0, 0)
} else {
((c[0] / h[0][0]).round() as i32, 0)
}
} else {
let det = h[0][0] * h[1][1] - h[0][1] * h[1][0];
if det == 0. {
(0, 0)
} else {
// If scaling up dividend would overflow, instead scale down the divisor
let div1 = (h[1][1] * c[0] - h[0][1] * c[1]) * (1 << SGRPROJ_PRJ_BITS) as f64;
let div2 = (h[0][0] * c[1] - h[1][0] * c[0]) * (1 << SGRPROJ_PRJ_BITS) as f64;
((div1 / det).round() as i32, (div2 / det).round() as i32)
}
};
(clamp(xq0, SGRPROJ_XQD_MIN[0] as i32, SGRPROJ_XQD_MAX[0] as i32) as i8,
clamp(xq1, SGRPROJ_XQD_MIN[1] as i32, SGRPROJ_XQD_MAX[1] as i32) as i8)
}
fn wiener_stripe_filter<T: Pixel>(coeffs: [[i8; 3]; 2], fi: &FrameInvariants<T>,
crop_w: usize, crop_h: usize,
stripe_w: usize, stripe_h: usize,
stripe_x: usize, stripe_y: isize,
cdeffed: &Plane<T>, deblocked: &Plane<T>, out: &mut Plane<T>) {
crop_w: usize, crop_h: usize,
stripe_w: usize, stripe_h: usize,
stripe_x: usize, stripe_y: isize,
cdeffed: &Plane<T>, deblocked: &Plane<T>, out: &mut Plane<T>) {
let bit_depth = fi.sequence.bit_depth;
let round_h = if bit_depth == 12 {5} else {3};
let round_v = if bit_depth == 12 {9} else {11};
......
......@@ -12,6 +12,7 @@
use crate::api::*;
use crate::cdef::*;
use crate::lrf::*;
use crate::context::*;
use crate::ec::{OD_BITRES, Writer, WriterCounter};
use crate::header::ReferenceMode;
......@@ -20,6 +21,7 @@ use crate::encode_block_b;
use crate::encode_block_with_modes;
use crate::FrameInvariants;
use crate::FrameState;
use crate::Frame;
use crate::luma_ac;
use crate::me::*;
use crate::motion_compensate;
......@@ -32,6 +34,7 @@ use crate::write_tx_tree;
use crate::util::{CastFromPrimitive, Pixel};
use std;
use std::cmp;
use std::vec::Vec;
use crate::partition::PartitionType::*;
......@@ -1120,69 +1123,200 @@ pub fn rdo_partition_decision<T: Pixel>(
}
}
pub fn rdo_cdef_decision<T: Pixel>(
sbo: &SuperBlockOffset, fi: &FrameInvariants<T>,
fs: &FrameState<T>, cw: &mut ContextWriter
) -> u8 {
// Construct a single-superblock-sized frame to test-filter into
fn rdo_loop_plane_error<T: Pixel>(sbo: &SuperBlockOffset, fi: &FrameInvariants<T>,
fs: &FrameState<T>, bc: &BlockContext,
test: &Frame<T>, pli: usize) -> u64 {
let sbo_0 = SuperBlockOffset { x: 0, y: 0 };
let bc = &mut cw.bc;
let mut cdef_output = cdef_sb_frame(fi, &fs.rec);
let mut rec_input = cdef_sb_padded_frame_copy(fi, sbo, &fs.rec, 2);
// RDO comparisons
let sb_blocks = if fi.sequence.use_128x128_superblock {16} else {8};
let mut best_index: u8 = 0;
let mut best_err: u64 = 0;
let cdef_dirs = cdef_analyze_superblock(&mut rec_input, bc, &sbo_0, &sbo, fi.sequence.bit_depth);
for cdef_index in 0..(1<<fi.cdef_bits) {
//for p in 0..3 {
// for i in 0..cdef_output.planes[p].data.len() { cdef_output.planes[p].data[i] = CDEF_VERY_LARGE; }
//}
// TODO: Don't repeat find_direction over and over; split filter_superblock to run it separately
cdef_filter_superblock(fi, &mut rec_input, &mut cdef_output,
bc, &sbo_0, &sbo, cdef_index, &cdef_dirs);
// Rate is constant, compute just distortion
// Computation is block by block, paying attention to skip flag
// Each direction block is 8x8 in y, potentially smaller if subsampled in chroma
// We're dealing only with in-frmae and unpadded planes now
let mut err:u64 = 0;
for by in 0..sb_blocks {
for bx in 0..sb_blocks {
let bo = sbo.block_offset(bx<<1, by<<1);
if bo.x < bc.cols && bo.y < bc.rows {
let skip = bc.at(&bo).skip;
if !skip {
for p in 0..3 {
let in_plane = &fs.input.planes[p];
let in_po = sbo.block_offset(bx<<1, by<<1).plane_offset(&in_plane.cfg);
let in_slice = in_plane.slice(&in_po);
let out_plane = &mut cdef_output.planes[p];
let out_po = sbo_0.block_offset(bx<<1, by<<1).plane_offset(&out_plane.cfg);
let out_slice = &out_plane.slice(&out_po);