Commit 4f93ef73 authored by Yushin Cho's avatar Yushin Cho Committed by GitHub

Transform domain distortion for RDO-based mode decision (#680)

Use transform domain distortion during RDO-based mode decision.
- Turn on tx-domain distortion when speed >= 1.
-  For CfL mode, use pixel domain distortion since reconstructed luma pixels are required for CfL mode of chroma channels.

For default speed = 3, there is a regression of 0.35% PSNR bd-rate increase on AWCY.

  PSNR | PSNR Cb | PSNR Cr | PSNR HVS |   SSIM  | MS SSIM | CIEDE 2000
0.3440 |  0.4580   |  0.3700   |   0.2730     | 0.2714 |  0.2616    |     0.5526
parent d1445621
......@@ -92,7 +92,8 @@ fn write_b_bench(b: &mut Bencher, tx_size: TxSize, qindex: usize) {
false,
8,
ac,
0
0,
false
);
}
}
......
This diff is collapsed.
......@@ -13,7 +13,7 @@
use partition::TxSize;
use std::mem;
fn get_log_tx_scale(tx_size: TxSize) -> i32 {
pub fn get_log_tx_scale(tx_size: TxSize) -> i32 {
match tx_size {
TxSize::TX_64X64 => 2,
TxSize::TX_32X32 => 1,
......@@ -128,21 +128,22 @@ impl QuantizationContext {
}
#[inline]
pub fn quantize(&self, coeffs: &mut [i32]) {
coeffs[0] <<= self.log_tx_scale;
coeffs[0] += coeffs[0].signum() * self.dc_offset;
coeffs[0] = divu_pair(coeffs[0], self.dc_mul_add);
for c in coeffs[1..].iter_mut() {
*c <<= self.log_tx_scale;
*c += c.signum() * self.ac_offset;
*c = divu_pair(*c, self.ac_mul_add);
pub fn quantize(&self, coeffs: &[i32], qcoeffs: &mut [i32]) {
qcoeffs[0] = coeffs[0] << self.log_tx_scale;
qcoeffs[0] += qcoeffs[0].signum() * self.dc_offset;
qcoeffs[0] = divu_pair(qcoeffs[0], self.dc_mul_add);
for (qc, c) in qcoeffs[1..].iter_mut().zip(coeffs[1..].iter()) {
*qc = *c << self.log_tx_scale;
*qc += qc.signum() * self.ac_offset;
*qc = divu_pair(*qc, self.ac_mul_add);
}
}
}
pub fn quantize_in_place(
qindex: u8, coeffs: &mut [i32], tx_size: TxSize, bit_depth: usize
// quantization without using Multiplication Factor
pub fn quantize_wo_mf(
qindex: u8, coeffs: &[i32], qcoeffs: &mut [i32], tx_size: TxSize, bit_depth: usize
) {
let log_tx_scale = get_log_tx_scale(tx_size);
......@@ -153,14 +154,14 @@ pub fn quantize_in_place(
let dc_offset = dc_quant * 21 / 64 as i32;
let ac_offset = ac_quant * 21 / 64 as i32;
coeffs[0] <<= log_tx_scale;
coeffs[0] += coeffs[0].signum() * dc_offset;
coeffs[0] /= dc_quant;
qcoeffs[0] = coeffs[0] << log_tx_scale;
qcoeffs[0] += qcoeffs[0].signum() * dc_offset;
qcoeffs[0] /= dc_quant;
for c in coeffs[1..].iter_mut() {
*c <<= log_tx_scale;
*c += c.signum() * ac_offset;
*c /= ac_quant;
for (qc, c) in qcoeffs[1..].iter_mut().zip(coeffs[1..].iter()) {
*qc = *c << log_tx_scale;
*qc += qc.signum() * ac_offset;
*qc /= ac_quant;
}
}
......
......@@ -113,7 +113,7 @@ fn cdef_dist_wxh(
}
// Sum of Squared Error for a wxh block
fn sse_wxh(
pub fn sse_wxh(
src1: &PlaneSlice<'_>, src2: &PlaneSlice<'_>, w: usize, h: usize
) -> u64 {
assert!(w & (MI_SIZE - 1) == 0);
......@@ -210,6 +210,64 @@ fn compute_rd_cost(
(distortion as f64) + lambda * rate
}
// Compute the rate-distortion cost for an encode
fn compute_tx_rd_cost(
fi: &FrameInvariants, fs: &FrameState, w_y: usize, h_y: usize,
is_chroma_block: bool, bo: &BlockOffset, bit_cost: u32, tx_dist: i64,
bit_depth: usize,
skip: bool, luma_only: bool
) -> f64 {
assert!(fi.config.tune == Tune::Psnr);
let lambda = get_lambda(fi, bit_depth);
// Compute distortion
let mut distortion = if skip {
let po = bo.plane_offset(&fs.input.planes[0].cfg);
sse_wxh(
&fs.input.planes[0].slice(&po),
&fs.rec.planes[0].slice(&po),
w_y,
h_y
)
} else {
assert!(tx_dist >= 0);
tx_dist as u64
};
if !luma_only && skip {
let PlaneConfig { xdec, ydec, .. } = fs.input.planes[1].cfg;
let mask = !(MI_SIZE - 1);
let mut w_uv = (w_y >> xdec) & mask;
let mut h_uv = (h_y >> ydec) & mask;
if (w_uv == 0 || h_uv == 0) && is_chroma_block {
w_uv = MI_SIZE;
h_uv = MI_SIZE;
}
// Add chroma distortion only when it is available
if w_uv > 0 && h_uv > 0 {
for p in 1..3 {
let po = bo.plane_offset(&fs.input.planes[p].cfg);
distortion += sse_wxh(
&fs.input.planes[p].slice(&po),
&fs.rec.planes[p].slice(&po),
w_uv,
h_uv
);
}
}
}
// Compute rate
let rate = (bit_cost as f64) / ((1 << OD_BITRES) as f64);
(distortion as f64) + lambda * rate
}
pub fn rdo_tx_size_type(
seq: &Sequence, fi: &FrameInvariants, fs: &mut FrameState,
cw: &mut ContextWriter, bsize: BlockSize, bo: &BlockOffset,
......@@ -391,6 +449,7 @@ pub fn rdo_mode_decision(
if skip { tx_type = TxType::DCT_DCT; };
encode_block_a(seq, cw, wr, bsize, bo, skip);
let tx_dist =
encode_block_b(
seq,
fi,
......@@ -409,22 +468,38 @@ pub fn rdo_mode_decision(
tx_size,
tx_type,
mode_context,
mv_stack
mv_stack,
true
);
let cost = wr.tell_frac() - tell;
let rd = compute_rd_cost(
fi,
fs,
w,
h,
is_chroma_block,
bo,
cost,
seq.bit_depth,
false
);
let rd = if fi.use_tx_domain_distortion {
compute_tx_rd_cost(
fi,
fs,
w,
h,
is_chroma_block,
bo,
cost,
tx_dist,
seq.bit_depth,
skip,
false
)
} else {
compute_rd_cost(
fi,
fs,
w,
h,
is_chroma_block,
bo,
cost,
seq.bit_depth,
false
)
};
if rd < best.rd {
//if rd < best.rd || luma_mode == PredictionMode::NEW_NEWMV {
best.rd = rd;
......@@ -509,7 +584,8 @@ pub fn rdo_mode_decision(
false,
seq.bit_depth,
CFLParams::new(),
true
true,
false
);
cw.rollback(&cw_checkpoint);
if let Some(cfl) = rdo_cfl_alpha(fs, bo, bsize, seq.bit_depth) {
......@@ -535,21 +611,25 @@ pub fn rdo_mode_decision(
best.tx_size,
best.tx_type,
0,
&Vec::new()
&Vec::new(),
false // For CFL, luma should be always reconstructed.
);
let cost = wr.tell_frac() - tell;
let rd = compute_rd_cost(
fi,
fs,
w,
h,
is_chroma_block,
bo,
cost,
seq.bit_depth,
false
);
// For CFL, tx-domain distortion is not an option.
let rd =
compute_rd_cost(
fi,
fs,
w,
h,
is_chroma_block,
bo,
cost,
seq.bit_depth,
false
);
if rd < best.rd {
best.rd = rd;
......@@ -658,30 +738,45 @@ pub fn rdo_tx_type_decision(
let mut wr: &mut dyn Writer = &mut WriterCounter::new();
let tell = wr.tell_frac();
if is_inter {
let tx_dist = if is_inter {
write_tx_tree(
fi, fs, cw, wr, mode, bo, bsize, tx_size, tx_type, false, bit_depth, true
);
fi, fs, cw, wr, mode, bo, bsize, tx_size, tx_type, false, bit_depth, true, true
)
} else {
let cfl = CFLParams::new(); // Unused
write_tx_blocks(
fi, fs, cw, wr, mode, mode, bo, bsize, tx_size, tx_type, false, bit_depth, cfl, true
);
}
fi, fs, cw, wr, mode, mode, bo, bsize, tx_size, tx_type, false, bit_depth, cfl, true, true
)
};
let cost = wr.tell_frac() - tell;
let rd = compute_rd_cost(
fi,
fs,
w,
h,
is_chroma_block,
bo,
cost,
bit_depth,
true
);
let rd = if fi.use_tx_domain_distortion {
compute_tx_rd_cost(
fi,
fs,
w,
h,
is_chroma_block,
bo,
cost,
tx_dist,
bit_depth,
false,
true
)
} else {
compute_rd_cost(
fi,
fs,
w,
h,
is_chroma_block,
bo,
cost,
bit_depth,
true
)
};
if rd < best_rd {
best_rd = rd;
best_type = tx_type;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment