Commit a541334a authored by Frank Bossen's avatar Frank Bossen Committed by fbossen

Add support for coding 64x64 blocks

Inverse 64-point transform is added.
Forward 64-point transform computes all 4096 coefficients such
that distortion from zeroing high-frequency coefficients can
be easily computed
parent 9b28d1c5
......@@ -55,7 +55,7 @@ const MAX_SB_SIZE_LOG2: usize = 6;
pub const MAX_SB_SIZE: usize = (1 << MAX_SB_SIZE_LOG2);
const MAX_SB_SQUARE: usize = (MAX_SB_SIZE * MAX_SB_SIZE);
pub const MAX_TX_SIZE: usize = 32;
pub const MAX_TX_SIZE: usize = 64;
const MAX_TX_SQUARE: usize = MAX_TX_SIZE * MAX_TX_SIZE;
pub const INTRA_MODES: usize = 13;
......@@ -2997,18 +2997,16 @@ impl ContextWriter {
}
pub fn av1_get_adjusted_tx_size(&mut self, tx_size: TxSize) -> TxSize {
// TODO: Enable below commented out block if TX64X64 is enabled.
/*
if tx_size == TX_64X64 || tx_size == TX_64X32 || tx_size == TX_32X64 {
return TX_32X32
}
if (tx_size == TX_16X64) {
return TX_16X32
}
if (tx_size == TX_64X16) {
return TX_32X16
}
*/
if tx_size == TX_64X64 || tx_size == TX_64X32 || tx_size == TX_32X64 {
return TX_32X32
}
if tx_size == TX_16X64 {
return TX_16X32
}
if tx_size == TX_64X16 {
return TX_32X16
}
tx_size
}
......@@ -3016,6 +3014,18 @@ impl ContextWriter {
self.av1_get_adjusted_tx_size(tx_size).width_log2()
}
pub fn get_txb_wide(&mut self, tx_size: TxSize) -> usize {
let adjusted_tx_size = self.av1_get_adjusted_tx_size(tx_size);
return adjusted_tx_size.width()
}
pub fn get_txb_height(&mut self, tx_size: TxSize) -> usize {
let adjusted_tx_size = self.av1_get_adjusted_tx_size(tx_size);
return adjusted_tx_size.height()
}
pub fn get_eob_pos_token(&mut self, eob: usize, extra: &mut u32) -> u32 {
let t = if eob < 33 {
eob_to_pos_small[eob] as u32
......@@ -3123,9 +3133,8 @@ impl ContextWriter {
&mut self, levels: &mut [u8], scan: &[u16], eob: u16,
tx_size: TxSize, tx_class: TxClass, coeff_contexts: &mut [i8]
) {
// TODO: If TX_64X64 is enabled, use av1_get_adjusted_tx_size()
let bwl = tx_size.width_log2();
let height = tx_size.height();
let bwl = self.get_txb_bwl(tx_size);
let height = self.get_txb_height(tx_size);
for i in 0..eob {
let pos = scan[i as usize];
coeff_contexts[pos as usize] = self.get_nz_map_ctx(
......@@ -3218,11 +3227,13 @@ impl ContextWriter {
let scan_order =
&av1_scan_orders[tx_size as usize][tx_type as usize];
let scan = scan_order.scan;
let mut coeffs_storage = [0 as i32; 32 * 32];
let coeffs = &mut coeffs_storage[..tx_size.area()];
let width = self.get_txb_wide(tx_size);
let height = self.get_txb_height(tx_size);
let mut coeffs_storage = [0 as i32; 32*32];
let coeffs = &mut coeffs_storage[..width*height];
let mut cul_level = 0 as u32;
for i in 0..tx_size.area() {
for i in 0..width*height {
coeffs[i] = coeffs_in[scan[i] as usize];
cul_level += coeffs[i].abs() as u32;
}
......@@ -3255,8 +3266,8 @@ impl ContextWriter {
self.txb_init_levels(
coeffs_in,
tx_size.width(),
tx_size.height(),
width,
height,
&mut levels_buf
);
......@@ -3267,6 +3278,8 @@ impl ContextWriter {
1
} as usize;
assert!(tx_size <= TX_32X32 || tx_type == DCT_DCT);
// Signal tx_type for luma plane only
if plane == 0 {
self.write_tx_type(
......@@ -3376,7 +3389,7 @@ impl ContextWriter {
let mut coeff_contexts = [0 as i8; MAX_TX_SQUARE];
let levels =
&mut levels_buf[TX_PAD_TOP * (tx_size.width() + TX_PAD_HOR)..];
&mut levels_buf[TX_PAD_TOP * (width + TX_PAD_HOR)..];
self.get_nz_map_contexts(
levels,
......
......@@ -470,7 +470,8 @@ impl FrameInvariants {
let mut min_partition_size = if config.speed <= 1 { BlockSize::BLOCK_4X4 }
else if config.speed <= 2 { BlockSize::BLOCK_8X8 }
else if config.speed <= 3 { BlockSize::BLOCK_16X16 }
else { BlockSize::BLOCK_32X32 };
else if config.speed <= 4 { BlockSize::BLOCK_32X32 }
else { BlockSize::BLOCK_64X64 };
if config.tune == Tune::Psychovisual {
if min_partition_size < BlockSize::BLOCK_8X8 {
......@@ -1717,7 +1718,11 @@ pub fn write_tx_blocks(fi: &FrameInvariants, fs: &mut FrameState,
}
if bw_uv > 0 && bh_uv > 0 {
let uv_tx_type = uv_intra_mode_to_tx_type_context(chroma_mode);
let uv_tx_type = if uv_tx_size == TxSize::TX_32X32 {
TxType::DCT_DCT
} else {
uv_intra_mode_to_tx_type_context(chroma_mode)
};
fs.qc.update(fi.base_q_idx, uv_tx_size, true, bit_depth);
for p in 1..3 {
......@@ -1832,7 +1837,7 @@ fn encode_partition_bottomup(seq: &Sequence, fi: &FrameInvariants, fs: &mut Fram
// Always split if the current partition is too large
let must_split = bo.x + bs as usize > fi.w_in_b ||
bo.y + bs as usize > fi.h_in_b ||
bsize >= BlockSize::BLOCK_64X64;
bsize > BlockSize::BLOCK_64X64;
// must_split overrides the minimum partition size when applicable
let can_split = bsize > fi.min_partition_size || must_split;
......@@ -2012,7 +2017,7 @@ fn encode_partition_topdown(seq: &Sequence, fi: &FrameInvariants, fs: &mut Frame
// Always split if the current partition is too large
let must_split = bo.x + bs as usize > fi.w_in_b ||
bo.y + bs as usize > fi.h_in_b ||
bsize >= BlockSize::BLOCK_64X64;
bsize > BlockSize::BLOCK_64X64;
let mut rdo_output = block_output.clone().unwrap_or(RDOOutput {
part_type: PartitionType::PARTITION_INVALID,
......
......@@ -834,6 +834,8 @@ impl PredictionMode {
self.predict_intra_inner::<Block16x16>(dst, bit_depth, ac, alpha),
TxSize::TX_32X32 =>
self.predict_intra_inner::<Block32x32>(dst, bit_depth, ac, alpha),
TxSize::TX_64X64 =>
self.predict_intra_inner::<Block64x64>(dst, bit_depth, ac, alpha),
_ => unimplemented!()
}
}
......
......@@ -75,12 +75,11 @@ static sm_weight_arrays: [u8; 2 * MAX_TX_SIZE] = [
// bs = 32
255, 240, 225, 210, 196, 182, 169, 157, 145, 133, 122, 111, 101, 92, 83, 74,
66, 59, 52, 45, 39, 34, 29, 25, 21, 17, 14, 12, 10, 9, 8, 8,
// TODO: enable extra weights for TX64X64
// bs = 64
/*255, 248, 240, 233, 225, 218, 210, 203, 196, 189, 182, 176, 169, 163, 156,
255, 248, 240, 233, 225, 218, 210, 203, 196, 189, 182, 176, 169, 163, 156,
150, 144, 138, 133, 127, 121, 116, 111, 106, 101, 96, 91, 86, 82, 77, 73, 69,
65, 61, 57, 54, 50, 47, 44, 41, 38, 35, 32, 29, 27, 25, 22, 20, 18, 16, 15,
13, 12, 10, 9, 8, 7, 6, 6, 5, 5, 4, 4, 4,*/
13, 12, 10, 9, 8, 7, 6, 6, 5, 5, 4, 4, 4,
];
const NEED_LEFT: u8 = 1 << 1;
......@@ -142,6 +141,13 @@ impl Dim for Block32x32 {
const H: usize = 32;
}
pub struct Block64x64;
impl Dim for Block64x64 {
const W: usize = 64;
const H: usize = 64;
}
#[inline(always)]
fn get_scaled_luma_q0(alpha_q3: i16, ac_pred_q3: i16) -> i32 {
let scaled_luma_q6 = (alpha_q3 as i32) * (ac_pred_q3 as i32);
......@@ -649,11 +655,13 @@ impl Intra<u8> for Block4x4 {}
impl Intra<u8> for Block8x8 {}
impl Intra<u8> for Block16x16 {}
impl Intra<u8> for Block32x32 {}
impl Intra<u8> for Block64x64 {}
impl Intra<u16> for Block4x4 {}
impl Intra<u16> for Block8x8 {}
impl Intra<u16> for Block16x16 {}
impl Intra<u16> for Block32x32 {}
impl Intra<u16> for Block64x64 {}
#[cfg(all(test, feature = "aom"))]
pub mod test {
......@@ -903,7 +911,7 @@ pub mod test {
let mut edge_buf: AlignedArray<[u8; 2 * MAX_TX_SIZE + 1]> =
UninitializedAlignedArray();
for i in 0..edge_buf.array.len() {
edge_buf.array[i] = i.as_();
edge_buf.array[i] = (i + 32).saturating_sub(MAX_TX_SIZE).as_();
}
let left = &edge_buf.array[MAX_TX_SIZE - 4..MAX_TX_SIZE];
let above = &edge_buf.array[MAX_TX_SIZE + 1..MAX_TX_SIZE + 5];
......
......@@ -133,11 +133,17 @@ impl QuantizationContext {
qcoeffs[0] += qcoeffs[0].signum() * self.dc_offset;
qcoeffs[0] = divu_pair(qcoeffs[0], self.dc_mul_add);
for (qc, c) in qcoeffs[1..].iter_mut().zip(coeffs[1..].iter()) {
for (qc, c) in qcoeffs[1..].iter_mut().zip(coeffs[1..].iter()).take(1024) {
*qc = *c << self.log_tx_scale;
*qc += qc.signum() * self.ac_offset;
*qc = divu_pair(*qc, self.ac_mul_add);
}
if qcoeffs.len() > 1024 {
for qc in qcoeffs[1024..].iter_mut() {
*qc = 0;
}
}
}
}
......
......@@ -278,7 +278,8 @@ pub fn rdo_tx_size_type(
BlockSize::BLOCK_4X4 => TxSize::TX_4X4,
BlockSize::BLOCK_8X8 => TxSize::TX_8X8,
BlockSize::BLOCK_16X16 => TxSize::TX_16X16,
_ => TxSize::TX_32X32
BlockSize::BLOCK_32X32 => TxSize::TX_32X32,
_ => TxSize::TX_64X64
};
cw.bc.set_tx_size(bo, tx_size);
// Were we not hardcoded to TX_MODE_LARGEST, block tx size would be written here
......@@ -829,7 +830,7 @@ pub fn rdo_partition_decision(
match partition {
PartitionType::PARTITION_NONE => {
if bsize > BlockSize::BLOCK_32X32 {
if bsize > BlockSize::BLOCK_64X64 {
continue;
}
......
......@@ -1992,6 +1992,7 @@ impl FwdTxfm2D for Block4x4 {}
impl FwdTxfm2D for Block8x8 {}
impl FwdTxfm2D for Block16x16 {}
impl FwdTxfm2D for Block32x32 {}
impl FwdTxfm2D for Block64x64 {}
pub fn fht4x4(
input: &[i16], output: &mut [i32], stride: usize, tx_type: TxType,
......@@ -2040,3 +2041,19 @@ pub fn fht32x32(
Block32x32::fwd_txfm2d_rs(input, output, stride, tx_type, bit_depth);
}
}
pub fn fht64x64(
input: &[i16], output: &mut [i32], stride: usize, tx_type: TxType,
bit_depth: usize
) {
assert!(tx_type == TxType::DCT_DCT);
let mut tmp = [0 as i32; 4096];
Block64x64::fwd_txfm2d(input, &mut tmp, stride, tx_type, bit_depth);
for i in 0..2 {
for (row_out, row_in) in output[2048*i..].chunks_mut(32).zip(tmp[32*i..].chunks(64)).take(64) {
row_out.copy_from_slice(&row_in[..32]);
}
}
}
This diff is collapsed.
......@@ -258,6 +258,7 @@ pub fn forward_transform(
TxSize::TX_8X8 => fht8x8(input, output, stride, tx_type, bit_depth),
TxSize::TX_16X16 => fht16x16(input, output, stride, tx_type, bit_depth),
TxSize::TX_32X32 => fht32x32(input, output, stride, tx_type, bit_depth),
TxSize::TX_64X64 => fht64x64(input, output, stride, tx_type, bit_depth),
_ => panic!("unimplemented tx size")
}
}
......@@ -273,6 +274,8 @@ pub fn inverse_transform_add(
iht16x16_add(input, output, stride, tx_type, bit_depth),
TxSize::TX_32X32 =>
iht32x32_add(input, output, stride, tx_type, bit_depth),
TxSize::TX_64X64 =>
iht64x64_add(input, output, stride, tx_type, bit_depth),
_ => panic!("unimplemented tx size")
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment