From 6bfa0aa88e11bfc476610b98e131a952ab0bfdf0 Mon Sep 17 00:00:00 2001 From: fbossen <frank@bossentech.com> Date: Tue, 23 Oct 2018 17:01:19 -0400 Subject: [PATCH] Add support for compound mode (#663) * Enable coding of compound mode * Modify inter prediction function to support compound mode * Call motion estimation function early and store results for use by compound mode * Add compound as an RDO candidate * Increase frame coding structure pyramid size * Make ME search range dependent on frame distance --- src/api.rs | 8 +- src/context.rs | 606 +++++++++++++++++++++++++++++++++------------ src/encoder.rs | 286 +++++++++++++-------- src/entropymode.rs | 4 +- src/me.rs | 5 +- src/partition.rs | 251 +++++++++++-------- src/predict.rs | 8 + src/rdo.rs | 129 ++++++---- 8 files changed, 892 insertions(+), 405 deletions(-) diff --git a/src/api.rs b/src/api.rs index a05dca24..d75f785d 100644 --- a/src/api.rs +++ b/src/api.rs @@ -179,7 +179,7 @@ impl Context { let reorder = false; let multiref = reorder || self.fi.config.speed <= 2; - let pyramid_depth = if reorder { 1 } else { 0 }; + let pyramid_depth = if reorder { 2 } else { 0 }; let group_src_len = 1 << pyramid_depth; let group_len = group_src_len + if reorder { pyramid_depth } else { 0 }; let segment_len = 1 + (key_frame_interval - 1 + group_src_len - 1) / group_src_len * group_len; @@ -264,7 +264,13 @@ impl Context { } as u8) & 7; } + self.fi.reference_mode = if multiref && reorder && idx_in_group != 0 { + ReferenceMode::SELECT + } else { + ReferenceMode::SINGLE + }; self.fi.number = segment_idx * key_frame_interval + self.fi.order_hint as u64; + self.fi.me_range_scale = (group_src_len >> lvl) as u8; } true diff --git a/src/context.rs b/src/context.rs index ec1739a7..6cbc5792 100644 --- a/src/context.rs +++ b/src/context.rs @@ -30,6 +30,7 @@ use entropymode::*; use token_cdfs::*; use encoder::FrameInvariants; use scan_order::*; +use encoder::ReferenceMode; use self::REF_CONTEXTS; use self::SINGLE_REFS; @@ -692,8 +693,13 @@ pub struct CDFContext { intra_inter_cdfs: [[u16; 3]; INTRA_INTER_CONTEXTS], angle_delta_cdf: [[u16; 2 * MAX_ANGLE_DELTA + 1 + 1]; DIRECTIONAL_MODES], filter_intra_cdfs: [[u16; 3]; BlockSize::BLOCK_SIZES_ALL], + comp_mode_cdf: [[u16; 3]; COMP_INTER_CONTEXTS], + comp_ref_type_cdf: [[u16; 3]; COMP_REF_TYPE_CONTEXTS], + comp_ref_cdf: [[[u16; 3]; FWD_REFS - 1]; REF_CONTEXTS], + comp_bwd_ref_cdf: [[[u16; 3]; BWD_REFS - 1]; REF_CONTEXTS], single_ref_cdfs: [[[u16; 2 + 1]; SINGLE_REFS - 1]; REF_CONTEXTS], drl_cdfs: [[u16; 2 + 1]; DRL_MODE_CONTEXTS], + compound_mode_cdf: [[u16; INTER_COMPOUND_MODES + 1]; INTER_MODE_CONTEXTS], nmv_context: NMVContext, deblock_delta_multi_cdf: [[u16; DELTA_LF_PROBS + 1 + 1]; FRAME_LF_COUNT], deblock_delta_cdf: [u16; DELTA_LF_PROBS + 1 + 1], @@ -744,8 +750,13 @@ impl CDFContext { intra_inter_cdfs: default_intra_inter_cdf, angle_delta_cdf: default_angle_delta_cdf, filter_intra_cdfs: default_filter_intra_cdfs, + comp_mode_cdf: default_comp_mode_cdf, + comp_ref_type_cdf: default_comp_ref_type_cdf, + comp_ref_cdf: default_comp_ref_cdf, + comp_bwd_ref_cdf: default_comp_bwdref_cdf, single_ref_cdfs: default_single_ref_cdf, drl_cdfs: default_drl_cdf, + compound_mode_cdf: default_compound_mode_cdf, nmv_context: default_nmv_context, deblock_delta_multi_cdf: default_delta_lf_multi_cdf, deblock_delta_cdf: default_delta_lf_cdf, @@ -814,8 +825,13 @@ impl CDFContext { reset_2d!(self.intra_inter_cdfs); reset_2d!(self.angle_delta_cdf); reset_2d!(self.filter_intra_cdfs); + reset_2d!(self.comp_mode_cdf); + reset_2d!(self.comp_ref_type_cdf); + reset_3d!(self.comp_ref_cdf); + reset_3d!(self.comp_bwd_ref_cdf); reset_3d!(self.single_ref_cdfs); reset_2d!(self.drl_cdfs); + reset_2d!(self.compound_mode_cdf); reset_2d!(self.deblock_delta_multi_cdf); reset_1d!(self.deblock_delta_cdf); @@ -891,6 +907,22 @@ impl CDFContext { self.filter_intra_cdfs.first().unwrap().as_ptr() as usize; let filter_intra_cdfs_end = filter_intra_cdfs_start + size_of_val(&self.filter_intra_cdfs); + let comp_mode_cdf_start = + self.comp_mode_cdf.first().unwrap().as_ptr() as usize; + let comp_mode_cdf_end = + comp_mode_cdf_start + size_of_val(&self.comp_mode_cdf); + let comp_ref_type_cdf_start = + self.comp_ref_type_cdf.first().unwrap().as_ptr() as usize; + let comp_ref_type_cdf_end = + comp_ref_type_cdf_start + size_of_val(&self.comp_ref_type_cdf); + let comp_ref_cdf_start = + self.comp_ref_cdf.first().unwrap().as_ptr() as usize; + let comp_ref_cdf_end = + comp_ref_cdf_start + size_of_val(&self.comp_ref_cdf); + let comp_bwd_ref_cdf_start = + self.comp_bwd_ref_cdf.first().unwrap().as_ptr() as usize; + let comp_bwd_ref_cdf_end = + comp_bwd_ref_cdf_start + size_of_val(&self.comp_bwd_ref_cdf); let deblock_delta_multi_cdf_start = self.deblock_delta_multi_cdf.first().unwrap().as_ptr() as usize; let deblock_delta_multi_cdf_end = @@ -965,6 +997,10 @@ impl CDFContext { ("intra_inter_cdfs", intra_inter_cdfs_start, intra_inter_cdfs_end), ("angle_delta_cdf", angle_delta_cdf_start, angle_delta_cdf_end), ("filter_intra_cdfs", filter_intra_cdfs_start, filter_intra_cdfs_end), + ("comp_mode_cdf", comp_mode_cdf_start, comp_mode_cdf_end), + ("comp_ref_type_cdf", comp_ref_type_cdf_start, comp_ref_type_cdf_end), + ("comp_ref_cdf", comp_ref_cdf_start, comp_ref_cdf_end), + ("comp_bwd_ref_cdf", comp_bwd_ref_cdf_start, comp_bwd_ref_cdf_end), ("deblock_delta_multi_cdf", deblock_delta_multi_cdf_start, deblock_delta_multi_cdf_end), ("deblock_delta_cdf", deblock_delta_cdf_start, deblock_delta_cdf_end), ("txb_skip_cdf", txb_skip_cdf_start, txb_skip_cdf_end), @@ -1158,7 +1194,7 @@ impl Block { self.mode >= PredictionMode::NEARESTMV } pub fn has_second_ref(&self) -> bool { - self.ref_frames[1] > INTRA_FRAME + self.ref_frames[1] > INTRA_FRAME && self.ref_frames[1] != NONE_FRAME } } @@ -1426,24 +1462,24 @@ impl BlockContext { self.for_each(bo, bsize, |block| block.skip = skip); } - pub fn set_ref_frame(&mut self, bo: &BlockOffset, bsize: BlockSize, r: usize) { + pub fn set_ref_frames(&mut self, bo: &BlockOffset, bsize: BlockSize, r: &[usize; 2]) { let bw = bsize.width_mi(); let bh = bsize.height_mi(); for y in 0..bh { for x in 0..bw { - self.blocks[bo.y + y as usize][bo.x + x as usize].ref_frames[0] = r; + self.blocks[bo.y + y as usize][bo.x + x as usize].ref_frames = *r; } } } - pub fn set_motion_vector(&mut self, bo: &BlockOffset, bsize: BlockSize, mv: MotionVector) { + pub fn set_motion_vectors(&mut self, bo: &BlockOffset, bsize: BlockSize, mvs: &[MotionVector; 2]) { let bw = bsize.width_mi(); let bh = bsize.height_mi(); for y in 0..bh { for x in 0..bw { - self.blocks[bo.y + y as usize][bo.x + x as usize].mv[0] = mv; + self.blocks[bo.y + y as usize][bo.x + x as usize].mv = *mvs; } } } @@ -1981,9 +2017,9 @@ impl ContextWriter { false } - fn find_matching_mv_and_update_weight(&self, blk: &Block, mv_stack: &mut Vec<CandidateMV>, weight: u32) -> bool { + fn find_matching_mv_and_update_weight(&self, mv: &MotionVector, mv_stack: &mut Vec<CandidateMV>, weight: u32) -> bool { for mut mv_cand in mv_stack { - if blk.mv[0].row == mv_cand.this_mv.row && blk.mv[0].col == mv_cand.this_mv.col { + if mv.row == mv_cand.this_mv.row && mv.col == mv_cand.this_mv.col { mv_cand.weight += weight; return true; } @@ -1991,66 +2027,140 @@ impl ContextWriter { false } - fn add_ref_mv_candidate(&self, ref_frame: usize, blk: &Block, mv_stack: &mut Vec<CandidateMV>, - weight: u32, newmv_count: &mut usize) -> bool { - if !blk.is_inter() { /* For intrabc */ - return false; + fn find_matching_comp_mv_and_update_weight(&self, mvs: &[MotionVector; 2], mv_stack: &mut Vec<CandidateMV>, weight: u32) -> bool { + for mut mv_cand in mv_stack { + if mvs[0].row == mv_cand.this_mv.row && mvs[0].col == mv_cand.this_mv.col && + mvs[1].row == mv_cand.comp_mv.row && mvs[1].col == mv_cand.comp_mv.col { + mv_cand.weight += weight; + return true; + } } + false + } - if blk.ref_frames[0] == ref_frame { - let found_match = self.find_matching_mv_and_update_weight(blk, mv_stack, weight); + fn add_ref_mv_candidate(&self, ref_frames: &[usize; 2], blk: &Block, mv_stack: &mut Vec<CandidateMV>, + weight: u32, newmv_count: &mut usize, is_compound: bool) -> bool { + if !blk.is_inter() { /* For intrabc */ + false + } else if is_compound { + if blk.ref_frames[0] == ref_frames[0] && blk.ref_frames[1] == ref_frames[1] { + let found_match = self.find_matching_comp_mv_and_update_weight(&blk.mv, mv_stack, weight); - if !found_match && mv_stack.len() < MAX_REF_MV_STACK_SIZE { - let mv_cand = CandidateMV { - this_mv: blk.mv[0], - comp_mv: blk.mv[1], - weight: weight - }; + if !found_match && mv_stack.len() < MAX_REF_MV_STACK_SIZE { + let mv_cand = CandidateMV { + this_mv: blk.mv[0], + comp_mv: blk.mv[1], + weight: weight + }; - mv_stack.push(mv_cand); - } + mv_stack.push(mv_cand); + } - if blk.mode == PredictionMode::NEWMV { - *newmv_count += 1; - } + if blk.mode == PredictionMode::NEW_NEWMV || + blk.mode == PredictionMode::NEAREST_NEWMV || + blk.mode == PredictionMode::NEW_NEARESTMV || + blk.mode == PredictionMode::NEAR_NEWMV || + blk.mode == PredictionMode::NEW_NEARMV { + *newmv_count += 1; + } - true + true + } else { + false + } } else { - false + let mut found = false; + for i in 0..2 { + if blk.ref_frames[i] == ref_frames[0] { + let found_match = self.find_matching_mv_and_update_weight(&blk.mv[i], mv_stack, weight); + + if !found_match && mv_stack.len() < MAX_REF_MV_STACK_SIZE { + let mv_cand = CandidateMV { + this_mv: blk.mv[i], + comp_mv: MotionVector { row: 0, col: 0 }, + weight: weight + }; + + mv_stack.push(mv_cand); + } + + if blk.mode == PredictionMode::NEW_NEWMV || + blk.mode == PredictionMode::NEAREST_NEWMV || + blk.mode == PredictionMode::NEW_NEARESTMV || + blk.mode == PredictionMode::NEAR_NEWMV || + blk.mode == PredictionMode::NEW_NEARMV || + blk.mode == PredictionMode::NEWMV { + *newmv_count += 1; + } + + found = true; + } + } + found } } fn add_extra_mv_candidate( &self, blk: &Block, - ref_frame: usize, + ref_frames: &[usize; 2], mv_stack: &mut Vec<CandidateMV>, - fi: &FrameInvariants + fi: &FrameInvariants, + is_compound: bool, + ref_id_count: &mut [usize; 2], + ref_id_mvs: &mut [[MotionVector; 2]; 2], + ref_diff_count: &mut [usize; 2], + ref_diff_mvs: &mut [[MotionVector; 2]; 2], ) { - for cand_list in 0..2 { - if blk.ref_frames[cand_list] > INTRA_FRAME { - let mut mv = blk.mv[0]; - if fi.ref_frame_sign_bias[blk.ref_frames[cand_list] - LAST_FRAME] != - fi.ref_frame_sign_bias[ref_frame - LAST_FRAME] { - mv.row = -mv.row; - mv.col = -mv.col; + if is_compound { + for cand_list in 0..2 { + let cand_ref = blk.ref_frames[cand_list]; + if cand_ref > INTRA_FRAME && cand_ref != NONE_FRAME { + for list in 0..2 { + let mut cand_mv = blk.mv[cand_list]; + if cand_ref == ref_frames[list] && ref_id_count[list] < 2 { + ref_id_mvs[list][ref_id_count[list]] = cand_mv; + ref_id_count[list] = ref_id_count[list] + 1; + } else if ref_diff_count[list] < 2 { + if fi.ref_frame_sign_bias[cand_ref - LAST_FRAME] != + fi.ref_frame_sign_bias[ref_frames[list] - LAST_FRAME] { + cand_mv.row = -cand_mv.row; + cand_mv.col = -cand_mv.col; + } + ref_diff_mvs[list][ref_diff_count[list]] = cand_mv; + ref_diff_count[list] = ref_diff_count[list] + 1; + } + } } + } + } else { + for cand_list in 0..2 { + let cand_ref = blk.ref_frames[cand_list]; + if cand_ref > INTRA_FRAME && cand_ref != NONE_FRAME { + let mut mv = blk.mv[cand_list]; + if fi.ref_frame_sign_bias[cand_ref - LAST_FRAME] != + fi.ref_frame_sign_bias[ref_frames[0] - LAST_FRAME] { + mv.row = -mv.row; + mv.col = -mv.col; + } - if !self.find_matching_mv(&mv, mv_stack) { - let mv_cand = CandidateMV { - this_mv: mv, - comp_mv: mv, - weight: 2 - }; - mv_stack.push(mv_cand); + if !self.find_matching_mv(&mv, mv_stack) { + let mv_cand = CandidateMV { + this_mv: mv, + comp_mv: MotionVector { row: 0, col: 0 }, + weight: 2 + }; + mv_stack.push(mv_cand); + } } } } } fn scan_row_mbmi(&mut self, bo: &BlockOffset, row_offset: isize, max_row_offs: isize, - processed_rows: &mut isize, ref_frame: usize, - mv_stack: &mut Vec<CandidateMV>, newmv_count: &mut usize, bsize: BlockSize) -> bool { + processed_rows: &mut isize, ref_frames: &[usize; 2], + mv_stack: &mut Vec<CandidateMV>, newmv_count: &mut usize, bsize: BlockSize, + is_compound: bool) -> bool { let bc = &self.bc; let target_n4_w = bsize.width_mi(); @@ -2091,7 +2201,7 @@ impl ContextWriter { *processed_rows = (inc as isize) - row_offset - 1; } - if self.add_ref_mv_candidate(ref_frame, cand, mv_stack, len as u32 * weight, newmv_count) { + if self.add_ref_mv_candidate(ref_frames, cand, mv_stack, len as u32 * weight, newmv_count, is_compound) { found_match = true; } @@ -2102,8 +2212,9 @@ impl ContextWriter { } fn scan_col_mbmi(&mut self, bo: &BlockOffset, col_offset: isize, max_col_offs: isize, - processed_cols: &mut isize, ref_frame: usize, - mv_stack: &mut Vec<CandidateMV>, newmv_count: &mut usize, bsize: BlockSize) -> bool { + processed_cols: &mut isize, ref_frames: &[usize; 2], + mv_stack: &mut Vec<CandidateMV>, newmv_count: &mut usize, bsize: BlockSize, + is_compound: bool) -> bool { let bc = &self.bc; let target_n4_h = bsize.height_mi(); @@ -2144,7 +2255,7 @@ impl ContextWriter { *processed_cols = (inc as isize) - col_offset - 1; } - if self.add_ref_mv_candidate(ref_frame, cand, mv_stack, len as u32 * weight, newmv_count) { + if self.add_ref_mv_candidate(ref_frames, cand, mv_stack, len as u32 * weight, newmv_count, is_compound) { found_match = true; } @@ -2154,15 +2265,16 @@ impl ContextWriter { found_match } - fn scan_blk_mbmi(&mut self, bo: &BlockOffset, ref_frame: usize, - mv_stack: &mut Vec<CandidateMV>, newmv_count: &mut usize) -> bool { + fn scan_blk_mbmi(&mut self, bo: &BlockOffset, ref_frames: &[usize; 2], + mv_stack: &mut Vec<CandidateMV>, newmv_count: &mut usize, + is_compound: bool) -> bool { if bo.x >= self.bc.cols || bo.y >= self.bc.rows { return false; } let weight = 2 * BLOCK_8X8.width_mi() as u32; /* Always assume its within a tile, probably wrong */ - self.add_ref_mv_candidate(ref_frame, self.bc.at(bo), mv_stack, weight, newmv_count) + self.add_ref_mv_candidate(ref_frames, self.bc.at(bo), mv_stack, weight, newmv_count, is_compound) } fn add_offset(&mut self, mv_stack: &mut Vec<CandidateMV>) { @@ -2171,8 +2283,8 @@ impl ContextWriter { } } - fn setup_mvref_list(&mut self, bo: &BlockOffset, ref_frame: usize, mv_stack: &mut Vec<CandidateMV>, - bsize: BlockSize, is_sec_rect: bool, fi: &FrameInvariants) -> usize { + fn setup_mvref_list(&mut self, bo: &BlockOffset, ref_frames: &[usize; 2], mv_stack: &mut Vec<CandidateMV>, + bsize: BlockSize, is_sec_rect: bool, fi: &FrameInvariants, is_compound: bool) -> usize { let (_rf, _rf_num) = self.get_mvref_ref_frames(INTRA_FRAME); let target_n4_h = bsize.height_mi(); @@ -2218,18 +2330,18 @@ impl ContextWriter { let mut newmv_count: usize = 0; if max_row_offs.abs() >= 1 { - let found_match = self.scan_row_mbmi(bo, -1, max_row_offs, &mut processed_rows, ref_frame, mv_stack, - &mut newmv_count, bsize); + let found_match = self.scan_row_mbmi(bo, -1, max_row_offs, &mut processed_rows, ref_frames, mv_stack, + &mut newmv_count, bsize, is_compound); row_match |= found_match; } if max_col_offs.abs() >= 1 { - let found_match = self.scan_col_mbmi(bo, -1, max_col_offs, &mut processed_cols, ref_frame, mv_stack, - &mut newmv_count, bsize); + let found_match = self.scan_col_mbmi(bo, -1, max_col_offs, &mut processed_cols, ref_frames, mv_stack, + &mut newmv_count, bsize, is_compound); col_match |= found_match; } if self.has_tr(bo, bsize, is_sec_rect) { - let found_match = self.scan_blk_mbmi(&bo.with_offset(target_n4_w as isize, -1), ref_frame, mv_stack, - &mut newmv_count); + let found_match = self.scan_blk_mbmi(&bo.with_offset(target_n4_w as isize, -1), ref_frames, mv_stack, + &mut newmv_count, is_compound); row_match |= found_match; } @@ -2240,7 +2352,9 @@ impl ContextWriter { /* Scan the second outer area. */ let mut far_newmv_count: usize = 0; // won't be used - let found_match = self.scan_blk_mbmi(&bo.with_offset(-1, -1), ref_frame, mv_stack, &mut far_newmv_count); + let found_match = self.scan_blk_mbmi( + &bo.with_offset(-1, -1), ref_frames, mv_stack, &mut far_newmv_count, is_compound + ); row_match |= found_match; for idx in 2..MVREF_ROW_COLS+1 { @@ -2248,14 +2362,14 @@ impl ContextWriter { let col_offset = -2 * idx as isize + 1 + col_adj as isize; if row_offset.abs() <= max_row_offs.abs() && row_offset.abs() > processed_rows { - let found_match = self.scan_row_mbmi(bo, row_offset, max_row_offs, &mut processed_rows, ref_frame, mv_stack, - &mut far_newmv_count, bsize); + let found_match = self.scan_row_mbmi(bo, row_offset, max_row_offs, &mut processed_rows, ref_frames, mv_stack, + &mut far_newmv_count, bsize, is_compound); row_match |= found_match; } if col_offset.abs() <= max_col_offs.abs() && col_offset.abs() > processed_cols { - let found_match = self.scan_col_mbmi(bo, col_offset, max_col_offs, &mut processed_cols, ref_frame, mv_stack, - &mut far_newmv_count, bsize); + let found_match = self.scan_col_mbmi(bo, col_offset, max_col_offs, &mut processed_cols, ref_frames, mv_stack, + &mut far_newmv_count, bsize, is_compound); col_match |= found_match; } } @@ -2264,13 +2378,13 @@ impl ContextWriter { assert!(total_match >= nearest_match); + // mode_context contains both newmv_context and refmv_context, where newmv_context + // lies in the REF_MVOFFSET least significant bits let mode_context = match nearest_match { - 0 => cmp::min(total_match, 1) + (total_match << REFMV_OFFSET) , - 1 => 3 - cmp::min(newmv_count, 1) + ((2 + total_match) << REFMV_OFFSET) , - _ => 5 - cmp::min(newmv_count, 1) + (5 << REFMV_OFFSET) - }; - - // println!("{} {} {} {} {}", bo.x, bo.y, nearest_match, total_match, mode_context); + 0 => cmp::min(total_match, 1) + (total_match << REFMV_OFFSET), + 1 => 3 - cmp::min(newmv_count, 1) + ((2 + total_match) << REFMV_OFFSET), + _ => 5 - cmp::min(newmv_count, 1) + (5 << REFMV_OFFSET) + }; /* TODO: Find nearest match and assign nearest and near mvs */ @@ -2286,6 +2400,11 @@ impl ContextWriter { let passes = if up_avail { 0 } else { 1 } .. if left_avail { 2 } else { 1 }; + let mut ref_id_count = [0 as usize; 2]; + let mut ref_diff_count = [0 as usize; 2]; + let mut ref_id_mvs = [[MotionVector { row: 0, col: 0 }; 2]; 2]; + let mut ref_diff_mvs = [[MotionVector { row: 0, col: 0 }; 2]; 2]; + for pass in passes { let mut idx = 0; while idx < num4x4 && mv_stack.len() < 2 { @@ -2296,7 +2415,10 @@ impl ContextWriter { }; let blk = &self.bc.at(&rbo); - self.add_extra_mv_candidate(blk, ref_frame, mv_stack, fi); + self.add_extra_mv_candidate( + blk, ref_frames, mv_stack, fi, is_compound, + &mut ref_id_count, &mut ref_id_mvs, &mut ref_diff_count, &mut ref_diff_mvs + ); idx += if pass == 0 { blk.n4_w @@ -2305,6 +2427,55 @@ impl ContextWriter { }; } } + + if is_compound { + let mut combined_mvs = [[MotionVector { row: 0, col: 0}; 2]; 2]; + + for list in 0..2 { + let mut comp_count = 0; + for idx in 0..ref_id_count[list] { + combined_mvs[comp_count][list] = ref_id_mvs[list][idx]; + comp_count = comp_count + 1; + } + for idx in 0..ref_diff_count[list] { + if comp_count < 2 { + combined_mvs[comp_count][list] = ref_diff_mvs[list][idx]; + comp_count = comp_count + 1; + } + } + } + + if mv_stack.len() == 1 { + let mv_cand = if combined_mvs[0][0].row == mv_stack[0].this_mv.row && + combined_mvs[0][0].col == mv_stack[0].this_mv.col && + combined_mvs[0][1].row == mv_stack[0].comp_mv.row && + combined_mvs[0][1].col == mv_stack[0].comp_mv.col { + CandidateMV { + this_mv: combined_mvs[1][0], + comp_mv: combined_mvs[1][1], + weight: 2 + } + } else { + CandidateMV { + this_mv: combined_mvs[0][0], + comp_mv: combined_mvs[0][1], + weight: 2 + } + }; + mv_stack.push(mv_cand); + } else { + for idx in 0..2 { + let mv_cand = CandidateMV { + this_mv: combined_mvs[idx][0], + comp_mv: combined_mvs[idx][1], + weight: 2 + }; + mv_stack.push(mv_cand); + } + } + + assert!(mv_stack.len() == 2); + } } /* TODO: Handle single reference frame extension */ @@ -2321,33 +2492,36 @@ impl ContextWriter { let mvy_max = (self.bc.rows - bo.y - blk_h / MI_SIZE) as isize * (8 * MI_SIZE) as isize + border_h; mv.this_mv.row = (mv.this_mv.row as isize).max(mvy_min).min(mvy_max) as i16; mv.this_mv.col = (mv.this_mv.col as isize).max(mvx_min).min(mvx_max) as i16; + mv.comp_mv.row = (mv.comp_mv.row as isize).max(mvy_min).min(mvy_max) as i16; + mv.comp_mv.col = (mv.comp_mv.col as isize).max(mvx_min).min(mvx_max) as i16; } mode_context } - pub fn find_mvrefs(&mut self, bo: &BlockOffset, ref_frame: usize, + pub fn find_mvrefs(&mut self, bo: &BlockOffset, ref_frames: &[usize; 2], mv_stack: &mut Vec<CandidateMV>, bsize: BlockSize, is_sec_rect: bool, - fi: &FrameInvariants) -> usize { - if ref_frame < REF_FRAMES { - if ref_frame != INTRA_FRAME { + fi: &FrameInvariants, is_compound: bool) -> usize { + assert!(ref_frames[0] != NONE_FRAME); + if ref_frames[0] < REF_FRAMES { + if ref_frames[0] != INTRA_FRAME { /* TODO: convert global mv to an mv here */ } else { /* TODO: set the global mv ref to invalid here */ } } - if ref_frame != INTRA_FRAME { + if ref_frames[0] != INTRA_FRAME { /* TODO: Set zeromv ref to the converted global motion vector */ } else { /* TODO: Set the zeromv ref to 0 */ } - if ref_frame <= INTRA_FRAME { + if ref_frames[0] <= INTRA_FRAME { return 0; } - let mode_context = self.setup_mvref_list(bo, ref_frame, mv_stack, bsize, is_sec_rect, fi); + let mode_context = self.setup_mvref_list(bo, ref_frames, mv_stack, bsize, is_sec_rect, fi, is_compound); mode_context } @@ -2373,6 +2547,16 @@ impl ContextWriter { self.bc.at_mut(bo).neighbors_ref_counts = ref_counts; } + fn ref_count_ctx(counts0: usize, counts1: usize) -> usize { + if counts0 < counts1 { + 0 + } else if counts0 == counts1 { + 1 + } else { + 2 + } + } + fn get_ref_frame_ctx_b0(&mut self, bo: &BlockOffset) -> usize { let ref_counts = self.bc.at(bo).neighbors_ref_counts; @@ -2382,137 +2566,255 @@ impl ContextWriter { let bwd_cnt = ref_counts[BWDREF_FRAME] + ref_counts[ALTREF2_FRAME] + ref_counts[ALTREF_FRAME]; - if fwd_cnt == bwd_cnt { - return 1; - } else if fwd_cnt < bwd_cnt { - return 0; - } else { - return 2; - } + ContextWriter::ref_count_ctx(fwd_cnt, bwd_cnt) } fn get_pred_ctx_brfarf2_or_arf(&mut self, bo: &BlockOffset) -> usize { let ref_counts = self.bc.at(bo).neighbors_ref_counts; - let brfarf2_count = ref_counts[BWDREF_FRAME] + - ref_counts[ALTREF2_FRAME]; - + let brfarf2_count = ref_counts[BWDREF_FRAME] + ref_counts[ALTREF2_FRAME]; let arf_count = ref_counts[ALTREF_FRAME]; - if brfarf2_count == arf_count { - return 1; - } else if brfarf2_count < arf_count { - return 0; - } else { - return 2; - } + ContextWriter::ref_count_ctx(brfarf2_count, arf_count) } fn get_pred_ctx_ll2_or_l3gld(&mut self, bo: &BlockOffset) -> usize { let ref_counts = self.bc.at(bo).neighbors_ref_counts; - let l_l2_count = ref_counts[LAST_FRAME] + - ref_counts[LAST2_FRAME]; - - let l3_gold_count = ref_counts[LAST3_FRAME] + - ref_counts[GOLDEN_FRAME]; + let l_l2_count = ref_counts[LAST_FRAME] + ref_counts[LAST2_FRAME]; + let l3_gold_count = ref_counts[LAST3_FRAME] + ref_counts[GOLDEN_FRAME]; - if l_l2_count == l3_gold_count { - return 1; - } else if l_l2_count < l3_gold_count { - return 0; - } else { - return 2; - } + ContextWriter::ref_count_ctx(l_l2_count, l3_gold_count) } fn get_pred_ctx_last_or_last2(&mut self, bo: &BlockOffset) -> usize { let ref_counts = self.bc.at(bo).neighbors_ref_counts; let l_count = ref_counts[LAST_FRAME]; - let l2_count = ref_counts[LAST2_FRAME]; - if l_count == l2_count { - return 1; - } else if l_count < l2_count { - return 0; - } else { - return 2; - } + ContextWriter::ref_count_ctx(l_count, l2_count) } fn get_pred_ctx_last3_or_gold(&mut self, bo: &BlockOffset) -> usize { let ref_counts = self.bc.at(bo).neighbors_ref_counts; let l3_count = ref_counts[LAST3_FRAME]; - let gold_count = ref_counts[GOLDEN_FRAME]; - if l3_count == gold_count { - return 1; - } else if l3_count < gold_count { - return 0; - } else { - return 2; - } + ContextWriter::ref_count_ctx(l3_count, gold_count) } fn get_pred_ctx_brf_or_arf2(&mut self, bo: &BlockOffset) -> usize { let ref_counts = self.bc.at(bo).neighbors_ref_counts; let brf_count = ref_counts[BWDREF_FRAME]; - let arf2_count = ref_counts[ALTREF2_FRAME]; - if brf_count == arf2_count { - return 1; - } else if brf_count < arf2_count { - return 0; + ContextWriter::ref_count_ctx(brf_count, arf2_count) + } + + fn get_comp_mode_ctx(&self, bo: &BlockOffset) -> usize { + fn check_backward(ref_frame: usize) -> bool { + ref_frame >= BWDREF_FRAME && ref_frame <= ALTREF_FRAME + } + let avail_left = bo.x > 0; + let avail_up = bo.y > 0; + let bo_left = bo.with_offset(-1, 0); + let bo_up = bo.with_offset(0, -1); + let above0 = if avail_up { self.bc.at(&bo_up).ref_frames[0] } else { INTRA_FRAME }; + let above1 = if avail_up { self.bc.at(&bo_up).ref_frames[1] } else { NONE_FRAME }; + let left0 = if avail_left { self.bc.at(&bo_left).ref_frames[0] } else { INTRA_FRAME }; + let left1 = if avail_left { self.bc.at(&bo_left).ref_frames[1] } else { NONE_FRAME }; + let left_single = left1 == NONE_FRAME; + let above_single = above1 == NONE_FRAME; + let left_intra = left0 == INTRA_FRAME; + let above_intra = above0 == INTRA_FRAME; + let left_backward = check_backward(left0); + let above_backward = check_backward(above0); + + if avail_left && avail_up { + if above_single && left_single { + (above_backward ^ left_backward) as usize + } else if above_single { + 2 + (above_backward || above_intra) as usize + } else if left_single { + 2 + (left_backward || left_intra) as usize + } else { + 4 + } + } else if avail_up { + if above_single { + above_backward as usize + } else { + 3 + } + } else if avail_left { + if left_single { + left_backward as usize + } else { + 3 + } + } else { + 1 + } + } + + fn get_comp_ref_type_ctx(&self, bo: &BlockOffset) -> usize { + fn is_samedir_ref_pair(ref0: usize, ref1: usize) -> bool { + (ref0 >= BWDREF_FRAME && ref0 != NONE_FRAME) == (ref1 >= BWDREF_FRAME && ref1 != NONE_FRAME) + } + + let avail_left = bo.x > 0; + let avail_up = bo.y > 0; + let bo_left = bo.with_offset(-1, 0); + let bo_up = bo.with_offset(0, -1); + let above0 = if avail_up { self.bc.at(&bo_up).ref_frames[0] } else { INTRA_FRAME }; + let above1 = if avail_up { self.bc.at(&bo_up).ref_frames[1] } else { NONE_FRAME }; + let left0 = if avail_left { self.bc.at(&bo_left).ref_frames[0] } else { INTRA_FRAME }; + let left1 = if avail_left { self.bc.at(&bo_left).ref_frames[1] } else { NONE_FRAME }; + let left_single = left1 == NONE_FRAME; + let above_single = above1 == NONE_FRAME; + let left_intra = left0 == INTRA_FRAME; + let above_intra = above0 == INTRA_FRAME; + let above_comp_inter = avail_up && !above_intra && !above_single; + let left_comp_inter = avail_left && !left_intra && !left_single; + let above_uni_comp = above_comp_inter && is_samedir_ref_pair(above0, above1); + let left_uni_comp = left_comp_inter && is_samedir_ref_pair(left0, left1); + + if avail_up && !above_intra && avail_left && !left_intra { + let samedir = is_samedir_ref_pair(above0, left0); + + if !above_comp_inter && !left_comp_inter { + 1 + 2 * samedir as usize + } else if !above_comp_inter { + if !left_uni_comp { 1 } else { 3 + samedir as usize } + } else if !left_comp_inter { + if !above_uni_comp { 1 } else { 3 + samedir as usize } + } else { + if !above_uni_comp && !left_uni_comp { + 0 + } else if !above_uni_comp || !left_uni_comp { + 2 + } else { + 3 + ((above0 == BWDREF_FRAME) == (left0 == BWDREF_FRAME)) as usize + } + } + } else if avail_up && avail_left { + if above_comp_inter { + 1 + 2 * above_uni_comp as usize + } else if left_comp_inter { + 1 + 2 * left_uni_comp as usize + } else { + 2 + } + } else if above_comp_inter { + 4 * above_uni_comp as usize + } else if left_comp_inter { + 4 * left_uni_comp as usize } else { - return 2; + 2 } } - pub fn write_ref_frames(&mut self, w: &mut dyn Writer, bo: &BlockOffset) { + pub fn write_ref_frames(&mut self, w: &mut dyn Writer, fi: &FrameInvariants, bo: &BlockOffset) { let rf = self.bc.at(bo).ref_frames; + let sz = self.bc.at(bo).n4_w.min(self.bc.at(bo).n4_h); /* TODO: Handle multiple references */ + let comp_mode = self.bc.at(bo).has_second_ref(); - let b0_ctx = self.get_ref_frame_ctx_b0(bo); - let b0 = rf[0] <= ALTREF_FRAME && rf[0] >= BWDREF_FRAME; - - symbol_with_update!(self, w, b0 as u32, &mut self.fc.single_ref_cdfs[b0_ctx][0]); - if b0 { - let b1_ctx = self.get_pred_ctx_brfarf2_or_arf(bo); - let b1 = rf[0] == ALTREF_FRAME; + if fi.reference_mode != ReferenceMode::SINGLE && sz >= 2 { + let ctx = self.get_comp_mode_ctx(bo); + symbol_with_update!(self, w, comp_mode as u32, &mut self.fc.comp_mode_cdf[ctx]); + } else { + assert!(!comp_mode); + } - symbol_with_update!(self, w, b1 as u32, &mut self.fc.single_ref_cdfs[b1_ctx][1]); - if !b1 { - let b5_ctx = self.get_pred_ctx_brf_or_arf2(bo); - let b5 = rf[0] == ALTREF2_FRAME; + if comp_mode { + let comp_ref_type = 1 as u32; // bidir + let ctx = self.get_comp_ref_type_ctx(bo); + symbol_with_update!(self, w, comp_ref_type, &mut self.fc.comp_ref_type_cdf[ctx]); - symbol_with_update!(self, w, b5 as u32, &mut self.fc.single_ref_cdfs[b5_ctx][5]); + if comp_ref_type == 0 { + unimplemented!(); + } else { + let compref = rf[0] == GOLDEN_FRAME || rf[0] == LAST3_FRAME; + let ctx = self.get_pred_ctx_ll2_or_l3gld(bo); + symbol_with_update!(self, w, compref as u32, &mut self.fc.comp_ref_cdf[ctx][0]); + if !compref { + let compref_p1 = rf[0] == LAST2_FRAME; + let ctx = self.get_pred_ctx_last_or_last2(bo); + symbol_with_update!(self, w, compref_p1 as u32, &mut self.fc.comp_ref_cdf[ctx][1]); + } else { + let compref_p2 = rf[0] == GOLDEN_FRAME; + let ctx = self.get_pred_ctx_last3_or_gold(bo); + symbol_with_update!(self, w, compref_p2 as u32, &mut self.fc.comp_ref_cdf[ctx][2]); + } + let comp_bwdref = rf[1] == ALTREF_FRAME; + let ctx = self.get_pred_ctx_brfarf2_or_arf(bo); + symbol_with_update!(self, w, comp_bwdref as u32, &mut self.fc.comp_bwd_ref_cdf[ctx][0]); + if !comp_bwdref { + let comp_bwdref_p1 = rf[1] == ALTREF2_FRAME; + let ctx = self.get_pred_ctx_brf_or_arf2(bo); + symbol_with_update!(self, w, comp_bwdref_p1 as u32, &mut self.fc.comp_bwd_ref_cdf[ctx][1]); + } } } else { - let b2_ctx = self.get_pred_ctx_ll2_or_l3gld(bo); - let b2 = rf[0] == LAST3_FRAME || rf[0] == GOLDEN_FRAME; + let b0_ctx = self.get_ref_frame_ctx_b0(bo); + let b0 = rf[0] <= ALTREF_FRAME && rf[0] >= BWDREF_FRAME; + + symbol_with_update!(self, w, b0 as u32, &mut self.fc.single_ref_cdfs[b0_ctx][0]); + if b0 { + let b1_ctx = self.get_pred_ctx_brfarf2_or_arf(bo); + let b1 = rf[0] == ALTREF_FRAME; - symbol_with_update!(self, w, b2 as u32, &mut self.fc.single_ref_cdfs[b2_ctx][2]); - if !b2 { - let b3_ctx = self.get_pred_ctx_last_or_last2(bo); - let b3 = rf[0] != LAST_FRAME; + symbol_with_update!(self, w, b1 as u32, &mut self.fc.single_ref_cdfs[b1_ctx][1]); + if !b1 { + let b5_ctx = self.get_pred_ctx_brf_or_arf2(bo); + let b5 = rf[0] == ALTREF2_FRAME; - symbol_with_update!(self, w, b3 as u32, &mut self.fc.single_ref_cdfs[b3_ctx][3]); + symbol_with_update!(self, w, b5 as u32, &mut self.fc.single_ref_cdfs[b5_ctx][5]); + } } else { - let b4_ctx = self.get_pred_ctx_last3_or_gold(bo); - let b4 = rf[0] != LAST3_FRAME; + let b2_ctx = self.get_pred_ctx_ll2_or_l3gld(bo); + let b2 = rf[0] == LAST3_FRAME || rf[0] == GOLDEN_FRAME; + + symbol_with_update!(self, w, b2 as u32, &mut self.fc.single_ref_cdfs[b2_ctx][2]); + if !b2 { + let b3_ctx = self.get_pred_ctx_last_or_last2(bo); + let b3 = rf[0] != LAST_FRAME; - symbol_with_update!(self, w, b4 as u32, &mut self.fc.single_ref_cdfs[b4_ctx][4]); + symbol_with_update!(self, w, b3 as u32, &mut self.fc.single_ref_cdfs[b3_ctx][3]); + } else { + let b4_ctx = self.get_pred_ctx_last3_or_gold(bo); + let b4 = rf[0] != LAST3_FRAME; + + symbol_with_update!(self, w, b4 as u32, &mut self.fc.single_ref_cdfs[b4_ctx][4]); + } } } } + pub fn write_compound_mode( + &mut self, w: &mut dyn Writer, mode: PredictionMode, ctx: usize, + ) { + let newmv_ctx = ctx & NEWMV_CTX_MASK; + let refmv_ctx = (ctx >> REFMV_OFFSET) & REFMV_CTX_MASK; + + let ctx = if refmv_ctx < 2 { + newmv_ctx.min(1) + } else if refmv_ctx < 4 { + (newmv_ctx + 1).min(4) + } else { + (newmv_ctx.max(1) + 3).min(7) + }; + + assert!(mode >= PredictionMode::NEAREST_NEARESTMV); + let val = mode as u32 - PredictionMode::NEAREST_NEARESTMV as u32; + symbol_with_update!(self, w, val, &mut self.fc.compound_mode_cdf[ctx]); + } + pub fn write_inter_mode(&mut self, w: &mut dyn Writer, mode: PredictionMode, ctx: usize) { let newmv_ctx = ctx & NEWMV_CTX_MASK; symbol_with_update!(self, w, (mode != PredictionMode::NEWMV) as u32, &mut self.fc.newmv_cdf[newmv_ctx]); diff --git a/src/encoder.rs b/src/encoder.rs index ac098bae..64b16c19 100644 --- a/src/encoder.rs +++ b/src/encoder.rs @@ -218,7 +218,7 @@ impl Sequence { frame_id_length: 0, delta_frame_id_length: 0, use_128x128_superblock: false, - order_hint_bits_minus_1: 0, + order_hint_bits_minus_1: 3, force_screen_content_tools: 0, force_integer_mv: 2, still_picture: false, @@ -252,6 +252,59 @@ impl Sequence { let m = 1 << self.order_hint_bits_minus_1; (diff & (m - 1)) - (diff & m) } + + pub fn get_skip_mode_allowed(&self, fi: &FrameInvariants, reference_select: bool) -> bool { + if fi.intra_only || !reference_select || !self.enable_order_hint { + false + } else { + let mut forward_idx: isize = -1; + let mut backward_idx: isize = -1; + let mut forward_hint = 0; + let mut backward_hint = 0; + for i in 0..INTER_REFS_PER_FRAME { + if let Some(ref rec) = fi.rec_buffer.frames[fi.ref_frames[i] as usize] { + let ref_hint = rec.order_hint; + if self.get_relative_dist(ref_hint, fi.order_hint) < 0 { + if forward_idx < 0 || self.get_relative_dist(ref_hint, forward_hint) > 0 { + forward_idx = i as isize; + forward_hint = ref_hint; + } + } else if self.get_relative_dist(ref_hint, fi.order_hint) > 0 { + if backward_idx < 0 || self.get_relative_dist(ref_hint, backward_hint) > 0 { + backward_idx = i as isize; + backward_hint = ref_hint; + } + } + } + } + if forward_idx < 0 { + false + } else if backward_idx >= 0 { + // set skip_mode_frame + true + } else { + let mut second_forward_idx: isize = -1; + let mut second_forward_hint = 0; + for i in 0..INTER_REFS_PER_FRAME { + if let Some(ref rec) = fi.rec_buffer.frames[fi.ref_frames[i] as usize] { + let ref_hint = rec.order_hint; + if self.get_relative_dist(ref_hint, forward_hint) < 0 { + if second_forward_idx < 0 || self.get_relative_dist(ref_hint, second_forward_hint) > 0 { + second_forward_idx = i as isize; + second_forward_hint = ref_hint; + } + } + } + } + if second_forward_idx < 0 { + false + } else { + // set skip_mode_frame + true + } + } + } + } } use std::sync::Arc; @@ -368,6 +421,7 @@ pub struct FrameInvariants { pub ref_frame_sign_bias: [bool; INTER_REFS_PER_FRAME], pub rec_buffer: ReferenceFramesSet, pub base_q_idx: u8, + pub me_range_scale: u8, } impl FrameInvariants { @@ -436,6 +490,7 @@ impl FrameInvariants { ref_frame_sign_bias: [false; INTER_REFS_PER_FRAME], rec_buffer: ReferenceFramesSet::new(), base_q_idx: config.quantizer as u8, + me_range_scale: 1, } } @@ -939,10 +994,8 @@ impl<W: io::Write> UncompressedHeader for BitWriter<W, BigEndian> { self.write_bit(reference_select)?; } - let skip_mode_allowed = - !(fi.intra_only || !reference_select || !seq.enable_order_hint); + let skip_mode_allowed = seq.get_skip_mode_allowed(fi, reference_select); if skip_mode_allowed { - unimplemented!(); self.write_bit(false)?; // skip_mode_present } @@ -1268,7 +1321,7 @@ pub fn encode_tx_block( } pub fn motion_compensate(fi: &FrameInvariants, fs: &mut FrameState, cw: &mut ContextWriter, - luma_mode: PredictionMode, ref_frame: usize, mv: MotionVector, + luma_mode: PredictionMode, ref_frames: &[usize; 2], mvs: &[MotionVector; 2], bsize: BlockSize, bo: &BlockOffset, bit_depth: usize, luma_only: bool) { if luma_mode.is_intra() { return; } @@ -1293,27 +1346,27 @@ pub fn motion_compensate(fi: &FrameInvariants, fs: &mut FrameState, cw: &mut Con if some_use_intra { luma_mode.predict_inter(fi, p, &po, &mut rec.mut_slice(&po), plane_bsize.width(), - plane_bsize.height(), ref_frame, &mv, bit_depth); + plane_bsize.height(), ref_frames, &mvs, bit_depth); } else { assert!(xdec == 1 && ydec == 1); // TODO: these are only valid for 4:2:0 - let mv0 = &cw.bc.at(&bo.with_offset(-1,-1)).mv[0]; - let rf0 = cw.bc.at(&bo.with_offset(-1,-1)).ref_frames[0]; - let mv1 = &cw.bc.at(&bo.with_offset(0,-1)).mv[0]; - let rf1 = cw.bc.at(&bo.with_offset(0,-1)).ref_frames[0]; + let mv0 = &cw.bc.at(&bo.with_offset(-1,-1)).mv; + let rf0 = &cw.bc.at(&bo.with_offset(-1,-1)).ref_frames; + let mv1 = &cw.bc.at(&bo.with_offset(0,-1)).mv; + let rf1 = &cw.bc.at(&bo.with_offset(0,-1)).ref_frames; let po1 = PlaneOffset { x: po.x+2, y: po.y }; - let mv2 = &cw.bc.at(&bo.with_offset(-1,0)).mv[0]; - let rf2 = cw.bc.at(&bo.with_offset(-1,0)).ref_frames[0]; + let mv2 = &cw.bc.at(&bo.with_offset(-1,0)).mv; + let rf2 = &cw.bc.at(&bo.with_offset(-1,0)).ref_frames; let po2 = PlaneOffset { x: po.x, y: po.y+2 }; let po3 = PlaneOffset { x: po.x+2, y: po.y+2 }; luma_mode.predict_inter(fi, p, &po, &mut rec.mut_slice(&po), 2, 2, rf0, mv0, bit_depth); luma_mode.predict_inter(fi, p, &po1, &mut rec.mut_slice(&po1), 2, 2, rf1, mv1, bit_depth); luma_mode.predict_inter(fi, p, &po2, &mut rec.mut_slice(&po2), 2, 2, rf2, mv2, bit_depth); - luma_mode.predict_inter(fi, p, &po3, &mut rec.mut_slice(&po3), 2, 2, ref_frame, &mv, bit_depth); + luma_mode.predict_inter(fi, p, &po3, &mut rec.mut_slice(&po3), 2, 2, ref_frames, mvs, bit_depth); } } else { luma_mode.predict_inter(fi, p, &po, &mut rec.mut_slice(&po), plane_bsize.width(), - plane_bsize.height(), ref_frame, &mv, bit_depth); + plane_bsize.height(), ref_frames, &mvs, bit_depth); } } } @@ -1332,7 +1385,7 @@ pub fn encode_block_a(seq: &Sequence, pub fn encode_block_b(seq: &Sequence, fi: &FrameInvariants, fs: &mut FrameState, cw: &mut ContextWriter, w: &mut dyn Writer, luma_mode: PredictionMode, chroma_mode: PredictionMode, - ref_frame: usize, mv: MotionVector, + ref_frames: &[usize; 2], mvs: &[MotionVector; 2], bsize: BlockSize, bo: &BlockOffset, skip: bool, bit_depth: usize, cfl: CFLParams, tx_size: TxSize, tx_type: TxType, mode_context: usize, mv_stack: &Vec<CandidateMV>) { @@ -1349,8 +1402,8 @@ pub fn encode_block_b(seq: &Sequence, fi: &FrameInvariants, fs: &mut FrameState, } cw.bc.set_block_size(bo, bsize); cw.bc.set_mode(bo, bsize, luma_mode); - cw.bc.set_ref_frame(bo, bsize, ref_frame); - cw.bc.set_motion_vector(bo, bsize, mv); + cw.bc.set_ref_frames(bo, bsize, ref_frames); + cw.bc.set_motion_vectors(bo, bsize, mvs); //write_q_deltas(); if cw.bc.code_deltas && fs.deblock.block_deltas_enabled && (bsize < sb_size || !skip) { @@ -1362,43 +1415,57 @@ pub fn encode_block_b(seq: &Sequence, fi: &FrameInvariants, fs: &mut FrameState, cw.write_is_inter(w, bo, is_inter); if is_inter { cw.fill_neighbours_ref_counts(bo); - cw.write_ref_frames(w, bo); + cw.write_ref_frames(w, fi, bo); - //let mode_context = if bo.x == 0 && bo.y == 0 { 0 } else if bo.x ==0 || bo.y == 0 { 51 } else { 85 }; // NOTE: Until rav1e supports other inter modes than GLOBALMV - cw.write_inter_mode(w, luma_mode, mode_context); + if luma_mode >= PredictionMode::NEAREST_NEARESTMV { + cw.write_compound_mode(w, luma_mode, mode_context); + } else { + cw.write_inter_mode(w, luma_mode, mode_context); + } + + let ref_mv_idx = 0; + let num_mv_found = mv_stack.len(); if luma_mode == PredictionMode::NEWMV || luma_mode == PredictionMode::NEW_NEWMV { - let ref_mv_idx = 0; - let num_mv_found = mv_stack.len(); + if luma_mode == PredictionMode::NEW_NEWMV { assert!(num_mv_found >= 2); } for idx in 0..2 { if num_mv_found > idx + 1 { let drl_mode = ref_mv_idx > idx; let ctx: usize = (mv_stack[idx].weight < REF_CAT_LEVEL) as usize + (mv_stack[idx + 1].weight < REF_CAT_LEVEL) as usize; - cw.write_drl_mode(w, drl_mode, ctx); if !drl_mode { break; } } } + } - let ref_mv = if num_mv_found > 0 { - mv_stack[ref_mv_idx].this_mv - } else { - MotionVector{ row: 0, col: 0 } - }; + let ref_mvs = if num_mv_found > 0 { + [mv_stack[ref_mv_idx].this_mv, mv_stack[ref_mv_idx].comp_mv] + } else { + [MotionVector{ row: 0, col: 0 }; 2] + }; - let mv_precision = if fi.force_integer_mv != 0 { - MvSubpelPrecision::MV_SUBPEL_NONE - } else if fi.allow_high_precision_mv { - MvSubpelPrecision::MV_SUBPEL_HIGH_PRECISION - } else { - MvSubpelPrecision::MV_SUBPEL_LOW_PRECISION - }; - cw.write_mv(w, &mv, &ref_mv, mv_precision); - } else if luma_mode >= PredictionMode::NEAR0MV && luma_mode <= PredictionMode::NEAR2MV { + let mv_precision = if fi.force_integer_mv != 0 { + MvSubpelPrecision::MV_SUBPEL_NONE + } else if fi.allow_high_precision_mv { + MvSubpelPrecision::MV_SUBPEL_HIGH_PRECISION + } else { + MvSubpelPrecision::MV_SUBPEL_LOW_PRECISION + }; + + if luma_mode == PredictionMode::NEWMV || + luma_mode == PredictionMode::NEW_NEWMV || + luma_mode == PredictionMode::NEW_NEARESTMV { + cw.write_mv(w, &mvs[0], &ref_mvs[0], mv_precision); + } + if luma_mode == PredictionMode::NEW_NEWMV || + luma_mode == PredictionMode::NEAREST_NEWMV { + cw.write_mv(w, &mvs[1], &ref_mvs[1], mv_precision); + } + + if luma_mode >= PredictionMode::NEAR0MV && luma_mode <= PredictionMode::NEAR2MV { let ref_mv_idx = luma_mode as usize - PredictionMode::NEAR0MV as usize + 1; - let num_mv_found = mv_stack.len(); if luma_mode != PredictionMode::NEAR0MV { assert!(num_mv_found > ref_mv_idx); } for idx in 1..3 { @@ -1412,19 +1479,19 @@ pub fn encode_block_b(seq: &Sequence, fi: &FrameInvariants, fs: &mut FrameState, } } if mv_stack.len() > 1 { - assert!(mv_stack[ref_mv_idx].this_mv.row == mv.row); - assert!(mv_stack[ref_mv_idx].this_mv.col == mv.col); + assert!(mv_stack[ref_mv_idx].this_mv.row == mvs[0].row); + assert!(mv_stack[ref_mv_idx].this_mv.col == mvs[0].col); } else { - assert!(0 == mv.row); - assert!(0 == mv.col); + assert!(0 == mvs[0].row); + assert!(0 == mvs[0].col); } } else if luma_mode == PredictionMode::NEARESTMV { if mv_stack.len() > 0 { - assert!(mv_stack[0].this_mv.row == mv.row); - assert!(mv_stack[0].this_mv.col == mv.col); + assert!(mv_stack[0].this_mv.row == mvs[0].row); + assert!(mv_stack[0].this_mv.col == mvs[0].col); } else { - assert!(0 == mv.row); - assert!(0 == mv.col); + assert!(0 == mvs[0].row); + assert!(0 == mvs[0].col); } } } else { @@ -1454,7 +1521,7 @@ pub fn encode_block_b(seq: &Sequence, fi: &FrameInvariants, fs: &mut FrameState, } } - motion_compensate(fi, fs, cw, luma_mode, ref_frame, mv, bsize, bo, bit_depth, false); + motion_compensate(fi, fs, cw, luma_mode, ref_frames, mvs, bsize, bo, bit_depth, false); if is_inter { write_tx_tree(fi, fs, cw, w, luma_mode, bo, bsize, tx_size, tx_type, skip, bit_depth, false); @@ -1670,8 +1737,8 @@ fn encode_partition_bottomup(seq: &Sequence, fi: &FrameInvariants, fs: &mut Fram pred_mode_luma: PredictionMode::DC_PRED, pred_mode_chroma: PredictionMode::DC_PRED, pred_cfl_params: CFLParams::new(), - ref_frame: INTRA_FRAME, - mv: MotionVector { row: 0, col: 0}, + ref_frames: [INTRA_FRAME, NONE_FRAME], + mvs: [MotionVector { row: 0, col: 0}; 2], skip: false, tx_size: TxSize::TX_4X4, tx_type: TxType::DCT_DCT, @@ -1699,27 +1766,29 @@ fn encode_partition_bottomup(seq: &Sequence, fi: &FrameInvariants, fs: &mut Fram let mode_decision = rdo_mode_decision(seq, fi, fs, cw, bsize, bo, pmv).part_modes[0].clone(); let (mode_luma, mode_chroma) = (mode_decision.pred_mode_luma, mode_decision.pred_mode_chroma); let cfl = mode_decision.pred_cfl_params; - let ref_frame = mode_decision.ref_frame; - let mv = mode_decision.mv; - let skip = mode_decision.skip; - let mut cdef_coded = cw.bc.cdef_coded; - let (tx_size, tx_type) = (mode_decision.tx_size, mode_decision.tx_type); - - debug_assert!((tx_size, tx_type) == - rdo_tx_size_type(seq, fi, fs, cw, bsize, bo, mode_luma, ref_frame, mv, skip)); - cw.bc.set_tx_size(bo, tx_size); - - rd_cost = mode_decision.rd_cost + cost; - - let mut mv_stack = Vec::new(); - let mode_context = cw.find_mvrefs(bo, ref_frame, &mut mv_stack, bsize, false, fi); - - cdef_coded = encode_block_a(seq, cw, if cdef_coded {w_post_cdef} else {w_pre_cdef}, - bsize, bo, skip); - encode_block_b(seq, fi, fs, cw, if cdef_coded {w_post_cdef} else {w_pre_cdef}, - mode_luma, mode_chroma, ref_frame, mv, bsize, bo, skip, seq.bit_depth, cfl, - tx_size, tx_type, mode_context, &mv_stack); - + { + let ref_frames = &mode_decision.ref_frames; + let mvs = &mode_decision.mvs; + let skip = mode_decision.skip; + let mut cdef_coded = cw.bc.cdef_coded; + let (tx_size, tx_type) = (mode_decision.tx_size, mode_decision.tx_type); + + debug_assert!((tx_size, tx_type) == + rdo_tx_size_type(seq, fi, fs, cw, bsize, bo, mode_luma, ref_frames, mvs, skip)); + cw.bc.set_tx_size(bo, tx_size); + + rd_cost = mode_decision.rd_cost + cost; + + let mut mv_stack = Vec::new(); + let is_compound = ref_frames[1] != NONE_FRAME; + let mode_context = cw.find_mvrefs(bo, ref_frames, &mut mv_stack, bsize, false, fi, is_compound); + + cdef_coded = encode_block_a(seq, cw, if cdef_coded {w_post_cdef} else {w_pre_cdef}, + bsize, bo, skip); + encode_block_b(seq, fi, fs, cw, if cdef_coded {w_post_cdef} else {w_pre_cdef}, + mode_luma, mode_chroma, ref_frames, mvs, bsize, bo, skip, seq.bit_depth, cfl, + tx_size, tx_type, mode_context, &mv_stack); + } best_decision = mode_decision; } @@ -1759,7 +1828,7 @@ fn encode_partition_bottomup(seq: &Sequence, fi: &FrameInvariants, fs: &mut Fram w_post_cdef, subsize, offset, - &best_decision.mv + &best_decision.mvs[0] ) }).sum::<f64>(); @@ -1781,23 +1850,24 @@ fn encode_partition_bottomup(seq: &Sequence, fi: &FrameInvariants, fs: &mut Fram // FIXME: redundant block re-encode let (mode_luma, mode_chroma) = (best_decision.pred_mode_luma, best_decision.pred_mode_chroma); let cfl = best_decision.pred_cfl_params; - let ref_frame = best_decision.ref_frame; - let mv = best_decision.mv; + let ref_frames = &best_decision.ref_frames; + let mvs = &best_decision.mvs; let skip = best_decision.skip; let mut cdef_coded = cw.bc.cdef_coded; let (tx_size, tx_type) = (best_decision.tx_size, best_decision.tx_type); debug_assert!((tx_size, tx_type) == - rdo_tx_size_type(seq, fi, fs, cw, bsize, bo, mode_luma, ref_frame, mv, skip)); + rdo_tx_size_type(seq, fi, fs, cw, bsize, bo, mode_luma, ref_frames, mvs, skip)); cw.bc.set_tx_size(bo, tx_size); let mut mv_stack = Vec::new(); - let mode_context = cw.find_mvrefs(bo, ref_frame, &mut mv_stack, bsize, false, fi); + let is_compound = ref_frames[1] != NONE_FRAME; + let mode_context = cw.find_mvrefs(bo, ref_frames, &mut mv_stack, bsize, false, fi, is_compound); cdef_coded = encode_block_a(seq, cw, if cdef_coded {w_post_cdef} else {w_pre_cdef}, bsize, bo, skip); encode_block_b(seq, fi, fs, cw, if cdef_coded {w_post_cdef} else {w_pre_cdef}, - mode_luma, mode_chroma, ref_frame, mv, bsize, bo, skip, seq.bit_depth, cfl, + mode_luma, mode_chroma, ref_frames, mvs, bsize, bo, skip, seq.bit_depth, cfl, tx_size, tx_type, mode_context, &mv_stack); } } @@ -1874,42 +1944,64 @@ fn encode_partition_topdown(seq: &Sequence, fi: &FrameInvariants, fs: &mut Frame let cfl = part_decision.pred_cfl_params; let skip = part_decision.skip; - let ref_frame = part_decision.ref_frame; - let mv = part_decision.mv; + let ref_frames = &part_decision.ref_frames; + let mvs = &part_decision.mvs; let mut cdef_coded = cw.bc.cdef_coded; // NOTE: Cannot avoid calling rdo_tx_size_type() here again, // because, with top-down partition RDO, the neighnoring contexts // of current partition can change, i.e. neighboring partitions can split down more. let (tx_size, tx_type) = - rdo_tx_size_type(seq, fi, fs, cw, bsize, bo, mode_luma, ref_frame, mv, skip); + rdo_tx_size_type(seq, fi, fs, cw, bsize, bo, mode_luma, ref_frames, mvs, skip); let mut mv_stack = Vec::new(); - let mode_context = cw.find_mvrefs(bo, ref_frame, &mut mv_stack, bsize, false, fi); - - if !mode_luma.is_intra() && mode_luma != PredictionMode::GLOBALMV { - mode_luma = PredictionMode::NEWMV; - for (c, m) in mv_stack.iter().take(4) - .zip([PredictionMode::NEARESTMV, PredictionMode::NEAR0MV, - PredictionMode::NEAR1MV, PredictionMode::NEAR2MV].iter()) { - if c.this_mv.row == mv.row && c.this_mv.col == mv.col { - mode_luma = *m; + let is_compound = ref_frames[1] != NONE_FRAME; + let mode_context = cw.find_mvrefs(bo, ref_frames, &mut mv_stack, bsize, false, fi, is_compound); + + // TODO proper remap when is_compound is true + if !mode_luma.is_intra() { + if is_compound && mode_luma != PredictionMode::GLOBAL_GLOBALMV { + let match0 = mv_stack[0].this_mv.row == mvs[0].row && mv_stack[0].this_mv.col == mvs[0].col; + let match1 = mv_stack[0].comp_mv.row == mvs[1].row && mv_stack[0].comp_mv.col == mvs[1].col; + + mode_luma = if match0 && match1 { + PredictionMode::NEAREST_NEARESTMV + } else if match0 { + PredictionMode::NEAREST_NEWMV + } else if match1 { + PredictionMode::NEW_NEARESTMV + } else { + PredictionMode::NEW_NEWMV + }; + if mode_luma != PredictionMode::NEAREST_NEARESTMV && mvs[0].row == 0 && mvs[0].col == 0 && + mvs[1].row == 0 && mvs[1].col == 0 { + mode_luma = PredictionMode::GLOBAL_GLOBALMV; + } + mode_chroma = mode_luma; + } else if !is_compound && mode_luma != PredictionMode::GLOBALMV { + mode_luma = PredictionMode::NEWMV; + for (c, m) in mv_stack.iter().take(4) + .zip([PredictionMode::NEARESTMV, PredictionMode::NEAR0MV, + PredictionMode::NEAR1MV, PredictionMode::NEAR2MV].iter()) { + if c.this_mv.row == mvs[0].row && c.this_mv.col == mvs[0].col { + mode_luma = *m; + } + } + if mode_luma == PredictionMode::NEWMV && mvs[0].row == 0 && mvs[0].col == 0 { + mode_luma = + if mv_stack.len() == 0 { PredictionMode::NEARESTMV } + else if mv_stack.len() == 1 { PredictionMode::NEAR0MV } + else { PredictionMode::GLOBALMV }; + } + mode_chroma = mode_luma; } - } - if mode_luma == PredictionMode::NEWMV && mv.row == 0 && mv.col == 0 { - mode_luma = - if mv_stack.len() == 0 { PredictionMode::NEARESTMV } - else if mv_stack.len() == 1 { PredictionMode::NEAR0MV } - else { PredictionMode::GLOBALMV }; - } - mode_chroma = mode_luma; } // FIXME: every final block that has gone through the RDO decision process is encoded twice cdef_coded = encode_block_a(seq, cw, if cdef_coded {w_post_cdef} else {w_pre_cdef}, bsize, bo, skip); encode_block_b(seq, fi, fs, cw, if cdef_coded {w_post_cdef} else {w_pre_cdef}, - mode_luma, mode_chroma, ref_frame, mv, bsize, bo, skip, seq.bit_depth, cfl, + mode_luma, mode_chroma, ref_frames, mvs, bsize, bo, skip, seq.bit_depth, cfl, tx_size, tx_type, mode_context, &mv_stack); }, PartitionType::PARTITION_SPLIT => { diff --git a/src/entropymode.rs b/src/entropymode.rs index 020c3feb..d4698e9d 100644 --- a/src/entropymode.rs +++ b/src/entropymode.rs @@ -899,7 +899,7 @@ pub static default_refmv_cdf: [[u16; cdf_size!(2)]; REFMV_MODE_CONTEXTS] = [ pub static default_drl_cdf: [[u16; cdf_size!(2)]; DRL_MODE_CONTEXTS] = [cdf!(13104), cdf!(24560), cdf!(18945)]; -pub static default_inter_compound_mode_cdf: [[u16; +pub static default_compound_mode_cdf: [[u16; cdf_size!(INTER_COMPOUND_MODES)]; INTER_MODE_CONTEXTS] = [ cdf!(7760, 13823, 15808, 17641, 19156, 20666, 26891), @@ -1125,7 +1125,7 @@ pub static default_obmc_cdf: [[u16; cdf_size!(2)]; pub static default_intra_inter_cdf: [[u16; cdf_size!(2)]; INTRA_INTER_CONTEXTS] = [cdf!(806), cdf!(16662), cdf!(20186), cdf!(26538)]; -pub static default_comp_inter_cdf: [[u16; cdf_size!(2)]; COMP_INTER_CONTEXTS] = +pub static default_comp_mode_cdf: [[u16; cdf_size!(2)]; COMP_INTER_CONTEXTS] = [cdf!(26828), cdf!(24035), cdf!(12031), cdf!(10640), cdf!(2901)]; pub static default_comp_ref_type_cdf: [[u16; cdf_size!(2)]; diff --git a/src/me.rs b/src/me.rs index c95b01e3..f04d1c5a 100644 --- a/src/me.rs +++ b/src/me.rs @@ -45,7 +45,7 @@ pub fn motion_estimation( x: (bo.x as isize) << BLOCK_TO_PLANE_SHIFT, y: (bo.y as isize) << BLOCK_TO_PLANE_SHIFT }; - let range = 32 as isize; + let range = 32 * fi.me_range_scale as isize; let blk_w = bsize.width(); let blk_h = bsize.height(); let border_w = 128 + blk_w as isize * 8; @@ -113,7 +113,8 @@ pub fn motion_estimation( &mut tmp_plane.mut_slice(&PlaneOffset { x: 0, y: 0 }); mode.predict_inter( - fi, 0, &po, tmp_slice, blk_w, blk_h, ref_frame, &cand_mv, 8, + fi, 0, &po, tmp_slice, blk_w, blk_h, &[ref_frame, NONE_FRAME], + &[cand_mv, MotionVector{ row: 0, col: 0 }], 8, ); } diff --git a/src/partition.rs b/src/partition.rs index c4db29f4..3338c4d8 100644 --- a/src/partition.rs +++ b/src/partition.rs @@ -965,134 +965,175 @@ impl PredictionMode { pub fn predict_inter<'a>( self, fi: &FrameInvariants, p: usize, po: &PlaneOffset, dst: &'a mut PlaneMutSlice<'a>, width: usize, height: usize, - ref_frame: usize, mv: &MotionVector, bit_depth: usize + ref_frames: &[usize; 2], mvs: &[MotionVector; 2], bit_depth: usize ) { assert!(!self.is_intra()); - match fi.rec_buffer.frames[fi.ref_frames[ref_frame - LAST_FRAME] as usize] { - Some(ref rec) => { - let rec_cfg = &rec.frame.planes[p].cfg; - let shift_row = 3 + rec_cfg.ydec; - let shift_col = 3 + rec_cfg.xdec; - let row_offset = mv.row as i32 >> shift_row; - let col_offset = mv.col as i32 >> shift_col; - let row_frac = - (mv.row as i32 - (row_offset << shift_row)) << (4 - shift_row); - let col_frac = - (mv.col as i32 - (col_offset << shift_col)) << (4 - shift_col); - let ref_stride = rec_cfg.stride; - - let stride = dst.plane.cfg.stride; - let slice = dst.as_mut_slice(); - - let max_sample_val = ((1 << bit_depth) - 1) as i32; - let y_filter_idx = if height <= 4 { 4 } else { 0 }; - let x_filter_idx = if width <= 4 { 4 } else { 0 }; - let shifts = { - let shift_offset = if bit_depth == 12 { 2 } else { 0 }; - (3 + shift_offset, 11 - shift_offset) - }; - let round_shift = - |n, shift| -> i32 { (n + (1 << (shift - 1))) >> shift }; - - match (col_frac, row_frac) { - (0, 0) => { - let qo = PlaneOffset { - x: po.x + col_offset as isize, - y: po.y + row_offset as isize - }; - let ps = rec.frame.planes[p].slice(&qo); - let s = ps.as_slice_clamped(); - for r in 0..height { - for c in 0..width { - let output_index = r * stride + c; - slice[output_index] = s[r * ref_stride + c]; - } - } - } - (0, _) => { - let qo = PlaneOffset { - x: po.x + col_offset as isize, - y: po.y + row_offset as isize - 3 - }; - let ps = rec.frame.planes[p].slice(&qo); - let s = ps.as_slice_clamped(); - for r in 0..height { - for c in 0..width { - let mut sum: i32 = 0; - for k in 0..8 { - sum += s[(r + k) * ref_stride + c] as i32 - * SUBPEL_FILTERS[y_filter_idx][row_frac as usize][k]; - } - let output_index = r * stride + c; - let val = round_shift(sum, 7).max(0).min(max_sample_val); - slice[output_index] = val as u16; - } - } - } - (_, 0) => { - let qo = PlaneOffset { - x: po.x + col_offset as isize - 3, - y: po.y + row_offset as isize - }; - let ps = rec.frame.planes[p].slice(&qo); - let s = ps.as_slice_clamped(); - for r in 0..height { - for c in 0..width { - let mut sum: i32 = 0; - for k in 0..8 { - sum += s[r * ref_stride + (c + k)] as i32 - * SUBPEL_FILTERS[x_filter_idx][col_frac as usize][k]; + let is_compound = ref_frames[1] > INTRA_FRAME && ref_frames[1] != NONE_FRAME; + + let stride = dst.plane.cfg.stride; + let slice = dst.as_mut_slice(); + + for i in 0..(1 + is_compound as usize) { + match fi.rec_buffer.frames[fi.ref_frames[ref_frames[i] - LAST_FRAME] as usize] { + Some(ref rec) => { + let rec_cfg = &rec.frame.planes[p].cfg; + let shift_row = 3 + rec_cfg.ydec; + let shift_col = 3 + rec_cfg.xdec; + let row_offset = mvs[i].row as i32 >> shift_row; + let col_offset = mvs[i].col as i32 >> shift_col; + let row_frac = + (mvs[i].row as i32 - (row_offset << shift_row)) << (4 - shift_row); + let col_frac = + (mvs[i].col as i32 - (col_offset << shift_col)) << (4 - shift_col); + let ref_stride = rec_cfg.stride; + + let max_sample_val = ((1 << bit_depth) - 1) as i32; + let y_filter_idx = if height <= 4 { 4 } else { 0 }; + let x_filter_idx = if width <= 4 { 4 } else { 0 }; + let shifts = { + let shift_offset = if bit_depth == 12 { 2 } else { 0 }; + let inter_round0 = 3 + shift_offset; + let inter_round1 = if is_compound { 7 } else { 11 } - shift_offset; + (inter_round0, inter_round1, 14 - inter_round0 - inter_round1) + }; + let round_shift = + |n, shift| -> i32 { (n + (1 << (shift - 1))) >> shift }; + + match (col_frac, row_frac) { + (0, 0) => { + let qo = PlaneOffset { + x: po.x + col_offset as isize, + y: po.y + row_offset as isize + }; + let ps = rec.frame.planes[p].slice(&qo); + let s = ps.as_slice_clamped(); + for r in 0..height { + for c in 0..width { + let output_index = r * stride + c; + let mut val = s[r * ref_stride + c] as i32; + if is_compound { + val = val << shifts.2; + if i == 1 { + val = val + slice[output_index] as i32 - 32768; + val = round_shift(val, shifts.2 + 1); + val = val.max(0).min(max_sample_val); + } else { + val = val + 32768; + } + } + slice[output_index] = val as u16; } - let output_index = r * stride + c; - let val = - round_shift(round_shift(sum, shifts.0), shifts.1 - 7) - .max(0) - .min(max_sample_val); - slice[output_index] = val as u16; } } - } - (_, _) => { - let mut intermediate = [0 as i16; 8 * (128 + 7)]; - - let qo = PlaneOffset { - x: po.x + col_offset as isize - 3, - y: po.y + row_offset as isize - 3 - }; - let ps = rec.frame.planes[p].slice(&qo); - let s = ps.as_slice_clamped(); - for cg in (0..width).step_by(8) { - for r in 0..height + 7 { - for c in cg..(cg + 8).min(width) { + (0, _) => { + let qo = PlaneOffset { + x: po.x + col_offset as isize, + y: po.y + row_offset as isize - 3 + }; + let ps = rec.frame.planes[p].slice(&qo); + let s = ps.as_slice_clamped(); + for r in 0..height { + for c in 0..width { let mut sum: i32 = 0; for k in 0..8 { - sum += s[r * ref_stride + (c + k)] as i32 * SUBPEL_FILTERS - [x_filter_idx][col_frac as usize][k]; + sum += s[(r + k) * ref_stride + c] as i32 + * SUBPEL_FILTERS[y_filter_idx][row_frac as usize][k]; + } + let output_index = r * stride + c; + let mut val = round_shift(sum, shifts.0 + shifts.1 - 7); + if is_compound && i == 1 { + val = val + slice[output_index] as i32 - 32768; + val = round_shift(val, shifts.2 + 1); + val = val.max(0).min(max_sample_val); + } else if !is_compound { + val = val.max(0).min(max_sample_val); + } else { + val = val + 32768; } - let val = round_shift(sum, shifts.0); - intermediate[8 * r + (c - cg)] = val as i16; + slice[output_index] = val as u16; } } - + } + (_, 0) => { + let qo = PlaneOffset { + x: po.x + col_offset as isize - 3, + y: po.y + row_offset as isize + }; + let ps = rec.frame.planes[p].slice(&qo); + let s = ps.as_slice_clamped(); for r in 0..height { - for c in cg..(cg + 8).min(width) { + for c in 0..width { let mut sum: i32 = 0; for k in 0..8 { - sum += intermediate[8 * (r + k) + c - cg] as i32 - * SUBPEL_FILTERS[y_filter_idx][row_frac as usize][k]; + sum += s[r * ref_stride + (c + k)] as i32 + * SUBPEL_FILTERS[x_filter_idx][col_frac as usize][k]; } let output_index = r * stride + c; - let val = - round_shift(sum, shifts.1).max(0).min(max_sample_val); + let mut val = round_shift(round_shift(sum, shifts.0) << 7, shifts.1); + if is_compound && i == 1 { + val = val + slice[output_index] as i32 - 32768; + val = round_shift(val, shifts.2 + 1); + val = val.max(0).min(max_sample_val); + } else if !is_compound { + val = val.max(0).min(max_sample_val); + } else { + val = val + 32768; + } slice[output_index] = val as u16; } } } + (_, _) => { + let mut intermediate = [0 as i16; 8 * (128 + 7)]; + + let qo = PlaneOffset { + x: po.x + col_offset as isize - 3, + y: po.y + row_offset as isize - 3 + }; + let ps = rec.frame.planes[p].slice(&qo); + let s = ps.as_slice_clamped(); + for cg in (0..width).step_by(8) { + for r in 0..height + 7 { + for c in cg..(cg + 8).min(width) { + let mut sum: i32 = 0; + for k in 0..8 { + sum += s[r * ref_stride + (c + k)] as i32 * SUBPEL_FILTERS + [x_filter_idx][col_frac as usize][k]; + } + let val = round_shift(sum, shifts.0); + intermediate[8 * r + (c - cg)] = val as i16; + } + } + + for r in 0..height { + for c in cg..(cg + 8).min(width) { + let mut sum: i32 = 0; + for k in 0..8 { + sum += intermediate[8 * (r + k) + c - cg] as i32 + * SUBPEL_FILTERS[y_filter_idx][row_frac as usize][k]; + } + let output_index = r * stride + c; + let mut val = round_shift(sum, shifts.1); + if is_compound && i == 1 { + val = val + slice[output_index] as i32 - 32768; + val = round_shift(val, shifts.2 + 1); + val = val.max(0).min(max_sample_val); + } else if !is_compound { + val = val.max(0).min(max_sample_val); + } else { + val = val + 32768; + } + + slice[output_index] = val as u16; + } + } + } + } } } + None => () } - None => () } } } diff --git a/src/predict.rs b/src/predict.rs index 5c7c9d08..f9971e49 100644 --- a/src/predict.rs +++ b/src/predict.rs @@ -50,6 +50,14 @@ pub static RAV1E_INTER_MODES_MINIMAL: &'static [PredictionMode] = &[ PredictionMode::NEWMV ]; +pub static RAV1E_INTER_COMPOUND_MODES: &'static [PredictionMode] = &[ + PredictionMode::GLOBAL_GLOBALMV, + PredictionMode::NEAREST_NEARESTMV, + PredictionMode::NEW_NEWMV, + PredictionMode::NEAREST_NEWMV, + PredictionMode::NEW_NEARESTMV +]; + // Weights are quadratic from '1' to '1 / block_size', scaled by 2^sm_weight_log2_scale. const sm_weight_log2_scale: u8 = 8; diff --git a/src/rdo.rs b/src/rdo.rs index 46ea29d5..677f2d65 100644 --- a/src/rdo.rs +++ b/src/rdo.rs @@ -23,7 +23,7 @@ use motion_compensate; use partition::*; use plane::*; use cdef::*; -use predict::{RAV1E_INTRA_MODES, RAV1E_INTRA_MODES_MINIMAL, RAV1E_INTER_MODES_MINIMAL}; +use predict::{RAV1E_INTRA_MODES, RAV1E_INTRA_MODES_MINIMAL, RAV1E_INTER_MODES_MINIMAL, RAV1E_INTER_COMPOUND_MODES}; use quantize::dc_q; use std; use std::f64; @@ -37,6 +37,7 @@ use FrameState; use FrameType; use Tune; use Sequence; +use encoder::ReferenceMode; #[derive(Clone)] pub struct RDOOutput { @@ -52,8 +53,8 @@ pub struct RDOPartitionOutput { pub pred_mode_luma: PredictionMode, pub pred_mode_chroma: PredictionMode, pub pred_cfl_params: CFLParams, - pub ref_frame: usize, - pub mv: MotionVector, + pub ref_frames: [usize; 2], + pub mvs: [MotionVector; 2], pub skip: bool, pub tx_size: TxSize, pub tx_type: TxType, @@ -212,7 +213,7 @@ fn compute_rd_cost( pub fn rdo_tx_size_type( seq: &Sequence, fi: &FrameInvariants, fs: &mut FrameState, cw: &mut ContextWriter, bsize: BlockSize, bo: &BlockOffset, - luma_mode: PredictionMode, ref_frame: usize, mv: MotionVector, skip: bool + luma_mode: PredictionMode, ref_frames: &[usize; 2], mvs: &[MotionVector; 2], skip: bool ) -> (TxSize, TxType) { // these rules follow TX_MODE_LARGEST let tx_size = match bsize { @@ -235,8 +236,8 @@ pub fn rdo_tx_size_type( fs, cw, luma_mode, - ref_frame, - mv, + ref_frames, + mvs, bsize, bo, tx_size, @@ -256,8 +257,8 @@ struct EncodingSettings { cfl_params: CFLParams, skip: bool, rd: f64, - ref_frame: usize, - mv: MotionVector, + ref_frames: [usize; 2], + mvs: [MotionVector; 2], tx_size: TxSize, tx_type: TxType } @@ -270,8 +271,8 @@ impl Default for EncodingSettings { cfl_params: CFLParams::new(), skip: false, rd: std::f64::MAX, - ref_frame: INTRA_FRAME, - mv: MotionVector { row: 0, col: 0 }, + ref_frames: [INTRA_FRAME, NONE_FRAME], + mvs: [MotionVector { row: 0, col: 0 }; 2], tx_size: TxSize::TX_4X4, tx_type: TxType::DCT_DCT } @@ -304,50 +305,81 @@ pub fn rdo_mode_decision( RAV1E_INTRA_MODES_MINIMAL }; - let mut ref_frame_set = Vec::new(); + let mut ref_frames_set = Vec::new(); let mut ref_slot_set = Vec::new(); + let mut mvs_from_me = Vec::new(); + let mut fwdref = None; + let mut bwdref = None; if fi.frame_type == FrameType::INTER { for i in LAST_FRAME..NONE_FRAME { // Don't search LAST3 since it's used only for probs if i == LAST3_FRAME { continue; } if !ref_slot_set.contains(&fi.ref_frames[i - LAST_FRAME]) { - ref_frame_set.push(i); + if fwdref == None && i < BWDREF_FRAME { + fwdref = Some(ref_frames_set.len()); + } + if bwdref == None && i >= BWDREF_FRAME { + bwdref = Some(ref_frames_set.len()); + } + ref_frames_set.push([i, NONE_FRAME]); ref_slot_set.push(fi.ref_frames[i - LAST_FRAME]); + mvs_from_me.push([motion_estimation(fi, fs, bsize, bo, i, pmv), MotionVector { row: 0, col: 0 }]); } } - assert!(ref_frame_set.len() != 0); + assert!(ref_frames_set.len() != 0); } let mut mode_set: Vec<(PredictionMode, usize)> = Vec::new(); let mut mv_stacks = Vec::new(); let mut mode_contexts = Vec::new(); - for (i, &ref_frame) in ref_frame_set.iter().enumerate() { - let mut mvs: Vec<CandidateMV> = Vec::new(); - mode_contexts.push(cw.find_mvrefs(bo, ref_frame, &mut mvs, bsize, false, fi)); + for (i, &ref_frames) in ref_frames_set.iter().enumerate() { + let mut mv_stack: Vec<CandidateMV> = Vec::new(); + mode_contexts.push(cw.find_mvrefs(bo, &ref_frames, &mut mv_stack, bsize, false, fi, false)); if fi.frame_type == FrameType::INTER { for &x in RAV1E_INTER_MODES_MINIMAL { mode_set.push((x, i)); } if fi.config.speed <= 2 { - if mvs.len() >= 3 { + if mv_stack.len() >= 3 { mode_set.push((PredictionMode::NEAR1MV, i)); } - if mvs.len() >= 4 { + if mv_stack.len() >= 4 { mode_set.push((PredictionMode::NEAR2MV, i)); } } } - mv_stacks.push(mvs); + mv_stacks.push(mv_stack); + } + + let sz = bsize.width_mi().min(bsize.height_mi()); + + if fi.frame_type == FrameType::INTER && fi.reference_mode != ReferenceMode::SINGLE && sz >= 2 { + // Adding compound candidate + if let Some(r0) = fwdref { + if let Some(r1) = bwdref { + let ref_frames = [ref_frames_set[r0][0], ref_frames_set[r1][0]]; + ref_frames_set.push(ref_frames); + let mv0 = mvs_from_me[r0][0]; + let mv1 = mvs_from_me[r1][0]; + mvs_from_me.push([mv0, mv1]); + let mut mv_stack: Vec<CandidateMV> = Vec::new(); + mode_contexts.push(cw.find_mvrefs(bo, &ref_frames, &mut mv_stack, bsize, false, fi, true)); + for &x in RAV1E_INTER_COMPOUND_MODES { + mode_set.push((x, ref_frames_set.len() - 1)); + } + mv_stacks.push(mv_stack); + } + } } let luma_rdo = |luma_mode: PredictionMode, fs: &mut FrameState, cw: &mut ContextWriter, best: &mut EncodingSettings, - mv: MotionVector, ref_frame: usize, mode_set_chroma: &[PredictionMode], luma_mode_is_intra: bool, + mvs: &[MotionVector; 2], ref_frames: &[usize; 2], mode_set_chroma: &[PredictionMode], luma_mode_is_intra: bool, mode_context: usize, mv_stack: &Vec<CandidateMV>| { let (tx_size, mut tx_type) = rdo_tx_size_type( - seq, fi, fs, cw, bsize, bo, luma_mode, ref_frame, mv, false, + seq, fi, fs, cw, bsize, bo, luma_mode, ref_frames, mvs, false, ); // Find the best chroma prediction mode for the current luma prediction mode @@ -367,8 +399,8 @@ pub fn rdo_mode_decision( wr, luma_mode, chroma_mode, - ref_frame, - mv, + ref_frames, + mvs, bsize, bo, skip, @@ -394,11 +426,12 @@ pub fn rdo_mode_decision( ); if rd < best.rd { + //if rd < best.rd || luma_mode == PredictionMode::NEW_NEWMV { best.rd = rd; best.mode_luma = luma_mode; best.mode_chroma = chroma_mode; - best.ref_frame = ref_frame; - best.mv = mv; + best.ref_frames = *ref_frames; + best.mvs = *mvs; best.skip = skip; best.tx_size = tx_size; best.tx_type = tx_type; @@ -420,36 +453,40 @@ pub fn rdo_mode_decision( } mode_set.iter().for_each(|&(luma_mode, i)| { - let mv = match luma_mode { - PredictionMode::NEWMV => motion_estimation(fi, fs, bsize, bo, ref_frame_set[i], pmv), - PredictionMode::NEARESTMV => if mv_stacks[i].len() > 0 { - mv_stacks[i][0].this_mv + let mvs = match luma_mode { + PredictionMode::NEWMV | PredictionMode::NEW_NEWMV => mvs_from_me[i], + PredictionMode::NEARESTMV | PredictionMode::NEAREST_NEARESTMV => if mv_stacks[i].len() > 0 { + [mv_stacks[i][0].this_mv, mv_stacks[i][0].comp_mv] } else { - MotionVector { row: 0, col: 0 } + [MotionVector { row: 0, col: 0 }; 2] }, PredictionMode::NEAR0MV => if mv_stacks[i].len() > 1 { - mv_stacks[i][1].this_mv + [mv_stacks[i][1].this_mv, mv_stacks[i][1].comp_mv] } else { - MotionVector { row: 0, col: 0 } + [MotionVector { row: 0, col: 0 }; 2] }, PredictionMode::NEAR1MV | PredictionMode::NEAR2MV => - mv_stacks[i][luma_mode as usize - PredictionMode::NEAR0MV as usize + 1].this_mv, - _ => MotionVector { row: 0, col: 0 } + [mv_stacks[i][luma_mode as usize - PredictionMode::NEAR0MV as usize + 1].this_mv, + mv_stacks[i][luma_mode as usize - PredictionMode::NEAR0MV as usize + 1].comp_mv], + PredictionMode::NEAREST_NEWMV => [mv_stacks[i][0].this_mv, mvs_from_me[i][1]], + PredictionMode::NEW_NEARESTMV => [mvs_from_me[i][0], mv_stacks[i][0].comp_mv], + _ => [MotionVector { row: 0, col: 0 }; 2] }; let mode_set_chroma = vec![luma_mode]; - luma_rdo(luma_mode, fs, cw, &mut best, mv, ref_frame_set[i], &mode_set_chroma, false, + luma_rdo(luma_mode, fs, cw, &mut best, &mvs, &ref_frames_set[i], &mode_set_chroma, false, mode_contexts[i], &mv_stacks[i]); }); if !best.skip { intra_mode_set.iter().for_each(|&luma_mode| { - let mv = MotionVector { row: 0, col: 0 }; + let mvs = &[MotionVector { row: 0, col: 0 }; 2]; + let ref_frames = &[INTRA_FRAME, NONE_FRAME]; let mut mode_set_chroma = vec![luma_mode]; if is_chroma_block && luma_mode != PredictionMode::DC_PRED { mode_set_chroma.push(PredictionMode::DC_PRED); } - luma_rdo(luma_mode, fs, cw, &mut best, mv, INTRA_FRAME, &mode_set_chroma, true, + luma_rdo(luma_mode, fs, cw, &mut best, mvs, ref_frames, &mode_set_chroma, true, 0, &Vec::new()); }); } @@ -488,8 +525,8 @@ pub fn rdo_mode_decision( wr, best.mode_luma, chroma_mode, - best.ref_frame, - best.mv, + &best.ref_frames, + &best.mvs, bsize, bo, best.skip, @@ -525,8 +562,8 @@ pub fn rdo_mode_decision( } cw.bc.set_mode(bo, bsize, best.mode_luma); - cw.bc.set_ref_frame(bo, bsize, best.ref_frame); - cw.bc.set_motion_vector(bo, bsize, best.mv); + cw.bc.set_ref_frames(bo, bsize, &best.ref_frames); + cw.bc.set_motion_vectors(bo, bsize, &best.mvs); assert!(best.rd >= 0_f64); @@ -538,8 +575,8 @@ pub fn rdo_mode_decision( pred_mode_luma: best.mode_luma, pred_mode_chroma: best.mode_chroma, pred_cfl_params: best.cfl_params, - ref_frame: best.ref_frame, - mv: best.mv, + ref_frames: best.ref_frames, + mvs: best.mvs, rd_cost: best.rd, skip: best.skip, tx_size: best.tx_size, @@ -594,7 +631,7 @@ pub fn rdo_cfl_alpha( // RDO-based transform type decision pub fn rdo_tx_type_decision( fi: &FrameInvariants, fs: &mut FrameState, cw: &mut ContextWriter, - mode: PredictionMode, ref_frame: usize, mv: MotionVector, bsize: BlockSize, bo: &BlockOffset, tx_size: TxSize, + mode: PredictionMode, ref_frames: &[usize; 2], mvs: &[MotionVector; 2], bsize: BlockSize, bo: &BlockOffset, tx_size: TxSize, tx_set: TxSet, bit_depth: usize ) -> TxType { let mut best_type = TxType::DCT_DCT; @@ -617,7 +654,7 @@ pub fn rdo_tx_type_decision( continue; } - motion_compensate(fi, fs, cw, mode, ref_frame, mv, bsize, bo, bit_depth, true); + motion_compensate(fi, fs, cw, mode, ref_frames, mvs, bsize, bo, bit_depth, true); let mut wr: &mut dyn Writer = &mut WriterCounter::new(); let tell = wr.tell_frac(); @@ -702,7 +739,7 @@ pub fn rdo_partition_decision( if subsize == BlockSize::BLOCK_INVALID { continue; } - pmv = best_pred_modes[0].mv; + pmv = best_pred_modes[0].mvs[0]; assert!(best_pred_modes.len() <= 4); let bs = bsize.width_mi(); -- GitLab