Unverified Commit bc5a464b authored by fbossen's avatar fbossen Committed by GitHub

Hierarchical motion estimation (#673)

* Generate downsampled versions of input frames and store them with reference frames

* Add hierarchical ME, where initial ME is done using 4x subsampled 64x64 blocks. MVs are refined using 2x subsampled 32x32 blocks. In the refinement step, search is done around MV found for colocated 64x64 block, and also around MVs found for neighboring 64x64 blocks
parent 27b67a5b
...@@ -7,7 +7,6 @@ ...@@ -7,7 +7,6 @@
// Media Patent License 1.0 was not distributed with this source code in the // Media Patent License 1.0 was not distributed with this source code in the
// PATENTS file, you can obtain it at www.aomedia.org/license/patent. // PATENTS file, you can obtain it at www.aomedia.org/license/patent.
use context::CDFContext;
use encoder::*; use encoder::*;
use partition::*; use partition::*;
...@@ -291,13 +290,7 @@ impl Context { ...@@ -291,13 +290,7 @@ impl Context {
if self.fi.show_existing_frame { if self.fi.show_existing_frame {
self.idx = self.idx + 1; self.idx = self.idx + 1;
let mut fs = FrameState { let mut fs = FrameState::new(&self.fi);
input: Arc::new(Frame::new(self.fi.padded_w, self.fi.padded_h)), // dummy
rec: Frame::new(self.fi.padded_w, self.fi.padded_h),
qc: Default::default(),
cdfs: CDFContext::new(0),
deblock: Default::default(),
};
let data = encode_frame(&mut self.seq, &mut self.fi, &mut fs); let data = encode_frame(&mut self.seq, &mut self.fi, &mut fs);
...@@ -310,13 +303,7 @@ impl Context { ...@@ -310,13 +303,7 @@ impl Context {
self.idx = self.idx + 1; self.idx = self.idx + 1;
if let Some(frame) = f { if let Some(frame) = f {
let mut fs = FrameState { let mut fs = FrameState::new_with_frame(&self.fi, frame);
input: frame,
rec: Frame::new(self.fi.padded_w, self.fi.padded_h),
qc: Default::default(),
cdfs: CDFContext::new(0),
deblock: Default::default(),
};
let data = encode_frame(&mut self.seq, &mut self.fi, &mut fs); let data = encode_frame(&mut self.seq, &mut self.fi, &mut fs);
self.packet_data.extend(data); self.packet_data.extend(data);
......
...@@ -52,7 +52,7 @@ pub const MAX_MIB_SIZE: usize = (1 << MAX_MIB_SIZE_LOG2); ...@@ -52,7 +52,7 @@ pub const MAX_MIB_SIZE: usize = (1 << MAX_MIB_SIZE_LOG2);
pub const MAX_MIB_MASK: usize = (MAX_MIB_SIZE - 1); pub const MAX_MIB_MASK: usize = (MAX_MIB_SIZE - 1);
const MAX_SB_SIZE_LOG2: usize = 6; const MAX_SB_SIZE_LOG2: usize = 6;
const MAX_SB_SIZE: usize = (1 << MAX_SB_SIZE_LOG2); pub const MAX_SB_SIZE: usize = (1 << MAX_SB_SIZE_LOG2);
const MAX_SB_SQUARE: usize = (MAX_SB_SIZE * MAX_SB_SIZE); const MAX_SB_SQUARE: usize = (MAX_SB_SIZE * MAX_SB_SIZE);
pub const MAX_TX_SIZE: usize = 32; pub const MAX_TX_SIZE: usize = 32;
......
This diff is collapsed.
...@@ -35,6 +35,18 @@ pub fn get_sad( ...@@ -35,6 +35,18 @@ pub fn get_sad(
sum sum
} }
fn get_mv_range(fi: &FrameInvariants, bo: &BlockOffset, blk_w: usize, blk_h: usize) -> (isize, isize, isize, isize) {
let border_w = 128 + blk_w as isize * 8;
let border_h = 128 + blk_h as isize * 8;
let mvx_min = -(bo.x as isize) * (8 * MI_SIZE) as isize - border_w;
let mvx_max = (fi.w_in_b - bo.x - blk_w / MI_SIZE) as isize * (8 * MI_SIZE) as isize + border_w;
let mvy_min = -(bo.y as isize) * (8 * MI_SIZE) as isize - border_h;
let mvy_max = (fi.h_in_b - bo.y - blk_h / MI_SIZE) as isize * (8 * MI_SIZE) as isize + border_h;
(mvx_min, mvx_max, mvy_min, mvy_max)
}
pub fn motion_estimation( pub fn motion_estimation(
fi: &FrameInvariants, fs: &FrameState, bsize: BlockSize, fi: &FrameInvariants, fs: &FrameState, bsize: BlockSize,
bo: &BlockOffset, ref_frame: usize, pmv: &MotionVector bo: &BlockOffset, ref_frame: usize, pmv: &MotionVector
...@@ -45,15 +57,10 @@ pub fn motion_estimation( ...@@ -45,15 +57,10 @@ pub fn motion_estimation(
x: (bo.x as isize) << BLOCK_TO_PLANE_SHIFT, x: (bo.x as isize) << BLOCK_TO_PLANE_SHIFT,
y: (bo.y as isize) << BLOCK_TO_PLANE_SHIFT y: (bo.y as isize) << BLOCK_TO_PLANE_SHIFT
}; };
let range = 32 * fi.me_range_scale as isize; let range = 16;
let blk_w = bsize.width(); let blk_w = bsize.width();
let blk_h = bsize.height(); let blk_h = bsize.height();
let border_w = 128 + blk_w as isize * 8; let (mvx_min, mvx_max, mvy_min, mvy_max) = get_mv_range(fi, bo, blk_w, blk_h);
let border_h = 128 + blk_h as isize * 8;
let mvx_min = -(bo.x as isize) * (8 * MI_SIZE) as isize - border_w;
let mvx_max = (fi.w_in_b - bo.x - blk_w / MI_SIZE) as isize * (8 * MI_SIZE) as isize + border_w;
let mvy_min = -(bo.y as isize) * (8 * MI_SIZE) as isize - border_h;
let mvy_max = (fi.h_in_b - bo.y - blk_h / MI_SIZE) as isize * (8 * MI_SIZE) as isize + border_h;
let x_lo = po.x + ((-range + (pmv.col / 8) as isize).max(mvx_min / 8)); let x_lo = po.x + ((-range + (pmv.col / 8) as isize).max(mvx_min / 8));
let x_hi = po.x + ((range + (pmv.col / 8) as isize).min(mvx_max / 8)); let x_hi = po.x + ((range + (pmv.col / 8) as isize).min(mvx_max / 8));
let y_lo = po.y + ((-range + (pmv.row / 8) as isize).max(mvy_min / 8)); let y_lo = po.y + ((-range + (pmv.row / 8) as isize).max(mvy_min / 8));
...@@ -62,22 +69,10 @@ pub fn motion_estimation( ...@@ -62,22 +69,10 @@ pub fn motion_estimation(
let mut lowest_sad = 128 * 128 * 4096 as u32; let mut lowest_sad = 128 * 128 * 4096 as u32;
let mut best_mv = MotionVector { row: 0, col: 0 }; let mut best_mv = MotionVector { row: 0, col: 0 };
for y in (y_lo..y_hi).step_by(2) { full_search(
for x in (x_lo..x_hi).step_by(2) { x_lo, x_hi, y_lo, y_hi, blk_h, blk_w,
let plane_org = fs.input.planes[0].slice(&po); &fs.input.planes[0], &rec.frame.planes[0], &mut best_mv, &mut lowest_sad, &po, 2
let plane_ref = rec.frame.planes[0].slice(&PlaneOffset { x, y }); );
let sad = get_sad(&plane_org, &plane_ref, blk_h, blk_w);
if sad < lowest_sad {
lowest_sad = sad;
best_mv = MotionVector {
row: 8 * (y as i16 - po.y as i16),
col: 8 * (x as i16 - po.x as i16)
}
}
}
}
let mode = PredictionMode::NEWMV; let mode = PredictionMode::NEWMV;
let mut tmp_plane = Plane::new(blk_w, blk_h, 0, 0, 0, 0); let mut tmp_plane = Plane::new(blk_w, blk_h, 0, 0, 0, 0);
...@@ -138,6 +133,104 @@ pub fn motion_estimation( ...@@ -138,6 +133,104 @@ pub fn motion_estimation(
} }
} }
fn full_search(x_lo: isize, x_hi: isize, y_lo: isize, y_hi: isize, blk_h: usize, blk_w: usize,
p_org: &Plane, p_ref: &Plane, best_mv: &mut MotionVector, lowest_sad: &mut u32,
po: &PlaneOffset, step: usize) {
for y in (y_lo..y_hi).step_by(step) {
for x in (x_lo..x_hi).step_by(step) {
let plane_org = p_org.slice(po);
let plane_ref = p_ref.slice(&PlaneOffset { x, y });
let sad = get_sad(&plane_org, &plane_ref, blk_h, blk_w);
if sad < *lowest_sad {
*lowest_sad = sad;
*best_mv = MotionVector {
row: 8 * (y as i16 - po.y as i16),
col: 8 * (x as i16 - po.x as i16)
}
}
}
}
}
// Adjust block offset such that entire block lies within frame boundaries
fn adjust_bo(bo: &BlockOffset, fi: &FrameInvariants, blk_w: usize, blk_h: usize) -> BlockOffset {
BlockOffset {
x: (bo.x as isize).min(fi.w_in_b as isize - blk_w as isize / 4).max(0) as usize,
y: (bo.y as isize).min(fi.h_in_b as isize - blk_h as isize / 4).max(0) as usize
}
}
pub fn estimate_motion_ss4(
fi: &FrameInvariants, fs: &FrameState, bsize: BlockSize, ref_idx: usize, bo: &BlockOffset
) -> Option<MotionVector> {
if let Some(ref rec) = fi.rec_buffer.frames[ref_idx] {
let blk_w = bsize.width();
let blk_h = bsize.height();
let bo_adj = adjust_bo(bo, fi, blk_w, blk_h);
let po = PlaneOffset {
x: (bo_adj.x as isize) << BLOCK_TO_PLANE_SHIFT >> 2,
y: (bo_adj.y as isize) << BLOCK_TO_PLANE_SHIFT >> 2
};
let range = 64 * fi.me_range_scale as isize;
let (mvx_min, mvx_max, mvy_min, mvy_max) = get_mv_range(fi, &bo_adj, blk_w, blk_h);
let x_lo = po.x + (((-range).max(mvx_min / 8)) >> 2);
let x_hi = po.x + (((range).min(mvx_max / 8)) >> 2);
let y_lo = po.y + (((-range).max(mvy_min / 8)) >> 2);
let y_hi = po.y + (((range).min(mvy_max / 8)) >> 2);
let mut lowest_sad = ((blk_w >> 2) * (blk_h >> 2) * 4096) as u32;
let mut best_mv = MotionVector { row: 0, col: 0 };
full_search(
x_lo, x_hi, y_lo, y_hi, blk_h >> 2, blk_w >> 2,
&fs.input_qres, &rec.input_qres, &mut best_mv, &mut lowest_sad, &po, 1
);
Some(MotionVector { row: best_mv.row * 4, col: best_mv.col * 4 })
} else {
None
}
}
pub fn estimate_motion_ss2(
fi: &FrameInvariants, fs: &FrameState, bsize: BlockSize, ref_idx: usize, bo: &BlockOffset, pmvs: &[Option<MotionVector>; 3]
) -> Option<MotionVector> {
if let Some(ref rec) = fi.rec_buffer.frames[ref_idx] {
let blk_w = bsize.width();
let blk_h = bsize.height();
let bo_adj = adjust_bo(bo, fi, blk_w, blk_h);
let po = PlaneOffset {
x: (bo_adj.x as isize) << BLOCK_TO_PLANE_SHIFT >> 1,
y: (bo_adj.y as isize) << BLOCK_TO_PLANE_SHIFT >> 1
};
let range = 16;
let (mvx_min, mvx_max, mvy_min, mvy_max) = get_mv_range(fi, &bo_adj, blk_w, blk_h);
let mut lowest_sad = ((blk_w >> 1) * (blk_h >> 1) * 4096) as u32;
let mut best_mv = MotionVector { row: 0, col: 0 };
for omv in pmvs.iter() {
if let Some(pmv) = omv {
let x_lo = po.x + (((pmv.col as isize / 8 - range).max(mvx_min / 8)) >> 1);
let x_hi = po.x + (((pmv.col as isize / 8 + range).min(mvx_max / 8)) >> 1);
let y_lo = po.y + (((pmv.row as isize / 8 - range).max(mvy_min / 8)) >> 1);
let y_hi = po.y + (((pmv.row as isize / 8 + range).min(mvy_max / 8)) >> 1);
full_search(
x_lo, x_hi, y_lo, y_hi, blk_h >> 1, blk_w >> 1,
&fs.input_hres, &rec.input_hres, &mut best_mv, &mut lowest_sad, &po, 1
);
}
}
Some(MotionVector { row: best_mv.row * 2, col: best_mv.col * 2 })
} else {
None
}
}
#[cfg(test)] #[cfg(test)]
pub mod test { pub mod test {
use super::*; use super::*;
......
...@@ -689,7 +689,9 @@ pub enum MvSubpelPrecision { ...@@ -689,7 +689,9 @@ pub enum MvSubpelPrecision {
MV_SUBPEL_HIGH_PRECISION MV_SUBPEL_HIGH_PRECISION
} }
const SUBPEL_FILTERS: [[[i32; 8]; 16]; 6] = [ pub const SUBPEL_FILTER_SIZE: usize = 8;
const SUBPEL_FILTERS: [[[i32; SUBPEL_FILTER_SIZE]; 16]; 6] = [
[ [
[0, 0, 0, 128, 0, 0, 0, 0], [0, 0, 0, 128, 0, 0, 0, 0],
[0, 2, -6, 126, 8, -2, 0, 0], [0, 2, -6, 126, 8, -2, 0, 0],
......
...@@ -218,6 +218,29 @@ impl Plane { ...@@ -218,6 +218,29 @@ impl Plane {
} }
} }
} }
pub fn downsample_from(&mut self, src: &Plane) {
let width = self.cfg.width;
let height = self.cfg.height;
assert!(width * 2 == src.cfg.width);
assert!(height * 2 == src.cfg.height);
for row in 0..height {
let mut dst_slice = self.mut_slice(&PlaneOffset{ x: 0, y: row as isize });
let mut dst = dst_slice.as_mut_slice();
for col in 0..width {
let mut sum = 0;
sum = sum + src.p(2*col, 2*row);
sum = sum + src.p(2*col+1, 2*row);
sum = sum + src.p(2*col, 2*row+1);
sum = sum + src.p(2*col+1, 2*row+1);
let avg = (sum + 2) >> 2;
dst[col] = avg;
}
}
}
} }
#[derive(Clone, Copy)] #[derive(Clone, Copy)]
......
...@@ -340,7 +340,7 @@ impl Default for EncodingSettings { ...@@ -340,7 +340,7 @@ impl Default for EncodingSettings {
pub fn rdo_mode_decision( pub fn rdo_mode_decision(
seq: &Sequence, fi: &FrameInvariants, fs: &mut FrameState, seq: &Sequence, fi: &FrameInvariants, fs: &mut FrameState,
cw: &mut ContextWriter, bsize: BlockSize, bo: &BlockOffset, cw: &mut ContextWriter, bsize: BlockSize, bo: &BlockOffset,
pmv: &MotionVector pmvs: &[Option<MotionVector>]
) -> RDOOutput { ) -> RDOOutput {
let mut best = EncodingSettings::default(); let mut best = EncodingSettings::default();
...@@ -381,8 +381,10 @@ pub fn rdo_mode_decision( ...@@ -381,8 +381,10 @@ pub fn rdo_mode_decision(
bwdref = Some(ref_frames_set.len()); bwdref = Some(ref_frames_set.len());
} }
ref_frames_set.push([i, NONE_FRAME]); ref_frames_set.push([i, NONE_FRAME]);
ref_slot_set.push(fi.ref_frames[i - LAST_FRAME]); let slot_idx = fi.ref_frames[i - LAST_FRAME];
mvs_from_me.push([motion_estimation(fi, fs, bsize, bo, i, pmv), MotionVector { row: 0, col: 0 }]); ref_slot_set.push(slot_idx);
let pmv = pmvs[slot_idx as usize].unwrap();
mvs_from_me.push([motion_estimation(fi, fs, bsize, bo, i, &pmv), MotionVector { row: 0, col: 0 }]);
} }
} }
assert!(ref_frames_set.len() != 0); assert!(ref_frames_set.len() != 0);
...@@ -794,7 +796,7 @@ pub fn rdo_tx_type_decision( ...@@ -794,7 +796,7 @@ pub fn rdo_tx_type_decision(
pub fn rdo_partition_decision( pub fn rdo_partition_decision(
seq: &Sequence, fi: &FrameInvariants, fs: &mut FrameState, seq: &Sequence, fi: &FrameInvariants, fs: &mut FrameState,
cw: &mut ContextWriter, bsize: BlockSize, bo: &BlockOffset, cw: &mut ContextWriter, bsize: BlockSize, bo: &BlockOffset,
cached_block: &RDOOutput cached_block: &RDOOutput, pmvs: &[[Option<MotionVector>; REF_FRAMES]; 5]
) -> RDOOutput { ) -> RDOOutput {
let max_rd = std::f64::MAX; let max_rd = std::f64::MAX;
...@@ -812,7 +814,6 @@ pub fn rdo_partition_decision( ...@@ -812,7 +814,6 @@ pub fn rdo_partition_decision(
let mut rd: f64; let mut rd: f64;
let mut child_modes = std::vec::Vec::new(); let mut child_modes = std::vec::Vec::new();
let mut pmv = MotionVector { row: 0, col: 0 };
match partition { match partition {
PartitionType::PARTITION_NONE => { PartitionType::PARTITION_NONE => {
...@@ -820,11 +821,14 @@ pub fn rdo_partition_decision( ...@@ -820,11 +821,14 @@ pub fn rdo_partition_decision(
continue; continue;
} }
let pmv_idx = ((bo.x & 32) >> 5) + ((bo.y & 32) >> 4) + 1;
let spmvs = &pmvs[pmv_idx];
let mode_decision = cached_block let mode_decision = cached_block
.part_modes .part_modes
.get(0) .get(0)
.unwrap_or( .unwrap_or(
&rdo_mode_decision(seq, fi, fs, cw, bsize, bo, &pmv).part_modes[0] &rdo_mode_decision(seq, fi, fs, cw, bsize, bo, spmvs).part_modes[0]
).clone(); ).clone();
child_modes.push(mode_decision); child_modes.push(mode_decision);
} }
...@@ -834,7 +838,7 @@ pub fn rdo_partition_decision( ...@@ -834,7 +838,7 @@ pub fn rdo_partition_decision(
if subsize == BlockSize::BLOCK_INVALID { if subsize == BlockSize::BLOCK_INVALID {
continue; continue;
} }
pmv = best_pred_modes[0].mvs[0]; //pmv = best_pred_modes[0].mvs[0];
assert!(best_pred_modes.len() <= 4); assert!(best_pred_modes.len() <= 4);
let bs = bsize.width_mi(); let bs = bsize.width_mi();
...@@ -845,11 +849,21 @@ pub fn rdo_partition_decision( ...@@ -845,11 +849,21 @@ pub fn rdo_partition_decision(
&BlockOffset{ x: bo.x, y: bo.y + hbs as usize }, &BlockOffset{ x: bo.x, y: bo.y + hbs as usize },
&BlockOffset{ x: bo.x + hbs as usize, y: bo.y + hbs as usize } &BlockOffset{ x: bo.x + hbs as usize, y: bo.y + hbs as usize }
]; ];
let pmv_idxs = partitions.iter().map(|&offset| {
if subsize > BlockSize::BLOCK_32X32 {
0
} else {
((offset.x & 32) >> 5) + ((offset.y & 32) >> 4) + 1
}
}).collect::<Vec<_>>();
child_modes.extend( child_modes.extend(
partitions partitions
.iter() .iter().zip(pmv_idxs)
.map(|&offset| { .map(|(&offset, pmv_idx)| {
rdo_mode_decision(seq, fi, fs, cw, subsize, &offset, &pmv) rdo_mode_decision(seq, fi, fs, cw, subsize, &offset,
&pmvs[pmv_idx])
.part_modes[0] .part_modes[0]
.clone() .clone()
}).collect::<Vec<_>>() }).collect::<Vec<_>>()
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment