Commit 869fef70 authored by Josh Holmer's avatar Josh Holmer Committed by Thomas Daede

Adaptive keyframe selection

Detects the level of difference between the current frame and the
previous frame, and if the difference is above a preset threshold,
encodes the current frame as a keyframe.

Introduces a new CLI option, --max-keyint (-i), to set the maximum
interval between two keyframes. Reuses the existing --keyint (-I) option
as the minimum interval between two keyframes. Sets the default minimum
to be 12 frames. To disable scenechange detection, set the max and min
to be equal via the CLI.
parent 90f50929
This diff is collapsed.
......@@ -68,9 +68,16 @@ pub fn parse_cli() -> CliOptions {
.long("speed")
.takes_value(true)
.default_value("3")
).arg(
Arg::with_name("MIN_KEYFRAME_INTERVAL")
.help("Minimum interval between keyframes")
.short("i")
.long("min-keyint")
.takes_value(true)
.default_value("12")
).arg(
Arg::with_name("KEYFRAME_INTERVAL")
.help("Keyframe interval")
.help("Maximum interval between keyframes")
.short("I")
.long("keyint")
.takes_value(true)
......@@ -120,16 +127,21 @@ pub fn parse_cli() -> CliOptions {
fn parse_config(matches: &ArgMatches) -> EncoderConfig {
let speed = matches.value_of("SPEED").unwrap().parse().unwrap();
let quantizer = matches.value_of("QP").unwrap().parse().unwrap();
let min_interval = matches.value_of("MIN_KEYFRAME_INTERVAL").unwrap().parse().unwrap();
let max_interval = matches.value_of("KEYFRAME_INTERVAL").unwrap().parse().unwrap();
// Validate arguments
if quantizer == 0 {
unimplemented!("Lossless encoding not yet implemented");
} else if quantizer > 255 || speed > 10 {
panic!("argument out of range");
} else if min_interval > max_interval {
panic!("Maximum keyframe interval must be greater than or equal to minimum keyframe interval");
}
let mut cfg = EncoderConfig::with_speed_preset(speed);
cfg.key_frame_interval = matches.value_of("KEYFRAME_INTERVAL").unwrap().parse().unwrap();
cfg.max_key_frame_interval = min_interval;
cfg.max_key_frame_interval = max_interval;
cfg.low_latency = matches.value_of("LOW_LATENCY").unwrap().parse().unwrap();
cfg.tune = matches.value_of("TUNE").unwrap().parse().unwrap();
cfg.quantizer = quantizer;
......@@ -207,23 +219,19 @@ pub fn process_frame(
}
let _ = ctx.send_frame(input);
true
}
_ => {
let frames_to_be_coded = ctx.get_frame_count();
ctx.set_frames_to_be_coded(frames_to_be_coded);
ctx.flush();
false
}
}
} else {
ctx.flush();
false
};
let mut has_data = true;
let mut frame_summaries = Vec::new();
while has_data {
loop {
let pkt_wrapped = ctx.receive_packet();
match pkt_wrapped {
Ok(pkt) => {
......@@ -306,7 +314,7 @@ pub fn process_frame(
}
frame_summaries.push(pkt.into());
},
_ => { has_data = false; }
_ => { break; }
}
}
Ok(frame_summaries)
......
......@@ -79,6 +79,65 @@ impl Frame {
]
}
}
/// Returns a `PixelIter` containing the data of this frame's planes in YUV format.
/// Each point in the `PixelIter` is a triple consisting of a Y, U, and V component.
/// The `PixelIter` is laid out as contiguous rows, e.g. to get a given 0-indexed row
/// you could use `data.skip(width * row_idx).take(width)`.
///
/// This data retains any padding, e.g. it uses the width and height specifed in
/// the Y-plane's `cfg` struct, and not the display width and height specied in
/// `FrameInvariants`.
pub fn iter(&self) -> PixelIter {
PixelIter::new(&self.planes)
}
}
#[derive(Debug)]
pub struct PixelIter<'a> {
planes: &'a [Plane; 3],
y: usize,
x: usize,
}
impl<'a> PixelIter<'a> {
pub fn new(planes: &'a [Plane; 3]) -> Self {
PixelIter {
planes,
y: 0,
x: 0,
}
}
fn width(&self) -> usize {
self.planes[0].cfg.width
}
fn height(&self) -> usize {
self.planes[0].cfg.height
}
}
impl<'a> Iterator for PixelIter<'a> {
type Item = (u16, u16, u16);
fn next(&mut self) -> Option<<Self as Iterator>::Item> {
if self.y == self.height() - 1 && self.x == self.width() - 1 {
return None;
}
let pixel = (
self.planes[0].p(self.x, self.y),
self.planes[1].p(self.x / 2, self.y / 2),
self.planes[2].p(self.x / 2, self.y / 2),
);
if self.x == self.width() - 1 {
self.x = 0;
self.y += 1;
} else {
self.x += 1;
}
Some(pixel)
}
}
#[derive(Debug, Clone)]
......
......@@ -37,6 +37,7 @@ pub mod lrf;
pub mod encoder;
pub mod me;
pub mod scan_order;
pub mod scenechange;
mod api;
......
use encoder::Frame;
use api::FrameInfo;
use std::sync::Arc;
/// Detects fast cuts using changes in colour and intensity between frames.
/// Since the difference between frames is used, only fast cuts are detected
/// with this method. This is probably fine for the purpose of choosing keyframes.
pub struct SceneChangeDetector {
/// Minimum average difference between YUV deltas that will trigger a scene change.
threshold: u8,
/// Frame number and frame reference of the last frame analyzed
last_frame: Option<(usize, Arc<Frame>)>,
}
impl Default for SceneChangeDetector {
fn default() -> Self {
Self {
// This implementation is based on a Python implementation at
// https://pyscenedetect.readthedocs.io/en/latest/reference/detection-methods/.
// The Python implementation uses HSV values and a threshold of 30. Comparing the
// YUV values was sufficient in most cases, and avoided a more costly YUV->RGB->HSV
// conversion, but the deltas needed to be scaled down. The deltas for keyframes
// in YUV were about 1/3 to 1/2 of what they were in HSV, but non-keyframes were
// very unlikely to have a delta greater than 3 in YUV, whereas they may reach into
// the double digits in HSV. Therefore, 12 was chosen as a reasonable default threshold.
// This may be adjusted later.
threshold: 12,
last_frame: None,
}
}
}
impl SceneChangeDetector {
pub fn new(frame_info: &FrameInfo) -> Self {
let mut detector = Self::default();
detector.threshold = detector.threshold * frame_info.bit_depth as u8 / 8;
detector
}
pub fn detect_scene_change(&mut self, curr_frame: Arc<Frame>, frame_num: usize) -> bool {
let mut is_change = false;
match self.last_frame {
Some((last_num, ref last_frame)) if last_num == frame_num - 1 => {
let len = curr_frame.planes[0].cfg.width * curr_frame.planes[0].cfg.height;
let delta_yuv = last_frame.iter().zip(curr_frame.iter())
.map(|(last, cur)| (
(cur.0 as i16 - last.0 as i16).abs() as u64,
(cur.1 as i16 - last.1 as i16).abs() as u64,
(cur.2 as i16 - last.2 as i16).abs() as u64
)).fold((0, 0, 0), |(ht, st, vt), (h, s, v)| (ht + h, st + s, vt + v));
let delta_yuv = (
(delta_yuv.0 / len as u64) as u16,
(delta_yuv.1 / len as u64) as u16,
(delta_yuv.2 / len as u64) as u16
);
let delta_avg = ((delta_yuv.0 + delta_yuv.1 + delta_yuv.2) / 3) as u8;
is_change = delta_avg >= self.threshold;
}
_ => ()
}
self.last_frame = Some((frame_num, curr_frame));
is_change
}
}
......@@ -72,7 +72,8 @@ impl Drop for AomDecoder {
fn setup_encoder(
w: usize, h: usize, speed: usize, quantizer: usize, bit_depth: usize,
chroma_sampling: ChromaSampling
chroma_sampling: ChromaSampling, min_keyint: u64, max_keyint: u64,
low_latency: bool
) -> Context {
unsafe {
av1_rtcd();
......@@ -81,6 +82,9 @@ fn setup_encoder(
let mut enc = EncoderConfig::with_speed_preset(speed);
enc.quantizer = quantizer;
enc.min_key_frame_interval = min_keyint;
enc.max_key_frame_interval = max_keyint;
enc.low_latency = low_latency;
let cfg = Config {
frame_info: FrameInfo { width: w, height: h, bit_depth, chroma_sampling },
......@@ -105,7 +109,7 @@ fn speed() {
for b in DIMENSION_OFFSETS.iter() {
for s in 0..10 {
encode_decode(w + b.0, h + b.1, s, quantizer, limit, 8);
encode_decode(w + b.0, h + b.1, s, quantizer, limit, 8, 15, 15, true);
}
}
}
......@@ -135,7 +139,7 @@ fn dimensions() {
let speed = 4;
for (w, h) in DIMENSIONS.iter() {
encode_decode(*w, *h, speed, quantizer, limit, 8);
encode_decode(*w, *h, speed, quantizer, limit, 8, 15, 15, true);
}
}
......@@ -148,11 +152,35 @@ fn quantizer() {
for b in DIMENSION_OFFSETS.iter() {
for &q in [80, 100, 120].iter() {
encode_decode(w + b.0, h + b.1, speed, q, limit, 8);
encode_decode(w + b.0, h + b.1, speed, q, limit, 8, 15, 15, true);
}
}
}
#[test]
fn keyframes() {
let limit = 12;
let w = 64;
let h = 80;
let speed = 10;
let q = 100;
encode_decode(w, h, speed, q, limit, 8, 6, 6, true);
}
#[test]
fn reordering() {
let limit = 12;
let w = 64;
let h = 80;
let speed = 10;
let q = 100;
for keyint in &[4, 5, 6] {
encode_decode(w, h, speed, q, limit, 8, *keyint, *keyint, false);
}
}
#[test]
#[ignore]
fn odd_size_frame_with_full_rdo() {
......@@ -162,7 +190,7 @@ fn odd_size_frame_with_full_rdo() {
let speed = 0;
let qindex = 100;
encode_decode(w, h, speed, qindex, limit, 8);
encode_decode(w, h, speed, qindex, limit, 8, 15, 15, true);
}
#[test]
......@@ -174,10 +202,10 @@ fn high_bd() {
let h = 80;
// 10-bit
encode_decode(w, h, speed, quantizer, limit, 10);
encode_decode(w, h, speed, quantizer, limit, 10, 15, 15, true);
// 12-bit
encode_decode(w, h, speed, quantizer, limit, 12);
encode_decode(w, h, speed, quantizer, limit, 12, 15, 15, true);
}
fn compare_plane<T: Ord + std::fmt::Debug>(
......@@ -233,14 +261,15 @@ fn compare_img(img: *const aom_image_t, frame: &Frame, bit_depth: usize, width:
fn encode_decode(
w: usize, h: usize, speed: usize, quantizer: usize, limit: usize,
bit_depth: usize
bit_depth: usize, min_keyint: u64, max_keyint: u64, low_latency: bool
) {
use std::ptr;
let mut ra = ChaChaRng::from_seed([0; 32]);
let mut dec = setup_decoder(w, h);
let mut ctx =
setup_encoder(w, h, speed, quantizer, bit_depth, ChromaSampling::Cs420);
setup_encoder(w, h, speed, quantizer, bit_depth, ChromaSampling::Cs420,
min_keyint, max_keyint, low_latency);
println!("Encoding {}x{} speed {} quantizer {}", w, h, speed, quantizer);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment