diff --git a/test/svc_test.cc b/test/svc_test.cc index 417790bffa140b18e95275989a1fbb2a3bfdb3e7..e9cf38def05591e2c4cbe9fc84641f48a3ebecb1 100644 --- a/test/svc_test.cc +++ b/test/svc_test.cc @@ -167,6 +167,24 @@ TEST_F(SvcTest, SetQuantizersOption) { codec_initialized_ = true; } +TEST_F(SvcTest, SetAutoAltRefOption) { + svc_.spatial_layers = 5; + vpx_codec_err_t res = vpx_svc_set_options(&svc_, "auto-alt-refs=none"); + EXPECT_EQ(VPX_CODEC_OK, res); + res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_); + EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res); + + res = vpx_svc_set_options(&svc_, "auto-alt-refs=1,1,1,1,0"); + EXPECT_EQ(VPX_CODEC_OK, res); + res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_); + EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res); + + vpx_svc_set_options(&svc_, "auto-alt-refs=0,1,1,1,0"); + res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_); + EXPECT_EQ(VPX_CODEC_OK, res); + codec_initialized_ = true; +} + TEST_F(SvcTest, SetQuantizers) { vpx_codec_err_t res = vpx_svc_set_quantizers(NULL, "40,30"); EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res); @@ -362,6 +380,7 @@ TEST_F(SvcTest, TwoPassEncode) { codec_enc_.g_pass = VPX_RC_FIRST_PASS; vpx_svc_set_scale_factors(&svc_, "4/16,16/16"); vpx_svc_set_quantizers(&svc_, "40,30"); + vpx_svc_set_options(&svc_, "auto-alt-refs=1,1"); vpx_codec_err_t res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_); @@ -410,6 +429,9 @@ TEST_F(SvcTest, TwoPassEncode) { vpx_codec_err_t res_dec; int frame_size; codec_enc_.g_pass = VPX_RC_LAST_PASS; + vpx_svc_set_scale_factors(&svc_, "4/16,16/16"); + vpx_svc_set_quantizers(&svc_, "40,30"); + vpx_svc_set_options(&svc_, "auto-alt-refs=1,1"); codec_enc_.rc_twopass_stats_in.buf = &stats_buf[0]; codec_enc_.rc_twopass_stats_in.sz = stats_buf.size(); diff --git a/vp8/vp8_cx_iface.c b/vp8/vp8_cx_iface.c index 4f6f9fa5d7e63fce7e6acace83e422a6ed00db5c..4cd8c2e79e310629e818c79d3a21679f6252b55e 100644 --- a/vp8/vp8_cx_iface.c +++ b/vp8/vp8_cx_iface.c @@ -9,6 +9,7 @@ */ +#include "./vpx_config.h" #include "vp8_rtcd.h" #include "vpx/vpx_codec.h" #include "vpx/internal/vpx_codec_internal.h" @@ -1314,6 +1315,9 @@ static vpx_codec_enc_cfg_map_t vp8e_usage_cfg_map[] = "vp8.fpf" /* first pass filename */ #endif VPX_SS_DEFAULT_LAYERS, /* ss_number_layers */ +#ifdef CONFIG_SPATIAL_SVC + {0}, +#endif {0}, /* ss_target_bitrate */ 1, /* ts_number_layers */ {0}, /* ts_target_bitrate */ diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index dc572aa9de329d1496db8e6d0a403343b2a89f1c..1a4b880c0b411af7ffea64340fcc4f224c466f20 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -206,6 +206,12 @@ static void dealloc_compressor_data(VP9_COMP *cpi) { cpi->twopass.this_frame_mb_stats.mb_stats = NULL; } #endif + + for (i = 0; i < MAX_LAG_BUFFERS; ++i) { + vp9_free_frame_buffer(&cpi->svc.scaled_frames[i]); + } + vpx_memset(&cpi->svc.scaled_frames[0], 0, + MAX_LAG_BUFFERS * sizeof(cpi->svc.scaled_frames[0])); } static void save_coding_context(VP9_COMP *cpi) { @@ -476,6 +482,15 @@ static void update_frame_size(VP9_COMP *cpi) { MACROBLOCKD *const xd = &cpi->mb.e_mbd; vp9_update_frame_size(cm); init_macroblockd(cm, xd); + + if (cpi->use_svc && cpi->svc.number_temporal_layers == 1) { + if (vp9_realloc_frame_buffer(&cpi->alt_ref_buffer, + cm->width, cm->height, + cm->subsampling_x, cm->subsampling_y, + VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, NULL)) + vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, + "Failed to reallocate alt_ref_buffer"); + } } void vp9_new_framerate(VP9_COMP *cpi, double framerate) { @@ -2486,7 +2501,7 @@ void adjust_frame_rate(VP9_COMP *cpi) { static int get_arf_src_index(VP9_COMP *cpi) { RATE_CONTROL *const rc = &cpi->rc; int arf_src_index = 0; - if (is_altref_enabled(&cpi->oxcf)) { + if (is_altref_enabled(cpi)) { if (cpi->pass == 2) { const GF_GROUP *const gf_group = &cpi->twopass.gf_group; if (gf_group->update_type[gf_group->index] == ARF_UPDATE) { @@ -2565,13 +2580,27 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, #ifdef CONFIG_SPATIAL_SVC if (is_spatial_svc) cpi->source = vp9_svc_lookahead_peek(cpi, cpi->lookahead, - arf_src_index, 1); + arf_src_index, 0); else #endif cpi->source = vp9_lookahead_peek(cpi->lookahead, arf_src_index); if (cpi->source != NULL) { cpi->alt_ref_source = cpi->source; +#ifdef CONFIG_SPATIAL_SVC + if (is_spatial_svc && cpi->svc.spatial_layer_id > 0) { + int i; + // Reference a hidden frame from a lower layer + for (i = cpi->svc.spatial_layer_id - 1; i >= 0; --i) { + if (cpi->oxcf.ss_play_alternate[i]) { + cpi->gld_fb_idx = cpi->svc.layer_context[i].alt_ref_idx; + break; + } + } + } + cpi->svc.layer_context[cpi->svc.spatial_layer_id].has_alt_frame = 1; +#endif + if (cpi->oxcf.arnr_max_frames > 0) { // Produce the filtered ARF frame. vp9_temporal_filter(cpi, arf_src_index); diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h index 9f8b37f2d9a70a8a7c362484a51a2e0f56e1d6b9..35a3358ee3feb6bb373c79bd8cabd1f45b172b68 100644 --- a/vp9/encoder/vp9_encoder.h +++ b/vp9/encoder/vp9_encoder.h @@ -195,6 +195,7 @@ typedef struct VP9EncoderConfig { int ts_number_layers; // Number of temporal layers. // Bitrate allocation for spatial layers. int ss_target_bitrate[VPX_SS_MAX_LAYERS]; + int ss_play_alternate[VPX_SS_MAX_LAYERS]; // Bitrate allocation (CBR mode) and framerate factor, for temporal layers. int ts_target_bitrate[VPX_TS_MAX_LAYERS]; int ts_rate_decimator[VPX_TS_MAX_LAYERS]; @@ -229,10 +230,6 @@ typedef struct VP9EncoderConfig { vp8e_tuning tuning; } VP9EncoderConfig; -static INLINE int is_altref_enabled(const VP9EncoderConfig *cfg) { - return cfg->mode != REALTIME && cfg->play_alternate && cfg->lag_in_frames > 0; -} - static INLINE int is_lossless_requested(const VP9EncoderConfig *cfg) { return cfg->best_allowed_q == 0 && cfg->worst_allowed_q == 0; } @@ -535,6 +532,13 @@ YV12_BUFFER_CONFIG *vp9_scale_if_required(VP9_COMMON *cm, void vp9_apply_encoding_flags(VP9_COMP *cpi, vpx_enc_frame_flags_t flags); +static INLINE int is_altref_enabled(const VP9_COMP *const cpi) { + return cpi->oxcf.mode != REALTIME && cpi->oxcf.lag_in_frames > 0 && + (cpi->oxcf.play_alternate && + (!(cpi->use_svc && cpi->svc.number_temporal_layers == 1) || + cpi->oxcf.ss_play_alternate[cpi->svc.spatial_layer_id])); +} + static INLINE void set_ref_ptrs(VP9_COMMON *cm, MACROBLOCKD *xd, MV_REFERENCE_FRAME ref0, MV_REFERENCE_FRAME ref1) { diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c index ad73c4cd27168903ad504ddb165d242e364c823f..b83659159fa2006dc4704554e8a90666331a3a4e 100644 --- a/vp9/encoder/vp9_firstpass.c +++ b/vp9/encoder/vp9_firstpass.c @@ -486,24 +486,33 @@ void vp9_first_pass(VP9_COMP *cpi) { const YV12_BUFFER_CONFIG *scaled_ref_buf = NULL; twopass = &cpi->svc.layer_context[cpi->svc.spatial_layer_id].twopass; + if (cpi->common.current_video_frame == 0) { + cpi->ref_frame_flags = 0; + } else { + LAYER_CONTEXT *lc = &cpi->svc.layer_context[cpi->svc.spatial_layer_id]; + if (lc->current_video_frame_in_layer == 0) + cpi->ref_frame_flags = VP9_GOLD_FLAG; + else + cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG; + } + vp9_scale_references(cpi); // Use either last frame or alt frame for motion search. if (cpi->ref_frame_flags & VP9_LAST_FLAG) { scaled_ref_buf = vp9_get_scaled_ref_frame(cpi, LAST_FRAME); ref_frame = LAST_FRAME; - } else if (cpi->ref_frame_flags & VP9_ALT_FLAG) { - scaled_ref_buf = vp9_get_scaled_ref_frame(cpi, ALTREF_FRAME); - ref_frame = ALTREF_FRAME; + } else if (cpi->ref_frame_flags & VP9_GOLD_FLAG) { + scaled_ref_buf = vp9_get_scaled_ref_frame(cpi, GOLDEN_FRAME); + ref_frame = GOLDEN_FRAME; } - if (scaled_ref_buf != NULL) { - // Update the stride since we are using scaled reference buffer + if (scaled_ref_buf != NULL) first_ref_buf = scaled_ref_buf; - recon_y_stride = first_ref_buf->y_stride; - recon_uv_stride = first_ref_buf->uv_stride; - uv_mb_height = 16 >> (first_ref_buf->y_height > first_ref_buf->uv_height); - } + + recon_y_stride = new_yv12->y_stride; + recon_uv_stride = new_yv12->uv_stride; + uv_mb_height = 16 >> (new_yv12->y_height > new_yv12->uv_height); // Disable golden frame for svc first pass for now. gld_yv12 = NULL; @@ -909,6 +918,8 @@ void vp9_first_pass(VP9_COMP *cpi) { } ++cm->current_video_frame; + if (cpi->use_svc) + vp9_inc_frame_in_layer(&cpi->svc); } static double calc_correction_factor(double err_per_mb, @@ -1506,7 +1517,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { double mv_in_out_accumulator = 0.0; double abs_mv_in_out_accumulator = 0.0; double mv_ratio_accumulator_thresh; - unsigned int allow_alt_ref = is_altref_enabled(oxcf); + unsigned int allow_alt_ref = is_altref_enabled(cpi); int f_boost = 0; int b_boost = 0; @@ -2080,6 +2091,11 @@ void configure_buffer_updates(VP9_COMP *cpi) { default: assert(0); } + if (cpi->use_svc && cpi->svc.number_temporal_layers == 1) { + cpi->refresh_golden_frame = 0; + if (cpi->alt_ref_source == NULL) + cpi->refresh_alt_ref_frame = 0; + } } @@ -2122,6 +2138,18 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { #endif vp9_rc_set_frame_target(cpi, target_rate); cm->frame_type = INTER_FRAME; + + if (is_spatial_svc) { + if (cpi->svc.spatial_layer_id == 0) { + lc->is_key_frame = 0; + } else { + lc->is_key_frame = cpi->svc.layer_context[0].is_key_frame; + + if (lc->is_key_frame) + cpi->ref_frame_flags &= (~VP9_LAST_FLAG); + } + } + return; } @@ -2189,7 +2217,8 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { } rc->frames_till_gf_update_due = rc->baseline_gf_interval; - cpi->refresh_golden_frame = 1; + if (!is_spatial_svc) + cpi->refresh_golden_frame = 1; } { diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c index e1109838c7212966c5496ba21649a64715564650..aa8e4f06ec3cc7ba12a1fbb886c0c336cebdacf7 100644 --- a/vp9/encoder/vp9_ratectrl.c +++ b/vp9/encoder/vp9_ratectrl.c @@ -1097,7 +1097,7 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) { rc->total_target_vs_actual = rc->total_actual_bits - rc->total_target_bits; - if (is_altref_enabled(oxcf) && cpi->refresh_alt_ref_frame && + if (is_altref_enabled(cpi) && cpi->refresh_alt_ref_frame && (cm->frame_type != KEY_FRAME)) // Update the alternate reference frame stats as appropriate. update_alt_ref_frame_stats(cpi); @@ -1349,8 +1349,9 @@ int vp9_compute_qdelta_by_rate(const RATE_CONTROL *rc, FRAME_TYPE frame_type, return target_index - qindex; } -void vp9_rc_set_gf_max_interval(const VP9EncoderConfig *const oxcf, +void vp9_rc_set_gf_max_interval(const VP9_COMP *const cpi, RATE_CONTROL *const rc) { + const VP9EncoderConfig *const oxcf = &cpi->oxcf; // Set Maximum gf/arf interval rc->max_gf_interval = 16; @@ -1359,7 +1360,7 @@ void vp9_rc_set_gf_max_interval(const VP9EncoderConfig *const oxcf, if (rc->static_scene_max_gf_interval > (MAX_LAG_BUFFERS * 2)) rc->static_scene_max_gf_interval = MAX_LAG_BUFFERS * 2; - if (is_altref_enabled(oxcf)) { + if (is_altref_enabled(cpi)) { if (rc->static_scene_max_gf_interval > oxcf->lag_in_frames - 1) rc->static_scene_max_gf_interval = oxcf->lag_in_frames - 1; } @@ -1392,5 +1393,5 @@ void vp9_rc_update_framerate(VP9_COMP *cpi) { rc->max_frame_bandwidth = MAX(MAX((cm->MBs * MAX_MB_RATE), MAXRATE_1080P), vbr_max_bits); - vp9_rc_set_gf_max_interval(oxcf, rc); + vp9_rc_set_gf_max_interval(cpi, rc); } diff --git a/vp9/encoder/vp9_ratectrl.h b/vp9/encoder/vp9_ratectrl.h index a15235c922a1ee9b833ba7869ebbf1bf989e27be..456daf48d032bc569bee95ae9c9150938ffd6daf 100644 --- a/vp9/encoder/vp9_ratectrl.h +++ b/vp9/encoder/vp9_ratectrl.h @@ -189,7 +189,7 @@ int vp9_compute_qdelta_by_rate(const RATE_CONTROL *rc, FRAME_TYPE frame_type, void vp9_rc_update_framerate(struct VP9_COMP *cpi); -void vp9_rc_set_gf_max_interval(const struct VP9EncoderConfig *const oxcf, +void vp9_rc_set_gf_max_interval(const struct VP9_COMP *const cpi, RATE_CONTROL *const rc); #ifdef __cplusplus diff --git a/vp9/encoder/vp9_svc_layercontext.c b/vp9/encoder/vp9_svc_layercontext.c index 07c17b22aaecb427021d19c19c21ec758a268f81..1eb450928afa35f4e96cda561f0b3c44b08aad6f 100644 --- a/vp9/encoder/vp9_svc_layercontext.c +++ b/vp9/encoder/vp9_svc_layercontext.c @@ -19,6 +19,7 @@ void vp9_init_layer_context(VP9_COMP *const cpi) { const VP9EncoderConfig *const oxcf = &cpi->oxcf; int layer; int layer_end; + int alt_ref_idx = svc->number_spatial_layers; svc->spatial_layer_id = 0; svc->temporal_layer_id = 0; @@ -34,7 +35,6 @@ void vp9_init_layer_context(VP9_COMP *const cpi) { RATE_CONTROL *const lrc = &lc->rc; int i; lc->current_video_frame_in_layer = 0; - lrc->avg_frame_qindex[INTER_FRAME] = oxcf->worst_allowed_q; lrc->ni_av_qi = oxcf->worst_allowed_q; lrc->total_actual_bits = 0; lrc->total_target_vs_actual = 0; @@ -48,14 +48,24 @@ void vp9_init_layer_context(VP9_COMP *const cpi) { for (i = 0; i < RATE_FACTOR_LEVELS; ++i) { lrc->rate_correction_factors[i] = 1.0; } + lc->layer_size = 0; if (svc->number_temporal_layers > 1) { lc->target_bandwidth = oxcf->ts_target_bitrate[layer]; lrc->last_q[INTER_FRAME] = oxcf->worst_allowed_q; + lrc->avg_frame_qindex[INTER_FRAME] = oxcf->worst_allowed_q; } else { lc->target_bandwidth = oxcf->ss_target_bitrate[layer]; lrc->last_q[KEY_FRAME] = oxcf->best_allowed_q; lrc->last_q[INTER_FRAME] = oxcf->best_allowed_q; + lrc->avg_frame_qindex[KEY_FRAME] = (oxcf->worst_allowed_q + + oxcf->best_allowed_q) / 2; + lrc->avg_frame_qindex[INTER_FRAME] = (oxcf->worst_allowed_q + + oxcf->best_allowed_q) / 2; + if (oxcf->ss_play_alternate[layer]) + lc->alt_ref_idx = alt_ref_idx++; + else + lc->alt_ref_idx = -1; } lrc->buffer_level = vp9_rescale((int)(oxcf->starting_buffer_level_ms), @@ -153,7 +163,7 @@ void vp9_update_spatial_layer_framerate(VP9_COMP *const cpi, double framerate) { oxcf->two_pass_vbrmin_section / 100); lrc->max_frame_bandwidth = (int)(((int64_t)lrc->avg_frame_bandwidth * oxcf->two_pass_vbrmax_section) / 100); - vp9_rc_set_gf_max_interval(oxcf, lrc); + vp9_rc_set_gf_max_interval(cpi, lrc); } void vp9_restore_layer_context(VP9_COMP *const cpi) { @@ -164,6 +174,7 @@ void vp9_restore_layer_context(VP9_COMP *const cpi) { cpi->rc = lc->rc; cpi->twopass = lc->twopass; cpi->oxcf.target_bandwidth = lc->target_bandwidth; + cpi->alt_ref_source = lc->alt_ref_source; // Reset the frames_since_key and frames_to_key counters to their values // before the layer restore. Keep these defined for the stream (not layer). if (cpi->svc.number_temporal_layers > 1) { @@ -179,6 +190,7 @@ void vp9_save_layer_context(VP9_COMP *const cpi) { lc->rc = cpi->rc; lc->twopass = cpi->twopass; lc->target_bandwidth = (int)oxcf->target_bandwidth; + lc->alt_ref_source = cpi->alt_ref_source; } void vp9_init_second_pass_spatial_svc(VP9_COMP *cpi) { @@ -239,7 +251,7 @@ int vp9_svc_lookahead_push(const VP9_COMP *const cpi, struct lookahead_ctx *ctx, static int copy_svc_params(VP9_COMP *const cpi, struct lookahead_entry *buf) { int layer_id; vpx_svc_parameters_t *layer_param; - vpx_enc_frame_flags_t flags; + LAYER_CONTEXT *lc; // Find the next layer to be encoded for (layer_id = 0; layer_id < cpi->svc.number_spatial_layers; ++layer_id) { @@ -251,12 +263,46 @@ static int copy_svc_params(VP9_COMP *const cpi, struct lookahead_entry *buf) { return 1; layer_param = &buf->svc_params[layer_id]; - buf->flags = flags = layer_param->flags; cpi->svc.spatial_layer_id = layer_param->spatial_layer; cpi->svc.temporal_layer_id = layer_param->temporal_layer; - cpi->lst_fb_idx = layer_param->lst_fb_idx; - cpi->gld_fb_idx = layer_param->gld_fb_idx; - cpi->alt_fb_idx = layer_param->alt_fb_idx; + + cpi->lst_fb_idx = cpi->svc.spatial_layer_id; + + if (cpi->svc.spatial_layer_id < 1) + cpi->gld_fb_idx = cpi->lst_fb_idx; + else + cpi->gld_fb_idx = cpi->svc.spatial_layer_id - 1; + + lc = &cpi->svc.layer_context[cpi->svc.spatial_layer_id]; + + if (lc->current_video_frame_in_layer == 0) { + if (cpi->svc.spatial_layer_id >= 2) + cpi->alt_fb_idx = cpi->svc.spatial_layer_id - 2; + else + cpi->alt_fb_idx = cpi->lst_fb_idx; + } else { + if (cpi->oxcf.ss_play_alternate[cpi->svc.spatial_layer_id]) { + cpi->alt_fb_idx = lc->alt_ref_idx; + if (!lc->has_alt_frame) + cpi->ref_frame_flags &= (~VP9_ALT_FLAG); + } else { + // Find a proper alt_fb_idx for layers that don't have alt ref frame + if (cpi->svc.spatial_layer_id == 0) { + cpi->alt_fb_idx = cpi->lst_fb_idx; + } else { + LAYER_CONTEXT *lc_lower = + &cpi->svc.layer_context[cpi->svc.spatial_layer_id - 1]; + + if (cpi->oxcf.ss_play_alternate[cpi->svc.spatial_layer_id - 1] && + lc_lower->alt_ref_source != NULL) + cpi->alt_fb_idx = lc_lower->alt_ref_idx; + else if (cpi->svc.spatial_layer_id >= 2) + cpi->alt_fb_idx = cpi->svc.spatial_layer_id - 2; + else + cpi->alt_fb_idx = cpi->lst_fb_idx; + } + } + } if (vp9_set_size_literal(cpi, layer_param->width, layer_param->height) != 0) return VPX_CODEC_INVALID_PARAM; @@ -270,9 +316,7 @@ static int copy_svc_params(VP9_COMP *const cpi, struct lookahead_entry *buf) { vp9_set_high_precision_mv(cpi, 1); - // Retrieve the encoding flags for each layer and apply it to encoder. - // It includes reference frame flags and update frame flags. - vp9_apply_encoding_flags(cpi, flags); + cpi->alt_ref_source = get_layer_context(&cpi->svc)->alt_ref_source; return 0; } diff --git a/vp9/encoder/vp9_svc_layercontext.h b/vp9/encoder/vp9_svc_layercontext.h index 3ebb831b566240b83aae1eca9d34f2dfae8beb3a..7b533e467a2a640d1705c1f3932260adaddc8431 100644 --- a/vp9/encoder/vp9_svc_layercontext.h +++ b/vp9/encoder/vp9_svc_layercontext.h @@ -29,6 +29,10 @@ typedef struct { unsigned int current_video_frame_in_layer; int is_key_frame; vpx_svc_parameters_t svc_params_received; + struct lookahead_entry *alt_ref_source; + int alt_ref_idx; + int has_alt_frame; + size_t layer_size; } LAYER_CONTEXT; typedef struct { @@ -36,6 +40,11 @@ typedef struct { int temporal_layer_id; int number_spatial_layers; int number_temporal_layers; + + // Store scaled source frames to be used for temporal filter to generate + // a alt ref frame. + YV12_BUFFER_CONFIG scaled_frames[MAX_LAG_BUFFERS]; + // Layer context used for rate control in one pass temporal CBR mode or // two pass spatial mode. Defined for temporal or spatial layers for now. // Does not support temporal combined with spatial RC. diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c index caa831cc9cbd63a71a86eeaa5dc200bb659b6a64..6af8510a4971bf4ff878f80fe7592b8624ff14dc 100644 --- a/vp9/encoder/vp9_temporal_filter.c +++ b/vp9/encoder/vp9_temporal_filter.c @@ -432,12 +432,6 @@ void vp9_temporal_filter(VP9_COMP *cpi, int distance) { frames_to_blur_forward = ((frames_to_blur - 1) / 2); start_frame = distance + frames_to_blur_forward; - // Setup scaling factors. Scaling on each of the arnr frames not supported. - vp9_setup_scale_factors_for_frame(&sf, - get_frame_new_buffer(cm)->y_crop_width, - get_frame_new_buffer(cm)->y_crop_height, - cm->width, cm->height); - // Setup frame pointers, NULL indicates frame not included in filter. vp9_zero(cpi->frames); for (frame = 0; frame < frames_to_blur; ++frame) { @@ -447,6 +441,41 @@ void vp9_temporal_filter(VP9_COMP *cpi, int distance) { cpi->frames[frames_to_blur - 1 - frame] = &buf->img; } + // Setup scaling factors. Scaling on each of the arnr frames is not supported + if (cpi->use_svc && cpi->svc.number_temporal_layers == 1) { + // In spatial svc the scaling factors might be less then 1/2. So we will use + // non-normative scaling. + int frame_used = 0; + vp9_setup_scale_factors_for_frame(&sf, + get_frame_new_buffer(cm)->y_crop_width, + get_frame_new_buffer(cm)->y_crop_height, + get_frame_new_buffer(cm)->y_crop_width, + get_frame_new_buffer(cm)->y_crop_height); + + for (frame = 0; frame < frames_to_blur; ++frame) { + if (cm->mi_cols * MI_SIZE != cpi->frames[frame]->y_width || + cm->mi_rows * MI_SIZE != cpi->frames[frame]->y_height) { + if (vp9_realloc_frame_buffer(&cpi->svc.scaled_frames[frame_used], + cm->width, cm->height, + cm->subsampling_x, cm->subsampling_y, + VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, + NULL)) + vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, + "Failed to reallocate alt_ref_buffer"); + + cpi->frames[frame] = + vp9_scale_if_required(cm, cpi->frames[frame], + &cpi->svc.scaled_frames[frame_used]); + ++frame_used; + } + } + } else { + vp9_setup_scale_factors_for_frame(&sf, + get_frame_new_buffer(cm)->y_crop_width, + get_frame_new_buffer(cm)->y_crop_height, + cm->width, cm->height); + } + temporal_filter_iterate_c(cpi, frames_to_blur, frames_to_blur_backward, strength, &sf); } diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c index 24dcbfa1c8566c1b998f127e2e1970ee4a8ebbf2..19a8927c7b93738f98f2d6b85eb2bfedfbb2d0fe 100644 --- a/vp9/vp9_cx_iface.c +++ b/vp9/vp9_cx_iface.c @@ -11,6 +11,7 @@ #include <stdlib.h> #include <string.h> +#include "./vpx_config.h" #include "vpx/vpx_codec.h" #include "vpx/internal/vpx_codec_internal.h" #include "./vpx_version.h" @@ -88,7 +89,7 @@ struct vpx_codec_alg_priv { size_t pending_frame_magnitude; vpx_image_t preview_img; vp8_postproc_cfg_t preview_ppcfg; - vpx_codec_pkt_list_decl(128) pkt_list; + vpx_codec_pkt_list_decl(256) pkt_list; unsigned int fixed_kf_cntr; }; @@ -174,6 +175,19 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx, } RANGE_CHECK(cfg, ss_number_layers, 1, VPX_SS_MAX_LAYERS); + +#ifdef CONFIG_SPATIAL_SVC + if (cfg->ss_number_layers > 1) { + int i, alt_ref_sum = 0; + for (i = 0; i < cfg->ss_number_layers; ++i) { + if (cfg->ss_enable_auto_alt_ref[i]) + ++alt_ref_sum; + } + if (alt_ref_sum > REF_FRAMES - cfg->ss_number_layers) + ERROR("Not enough ref buffers for svc alt ref frames"); + } +#endif + RANGE_CHECK(cfg, ts_number_layers, 1, VPX_TS_MAX_LAYERS); if (cfg->ts_number_layers > 1) { unsigned int i; @@ -382,8 +396,12 @@ static vpx_codec_err_t set_encoder_config( if (oxcf->ss_number_layers > 1) { int i; - for (i = 0; i < VPX_SS_MAX_LAYERS; ++i) + for (i = 0; i < VPX_SS_MAX_LAYERS; ++i) { oxcf->ss_target_bitrate[i] = 1000 * cfg->ss_target_bitrate[i]; +#ifdef CONFIG_SPATIAL_SVC + oxcf->ss_play_alternate[i] = cfg->ss_enable_auto_alt_ref[i]; +#endif + } } else if (oxcf->ss_number_layers == 1) { oxcf->ss_target_bitrate[0] = (int)oxcf->target_bandwidth; } @@ -864,6 +882,11 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx, vpx_codec_cx_pkt_t pkt; VP9_COMP *const cpi = (VP9_COMP *)ctx->cpi; +#ifdef CONFIG_SPATIAL_SVC + if (cpi->use_svc && cpi->svc.number_temporal_layers == 1) + cpi->svc.layer_context[cpi->svc.spatial_layer_id].layer_size += size; +#endif + // Pack invisible frames with the next visible frame if (cpi->common.show_frame == 0 #ifdef CONFIG_SPATIAL_SVC @@ -936,6 +959,18 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx, vpx_codec_pkt_list_add(&ctx->pkt_list.head, &pkt); cx_data += size; cx_data_sz -= size; +#ifdef CONFIG_SPATIAL_SVC + if (cpi->use_svc && cpi->svc.number_temporal_layers == 1) { + vpx_codec_cx_pkt_t pkt = {0}; + int i; + pkt.kind = VPX_CODEC_SPATIAL_SVC_LAYER_SIZES; + for (i = 0; i < cpi->svc.number_spatial_layers; ++i) { + pkt.data.layer_sizes[i] = cpi->svc.layer_context[i].layer_size; + cpi->svc.layer_context[i].layer_size = 0; + } + vpx_codec_pkt_list_add(&ctx->pkt_list.head, &pkt); + } +#endif } } } @@ -1245,6 +1280,9 @@ static vpx_codec_enc_cfg_map_t encoder_usage_cfg_map[] = { 9999, // kf_max_dist VPX_SS_DEFAULT_LAYERS, // ss_number_layers +#ifdef CONFIG_SPATIAL_SVC + {0}, +#endif {0}, // ss_target_bitrate 1, // ts_number_layers {0}, // ts_target_bitrate diff --git a/vpx/src/svc_encodeframe.c b/vpx/src/svc_encodeframe.c index 6c15f6ef993ae727e68e0e764cd6e266f176d912..93e86e3195ebdb6b3d7efc5ba1e706bdf82aac76 100644 --- a/vpx/src/svc_encodeframe.c +++ b/vpx/src/svc_encodeframe.c @@ -21,10 +21,12 @@ #include <string.h> #define VPX_DISABLE_CTRL_TYPECHECKS 1 #define VPX_CODEC_DISABLE_COMPAT 1 +#include "./vpx_config.h" #include "vpx/svc_context.h" #include "vpx/vp8cx.h" #include "vpx/vpx_encoder.h" #include "vpx_mem/vpx_mem.h" +#include "vp9/common/vp9_onyxc_int.h" #ifdef __MINGW32__ #define strtok_r strtok_s @@ -65,6 +67,7 @@ typedef struct SvcInternal { int scaling_factor_num[VPX_SS_MAX_LAYERS]; int scaling_factor_den[VPX_SS_MAX_LAYERS]; int quantizer[VPX_SS_MAX_LAYERS]; + int enable_auto_alt_ref[VPX_SS_MAX_LAYERS]; // accumulated statistics double psnr_sum[VPX_SS_MAX_LAYERS][COMPONENTS]; // total/Y/U/V @@ -80,7 +83,6 @@ typedef struct SvcInternal { int encode_frame_count; int frame_received; int frame_within_gop; - vpx_enc_frame_flags_t enc_frame_flags; int layers; int layer; int is_keyframe; @@ -238,6 +240,59 @@ static vpx_codec_err_t parse_quantizer_values(SvcContext *svc_ctx, return res; } +static vpx_codec_err_t parse_auto_alt_ref(SvcContext *svc_ctx, + const char *alt_ref_options) { + char *input_string; + char *token; + const char *delim = ","; + char *save_ptr; + int found = 0, enabled = 0; + int i, value; + vpx_codec_err_t res = VPX_CODEC_OK; + SvcInternal *const si = get_svc_internal(svc_ctx); + + if (alt_ref_options == NULL || strlen(alt_ref_options) == 0) { + return VPX_CODEC_INVALID_PARAM; + } else { + input_string = strdup(alt_ref_options); + } + + token = strtok_r(input_string, delim, &save_ptr); + for (i = 0; i < svc_ctx->spatial_layers; ++i) { + if (token != NULL) { + value = atoi(token); + if (value < 0 || value > 1) { + svc_log(svc_ctx, SVC_LOG_ERROR, + "enable auto alt ref values: invalid value %s\n", token); + res = VPX_CODEC_INVALID_PARAM; + break; + } + token = strtok_r(NULL, delim, &save_ptr); + found = i + 1; + } else { + value = 0; + } + si->enable_auto_alt_ref[i] = value; + if (value > 0) + ++enabled; + } + if (res == VPX_CODEC_OK && found != svc_ctx->spatial_layers) { + svc_log(svc_ctx, SVC_LOG_ERROR, + "svc: quantizers: %d values required, but only %d specified\n", + svc_ctx->spatial_layers, found); + res = VPX_CODEC_INVALID_PARAM; + } + if (enabled > REF_FRAMES - svc_ctx->spatial_layers) { + svc_log(svc_ctx, SVC_LOG_ERROR, + "svc: auto alt ref: Maxinum %d(REF_FRAMES - layers) layers could" + "enabled auto alt reference frame, but % layers are enabled\n", + REF_FRAMES - svc_ctx->spatial_layers, enabled); + res = VPX_CODEC_INVALID_PARAM; + } + free(input_string); + return res; +} + static void log_invalid_scale_factor(SvcContext *svc_ctx, const char *value) { svc_log(svc_ctx, SVC_LOG_ERROR, "svc scale-factors: invalid value %s\n", value); @@ -335,6 +390,9 @@ static vpx_codec_err_t parse_options(SvcContext *svc_ctx, const char *options) { } else if (strcmp("quantizers", option_name) == 0) { res = parse_quantizer_values(svc_ctx, option_value); if (res != VPX_CODEC_OK) break; + } else if (strcmp("auto-alt-refs", option_name) == 0) { + res = parse_auto_alt_ref(svc_ctx, option_value); + if (res != VPX_CODEC_OK) break; } else { svc_log(svc_ctx, SVC_LOG_ERROR, "invalid option: %s\n", option_name); res = VPX_CODEC_INVALID_PARAM; @@ -382,6 +440,7 @@ vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx, vpx_codec_iface_t *iface, vpx_codec_enc_cfg_t *enc_cfg) { vpx_codec_err_t res; + int i; SvcInternal *const si = get_svc_internal(svc_ctx); if (svc_ctx == NULL || codec_ctx == NULL || iface == NULL || enc_cfg == NULL) { @@ -428,7 +487,6 @@ vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx, // TODO(Minghai): Optimize the mechanism of allocating bits after // implementing svc two pass rate control. if (si->layers > 1) { - int i; float total = 0; float alloc_ratio[VPX_SS_MAX_LAYERS] = {0}; @@ -452,6 +510,9 @@ vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx, } } + for (i = 0; i < si->layers; ++i) + enc_cfg->ss_enable_auto_alt_ref[i] = si->enable_auto_alt_ref[i]; + // modify encoder configuration enc_cfg->ss_number_layers = si->layers; enc_cfg->ts_number_layers = 1; // Temporal layers not used in this encoder. @@ -482,106 +543,10 @@ vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx, vpx_codec_control(codec_ctx, VP9E_SET_SVC, 1); vpx_codec_control(codec_ctx, VP8E_SET_TOKEN_PARTITIONS, 1); - vpx_codec_control(codec_ctx, VP8E_SET_ENABLEAUTOALTREF, 0); return VPX_CODEC_OK; } -static void accumulate_frame_size_for_each_layer(SvcInternal *const si, - const uint8_t *const buf, - const size_t size) { - uint8_t marker = buf[size - 1]; - if ((marker & 0xe0) == 0xc0) { - const uint32_t frames = (marker & 0x7) + 1; - const uint32_t mag = ((marker >> 3) & 0x3) + 1; - const size_t index_sz = 2 + mag * frames; - - uint8_t marker2 = buf[size - index_sz]; - - if (size >= index_sz && marker2 == marker) { - // found a valid superframe index - uint32_t i, j; - const uint8_t *x = &buf[size - index_sz + 1]; - - // frames has a maximum of 8 and mag has a maximum of 4. - for (i = 0; i < frames; i++) { - uint32_t this_sz = 0; - - for (j = 0; j < mag; j++) - this_sz |= (*x++) << (j * 8); - si->bytes_sum[i] += this_sz; - } - } - } -} - -// SVC Algorithm flags - these get mapped to VP8_EFLAG_* defined in vp8cx.h - -// encoder should reference the last frame -#define USE_LAST (1 << 0) - -// encoder should reference the alt ref frame -#define USE_ARF (1 << 1) - -// encoder should reference the golden frame -#define USE_GF (1 << 2) - -// encoder should copy current frame to the last frame buffer -#define UPDATE_LAST (1 << 3) - -// encoder should copy current frame to the alt ref frame buffer -#define UPDATE_ARF (1 << 4) - -// encoder should copy current frame to the golden frame -#define UPDATE_GF (1 << 5) - -static int map_vp8_flags(int svc_flags) { - int flags = 0; - - if (!(svc_flags & USE_LAST)) flags |= VP8_EFLAG_NO_REF_LAST; - if (!(svc_flags & USE_ARF)) flags |= VP8_EFLAG_NO_REF_ARF; - if (!(svc_flags & USE_GF)) flags |= VP8_EFLAG_NO_REF_GF; - - if (svc_flags & UPDATE_LAST) { - // last is updated automatically - } else { - flags |= VP8_EFLAG_NO_UPD_LAST; - } - if (svc_flags & UPDATE_ARF) { - flags |= VP8_EFLAG_FORCE_ARF; - } else { - flags |= VP8_EFLAG_NO_UPD_ARF; - } - if (svc_flags & UPDATE_GF) { - flags |= VP8_EFLAG_FORCE_GF; - } else { - flags |= VP8_EFLAG_NO_UPD_GF; - } - return flags; -} - -static void calculate_enc_frame_flags(SvcContext *svc_ctx) { - vpx_enc_frame_flags_t flags = VPX_EFLAG_FORCE_KF; - SvcInternal *const si = get_svc_internal(svc_ctx); - const int is_keyframe = (si->frame_within_gop == 0); - - // keyframe layer zero is identical for all modes - if (is_keyframe && si->layer == 0) { - si->enc_frame_flags = VPX_EFLAG_FORCE_KF; - return; - } - - if (si->layer == 0) { - flags = map_vp8_flags(USE_LAST | UPDATE_LAST); - } else if (is_keyframe) { - flags = map_vp8_flags(USE_ARF | UPDATE_LAST); - } else { - flags = map_vp8_flags(USE_LAST | USE_ARF | UPDATE_LAST); - } - - si->enc_frame_flags = flags; -} - vpx_codec_err_t vpx_svc_get_layer_resolution(const SvcContext *svc_ctx, int layer, unsigned int *width, @@ -621,7 +586,6 @@ static void set_svc_parameters(SvcContext *svc_ctx, memset(&svc_params, 0, sizeof(svc_params)); svc_params.temporal_layer = 0; svc_params.spatial_layer = si->layer; - svc_params.flags = si->enc_frame_flags; layer = si->layer; if (VPX_CODEC_OK != vpx_svc_get_layer_resolution(svc_ctx, layer, @@ -640,33 +604,6 @@ static void set_svc_parameters(SvcContext *svc_ctx, } svc_params.distance_from_i_frame = si->frame_within_gop; - - // Use buffer i for layer i LST - svc_params.lst_fb_idx = si->layer; - - // Use buffer i-1 for layer i Alt (Inter-layer prediction) - svc_params.alt_fb_idx = (si->layer > 0) ? si->layer - 1 : 0; - svc_params.gld_fb_idx = svc_params.lst_fb_idx; - - svc_log(svc_ctx, SVC_LOG_DEBUG, "SVC frame: %d, layer: %d, %dx%d, q: %d\n", - si->encode_frame_count, si->layer, svc_params.width, - svc_params.height, svc_params.min_quantizer); - - if (svc_params.flags == VPX_EFLAG_FORCE_KF) { - svc_log(svc_ctx, SVC_LOG_DEBUG, "flags == VPX_EFLAG_FORCE_KF\n"); - } else { - svc_log( - svc_ctx, SVC_LOG_DEBUG, "Using: LST/GLD/ALT [%2d|%2d|%2d]\n", - svc_params.flags & VP8_EFLAG_NO_REF_LAST ? -1 : svc_params.lst_fb_idx, - svc_params.flags & VP8_EFLAG_NO_REF_GF ? -1 : svc_params.gld_fb_idx, - svc_params.flags & VP8_EFLAG_NO_REF_ARF ? -1 : svc_params.alt_fb_idx); - svc_log( - svc_ctx, SVC_LOG_DEBUG, "Updating: LST/GLD/ALT [%2d|%2d|%2d]\n", - svc_params.flags & VP8_EFLAG_NO_UPD_LAST ? -1 : svc_params.lst_fb_idx, - svc_params.flags & VP8_EFLAG_NO_UPD_GF ? -1 : svc_params.gld_fb_idx, - svc_params.flags & VP8_EFLAG_NO_UPD_ARF ? -1 : svc_params.alt_fb_idx); - } - vpx_codec_control(codec_ctx, VP9E_SET_SVC_PARAMETERS, &svc_params); } @@ -705,7 +642,6 @@ vpx_codec_err_t vpx_svc_encode(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx, if (rawimg != NULL) { // encode each layer for (si->layer = 0; si->layer < si->layers; ++si->layer) { - calculate_enc_frame_flags(svc_ctx); set_svc_parameters(svc_ctx, codec_ctx); } } @@ -723,8 +659,6 @@ vpx_codec_err_t vpx_svc_encode(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx, fd_list_add(&si->frame_list, fd_create(cx_pkt->data.frame.buf, cx_pkt->data.frame.sz, cx_pkt->data.frame.flags)); - accumulate_frame_size_for_each_layer(si, cx_pkt->data.frame.buf, - cx_pkt->data.frame.sz); svc_log(svc_ctx, SVC_LOG_DEBUG, "SVC frame: %d, kf: %d, size: %d, " "pts: %d\n", si->frame_received, @@ -775,6 +709,12 @@ vpx_codec_err_t vpx_svc_encode(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx, si->rc_stats_buf_used += cx_pkt->data.twopass_stats.sz; break; } + case VPX_CODEC_SPATIAL_SVC_LAYER_SIZES: { + int i; + for (i = 0; i < si->layers; ++i) + si->bytes_sum[i] += cx_pkt->data.layer_sizes[i]; + break; + } default: { break; } diff --git a/vpx/vp8cx.h b/vpx/vp8cx.h index 67cbdb1f511f0411e20ac95a69a3eeef4ea10d82..36c587f616cbc8d82dffc50699fd964ddaa23672 100644 --- a/vpx/vp8cx.h +++ b/vpx/vp8cx.h @@ -298,7 +298,6 @@ typedef struct vpx_svc_parameters { unsigned int height; /**< height of current spatial layer */ int spatial_layer; /**< current spatial layer number - 0 = base */ int temporal_layer; /**< current temporal layer number - 0 = base */ - int flags; /**< encode frame flags */ int max_quantizer; /**< max quantizer for current layer */ int min_quantizer; /**< min quantizer for current layer */ int distance_from_i_frame; /**< frame number within current gop */ diff --git a/vpx/vpx_encoder.h b/vpx/vpx_encoder.h index 571ad3fc52fd99f62c716bacee7c49315f565974..7e7b774e15bfea18ee3a3e2766cedc2245e2da25 100644 --- a/vpx/vpx_encoder.h +++ b/vpx/vpx_encoder.h @@ -156,6 +156,9 @@ extern "C" { VPX_CODEC_CX_FRAME_PKT, /**< Compressed video frame */ VPX_CODEC_STATS_PKT, /**< Two-pass statistics for this frame */ VPX_CODEC_PSNR_PKT, /**< PSNR statistics for this frame */ +#ifdef CONFIG_SPATIAL_SVC + VPX_CODEC_SPATIAL_SVC_LAYER_SIZES, /**< Sizes for each layer in this frame*/ +#endif VPX_CODEC_CUSTOM_PKT = 256 /**< Algorithm extensions */ }; @@ -191,6 +194,9 @@ extern "C" { double psnr[4]; /**< PSNR, total/y/u/v */ } psnr; /**< data for PSNR packet */ struct vpx_fixed_buf raw; /**< data for arbitrary packets */ +#ifdef CONFIG_SPATIAL_SVC + size_t layer_sizes[VPX_SS_MAX_LAYERS]; +#endif /* This packet size is fixed to allow codecs to extend this * interface without having to manage storage for raw packets, @@ -623,6 +629,15 @@ extern "C" { */ unsigned int ss_number_layers; +#ifdef CONFIG_SPATIAL_SVC + /*!\brief Enable auto alt reference flags for each spatial layer. + * + * These values specify if auto alt reference frame is enabled for each + * spatial layer. + */ + int ss_enable_auto_alt_ref[VPX_SS_MAX_LAYERS]; +#endif + /*!\brief Target bitrate for each spatial layer. * * These values specify the target coding bitrate to be used for each