From 7c514e2dfdb514b3efbf25d505232e9a4152689c Mon Sep 17 00:00:00 2001 From: Yaowu Xu Date: Mon, 28 Sep 2015 15:55:46 -0700 Subject: [PATCH] Merged branch 'master' into nextgenv2 Resolved Conflicts in the following files: configure vp10/common/idct.c vp10/encoder/dct.c vp10/encoder/encodemb.c vp10/encoder/rdopt.c Change-Id: I4cb3986b0b80de65c722ca29d53a0a57f5a94316 --- build/make/configure.sh | 77 +++++++++++++- build/make/iosbuild.sh | 16 ++- configure | 4 + examples/vp9_spatial_svc_encoder.c | 78 ++++++++++++++ test/encode_test_driver.h | 5 + test/resize_test.cc | 39 +++++-- test/vp10_dct_test.cc | 3 +- test/vp10_inv_txfm_test.cc | 4 +- test/vp9_arf_freq_test.cc | 14 +++ test/vp9_encoder_parms_get_to_decoder.cc | 9 +- test/vp9_end_to_end_test.cc | 14 +++ vp10/common/idct.c | 69 ++++++------ vp10/common/idct.h | 8 +- vp10/common/loopfilter.c | 28 +++++ vp10/common/loopfilter.h | 5 + vp10/common/onyxc_int.h | 4 +- vp10/common/reconinter.c | 47 +++++++++ vp10/common/reconinter.h | 4 + vp10/decoder/decodeframe.c | 50 ++++++--- vp10/decoder/decoder.c | 3 + vp10/encoder/bitstream.c | 36 +++++-- vp10/encoder/block.h | 8 -- vp10/encoder/dct.c | 30 +++--- vp10/encoder/encodeframe.c | 21 +--- vp10/encoder/encodemb.c | 63 ++++++----- vp10/encoder/encodemb.h | 10 +- vp10/encoder/encoder.c | 31 +++--- vp10/encoder/encoder.h | 2 + vp10/encoder/rdopt.c | 63 ++++------- vp10/vp10_cx_iface.c | 16 +++ vp10/vp10_dx_iface.c | 14 +-- vp10/vp10_iface_common.h | 4 + vp8/encoder/ratectrl.c | 27 +++++ vp8/vp8_dx_iface.c | 4 +- vp9/common/vp9_blockd.h | 1 - vp9/common/vp9_onyxc_int.h | 5 +- vp9/common/vp9_thread_common.c | 48 ++++----- vp9/common/vp9_thread_common.h | 4 +- vp9/decoder/vp9_decodeframe.c | 55 +++++----- vp9/decoder/vp9_decoder.c | 4 +- vp9/decoder/vp9_decoder.h | 1 - vp9/encoder/vp9_aq_cyclicrefresh.c | 56 ++++++++-- vp9/encoder/vp9_aq_cyclicrefresh.h | 7 ++ vp9/encoder/vp9_bitstream.c | 16 +-- vp9/encoder/vp9_encodeframe.c | 2 + vp9/encoder/vp9_encoder.c | 27 +++-- vp9/encoder/vp9_encoder.h | 2 + vp9/encoder/vp9_ethread.c | 2 +- vp9/encoder/vp9_pickmode.c | 18 ++-- vp9/encoder/vp9_rdopt.c | 6 ++ vp9/encoder/vp9_svc_layercontext.c | 22 ++-- vp9/encoder/vp9_svc_layercontext.h | 6 ++ vp9/vp9_cx_iface.c | 31 ++++++ vp9/vp9_dx_iface.c | 14 +-- vp9/vp9_iface_common.h | 4 + vpx/src/svc_encodeframe.c | 6 +- vpx/vp8cx.h | 40 +++++++ vpx/vpx_image.h | 4 + vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c | 116 --------------------- vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm | 40 +++---- vpx_ports/bitops.h | 10 ++ vpx_scale/yv12config.h | 2 + vpxdec.c | 22 ++-- 63 files changed, 905 insertions(+), 476 deletions(-) diff --git a/build/make/configure.sh b/build/make/configure.sh index 12b994963..c592b6385 100644 --- a/build/make/configure.sh +++ b/build/make/configure.sh @@ -73,6 +73,7 @@ Build options: --target=TARGET target platform tuple [generic-gnu] --cpu=CPU optimize for a specific cpu rather than a family --extra-cflags=ECFLAGS add ECFLAGS to CFLAGS [$CFLAGS] + --extra-cxxflags=ECXXFLAGS add ECXXFLAGS to CXXFLAGS [$CXXFLAGS] ${toggle_extra_warnings} emit harmless warnings (always non-fatal) ${toggle_werror} treat warnings as errors, if possible (not available with all compilers) @@ -200,6 +201,10 @@ disabled(){ eval test "x\$$1" = "xno" } +# Iterates through positional parameters, checks to confirm the parameter has +# not been explicitly (force) disabled, and enables the setting controlled by +# the parameter when the setting is not disabled. +# Note: Does NOT alter RTCD generation options ($RTCD_OPTIONS). soft_enable() { for var in $*; do if ! disabled $var; then @@ -209,6 +214,10 @@ soft_enable() { done } +# Iterates through positional parameters, checks to confirm the parameter has +# not been explicitly (force) enabled, and disables the setting controlled by +# the parameter when the setting is not enabled. +# Note: Does NOT alter RTCD generation options ($RTCD_OPTIONS). soft_disable() { for var in $*; do if ! enabled $var; then @@ -337,6 +346,10 @@ check_add_cflags() { check_cflags "$@" && add_cflags_only "$@" } +check_add_cxxflags() { + check_cxxflags "$@" && add_cxxflags_only "$@" +} + check_add_asflags() { log add_asflags "$@" add_asflags "$@" @@ -503,6 +516,9 @@ process_common_cmdline() { --extra-cflags=*) extra_cflags="${optval}" ;; + --extra-cxxflags=*) + extra_cxxflags="${optval}" + ;; --enable-?*|--disable-?*) eval `echo "$opt" | sed 's/--/action=/;s/-/ option=/;s/-/_/g'` if echo "${ARCH_EXT_LIST}" | grep "^ *$option\$" >/dev/null; then @@ -617,6 +633,11 @@ show_darwin_sdk_path() { xcodebuild -sdk $1 -version Path 2>/dev/null } +# Print the major version number of the Darwin SDK specified by $1. +show_darwin_sdk_major_version() { + xcrun --sdk $1 --show-sdk-version 2>/dev/null | cut -d. -f1 +} + process_common_toolchain() { if [ -z "$toolchain" ]; then gcctarget="${CHOST:-$(gcc -dumpmachine 2> /dev/null)}" @@ -728,7 +749,15 @@ process_common_toolchain() { # Handle darwin variants. Newer SDKs allow targeting older # platforms, so use the newest one available. case ${toolchain} in - *-darwin*) + arm*-darwin*) + add_cflags "-miphoneos-version-min=${IOS_VERSION_MIN}" + iphoneos_sdk_dir="$(show_darwin_sdk_path iphoneos)" + if [ -d "${iphoneos_sdk_dir}" ]; then + add_cflags "-isysroot ${iphoneos_sdk_dir}" + add_ldflags "-isysroot ${iphoneos_sdk_dir}" + fi + ;; + x86*-darwin*) osx_sdk_dir="$(show_darwin_sdk_path macosx)" if [ -d "${osx_sdk_dir}" ]; then add_cflags "-isysroot ${osx_sdk_dir}" @@ -803,10 +832,36 @@ process_common_toolchain() { if disabled neon && enabled neon_asm; then die "Disabling neon while keeping neon-asm is not supported" fi - soft_enable media + case ${toolchain} in + # Apple iOS SDKs no longer support armv6 as of the version 9 + # release (coincides with release of Xcode 7). Only enable media + # when using earlier SDK releases. + *-darwin*) + if [ "$(show_darwin_sdk_major_version iphoneos)" -lt 9 ]; then + soft_enable media + else + soft_disable media + RTCD_OPTIONS="${RTCD_OPTIONS}--disable-media " + fi + ;; + *) + soft_enable media + ;; + esac ;; armv6) - soft_enable media + case ${toolchain} in + *-darwin*) + if [ "$(show_darwin_sdk_major_version iphoneos)" -lt 9 ]; then + soft_enable media + else + die "Your iOS SDK does not support armv6." + fi + ;; + *) + soft_enable media + ;; + esac ;; esac @@ -989,6 +1044,12 @@ EOF done asm_conversion_cmd="${source_path}/build/make/ads2gas_apple.pl" + + if [ "$(show_darwin_sdk_major_version iphoneos)" -gt 8 ]; then + check_add_cflags -fembed-bitcode + check_add_asflags -fembed-bitcode + check_add_ldflags -fembed-bitcode + fi ;; linux*) @@ -1159,7 +1220,8 @@ EOF && AS="" fi [ "${AS}" = auto ] || [ -z "${AS}" ] \ - && die "Neither yasm nor nasm have been found" + && die "Neither yasm nor nasm have been found." \ + "See the prerequisites section in the README for more info." ;; esac log_echo " using $AS" @@ -1198,6 +1260,13 @@ EOF enabled x86 && sim_arch="-arch i386" || sim_arch="-arch x86_64" add_cflags ${sim_arch} add_ldflags ${sim_arch} + + if [ "$(show_darwin_sdk_major_version iphonesimulator)" -gt 8 ]; then + # yasm v1.3.0 doesn't know what -fembed-bitcode means, so turning it + # on is pointless (unless building a C-only lib). Warn the user, but + # do nothing here. + log "Warning: Bitcode embed disabled for simulator targets." + fi ;; os2) add_asflags -f aout diff --git a/build/make/iosbuild.sh b/build/make/iosbuild.sh index 89fa68186..927f3e532 100755 --- a/build/make/iosbuild.sh +++ b/build/make/iosbuild.sh @@ -41,13 +41,22 @@ TARGETS="arm64-darwin-gcc build_target() { local target="$1" local old_pwd="$(pwd)" + local target_specific_flags="" vlog "***Building target: ${target}***" + case "${target}" in + x86-*) + target_specific_flags="--enable-pic" + vlog "Enabled PIC for ${target}" + ;; + esac + mkdir "${target}" cd "${target}" eval "${LIBVPX_SOURCE_DIR}/configure" --target="${target}" \ - ${CONFIGURE_ARGS} ${EXTRA_CONFIGURE_ARGS} ${devnull} + ${CONFIGURE_ARGS} ${EXTRA_CONFIGURE_ARGS} ${target_specific_flags} \ + ${devnull} export DIST_DIR eval make -j ${MAKE_JOBS} dist ${devnull} cd "${old_pwd}" @@ -199,6 +208,8 @@ cat << EOF --show-build-output: Show output from each library build. --targets : Override default target list. Defaults: ${TARGETS} + --test-link: Confirms all targets can be linked. Functionally identical to + passing --enable-examples via --extra-configure-args. --verbose: Output information about the environment and each stage of the build. EOF @@ -237,6 +248,9 @@ while [ -n "$1" ]; do --show-build-output) devnull= ;; + --test-link) + EXTRA_CONFIGURE_ARGS="${EXTRA_CONFIGURE_ARGS} --enable-examples" + ;; --targets) TARGETS="$2" shift diff --git a/configure b/configure index 508afd1a1..8e3f24eec 100755 --- a/configure +++ b/configure @@ -724,6 +724,10 @@ EOF check_add_cflags ${extra_cflags} || \ die "Requested extra CFLAGS '${extra_cflags}' not supported by compiler" fi + if [ -n "${extra_cxxflags}" ]; then + check_add_cxxflags ${extra_cxxflags} || \ + die "Requested extra CXXFLAGS '${extra_cxxflags}' not supported by compiler" + fi } diff --git a/examples/vp9_spatial_svc_encoder.c b/examples/vp9_spatial_svc_encoder.c index 9f4191150..b26e98734 100644 --- a/examples/vp9_spatial_svc_encoder.c +++ b/examples/vp9_spatial_svc_encoder.c @@ -544,6 +544,59 @@ vpx_codec_err_t parse_superframe_index(const uint8_t *data, } #endif +// Example pattern for spatial layers and 2 temporal layers used in the +// bypass/flexible mode. The pattern corresponds to the pattern +// VP9E_TEMPORAL_LAYERING_MODE_0101 (temporal_layering_mode == 2) used in +// non-flexible mode. +void set_frame_flags_bypass_mode(int sl, int tl, int num_spatial_layers, + int is_key_frame, + vpx_svc_ref_frame_config_t *ref_frame_config) { + for (sl = 0; sl < num_spatial_layers; ++sl) { + if (!tl) { + if (!sl) { + ref_frame_config->frame_flags[sl] = VP8_EFLAG_NO_REF_GF | + VP8_EFLAG_NO_REF_ARF | + VP8_EFLAG_NO_UPD_GF | + VP8_EFLAG_NO_UPD_ARF; + } else { + if (is_key_frame) { + ref_frame_config->frame_flags[sl] = VP8_EFLAG_NO_REF_LAST | + VP8_EFLAG_NO_REF_ARF | + VP8_EFLAG_NO_UPD_GF | + VP8_EFLAG_NO_UPD_ARF; + } else { + ref_frame_config->frame_flags[sl] = VP8_EFLAG_NO_REF_ARF | + VP8_EFLAG_NO_UPD_GF | + VP8_EFLAG_NO_UPD_ARF; + } + } + } else if (tl == 1) { + if (!sl) { + ref_frame_config->frame_flags[sl] = VP8_EFLAG_NO_REF_GF | + VP8_EFLAG_NO_REF_ARF | + VP8_EFLAG_NO_UPD_LAST | + VP8_EFLAG_NO_UPD_GF; + } else { + ref_frame_config->frame_flags[sl] = VP8_EFLAG_NO_REF_ARF | + VP8_EFLAG_NO_UPD_LAST | + VP8_EFLAG_NO_UPD_GF; + } + } + if (tl == 0) { + ref_frame_config->lst_fb_idx[sl] = sl; + if (sl) + ref_frame_config->gld_fb_idx[sl] = sl - 1; + else + ref_frame_config->gld_fb_idx[sl] = 0; + ref_frame_config->alt_fb_idx[sl] = 0; + } else if (tl == 1) { + ref_frame_config->lst_fb_idx[sl] = sl; + ref_frame_config->gld_fb_idx[sl] = num_spatial_layers + sl - 1; + ref_frame_config->alt_fb_idx[sl] = num_spatial_layers + sl; + } + } +} + int main(int argc, const char **argv) { AppInput app_input = {0}; VpxVideoWriter *writer = NULL; @@ -564,6 +617,7 @@ int main(int argc, const char **argv) { VpxVideoWriter *outfile[VPX_TS_MAX_LAYERS] = {NULL}; struct RateControlStats rc; vpx_svc_layer_id_t layer_id; + vpx_svc_ref_frame_config_t ref_frame_config; int sl, tl; double sum_bitrate = 0.0; double sum_bitrate2 = 0.0; @@ -653,6 +707,30 @@ int main(int argc, const char **argv) { end_of_stream = 1; } + // For BYPASS/FLEXIBLE mode, set the frame flags (reference and updates) + // and the buffer indices for each spatial layer of the current + // (super)frame to be encoded. The temporal layer_id for the current frame + // also needs to be set. + // TODO(marpan): Should rename the "VP9E_TEMPORAL_LAYERING_MODE_BYPASS" + // mode to "VP9E_LAYERING_MODE_BYPASS". + if (svc_ctx.temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS) { + // Example for 2 temporal layers. + if (frame_cnt % 2 == 0) + layer_id.temporal_layer_id = 0; + else + layer_id.temporal_layer_id = 1; + // Note that we only set the temporal layer_id, since we are calling + // the encode for the whole superframe. The encoder will internally loop + // over all the spatial layers for the current superframe. + vpx_codec_control(&codec, VP9E_SET_SVC_LAYER_ID, &layer_id); + set_frame_flags_bypass_mode(sl, layer_id.temporal_layer_id, + svc_ctx.spatial_layers, + frame_cnt == 0, + &ref_frame_config); + vpx_codec_control(&codec, VP9E_SET_SVC_REF_FRAME_CONFIG, + &ref_frame_config); + } + vpx_usec_timer_start(&timer); res = vpx_svc_encode(&svc_ctx, &codec, (end_of_stream ? NULL : &raw), pts, frame_duration, svc_ctx.speed >= 5 ? diff --git a/test/encode_test_driver.h b/test/encode_test_driver.h index 9ecc4989e..6d0a72f98 100644 --- a/test/encode_test_driver.h +++ b/test/encode_test_driver.h @@ -124,6 +124,11 @@ class Encoder { ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError(); } + void Control(int ctrl_id, int *arg) { + const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg); + ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError(); + } + void Control(int ctrl_id, struct vpx_scaling_mode *arg) { const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg); ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError(); diff --git a/test/resize_test.cc b/test/resize_test.cc index a86c9d115..98b6f87e1 100644 --- a/test/resize_test.cc +++ b/test/resize_test.cc @@ -196,13 +196,27 @@ class ResizeInternalTest : public ResizeTest { virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video, libvpx_test::Encoder *encoder) { - if (video->frame() == kStepDownFrame) { - struct vpx_scaling_mode mode = {VP8E_FOURFIVE, VP8E_THREEFIVE}; - encoder->Control(VP8E_SET_SCALEMODE, &mode); - } - if (video->frame() == kStepUpFrame) { - struct vpx_scaling_mode mode = {VP8E_NORMAL, VP8E_NORMAL}; - encoder->Control(VP8E_SET_SCALEMODE, &mode); + if (change_config_) { + int new_q = 60; + if (video->frame() == 0) { + struct vpx_scaling_mode mode = {VP8E_ONETWO, VP8E_ONETWO}; + encoder->Control(VP8E_SET_SCALEMODE, &mode); + } + if (video->frame() == 1) { + struct vpx_scaling_mode mode = {VP8E_NORMAL, VP8E_NORMAL}; + encoder->Control(VP8E_SET_SCALEMODE, &mode); + cfg_.rc_min_quantizer = cfg_.rc_max_quantizer = new_q; + encoder->Config(&cfg_); + } + } else { + if (video->frame() == kStepDownFrame) { + struct vpx_scaling_mode mode = {VP8E_FOURFIVE, VP8E_THREEFIVE}; + encoder->Control(VP8E_SET_SCALEMODE, &mode); + } + if (video->frame() == kStepUpFrame) { + struct vpx_scaling_mode mode = {VP8E_NORMAL, VP8E_NORMAL}; + encoder->Control(VP8E_SET_SCALEMODE, &mode); + } } } @@ -227,6 +241,7 @@ class ResizeInternalTest : public ResizeTest { #endif double frame0_psnr_; + bool change_config_; #if WRITE_COMPRESSED_STREAM FILE *outfile_; unsigned int out_frames_; @@ -237,6 +252,7 @@ TEST_P(ResizeInternalTest, TestInternalResizeWorks) { ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, 30, 1, 0, 10); init_flags_ = VPX_CODEC_USE_PSNR; + change_config_ = false; // q picked such that initial keyframe on this clip is ~30dB PSNR cfg_.rc_min_quantizer = cfg_.rc_max_quantizer = 48; @@ -261,6 +277,15 @@ TEST_P(ResizeInternalTest, TestInternalResizeWorks) { } } +TEST_P(ResizeInternalTest, TestInternalResizeChangeConfig) { + ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, + 30, 1, 0, 10); + cfg_.g_w = 352; + cfg_.g_h = 288; + change_config_ = true; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); +} + class ResizeInternalRealtimeTest : public ::libvpx_test::EncoderTest, public ::libvpx_test::CodecTestWith2Params { protected: diff --git a/test/vp10_dct_test.cc b/test/vp10_dct_test.cc index 8e49609de..b2c301ae3 100644 --- a/test/vp10_dct_test.cc +++ b/test/vp10_dct_test.cc @@ -107,6 +107,5 @@ INSTANTIATE_TEST_CASE_P( ::testing::Values( FdctParam(&fdct4, &reference_dct_1d, 4, 1), FdctParam(&fdct8, &reference_dct_1d, 8, 1), - FdctParam(&fdct16, &reference_dct_1d, 16, 2), - FdctParam(&fdct32, &reference_dct_1d, 32, 4))); + FdctParam(&fdct16, &reference_dct_1d, 16, 2))); } // namespace diff --git a/test/vp10_inv_txfm_test.cc b/test/vp10_inv_txfm_test.cc index c49081ef8..6c0a3d242 100644 --- a/test/vp10_inv_txfm_test.cc +++ b/test/vp10_inv_txfm_test.cc @@ -203,7 +203,7 @@ TEST_P(Vp10PartialIDctTest, RunQuantCheck) { // quantization with maximum allowed step sizes test_coef_block1[0] = (output_ref_block[0] / 1336) * 1336; for (int j = 1; j < last_nonzero_; ++j) - test_coef_block1[vp10_default_scan_orders[tx_size_].scan[j]] + test_coef_block1[get_scan(tx_size_, DCT_DCT, 0)->scan[j]] = (output_ref_block[j] / 1828) * 1828; } @@ -265,7 +265,7 @@ TEST_P(Vp10PartialIDctTest, ResultsMatch) { max_energy_leftover = 0; coef = 0; } - test_coef_block1[vp10_default_scan_orders[tx_size_].scan[j]] = coef; + test_coef_block1[get_scan(tx_size_, DCT_DCT, 0)->scan[j]] = coef; } memcpy(test_coef_block2, test_coef_block1, diff --git a/test/vp9_arf_freq_test.cc b/test/vp9_arf_freq_test.cc index 87ff15b64..89200d408 100644 --- a/test/vp9_arf_freq_test.cc +++ b/test/vp9_arf_freq_test.cc @@ -230,9 +230,23 @@ VP9_INSTANTIATE_TEST_CASE( ::testing::ValuesIn(kEncodeVectors), ::testing::ValuesIn(kMinArfVectors)); +#if CONFIG_VP9_HIGHBITDEPTH +# if CONFIG_VP10_ENCODER +// TODO(angiebird): 25-29 fail in high bitdepth mode. +INSTANTIATE_TEST_CASE_P( + DISABLED_VP10, ArfFreqTest, + ::testing::Combine( + ::testing::Values(static_cast( + &libvpx_test::kVP10)), + ::testing::ValuesIn(kTestVectors), + ::testing::ValuesIn(kEncodeVectors), + ::testing::ValuesIn(kMinArfVectors))); +# endif // CONFIG_VP10_ENCODER +#else VP10_INSTANTIATE_TEST_CASE( ArfFreqTest, ::testing::ValuesIn(kTestVectors), ::testing::ValuesIn(kEncodeVectors), ::testing::ValuesIn(kMinArfVectors)); +#endif // CONFIG_VP9_HIGHBITDEPTH } // namespace diff --git a/test/vp9_encoder_parms_get_to_decoder.cc b/test/vp9_encoder_parms_get_to_decoder.cc index 901605d06..0984e6a42 100644 --- a/test/vp9_encoder_parms_get_to_decoder.cc +++ b/test/vp9_encoder_parms_get_to_decoder.cc @@ -42,6 +42,7 @@ struct EncodeParameters { int32_t frame_parallel; int32_t color_range; vpx_color_space_t cs; + int render_size[2]; // TODO(JBB): quantizers / bitrate }; @@ -49,7 +50,7 @@ const EncodeParameters kVP9EncodeParameterSet[] = { {0, 0, 0, 1, 0, 0, VPX_CS_BT_601}, {0, 0, 0, 0, 0, 1, VPX_CS_BT_709}, {0, 0, 1, 0, 0, 1, VPX_CS_BT_2020}, - {0, 2, 0, 0, 1, 0, VPX_CS_UNKNOWN}, + {0, 2, 0, 0, 1, 0, VPX_CS_UNKNOWN, { 640, 480 }}, // TODO(JBB): Test profiles (requires more work). }; @@ -88,6 +89,8 @@ class VpxEncoderParmsGetToDecoder encoder->Control(VP8E_SET_ARNR_MAXFRAMES, 7); encoder->Control(VP8E_SET_ARNR_STRENGTH, 5); encoder->Control(VP8E_SET_ARNR_TYPE, 3); + if (encode_parms.render_size[0] > 0 && encode_parms.render_size[1] > 0) + encoder->Control(VP9E_SET_RENDER_SIZE, encode_parms.render_size); } } @@ -118,6 +121,10 @@ class VpxEncoderParmsGetToDecoder } EXPECT_EQ(encode_parms.color_range, common->color_range); EXPECT_EQ(encode_parms.cs, common->color_space); + if (encode_parms.render_size[0] > 0 && encode_parms.render_size[1] > 0) { + EXPECT_EQ(encode_parms.render_size[0], common->render_width); + EXPECT_EQ(encode_parms.render_size[1], common->render_height); + } EXPECT_EQ(encode_parms.tile_cols, common->log2_tile_cols); EXPECT_EQ(encode_parms.tile_rows, common->log2_tile_rows); diff --git a/test/vp9_end_to_end_test.cc b/test/vp9_end_to_end_test.cc index e100eb95f..be1fa68c0 100644 --- a/test/vp9_end_to_end_test.cc +++ b/test/vp9_end_to_end_test.cc @@ -187,9 +187,23 @@ VP9_INSTANTIATE_TEST_CASE( ::testing::ValuesIn(kTestVectors), ::testing::ValuesIn(kCpuUsedVectors)); +#if CONFIG_VP9_HIGHBITDEPTH +# if CONFIG_VP10_ENCODER +// TODO(angiebird): many fail in high bitdepth mode. +INSTANTIATE_TEST_CASE_P( + DISABLED_VP10, EndToEndTestLarge, + ::testing::Combine( + ::testing::Values(static_cast( + &libvpx_test::kVP10)), + ::testing::ValuesIn(kEncodingModeVectors), + ::testing::ValuesIn(kTestVectors), + ::testing::ValuesIn(kCpuUsedVectors))); +# endif // CONFIG_VP10_ENCODER +#else VP10_INSTANTIATE_TEST_CASE( EndToEndTestLarge, ::testing::ValuesIn(kEncodingModeVectors), ::testing::ValuesIn(kTestVectors), ::testing::ValuesIn(kCpuUsedVectors)); +#endif // CONFIG_VP9_HIGHBITDEPTH } // namespace diff --git a/vp10/common/idct.c b/vp10/common/idct.c index 5656b06e7..62505eab1 100644 --- a/vp10/common/idct.c +++ b/vp10/common/idct.c @@ -447,20 +447,21 @@ void vp10_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride, vpx_idct32x32_1024_add(input, dest, stride); } -void vp10_inv_txfm_add_4x4( - const tran_low_t *input, uint8_t *dest, - int stride, int eob, TX_TYPE tx_type, - void (*itxm_add_4x4)(const tran_low_t *input, - uint8_t *dest, int stride, int eob)) { - switch (tx_type) { - case DCT_DCT: - itxm_add_4x4(input, dest, stride, eob); - break; - case ADST_DCT: - case DCT_ADST: - case ADST_ADST: - vp10_iht4x4_16_add(input, dest, stride, tx_type); - break; +void vp10_inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest, + int stride, int eob, TX_TYPE tx_type, int lossless) { + if (lossless) { + assert(tx_type == DCT_DCT); + vp10_iwht4x4_add(input, dest, stride, eob); + } else { + switch (tx_type) { + case DCT_DCT: + vp10_idct4x4_add(input, dest, stride, eob); + break; + case ADST_DCT: + case DCT_ADST: + case ADST_ADST: + vp10_iht4x4_16_add(input, dest, stride, tx_type); + break; #if CONFIG_EXT_TX case FLIPADST_DCT: flipud(dest, stride, 4); @@ -506,9 +507,10 @@ void vp10_inv_txfm_add_4x4( fliplr(dest, stride, 4); break; #endif // CONFIG_EXT_TX - default: - assert(0); - break; + default: + assert(0); + break; + } } } @@ -865,18 +867,20 @@ void vp10_highbd_idct32x32_add(const tran_low_t *input, uint8_t *dest, void vp10_highbd_inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest, int stride, int eob, int bd, TX_TYPE tx_type, - void (*highbd_itxm_add_4x4) - (const tran_low_t *input, uint8_t *dest, - int stride, int eob, int bd)) { - switch (tx_type) { - case DCT_DCT: - highbd_itxm_add_4x4(input, dest, stride, eob, bd); - break; - case ADST_DCT: - case DCT_ADST: - case ADST_ADST: - vp10_highbd_iht4x4_16_add(input, dest, stride, tx_type, bd); - break; + int lossless) { + if (lossless) { + assert(tx_type == DCT_DCT); + vp10_highbd_iwht4x4_add(input, dest, stride, eob, bd); + } else { + switch (tx_type) { + case DCT_DCT: + vp10_highbd_idct4x4_add(input, dest, stride, eob, bd); + break; + case ADST_DCT: + case DCT_ADST: + case ADST_ADST: + vp10_highbd_iht4x4_16_add(input, dest, stride, tx_type, bd); + break; #if CONFIG_EXT_TX case FLIPADST_DCT: flipud16(CONVERT_TO_SHORTPTR(dest), stride, 4); @@ -922,9 +926,10 @@ void vp10_highbd_inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest, fliplr16(CONVERT_TO_SHORTPTR(dest), stride, 4); break; #endif // CONFIG_EXT_TX - default: - assert(0); - break; + default: + assert(0); + break; + } } } diff --git a/vp10/common/idct.h b/vp10/common/idct.h index 2e000529b..088339804 100644 --- a/vp10/common/idct.h +++ b/vp10/common/idct.h @@ -44,9 +44,7 @@ void vp10_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride, int eob); void vp10_inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest, - int stride, int eob, TX_TYPE tx_type, - void (*itxm_add_4x4)(const tran_low_t *input, - uint8_t *dest, int stride, int eob)); + int stride, int eob, TX_TYPE tx_type, int lossless); void vp10_inv_txfm_add_8x8(const tran_low_t *input, uint8_t *dest, int stride, int eob, TX_TYPE tx_type); void vp10_inv_txfm_add_16x16(const tran_low_t *input, uint8_t *dest, @@ -67,9 +65,7 @@ void vp10_highbd_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride, int eob, int bd); void vp10_highbd_inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest, int stride, int eob, int bd, TX_TYPE tx_type, - void (*highbd_itxm_add_4x4) - (const tran_low_t *input, uint8_t *dest, - int stride, int eob, int bd)); + int lossless); void vp10_highbd_inv_txfm_add_8x8(const tran_low_t *input, uint8_t *dest, int stride, int eob, int bd, TX_TYPE tx_type); void vp10_highbd_inv_txfm_add_16x16(const tran_low_t *input, uint8_t *dest, diff --git a/vp10/common/loopfilter.c b/vp10/common/loopfilter.c index 1b89ed5a2..a1925de55 100644 --- a/vp10/common/loopfilter.c +++ b/vp10/common/loopfilter.c @@ -719,7 +719,11 @@ static void build_masks(const loop_filter_info_n *const lfi_n, uint64_t *const int_4x4_y = &lfm->int_4x4_y; uint16_t *const left_uv = &lfm->left_uv[tx_size_uv]; uint16_t *const above_uv = &lfm->above_uv[tx_size_uv]; +#if CONFIG_MISC_FIXES + uint16_t *const int_4x4_uv = &lfm->left_int_4x4_uv; +#else uint16_t *const int_4x4_uv = &lfm->int_4x4_uv; +#endif int i; // If filter level is 0 we don't loop filter. @@ -1015,7 +1019,11 @@ void vp10_setup_mask(VP10_COMMON *const cm, const int mi_row, const int mi_col, lfm->above_uv[i] &= mask_uv; } lfm->int_4x4_y &= mask_y; +#if CONFIG_MISC_FIXES + lfm->above_int_4x4_uv = lfm->left_int_4x4_uv & mask_uv; +#else lfm->int_4x4_uv &= mask_uv; +#endif // We don't apply a wide loop filter on the last uv block row. If set // apply the shorter one instead. @@ -1049,7 +1057,11 @@ void vp10_setup_mask(VP10_COMMON *const cm, const int mi_row, const int mi_col, lfm->above_uv[i] &= mask_uv; } lfm->int_4x4_y &= mask_y; +#if CONFIG_MISC_FIXES + lfm->left_int_4x4_uv &= mask_uv_int; +#else lfm->int_4x4_uv &= mask_uv_int; +#endif // We don't apply a wide loop filter on the last uv column. If set // apply the shorter one instead. @@ -1079,7 +1091,11 @@ void vp10_setup_mask(VP10_COMMON *const cm, const int mi_row, const int mi_col, assert(!(lfm->left_uv[TX_16X16]&lfm->left_uv[TX_8X8])); assert(!(lfm->left_uv[TX_16X16] & lfm->left_uv[TX_4X4])); assert(!(lfm->left_uv[TX_8X8] & lfm->left_uv[TX_4X4])); +#if CONFIG_MISC_FIXES + assert(!(lfm->left_int_4x4_uv & lfm->left_uv[TX_16X16])); +#else assert(!(lfm->int_4x4_uv & lfm->left_uv[TX_16X16])); +#endif assert(!(lfm->above_y[TX_16X16] & lfm->above_y[TX_8X8])); assert(!(lfm->above_y[TX_16X16] & lfm->above_y[TX_4X4])); assert(!(lfm->above_y[TX_8X8] & lfm->above_y[TX_4X4])); @@ -1087,7 +1103,11 @@ void vp10_setup_mask(VP10_COMMON *const cm, const int mi_row, const int mi_col, assert(!(lfm->above_uv[TX_16X16] & lfm->above_uv[TX_8X8])); assert(!(lfm->above_uv[TX_16X16] & lfm->above_uv[TX_4X4])); assert(!(lfm->above_uv[TX_8X8] & lfm->above_uv[TX_4X4])); +#if CONFIG_MISC_FIXES + assert(!(lfm->above_int_4x4_uv & lfm->above_uv[TX_16X16])); +#else assert(!(lfm->int_4x4_uv & lfm->above_uv[TX_16X16])); +#endif } static void filter_selectively_vert(uint8_t *s, int pitch, @@ -1442,7 +1462,11 @@ void vp10_filter_block_plane_ss11(VP10_COMMON *const cm, uint16_t mask_16x16 = lfm->left_uv[TX_16X16]; uint16_t mask_8x8 = lfm->left_uv[TX_8X8]; uint16_t mask_4x4 = lfm->left_uv[TX_4X4]; +#if CONFIG_MISC_FIXES + uint16_t mask_4x4_int = lfm->left_int_4x4_uv; +#else uint16_t mask_4x4_int = lfm->int_4x4_uv; +#endif assert(plane->subsampling_x == 1 && plane->subsampling_y == 1); @@ -1494,7 +1518,11 @@ void vp10_filter_block_plane_ss11(VP10_COMMON *const cm, mask_16x16 = lfm->above_uv[TX_16X16]; mask_8x8 = lfm->above_uv[TX_8X8]; mask_4x4 = lfm->above_uv[TX_4X4]; +#if CONFIG_MISC_FIXES + mask_4x4_int = lfm->above_int_4x4_uv; +#else mask_4x4_int = lfm->int_4x4_uv; +#endif for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 2) { const int skip_border_4x4_r = mi_row + r == cm->mi_rows - 1; diff --git a/vp10/common/loopfilter.h b/vp10/common/loopfilter.h index 329ab75af..8db705aa0 100644 --- a/vp10/common/loopfilter.h +++ b/vp10/common/loopfilter.h @@ -80,7 +80,12 @@ typedef struct { uint64_t int_4x4_y; uint16_t left_uv[TX_SIZES]; uint16_t above_uv[TX_SIZES]; +#if CONFIG_MISC_FIXES + uint16_t left_int_4x4_uv; + uint16_t above_int_4x4_uv; +#else uint16_t int_4x4_uv; +#endif uint8_t lfl_y[64]; uint8_t lfl_uv[16]; } LOOP_FILTER_MASK; diff --git a/vp10/common/onyxc_int.h b/vp10/common/onyxc_int.h index eeaadc61d..c345068b0 100644 --- a/vp10/common/onyxc_int.h +++ b/vp10/common/onyxc_int.h @@ -132,8 +132,8 @@ typedef struct VP10Common { int color_range; int width; int height; - int display_width; - int display_height; + int render_width; + int render_height; int last_width; int last_height; diff --git a/vp10/common/reconinter.c b/vp10/common/reconinter.c index 97c4abc6a..fdcb9673c 100644 --- a/vp10/common/reconinter.c +++ b/vp10/common/reconinter.c @@ -128,6 +128,53 @@ void build_inter_predictors(MACROBLOCKD *xd, int plane, int block, } } +void vp10_build_inter_predictor_sub8x8(MACROBLOCKD *xd, int plane, + int i, int ir, int ic, + int mi_row, int mi_col) { + struct macroblockd_plane *const pd = &xd->plane[plane]; + MODE_INFO *const mi = xd->mi[0]; + const BLOCK_SIZE plane_bsize = get_plane_block_size(mi->mbmi.sb_type, pd); + const int width = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; + const int height = 4 * num_4x4_blocks_high_lookup[plane_bsize]; + + uint8_t *const dst = &pd->dst.buf[(ir * pd->dst.stride + ic) << 2]; + int ref; + const int is_compound = has_second_ref(&mi->mbmi); + const InterpKernel *kernel = vp10_filter_kernels[mi->mbmi.interp_filter]; + + for (ref = 0; ref < 1 + is_compound; ++ref) { + const uint8_t *pre = + &pd->pre[ref].buf[(ir * pd->pre[ref].stride + ic) << 2]; +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + vp10_highbd_build_inter_predictor(pre, pd->pre[ref].stride, + dst, pd->dst.stride, + &mi->bmi[i].as_mv[ref].as_mv, + &xd->block_refs[ref]->sf, width, height, + ref, kernel, MV_PRECISION_Q3, + mi_col * MI_SIZE + 4 * ic, + mi_row * MI_SIZE + 4 * ir, xd->bd); + } else { + vp10_build_inter_predictor(pre, pd->pre[ref].stride, + dst, pd->dst.stride, + &mi->bmi[i].as_mv[ref].as_mv, + &xd->block_refs[ref]->sf, width, height, ref, + kernel, MV_PRECISION_Q3, + mi_col * MI_SIZE + 4 * ic, + mi_row * MI_SIZE + 4 * ir); + } +#else + vp10_build_inter_predictor(pre, pd->pre[ref].stride, + dst, pd->dst.stride, + &mi->bmi[i].as_mv[ref].as_mv, + &xd->block_refs[ref]->sf, width, height, ref, + kernel, MV_PRECISION_Q3, + mi_col * MI_SIZE + 4 * ic, + mi_row * MI_SIZE + 4 * ir); +#endif // CONFIG_VP9_HIGHBITDEPTH + } +} + static void build_inter_predictors_for_planes(MACROBLOCKD *xd, BLOCK_SIZE bsize, int mi_row, int mi_col, int plane_from, int plane_to) { diff --git a/vp10/common/reconinter.h b/vp10/common/reconinter.h index 1de405c4a..5678f473f 100644 --- a/vp10/common/reconinter.h +++ b/vp10/common/reconinter.h @@ -131,6 +131,10 @@ void build_inter_predictors(MACROBLOCKD *xd, int plane, int block, int x, int y, int w, int h, int mi_x, int mi_y); +void vp10_build_inter_predictor_sub8x8(MACROBLOCKD *xd, int plane, + int i, int ir, int ic, + int mi_row, int mi_col); + void vp10_build_inter_predictors_sby(MACROBLOCKD *xd, int mi_row, int mi_col, BLOCK_SIZE bsize); diff --git a/vp10/decoder/decodeframe.c b/vp10/decoder/decodeframe.c index 7160e9ecb..fc94b75bc 100644 --- a/vp10/decoder/decodeframe.c +++ b/vp10/decoder/decodeframe.c @@ -124,6 +124,18 @@ static void read_inter_mode_probs(FRAME_CONTEXT *fc, vpx_reader *r) { vp10_diff_update_prob(r, &fc->inter_mode_probs[i][j]); } +#if CONFIG_MISC_FIXES +static REFERENCE_MODE read_frame_reference_mode(const VP10_COMMON *cm, + struct vpx_read_bit_buffer *rb) { + if (is_compound_reference_allowed(cm)) { + return vpx_rb_read_bit(rb) ? REFERENCE_MODE_SELECT + : (vpx_rb_read_bit(rb) ? COMPOUND_REFERENCE + : SINGLE_REFERENCE); + } else { + return SINGLE_REFERENCE; + } +} +#else static REFERENCE_MODE read_frame_reference_mode(const VP10_COMMON *cm, vpx_reader *r) { if (is_compound_reference_allowed(cm)) { @@ -134,6 +146,7 @@ static REFERENCE_MODE read_frame_reference_mode(const VP10_COMMON *cm, return SINGLE_REFERENCE; } } +#endif static void read_frame_reference_mode_probs(VP10_COMMON *cm, vpx_reader *r) { FRAME_CONTEXT *const fc = cm->fc; @@ -203,9 +216,7 @@ static void inverse_transform_block_inter(MACROBLOCKD* xd, int plane, switch (tx_size) { case TX_4X4: vp10_highbd_inv_txfm_add_4x4(dqcoeff, dst, stride, eob, xd->bd, - tx_type, xd->lossless ? - vp10_highbd_iwht4x4_add : - vp10_highbd_idct4x4_add); + tx_type, xd->lossless); break; case TX_8X8: vp10_highbd_inv_txfm_add_8x8(dqcoeff, dst, stride, eob, xd->bd, @@ -228,8 +239,7 @@ static void inverse_transform_block_inter(MACROBLOCKD* xd, int plane, switch (tx_size) { case TX_4X4: vp10_inv_txfm_add_4x4(dqcoeff, dst, stride, eob, tx_type, - xd->lossless ? vp10_iwht4x4_add : - vp10_idct4x4_add); + xd->lossless); break; case TX_8X8: vp10_inv_txfm_add_8x8(dqcoeff, dst, stride, eob, tx_type); @@ -274,9 +284,7 @@ static void inverse_transform_block_intra(MACROBLOCKD* xd, int plane, switch (tx_size) { case TX_4X4: vp10_highbd_inv_txfm_add_4x4(dqcoeff, dst, stride, eob, xd->bd, - tx_type, xd->lossless ? - vp10_highbd_iwht4x4_add : - vp10_highbd_idct4x4_add); + tx_type, xd->lossless); break; case TX_8X8: vp10_highbd_inv_txfm_add_8x8(dqcoeff, dst, stride, eob, xd->bd, @@ -299,8 +307,7 @@ static void inverse_transform_block_intra(MACROBLOCKD* xd, int plane, switch (tx_size) { case TX_4X4: vp10_inv_txfm_add_4x4(dqcoeff, dst, stride, eob, tx_type, - xd->lossless ? vp10_iwht4x4_add : - vp10_idct4x4_add); + xd->lossless); break; case TX_8X8: vp10_inv_txfm_add_8x8(dqcoeff, dst, stride, eob, tx_type); @@ -1169,12 +1176,12 @@ static INTERP_FILTER read_interp_filter(struct vpx_read_bit_buffer *rb) { return vpx_rb_read_bit(rb) ? SWITCHABLE : vpx_rb_read_literal(rb, 2); } -static void setup_display_size(VP10_COMMON *cm, - struct vpx_read_bit_buffer *rb) { - cm->display_width = cm->width; - cm->display_height = cm->height; +static void setup_render_size(VP10_COMMON *cm, + struct vpx_read_bit_buffer *rb) { + cm->render_width = cm->width; + cm->render_height = cm->height; if (vpx_rb_read_bit(rb)) - vp10_read_frame_size(rb, &cm->display_width, &cm->display_height); + vp10_read_frame_size(rb, &cm->render_width, &cm->render_height); } static void resize_mv_buffer(VP10_COMMON *cm) { @@ -1222,7 +1229,7 @@ static void setup_frame_size(VP10_COMMON *cm, struct vpx_read_bit_buffer *rb) { BufferPool *const pool = cm->buffer_pool; vp10_read_frame_size(rb, &width, &height); resize_context_buffers(cm, width, height); - setup_display_size(cm, rb); + setup_render_size(cm, rb); lock_buffer_pool(pool); if (vpx_realloc_frame_buffer( @@ -1246,6 +1253,8 @@ static void setup_frame_size(VP10_COMMON *cm, struct vpx_read_bit_buffer *rb) { pool->frame_bufs[cm->new_fb_idx].buf.bit_depth = (unsigned int)cm->bit_depth; pool->frame_bufs[cm->new_fb_idx].buf.color_space = cm->color_space; pool->frame_bufs[cm->new_fb_idx].buf.color_range = cm->color_range; + pool->frame_bufs[cm->new_fb_idx].buf.render_width = cm->render_width; + pool->frame_bufs[cm->new_fb_idx].buf.render_height = cm->render_height; } static INLINE int valid_ref_frame_img_fmt(vpx_bit_depth_t ref_bit_depth, @@ -1304,7 +1313,7 @@ static void setup_frame_size_with_refs(VP10_COMMON *cm, } resize_context_buffers(cm, width, height); - setup_display_size(cm, rb); + setup_render_size(cm, rb); lock_buffer_pool(pool); if (vpx_realloc_frame_buffer( @@ -1328,6 +1337,8 @@ static void setup_frame_size_with_refs(VP10_COMMON *cm, pool->frame_bufs[cm->new_fb_idx].buf.bit_depth = (unsigned int)cm->bit_depth; pool->frame_bufs[cm->new_fb_idx].buf.color_space = cm->color_space; pool->frame_bufs[cm->new_fb_idx].buf.color_range = cm->color_range; + pool->frame_bufs[cm->new_fb_idx].buf.render_width = cm->render_width; + pool->frame_bufs[cm->new_fb_idx].buf.render_height = cm->render_height; } static void setup_tile_info(VP10_COMMON *cm, struct vpx_read_bit_buffer *rb) { @@ -1968,6 +1979,8 @@ static size_t read_uncompressed_header(VP10Decoder *pbi, #endif get_frame_new_buffer(cm)->color_space = cm->color_space; get_frame_new_buffer(cm)->color_range = cm->color_range; + get_frame_new_buffer(cm)->render_width = cm->render_width; + get_frame_new_buffer(cm)->render_height = cm->render_height; if (pbi->need_resync) { vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, @@ -2029,6 +2042,7 @@ static size_t read_uncompressed_header(VP10Decoder *pbi, setup_segmentation_dequant(cm); #if CONFIG_MISC_FIXES cm->tx_mode = xd->lossless ? ONLY_4X4 : read_tx_mode(rb); + cm->reference_mode = read_frame_reference_mode(cm, rb); #endif setup_tile_info(cm, rb); @@ -2089,7 +2103,9 @@ static int read_compressed_header(VP10Decoder *pbi, const uint8_t *data, for (i = 0; i < INTRA_INTER_CONTEXTS; i++) vp10_diff_update_prob(&r, &fc->intra_inter_prob[i]); +#if !CONFIG_MISC_FIXES cm->reference_mode = read_frame_reference_mode(cm, &r); +#endif if (cm->reference_mode != SINGLE_REFERENCE) setup_compound_reference_mode(cm); read_frame_reference_mode_probs(cm, &r); diff --git a/vp10/decoder/decoder.c b/vp10/decoder/decoder.c index 81bd35787..23851afa7 100644 --- a/vp10/decoder/decoder.c +++ b/vp10/decoder/decoder.c @@ -126,6 +126,9 @@ VP10Decoder *vp10_decoder_create(BufferPool *const pool) { void vp10_decoder_remove(VP10Decoder *pbi) { int i; + if (!pbi) + return; + vpx_get_worker_interface()->end(&pbi->lf_worker); vpx_free(pbi->lf_worker.data1); vpx_free(pbi->tile_data); diff --git a/vp10/encoder/bitstream.c b/vp10/encoder/bitstream.c index 808a9e6fa..b0e5ac8ee 100644 --- a/vp10/encoder/bitstream.c +++ b/vp10/encoder/bitstream.c @@ -449,8 +449,7 @@ static void write_modes_b(VP10_COMP *cpi, const TileInfo *const tile, xd->mi = cm->mi_grid_visible + (mi_row * cm->mi_stride + mi_col); m = xd->mi[0]; - cpi->td.mb.mbmi_ext = cpi->td.mb.mbmi_ext_base + - (mi_row * cm->mi_cols + mi_col); + cpi->td.mb.mbmi_ext = cpi->mbmi_ext_base + (mi_row * cm->mi_cols + mi_col); set_mi_row_col(xd, tile, mi_row, num_8x8_blocks_high_lookup[m->mbmi.sb_type], @@ -1046,14 +1045,14 @@ static size_t encode_tiles(VP10_COMP *cpi, uint8_t *data_ptr) { return total_size; } -static void write_display_size(const VP10_COMMON *cm, - struct vpx_write_bit_buffer *wb) { - const int scaling_active = cm->width != cm->display_width || - cm->height != cm->display_height; +static void write_render_size(const VP10_COMMON *cm, + struct vpx_write_bit_buffer *wb) { + const int scaling_active = cm->width != cm->render_width || + cm->height != cm->render_height; vpx_wb_write_bit(wb, scaling_active); if (scaling_active) { - vpx_wb_write_literal(wb, cm->display_width - 1, 16); - vpx_wb_write_literal(wb, cm->display_height - 1, 16); + vpx_wb_write_literal(wb, cm->render_width - 1, 16); + vpx_wb_write_literal(wb, cm->render_height - 1, 16); } } @@ -1062,7 +1061,7 @@ static void write_frame_size(const VP10_COMMON *cm, vpx_wb_write_literal(wb, cm->width - 1, 16); vpx_wb_write_literal(wb, cm->height - 1, 16); - write_display_size(cm, wb); + write_render_size(cm, wb); } static void write_frame_size_with_refs(VP10_COMP *cpi, @@ -1089,7 +1088,7 @@ static void write_frame_size_with_refs(VP10_COMP *cpi, vpx_wb_write_literal(wb, cm->height - 1, 16); } - write_display_size(cm, wb); + write_render_size(cm, wb); } static void write_sync_code(struct vpx_write_bit_buffer *wb) { @@ -1233,6 +1232,14 @@ static void write_uncompressed_header(VP10_COMP *cpi, cm->tx_mode = TX_4X4; else write_txfm_mode(cm->tx_mode, wb); + if (cpi->allow_comp_inter_inter) { + const int use_hybrid_pred = cm->reference_mode == REFERENCE_MODE_SELECT; + const int use_compound_pred = cm->reference_mode != SINGLE_REFERENCE; + + vpx_wb_write_bit(wb, use_hybrid_pred); + if (!use_hybrid_pred) + vpx_wb_write_bit(wb, use_compound_pred); + } #endif write_tile_info(cm, wb); @@ -1272,8 +1279,9 @@ static size_t write_compressed_header(VP10_COMP *cpi, uint8_t *data) { counts->intra_inter[i]); if (cpi->allow_comp_inter_inter) { - const int use_compound_pred = cm->reference_mode != SINGLE_REFERENCE; const int use_hybrid_pred = cm->reference_mode == REFERENCE_MODE_SELECT; +#if !CONFIG_MISC_FIXES + const int use_compound_pred = cm->reference_mode != SINGLE_REFERENCE; vpx_write_bit(&header_bc, use_compound_pred); if (use_compound_pred) { @@ -1283,6 +1291,12 @@ static size_t write_compressed_header(VP10_COMP *cpi, uint8_t *data) { vp10_cond_prob_diff_update(&header_bc, &fc->comp_inter_prob[i], counts->comp_inter[i]); } +#else + if (use_hybrid_pred) + for (i = 0; i < COMP_INTER_CONTEXTS; i++) + vp10_cond_prob_diff_update(&header_bc, &fc->comp_inter_prob[i], + counts->comp_inter[i]); +#endif } if (cm->reference_mode != COMPOUND_REFERENCE) { diff --git a/vp10/encoder/block.h b/vp10/encoder/block.h index cd3baa765..cb2a234c9 100644 --- a/vp10/encoder/block.h +++ b/vp10/encoder/block.h @@ -58,7 +58,6 @@ struct macroblock { MACROBLOCKD e_mbd; MB_MODE_INFO_EXT *mbmi_ext; - MB_MODE_INFO_EXT *mbmi_ext_base; int skip_block; int select_tx_size; int skip_recode; @@ -133,13 +132,6 @@ struct macroblock { // Strong color activity detection. Used in RTC coding mode to enhance // the visual quality at the boundary of moving color objects. uint8_t color_sensitivity[2]; - - void (*fwd_txm4x4)(const int16_t *input, tran_low_t *output, int stride); - void (*itxm_add)(const tran_low_t *input, uint8_t *dest, int stride, int eob); -#if CONFIG_VP9_HIGHBITDEPTH - void (*highbd_itxm_add)(const tran_low_t *input, uint8_t *dest, int stride, - int eob, int bd); -#endif }; #ifdef __cplusplus diff --git a/vp10/encoder/dct.c b/vp10/encoder/dct.c index cdbe655e9..f1fb19537 100644 --- a/vp10/encoder/dct.c +++ b/vp10/encoder/dct.c @@ -20,6 +20,20 @@ #include "vpx_dsp/fwd_txfm.h" #include "vpx_ports/mem.h" +static INLINE void range_check(const tran_low_t *input, const int size, + const int bit) { +#if CONFIG_COEFFICIENT_RANGE_CHECKING + int i; + for (i = 0; i < size; ++i) { + assert(abs(input[i]) < (1 << bit)); + } +#else + (void)input; + (void)size; + (void)bit; +#endif +} + #if CONFIG_EXT_TX void fdst4(const tran_low_t *input, tran_low_t *output) { static const int N = 4; @@ -98,20 +112,6 @@ void fdst16(const tran_low_t *input, tran_low_t *output) { } #endif // CONFIG_EXT_TX -static INLINE void range_check(const tran_low_t *input, const int size, - const int bit) { -#if CONFIG_COEFFICIENT_RANGE_CHECKING - int i; - for (i = 0; i < size; ++i) { - assert(abs(input[i]) < (1 << bit)); - } -#else - (void)input; - (void)size; - (void)bit; -#endif -} - static void fdct4(const tran_low_t *input, tran_low_t *output) { tran_high_t temp; tran_low_t step[4]; @@ -400,6 +400,7 @@ static void fdct16(const tran_low_t *input, tran_low_t *output) { range_check(output, 16, 16); } +/* #TODO(angiebird): Unify this with vp10_fwd_txfm.c: vp10_fdct32 static void fdct32(const tran_low_t *input, tran_low_t *output) { tran_high_t temp; tran_low_t step[32]; @@ -797,6 +798,7 @@ static void fdct32(const tran_low_t *input, tran_low_t *output) { range_check(output, 32, 18); } +*/ static void fadst4(const tran_low_t *input, tran_low_t *output) { tran_high_t x0, x1, x2, x3; diff --git a/vp10/encoder/encodeframe.c b/vp10/encoder/encodeframe.c index 175bf7b01..ceb9eb471 100644 --- a/vp10/encoder/encodeframe.c +++ b/vp10/encoder/encodeframe.c @@ -170,15 +170,16 @@ static BLOCK_SIZE get_rd_var_based_fixed_partition(VP10_COMP *cpi, // Lighter version of set_offsets that only sets the mode info // pointers. -static INLINE void set_mode_info_offsets(VP10_COMMON *const cm, +static INLINE void set_mode_info_offsets(VP10_COMP *const cpi, MACROBLOCK *const x, MACROBLOCKD *const xd, int mi_row, int mi_col) { + VP10_COMMON *const cm = &cpi->common; const int idx_str = xd->mi_stride * mi_row + mi_col; xd->mi = cm->mi_grid_visible + idx_str; xd->mi[0] = cm->mi + idx_str; - x->mbmi_ext = x->mbmi_ext_base + (mi_row * cm->mi_cols + mi_col); + x->mbmi_ext = cpi->mbmi_ext_base + (mi_row * cm->mi_cols + mi_col); } static void set_offsets(VP10_COMP *cpi, const TileInfo *const tile, @@ -193,7 +194,7 @@ static void set_offsets(VP10_COMP *cpi, const TileInfo *const tile, set_skip_context(xd, mi_row, mi_col); - set_mode_info_offsets(cm, x, xd, mi_row, mi_col); + set_mode_info_offsets(cpi, x, xd, mi_row, mi_col); mbmi = &xd->mi[0]->mbmi; @@ -244,7 +245,7 @@ static void set_block_size(VP10_COMP * const cpi, int mi_row, int mi_col, BLOCK_SIZE bsize) { if (cpi->common.mi_cols > mi_col && cpi->common.mi_rows > mi_row) { - set_mode_info_offsets(&cpi->common, x, xd, mi_row, mi_col); + set_mode_info_offsets(cpi, x, xd, mi_row, mi_col); xd->mi[0]->mbmi.sb_type = bsize; } } @@ -2695,18 +2696,6 @@ static void encode_frame_internal(VP10_COMP *cpi) { cm->uv_dc_delta_q == 0 && cm->uv_ac_delta_q == 0; -#if CONFIG_VP9_HIGHBITDEPTH - if (cm->use_highbitdepth) - x->fwd_txm4x4 = xd->lossless ? vp10_highbd_fwht4x4 : vpx_highbd_fdct4x4; - else - x->fwd_txm4x4 = xd->lossless ? vp10_fwht4x4 : vpx_fdct4x4; - x->highbd_itxm_add = xd->lossless ? vp10_highbd_iwht4x4_add : - vp10_highbd_idct4x4_add; -#else - x->fwd_txm4x4 = xd->lossless ? vp10_fwht4x4 : vpx_fdct4x4; -#endif // CONFIG_VP9_HIGHBITDEPTH - x->itxm_add = xd->lossless ? vp10_iwht4x4_add : vp10_idct4x4_add; - if (xd->lossless) x->optimize = 0; diff --git a/vp10/encoder/encodemb.c b/vp10/encoder/encodemb.c index 51ed11295..2d04115a0 100644 --- a/vp10/encoder/encodemb.c +++ b/vp10/encoder/encodemb.c @@ -387,16 +387,17 @@ static void copy_fliplrud(const int16_t *src, int src_stride, int l, } #endif // CONFIG_EXT_TX -void vp10_fwd_txfm_4x4(const int16_t *src_diff, - tran_low_t *coeff, int diff_stride, TX_TYPE tx_type, - void (*fwd_txm4x4)(const int16_t *input, - tran_low_t *output, int stride)) { +void vp10_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff, + int diff_stride, TX_TYPE tx_type, int lossless) { + if (lossless) { + vp10_fwht4x4(src_diff, coeff, diff_stride); + } else { #if CONFIG_EXT_TX int16_t src_diff2[16]; #endif // CONFIG_EXT_TX switch (tx_type) { case DCT_DCT: - fwd_txm4x4(src_diff, coeff, diff_stride); + vpx_fdct4x4(src_diff, coeff, diff_stride); break; case ADST_DCT: case DCT_ADST: @@ -444,6 +445,7 @@ void vp10_fwd_txfm_4x4(const int16_t *src_diff, default: assert(0); break; + } } } @@ -709,15 +711,17 @@ static void fwd_txfm_32x32_1(const int16_t *src_diff, #if CONFIG_VP9_HIGHBITDEPTH void vp10_highbd_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff, - int diff_stride, TX_TYPE tx_type, - void (*highbd_fwd_txm4x4)(const int16_t *input, - tran_low_t *output, int stride)) { + int diff_stride, TX_TYPE tx_type, int lossless) { + if (lossless) { + assert(tx_type == DCT_DCT); + vp10_highbd_fwht4x4(src_diff, coeff, diff_stride); + } else { #if CONFIG_EXT_TX int16_t src_diff2[16]; #endif // CONFIG_EXT_TX switch (tx_type) { case DCT_DCT: - highbd_fwd_txm4x4(src_diff, coeff, diff_stride); + vpx_highbd_fdct4x4(src_diff, coeff, diff_stride); break; case ADST_DCT: case DCT_ADST: @@ -765,6 +769,7 @@ void vp10_highbd_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff, default: assert(0); break; + } } } @@ -1084,7 +1089,7 @@ void vp10_xform_quant_fp(MACROBLOCK *x, int plane, int block, break; case TX_4X4: vp10_highbd_fwd_txfm_4x4(src_diff, coeff, diff_stride, tx_type, - x->fwd_txm4x4); + xd->lossless); vp10_highbd_quantize_fp(coeff, 16, x->skip_block, p->zbin, p->round_fp, p->quant_fp, p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, @@ -1121,7 +1126,7 @@ void vp10_xform_quant_fp(MACROBLOCK *x, int plane, int block, break; case TX_4X4: vp10_fwd_txfm_4x4(src_diff, coeff, diff_stride, tx_type, - x->fwd_txm4x4); + xd->lossless); vp10_quantize_fp(coeff, 16, x->skip_block, p->zbin, p->round_fp, p->quant_fp, p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, @@ -1174,7 +1179,7 @@ void vp10_xform_quant_dc(MACROBLOCK *x, int plane, int block, break; case TX_4X4: vp10_highbd_fwd_txfm_4x4(src_diff, coeff, diff_stride, tx_type, - x->fwd_txm4x4); + xd->lossless); vpx_highbd_quantize_dc(coeff, 16, x->skip_block, p->round, p->quant_fp[0], qcoeff, dqcoeff, pd->dequant[0], eob); @@ -1207,7 +1212,7 @@ void vp10_xform_quant_dc(MACROBLOCK *x, int plane, int block, break; case TX_4X4: vp10_fwd_txfm_4x4(src_diff, coeff, diff_stride, tx_type, - x->fwd_txm4x4); + xd->lossless); vpx_quantize_dc(coeff, 16, x->skip_block, p->round, p->quant_fp[0], qcoeff, dqcoeff, pd->dequant[0], eob); @@ -1264,7 +1269,7 @@ void vp10_xform_quant(MACROBLOCK *x, int plane, int block, break; case TX_4X4: vp10_highbd_fwd_txfm_4x4(src_diff, coeff, diff_stride, tx_type, - x->fwd_txm4x4); + xd->lossless); vpx_highbd_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, @@ -1300,7 +1305,7 @@ void vp10_xform_quant(MACROBLOCK *x, int plane, int block, scan_order->scan, scan_order->iscan); break; case TX_4X4: - vp10_fwd_txfm_4x4(src_diff, coeff, diff_stride, tx_type, x->fwd_txm4x4); + vp10_fwd_txfm_4x4(src_diff, coeff, diff_stride, tx_type, xd->lossless); vpx_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, @@ -1403,7 +1408,7 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize, // case. vp10_highbd_inv_txfm_add_4x4(dqcoeff, dst, pd->dst.stride, p->eobs[block], xd->bd, tx_type, - x->highbd_itxm_add); + xd->lossless); break; default: assert(0 && "Invalid transform size"); @@ -1432,7 +1437,7 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize, // which is significant (not just an optimization) for the lossless // case. vp10_inv_txfm_add_4x4(dqcoeff, dst, pd->dst.stride, p->eobs[block], - tx_type, x->itxm_add); + tx_type, xd->lossless); break; default: assert(0 && "Invalid transform size"); @@ -1457,11 +1462,21 @@ static void encode_block_pass1(int plane, int block, BLOCK_SIZE plane_bsize, if (p->eobs[block] > 0) { #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - x->highbd_itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block], xd->bd); - return; + if (xd->lossless) { + vp10_highbd_iwht4x4_add(dqcoeff, dst, pd->dst.stride, + p->eobs[block], xd->bd); + } else { + vp10_highbd_idct4x4_add(dqcoeff, dst, pd->dst.stride, + p->eobs[block], xd->bd); + } + return; } #endif // CONFIG_VP9_HIGHBITDEPTH - x->itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]); + if (xd->lossless) { + vp10_iwht4x4_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]); + } else { + vp10_idct4x4_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]); + } } } @@ -1582,7 +1597,7 @@ void vp10_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, vpx_highbd_subtract_block(4, 4, src_diff, diff_stride, src, src_stride, dst, dst_stride, xd->bd); vp10_highbd_fwd_txfm_4x4(src_diff, coeff, diff_stride, tx_type, - x->fwd_txm4x4); + xd->lossless); vpx_highbd_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, @@ -1594,7 +1609,7 @@ void vp10_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, // eob<=1 which is significant (not just an optimization) for the // lossless case. vp10_highbd_inv_txfm_add_4x4(dqcoeff, dst, dst_stride, *eob, xd->bd, - tx_type, x->highbd_itxm_add); + tx_type, xd->lossless); break; default: assert(0); @@ -1651,7 +1666,7 @@ void vp10_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, if (!x->skip_recode) { vpx_subtract_block(4, 4, src_diff, diff_stride, src, src_stride, dst, dst_stride); - vp10_fwd_txfm_4x4(src_diff, coeff, diff_stride, tx_type, x->fwd_txm4x4); + vp10_fwd_txfm_4x4(src_diff, coeff, diff_stride, tx_type, xd->lossless); vpx_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, @@ -1663,7 +1678,7 @@ void vp10_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, // which is significant (not just an optimization) for the lossless // case. vp10_inv_txfm_add_4x4(dqcoeff, dst, dst_stride, *eob, tx_type, - x->itxm_add); + xd->lossless); } break; default: diff --git a/vp10/encoder/encodemb.h b/vp10/encoder/encodemb.h index 928be6c5e..62a7db4a2 100644 --- a/vp10/encoder/encodemb.h +++ b/vp10/encoder/encodemb.h @@ -39,16 +39,12 @@ void vp10_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, void vp10_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane); -void vp10_fwd_txfm_4x4(const int16_t *src_diff, - tran_low_t *coeff, int diff_stride, TX_TYPE tx_type, - void (*fwd_txm4x4)(const int16_t *input, - tran_low_t *output, int stride)); +void vp10_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff, + int diff_stride, TX_TYPE tx_type, int lossless); #if CONFIG_VP9_HIGHBITDEPTH void vp10_highbd_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff, - int diff_stride, TX_TYPE tx_type, - void (*highbd_fwd_txm4x4)(const int16_t *input, - tran_low_t *output, int stride)); + int diff_stride, TX_TYPE tx_type, int lossless); #endif // CONFIG_VP9_HIGHBITDEPTH #ifdef __cplusplus diff --git a/vp10/encoder/encoder.c b/vp10/encoder/encoder.c index 582b2c1a0..76045b8f0 100644 --- a/vp10/encoder/encoder.c +++ b/vp10/encoder/encoder.c @@ -711,7 +711,6 @@ static void update_frame_size(VP10_COMP *cpi) { vp10_set_mb_mi(cm, cm->width, cm->height); vp10_init_context_buffers(cm); vp10_init_macroblockd(cm, xd, NULL); - cpi->td.mb.mbmi_ext_base = cpi->mbmi_ext_base; memset(cpi->mbmi_ext_base, 0, cm->mi_rows * cm->mi_cols * sizeof(*cpi->mbmi_ext_base)); @@ -1457,8 +1456,13 @@ void vp10_change_config(struct VP10_COMP *cpi, const VP10EncoderConfig *oxcf) { cm->interp_filter = cpi->sf.default_interp_filter; - cm->display_width = cpi->oxcf.width; - cm->display_height = cpi->oxcf.height; + if (cpi->oxcf.render_width > 0 && cpi->oxcf.render_height > 0) { + cm->render_width = cpi->oxcf.render_width; + cm->render_height = cpi->oxcf.render_height; + } else { + cm->render_width = cpi->oxcf.width; + cm->render_height = cpi->oxcf.height; + } cm->width = cpi->oxcf.width; cm->height = cpi->oxcf.height; @@ -1822,14 +1826,15 @@ VP10_COMP *vp10_create_compressor(VP10EncoderConfig *oxcf, snprintf((H) + strlen(H), sizeof(H) - strlen(H), (T), (V)) void vp10_remove_compressor(VP10_COMP *cpi) { - VP10_COMMON *const cm = &cpi->common; + VP10_COMMON *cm; unsigned int i; int t; if (!cpi) return; - if (cpi && (cm->current_video_frame > 0)) { + cm = &cpi->common; + if (cm->current_video_frame > 0) { #if CONFIG_INTERNAL_STATS vpx_clear_system_state(); @@ -3628,6 +3633,8 @@ static void encode_frame_to_data_rate(VP10_COMP *cpi, cm->frame_to_show = get_frame_new_buffer(cm); cm->frame_to_show->color_space = cm->color_space; cm->frame_to_show->color_range = cm->color_range; + cm->frame_to_show->render_width = cm->render_width; + cm->frame_to_show->render_height = cm->render_height; // Pick the loop filter level for the frame. loopfilter_frame(cpi, cm); @@ -4088,19 +4095,7 @@ int vp10_get_compressed_data(VP10_COMP *cpi, unsigned int *frame_flags, } if (oxcf->pass == 1) { - const int lossless = is_lossless_requested(oxcf); -#if CONFIG_VP9_HIGHBITDEPTH - if (cpi->oxcf.use_highbitdepth) - cpi->td.mb.fwd_txm4x4 = lossless ? - vp10_highbd_fwht4x4 : vpx_highbd_fdct4x4; - else - cpi->td.mb.fwd_txm4x4 = lossless ? vp10_fwht4x4 : vpx_fdct4x4; - cpi->td.mb.highbd_itxm_add = lossless ? vp10_highbd_iwht4x4_add : - vp10_highbd_idct4x4_add; -#else - cpi->td.mb.fwd_txm4x4 = lossless ? vp10_fwht4x4 : vpx_fdct4x4; -#endif // CONFIG_VP9_HIGHBITDEPTH - cpi->td.mb.itxm_add = lossless ? vp10_iwht4x4_add : vp10_idct4x4_add; + cpi->td.mb.e_mbd.lossless = is_lossless_requested(oxcf); vp10_first_pass(cpi, source); } else if (oxcf->pass == 2) { Pass2Encode(cpi, size, dest, frame_flags); diff --git a/vp10/encoder/encoder.h b/vp10/encoder/encoder.h index b22198987..e9779103b 100644 --- a/vp10/encoder/encoder.h +++ b/vp10/encoder/encoder.h @@ -229,6 +229,8 @@ typedef struct VP10EncoderConfig { #endif vpx_color_space_t color_space; int color_range; + int render_width; + int render_height; } VP10EncoderConfig; static INLINE int is_lossless_requested(const VP10EncoderConfig *cfg) { diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c index 3a6d6e47f..84f796e4c 100644 --- a/vp10/encoder/rdopt.c +++ b/vp10/encoder/rdopt.c @@ -864,8 +864,7 @@ static int64_t rd_pick_intra4x4block(VP10_COMP *cpi, MACROBLOCK *x, if (xd->lossless) { TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, TX_4X4); const scan_order *so = get_scan(TX_4X4, tx_type, 0); - vp10_highbd_fwd_txfm_4x4(src_diff, coeff, 8, DCT_DCT, - vp10_highbd_fwht4x4); + vp10_highbd_fwd_txfm_4x4(src_diff, coeff, 8, DCT_DCT, 1); vp10_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan); ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4, so->scan, so->neighbors, @@ -874,14 +873,12 @@ static int64_t rd_pick_intra4x4block(VP10_COMP *cpi, MACROBLOCK *x, goto next_highbd; vp10_highbd_inv_txfm_add_4x4(BLOCK_OFFSET(pd->dqcoeff, block), dst, dst_stride, p->eobs[block], - xd->bd, DCT_DCT, - vp10_highbd_iwht4x4_add); + xd->bd, DCT_DCT, 1); } else { int64_t unused; TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, TX_4X4); const scan_order *so = get_scan(TX_4X4, tx_type, 0); - vp10_highbd_fwd_txfm_4x4(src_diff, coeff, 8, tx_type, - vpx_highbd_fdct4x4); + vp10_highbd_fwd_txfm_4x4(src_diff, coeff, 8, tx_type, 0); vp10_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan); ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4, so->scan, so->neighbors, @@ -893,8 +890,7 @@ static int64_t rd_pick_intra4x4block(VP10_COMP *cpi, MACROBLOCK *x, goto next_highbd; vp10_highbd_inv_txfm_add_4x4(BLOCK_OFFSET(pd->dqcoeff, block), dst, dst_stride, p->eobs[block], - xd->bd, tx_type, - vp10_highbd_idct4x4_add); + xd->bd, tx_type, 0); } } } @@ -967,7 +963,7 @@ static int64_t rd_pick_intra4x4block(VP10_COMP *cpi, MACROBLOCK *x, if (xd->lossless) { TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, TX_4X4); const scan_order *so = get_scan(TX_4X4, tx_type, 0); - vp10_fwd_txfm_4x4(src_diff, coeff, 8, DCT_DCT, vp10_fwht4x4); + vp10_fwd_txfm_4x4(src_diff, coeff, 8, DCT_DCT, 1); vp10_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan); ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4, so->scan, so->neighbors, @@ -975,13 +971,12 @@ static int64_t rd_pick_intra4x4block(VP10_COMP *cpi, MACROBLOCK *x, if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd) goto next; vp10_inv_txfm_add_4x4(BLOCK_OFFSET(pd->dqcoeff, block), - dst, dst_stride, p->eobs[block], DCT_DCT, - vp10_iwht4x4_add); + dst, dst_stride, p->eobs[block], DCT_DCT, 1); } else { int64_t unused; TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, TX_4X4); const scan_order *so = get_scan(TX_4X4, tx_type, 0); - vp10_fwd_txfm_4x4(src_diff, coeff, 8, tx_type, vpx_fdct4x4); + vp10_fwd_txfm_4x4(src_diff, coeff, 8, tx_type, 0); vp10_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan); ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4, so->scan, so->neighbors, @@ -991,8 +986,7 @@ static int64_t rd_pick_intra4x4block(VP10_COMP *cpi, MACROBLOCK *x, if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd) goto next; vp10_inv_txfm_add_4x4(BLOCK_OFFSET(pd->dqcoeff, block), - dst, dst_stride, p->eobs[block], tx_type, - vp10_idct4x4_add); + dst, dst_stride, p->eobs[block], tx_type, 0); } } } @@ -1345,6 +1339,7 @@ static int64_t encode_inter_mb_segment(VP10_COMP *cpi, int64_t *distortion, int64_t *sse, ENTROPY_CONTEXT *ta, ENTROPY_CONTEXT *tl, + int ir, int ic, int mi_row, int mi_col) { int k; MACROBLOCKD *xd = &x->e_mbd; @@ -1355,49 +1350,28 @@ static int64_t encode_inter_mb_segment(VP10_COMP *cpi, const int width = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; const int height = 4 * num_4x4_blocks_high_lookup[plane_bsize]; int idx, idy; + void (*fwd_txm4x4)(const int16_t *input, tran_low_t *output, int stride); const uint8_t *const src = &p->src.buf[vp10_raster_block_offset(BLOCK_8X8, i, p->src.stride)]; uint8_t *const dst = &pd->dst.buf[vp10_raster_block_offset(BLOCK_8X8, i, pd->dst.stride)]; int64_t thisdistortion = 0, thissse = 0; - int thisrate = 0, ref; + int thisrate = 0; TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, i, TX_4X4); const scan_order *so = get_scan(TX_4X4, tx_type, 1); - const int is_compound = has_second_ref(&mi->mbmi); - const InterpKernel *kernel = vp10_filter_kernels[mi->mbmi.interp_filter]; - for (ref = 0; ref < 1 + is_compound; ++ref) { - const uint8_t *pre = &pd->pre[ref].buf[vp10_raster_block_offset(BLOCK_8X8, i, - pd->pre[ref].stride)]; + vp10_build_inter_predictor_sub8x8(xd, 0, i, ir, ic, mi_row, mi_col); + #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - vp10_highbd_build_inter_predictor(pre, pd->pre[ref].stride, - dst, pd->dst.stride, - &mi->bmi[i].as_mv[ref].as_mv, - &xd->block_refs[ref]->sf, width, height, - ref, kernel, MV_PRECISION_Q3, - mi_col * MI_SIZE + 4 * (i % 2), - mi_row * MI_SIZE + 4 * (i / 2), xd->bd); + fwd_txm4x4 = xd->lossless ? vp10_highbd_fwht4x4 : vpx_highbd_fdct4x4; } else { - vp10_build_inter_predictor(pre, pd->pre[ref].stride, - dst, pd->dst.stride, - &mi->bmi[i].as_mv[ref].as_mv, - &xd->block_refs[ref]->sf, width, height, ref, - kernel, MV_PRECISION_Q3, - mi_col * MI_SIZE + 4 * (i % 2), - mi_row * MI_SIZE + 4 * (i / 2)); + fwd_txm4x4 = xd->lossless ? vp10_fwht4x4 : vpx_fdct4x4; } #else - vp10_build_inter_predictor(pre, pd->pre[ref].stride, - dst, pd->dst.stride, - &mi->bmi[i].as_mv[ref].as_mv, - &xd->block_refs[ref]->sf, width, height, ref, - kernel, MV_PRECISION_Q3, - mi_col * MI_SIZE + 4 * (i % 2), - mi_row * MI_SIZE + 4 * (i / 2)); + fwd_txm4x4 = xd->lossless ? vp10_fwht4x4 : vpx_fdct4x4; #endif // CONFIG_VP9_HIGHBITDEPTH - } #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { @@ -1423,8 +1397,8 @@ static int64_t encode_inter_mb_segment(VP10_COMP *cpi, k += (idy * 2 + idx); coeff = BLOCK_OFFSET(p->coeff, k); - x->fwd_txm4x4(vp10_raster_block_offset_int16(BLOCK_8X8, k, p->src_diff), - coeff, 8); + fwd_txm4x4(vp10_raster_block_offset_int16(BLOCK_8X8, k, p->src_diff), + coeff, 8); vp10_regular_quantize_b_4x4(x, 0, k, so->scan, so->iscan); #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { @@ -2037,6 +2011,7 @@ static int64_t rd_pick_best_sub8x8_mode(VP10_COMP *cpi, MACROBLOCK *x, &bsi->rdstat[i][mode_idx].bsse, bsi->rdstat[i][mode_idx].ta, bsi->rdstat[i][mode_idx].tl, + idy, idx, mi_row, mi_col); if (bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) { bsi->rdstat[i][mode_idx].brdcost += RDCOST(x->rdmult, x->rddiv, diff --git a/vp10/vp10_cx_iface.c b/vp10/vp10_cx_iface.c index 2cb309d8e..304f74eee 100644 --- a/vp10/vp10_cx_iface.c +++ b/vp10/vp10_cx_iface.c @@ -46,6 +46,8 @@ struct vp10_extracfg { vp9e_tune_content content; vpx_color_space_t color_space; int color_range; + int render_width; + int render_height; }; static struct vp10_extracfg default_extra_cfg = { @@ -73,6 +75,8 @@ static struct vp10_extracfg default_extra_cfg = { VP9E_CONTENT_DEFAULT, // content VPX_CS_UNKNOWN, // color space 0, // color range + 0, // render width + 0, // render height }; struct vpx_codec_alg_priv { @@ -402,6 +406,8 @@ static vpx_codec_err_t set_encoder_config( oxcf->color_space = extra_cfg->color_space; oxcf->color_range = extra_cfg->color_range; + oxcf->render_width = extra_cfg->render_width; + oxcf->render_height = extra_cfg->render_height; oxcf->arnr_max_frames = extra_cfg->arnr_max_frames; oxcf->arnr_strength = extra_cfg->arnr_strength; oxcf->min_gf_interval = extra_cfg->min_gf_interval; @@ -1232,6 +1238,15 @@ static vpx_codec_err_t ctrl_set_color_range(vpx_codec_alg_priv_t *ctx, return update_extra_cfg(ctx, &extra_cfg); } +static vpx_codec_err_t ctrl_set_render_size(vpx_codec_alg_priv_t *ctx, + va_list args) { + struct vp10_extracfg extra_cfg = ctx->extra_cfg; + int *const render_size = va_arg(args, int *); + extra_cfg.render_width = render_size[0]; + extra_cfg.render_height = render_size[0]; + return update_extra_cfg(ctx, &extra_cfg); +} + static vpx_codec_ctrl_fn_map_t encoder_ctrl_maps[] = { {VP8_COPY_REFERENCE, ctrl_copy_reference}, {VP8E_UPD_ENTROPY, ctrl_update_entropy}, @@ -1269,6 +1284,7 @@ static vpx_codec_ctrl_fn_map_t encoder_ctrl_maps[] = { {VP9E_SET_NOISE_SENSITIVITY, ctrl_set_noise_sensitivity}, {VP9E_SET_MIN_GF_INTERVAL, ctrl_set_min_gf_interval}, {VP9E_SET_MAX_GF_INTERVAL, ctrl_set_max_gf_interval}, + {VP9E_SET_RENDER_SIZE, ctrl_set_render_size}, // Getters {VP8E_GET_LAST_QUANTIZER, ctrl_get_quantizer}, diff --git a/vp10/vp10_dx_iface.c b/vp10/vp10_dx_iface.c index a8f9aa351..33337a4bd 100644 --- a/vp10/vp10_dx_iface.c +++ b/vp10/vp10_dx_iface.c @@ -978,9 +978,9 @@ static vpx_codec_err_t ctrl_get_frame_size(vpx_codec_alg_priv_t *ctx, return VPX_CODEC_INVALID_PARAM; } -static vpx_codec_err_t ctrl_get_display_size(vpx_codec_alg_priv_t *ctx, - va_list args) { - int *const display_size = va_arg(args, int *); +static vpx_codec_err_t ctrl_get_render_size(vpx_codec_alg_priv_t *ctx, + va_list args) { + int *const render_size = va_arg(args, int *); // Only support this function in serial decode. if (ctx->frame_parallel_decode) { @@ -988,14 +988,14 @@ static vpx_codec_err_t ctrl_get_display_size(vpx_codec_alg_priv_t *ctx, return VPX_CODEC_INCAPABLE; } - if (display_size) { + if (render_size) { if (ctx->frame_workers) { VPxWorker *const worker = ctx->frame_workers; FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1; const VP10_COMMON *const cm = &frame_worker_data->pbi->common; - display_size[0] = cm->display_width; - display_size[1] = cm->display_height; + render_size[0] = cm->render_width; + render_size[1] = cm->render_height; return VPX_CODEC_OK; } else { return VPX_CODEC_ERROR; @@ -1094,7 +1094,7 @@ static vpx_codec_ctrl_fn_map_t decoder_ctrl_maps[] = { {VP8D_GET_LAST_REF_UPDATES, ctrl_get_last_ref_updates}, {VP8D_GET_FRAME_CORRUPTED, ctrl_get_frame_corrupted}, {VP9_GET_REFERENCE, ctrl_get_reference}, - {VP9D_GET_DISPLAY_SIZE, ctrl_get_display_size}, + {VP9D_GET_DISPLAY_SIZE, ctrl_get_render_size}, {VP9D_GET_BIT_DEPTH, ctrl_get_bit_depth}, {VP9D_GET_FRAME_SIZE, ctrl_get_frame_size}, diff --git a/vp10/vp10_iface_common.h b/vp10/vp10_iface_common.h index 7987d18aa..b2b4b7d8f 100644 --- a/vp10/vp10_iface_common.h +++ b/vp10/vp10_iface_common.h @@ -43,6 +43,8 @@ static void yuvconfig2image(vpx_image_t *img, const YV12_BUFFER_CONFIG *yv12, img->h = ALIGN_POWER_OF_TWO(yv12->y_height + 2 * VP9_ENC_BORDER_IN_PIXELS, 3); img->d_w = yv12->y_crop_width; img->d_h = yv12->y_crop_height; + img->r_w = yv12->render_width; + img->r_h = yv12->render_height; img->x_chroma_shift = yv12->subsampling_x; img->y_chroma_shift = yv12->subsampling_y; img->planes[VPX_PLANE_Y] = yv12->y_buffer; @@ -84,6 +86,8 @@ static vpx_codec_err_t image2yuvconfig(const vpx_image_t *img, yv12->y_crop_width = img->d_w; yv12->y_crop_height = img->d_h; + yv12->render_width = img->r_w; + yv12->render_height = img->r_h; yv12->y_width = img->d_w; yv12->y_height = img->d_h; diff --git a/vp8/encoder/ratectrl.c b/vp8/encoder/ratectrl.c index f2642227b..7da3d71ad 100644 --- a/vp8/encoder/ratectrl.c +++ b/vp8/encoder/ratectrl.c @@ -1593,11 +1593,38 @@ int vp8_drop_encodedframe_overshoot(VP8_COMP *cpi, int Q) { if (Q < thresh_qp && cpi->projected_frame_size > thresh_rate && pred_err_mb > thresh_pred_err_mb) { + double new_correction_factor = cpi->rate_correction_factor; + const int target_size = cpi->av_per_frame_bandwidth; + int target_bits_per_mb; // Drop this frame: advance frame counters, and set force_maxqp flag. cpi->common.current_video_frame++; cpi->frames_since_key++; // Flag to indicate we will force next frame to be encoded at max QP. cpi->force_maxqp = 1; + // Reset the buffer levels. + cpi->buffer_level = cpi->oxcf.optimal_buffer_level; + cpi->bits_off_target = cpi->oxcf.optimal_buffer_level; + // Compute a new rate correction factor, corresponding to the current + // target frame size and max_QP, and adjust the rate correction factor + // upwards, if needed. + // This is to prevent a bad state where the re-encoded frame at max_QP + // undershoots significantly, and then we end up dropping every other + // frame because the QP/rate_correction_factor may have been too low + // before the drop and then takes too long to come up. + if (target_size >= (INT_MAX >> BPER_MB_NORMBITS)) + target_bits_per_mb = + (target_size / cpi->common.MBs) << BPER_MB_NORMBITS; + else + target_bits_per_mb = + (target_size << BPER_MB_NORMBITS) / cpi->common.MBs; + // Rate correction factor based on target_size_per_mb and max_QP. + new_correction_factor = (double)target_bits_per_mb / + (double)vp8_bits_per_mb[INTER_FRAME][cpi->worst_quality]; + if (new_correction_factor > cpi->rate_correction_factor) + cpi->rate_correction_factor = + VPXMIN(2.0 * cpi->rate_correction_factor, new_correction_factor); + if (cpi->rate_correction_factor > MAX_BPB_FACTOR) + cpi->rate_correction_factor = MAX_BPB_FACTOR; return 1; } else { cpi->force_maxqp = 0; diff --git a/vp8/vp8_dx_iface.c b/vp8/vp8_dx_iface.c index 99e51c43d..a12a2ad0e 100644 --- a/vp8/vp8_dx_iface.c +++ b/vp8/vp8_dx_iface.c @@ -246,8 +246,8 @@ static void yuvconfig2image(vpx_image_t *img, img->fmt = VPX_IMG_FMT_I420; img->w = yv12->y_stride; img->h = (yv12->y_height + 2 * VP8BORDERINPIXELS + 15) & ~15; - img->d_w = yv12->y_width; - img->d_h = yv12->y_height; + img->d_w = img->r_w = yv12->y_width; + img->d_h = img->r_h = yv12->y_height; img->x_chroma_shift = 1; img->y_chroma_shift = 1; img->planes[VPX_PLANE_Y] = yv12->y_buffer; diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h index c8ef3674a..5683736c6 100644 --- a/vp9/common/vp9_blockd.h +++ b/vp9/common/vp9_blockd.h @@ -176,7 +176,6 @@ typedef struct macroblockd { int mb_to_bottom_edge; FRAME_CONTEXT *fc; - int frame_parallel_decoding_mode; /* pointers to reference frames */ RefBuffer *block_refs[2]; diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h index 5d8eb9069..6fb8dca22 100644 --- a/vp9/common/vp9_onyxc_int.h +++ b/vp9/common/vp9_onyxc_int.h @@ -115,8 +115,8 @@ typedef struct VP9Common { int color_range; int width; int height; - int display_width; - int display_height; + int render_width; + int render_height; int last_width; int last_height; @@ -364,7 +364,6 @@ static INLINE void vp9_init_macroblockd(VP9_COMMON *cm, MACROBLOCKD *xd, memcpy(xd->plane[i].seg_dequant, cm->uv_dequant, sizeof(cm->uv_dequant)); } xd->fc = cm->fc; - xd->frame_parallel_decoding_mode = cm->frame_parallel_decoding_mode; } xd->above_seg_context = cm->above_seg_context; diff --git a/vp9/common/vp9_thread_common.c b/vp9/common/vp9_thread_common.c index 8324ceaf1..978e0bfe9 100644 --- a/vp9/common/vp9_thread_common.c +++ b/vp9/common/vp9_thread_common.c @@ -318,21 +318,21 @@ void vp9_loop_filter_dealloc(VP9LfSync *lf_sync) { } // Accumulate frame counts. -void vp9_accumulate_frame_counts(VP9_COMMON *cm, FRAME_COUNTS *counts, - int is_dec) { +void vp9_accumulate_frame_counts(FRAME_COUNTS *accum, + const FRAME_COUNTS *counts, int is_dec) { int i, j, k, l, m; for (i = 0; i < BLOCK_SIZE_GROUPS; i++) for (j = 0; j < INTRA_MODES; j++) - cm->counts.y_mode[i][j] += counts->y_mode[i][j]; + accum->y_mode[i][j] += counts->y_mode[i][j]; for (i = 0; i < INTRA_MODES; i++) for (j = 0; j < INTRA_MODES; j++) - cm->counts.uv_mode[i][j] += counts->uv_mode[i][j]; + accum->uv_mode[i][j] += counts->uv_mode[i][j]; for (i = 0; i < PARTITION_CONTEXTS; i++) for (j = 0; j < PARTITION_TYPES; j++) - cm->counts.partition[i][j] += counts->partition[i][j]; + accum->partition[i][j] += counts->partition[i][j]; if (is_dec) { int n; @@ -341,10 +341,10 @@ void vp9_accumulate_frame_counts(VP9_COMMON *cm, FRAME_COUNTS *counts, for (k = 0; k < REF_TYPES; k++) for (l = 0; l < COEF_BANDS; l++) for (m = 0; m < COEFF_CONTEXTS; m++) { - cm->counts.eob_branch[i][j][k][l][m] += + accum->eob_branch[i][j][k][l][m] += counts->eob_branch[i][j][k][l][m]; for (n = 0; n < UNCONSTRAINED_NODES + 1; n++) - cm->counts.coef[i][j][k][l][m][n] += + accum->coef[i][j][k][l][m][n] += counts->coef[i][j][k][l][m][n]; } } else { @@ -353,64 +353,64 @@ void vp9_accumulate_frame_counts(VP9_COMMON *cm, FRAME_COUNTS *counts, for (k = 0; k < REF_TYPES; k++) for (l = 0; l < COEF_BANDS; l++) for (m = 0; m < COEFF_CONTEXTS; m++) - cm->counts.eob_branch[i][j][k][l][m] += + accum->eob_branch[i][j][k][l][m] += counts->eob_branch[i][j][k][l][m]; - // In the encoder, cm->counts.coef is only updated at frame + // In the encoder, coef is only updated at frame // level, so not need to accumulate it here. // for (n = 0; n < UNCONSTRAINED_NODES + 1; n++) - // cm->counts.coef[i][j][k][l][m][n] += + // accum->coef[i][j][k][l][m][n] += // counts->coef[i][j][k][l][m][n]; } for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) for (j = 0; j < SWITCHABLE_FILTERS; j++) - cm->counts.switchable_interp[i][j] += counts->switchable_interp[i][j]; + accum->switchable_interp[i][j] += counts->switchable_interp[i][j]; for (i = 0; i < INTER_MODE_CONTEXTS; i++) for (j = 0; j < INTER_MODES; j++) - cm->counts.inter_mode[i][j] += counts->inter_mode[i][j]; + accum->inter_mode[i][j] += counts->inter_mode[i][j]; for (i = 0; i < INTRA_INTER_CONTEXTS; i++) for (j = 0; j < 2; j++) - cm->counts.intra_inter[i][j] += counts->intra_inter[i][j]; + accum->intra_inter[i][j] += counts->intra_inter[i][j]; for (i = 0; i < COMP_INTER_CONTEXTS; i++) for (j = 0; j < 2; j++) - cm->counts.comp_inter[i][j] += counts->comp_inter[i][j]; + accum->comp_inter[i][j] += counts->comp_inter[i][j]; for (i = 0; i < REF_CONTEXTS; i++) for (j = 0; j < 2; j++) for (k = 0; k < 2; k++) - cm->counts.single_ref[i][j][k] += counts->single_ref[i][j][k]; + accum->single_ref[i][j][k] += counts->single_ref[i][j][k]; for (i = 0; i < REF_CONTEXTS; i++) for (j = 0; j < 2; j++) - cm->counts.comp_ref[i][j] += counts->comp_ref[i][j]; + accum->comp_ref[i][j] += counts->comp_ref[i][j]; for (i = 0; i < TX_SIZE_CONTEXTS; i++) { for (j = 0; j < TX_SIZES; j++) - cm->counts.tx.p32x32[i][j] += counts->tx.p32x32[i][j]; + accum->tx.p32x32[i][j] += counts->tx.p32x32[i][j]; for (j = 0; j < TX_SIZES - 1; j++) - cm->counts.tx.p16x16[i][j] += counts->tx.p16x16[i][j]; + accum->tx.p16x16[i][j] += counts->tx.p16x16[i][j]; for (j = 0; j < TX_SIZES - 2; j++) - cm->counts.tx.p8x8[i][j] += counts->tx.p8x8[i][j]; + accum->tx.p8x8[i][j] += counts->tx.p8x8[i][j]; } for (i = 0; i < TX_SIZES; i++) - cm->counts.tx.tx_totals[i] += counts->tx.tx_totals[i]; + accum->tx.tx_totals[i] += counts->tx.tx_totals[i]; for (i = 0; i < SKIP_CONTEXTS; i++) for (j = 0; j < 2; j++) - cm->counts.skip[i][j] += counts->skip[i][j]; + accum->skip[i][j] += counts->skip[i][j]; for (i = 0; i < MV_JOINTS; i++) - cm->counts.mv.joints[i] += counts->mv.joints[i]; + accum->mv.joints[i] += counts->mv.joints[i]; for (k = 0; k < 2; k++) { - nmv_component_counts *comps = &cm->counts.mv.comps[k]; - nmv_component_counts *comps_t = &counts->mv.comps[k]; + nmv_component_counts *const comps = &accum->mv.comps[k]; + const nmv_component_counts *const comps_t = &counts->mv.comps[k]; for (i = 0; i < 2; i++) { comps->sign[i] += comps_t->sign[i]; diff --git a/vp9/common/vp9_thread_common.h b/vp9/common/vp9_thread_common.h index 04666b62c..db6587fcb 100644 --- a/vp9/common/vp9_thread_common.h +++ b/vp9/common/vp9_thread_common.h @@ -55,8 +55,8 @@ void vp9_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, VPxWorker *workers, int num_workers, VP9LfSync *lf_sync); -void vp9_accumulate_frame_counts(struct VP9Common *cm, - struct FRAME_COUNTS *counts, int is_dec); +void vp9_accumulate_frame_counts(struct FRAME_COUNTS *accum, + const struct FRAME_COUNTS *counts, int is_dec); #ifdef __cplusplus } // extern "C" diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c index 004d343dc..fc58a6e3d 100644 --- a/vp9/decoder/vp9_decodeframe.c +++ b/vp9/decoder/vp9_decodeframe.c @@ -1181,11 +1181,11 @@ static INTERP_FILTER read_interp_filter(struct vpx_read_bit_buffer *rb) { : literal_to_filter[vpx_rb_read_literal(rb, 2)]; } -static void setup_display_size(VP9_COMMON *cm, struct vpx_read_bit_buffer *rb) { - cm->display_width = cm->width; - cm->display_height = cm->height; +static void setup_render_size(VP9_COMMON *cm, struct vpx_read_bit_buffer *rb) { + cm->render_width = cm->width; + cm->render_height = cm->height; if (vpx_rb_read_bit(rb)) - vp9_read_frame_size(rb, &cm->display_width, &cm->display_height); + vp9_read_frame_size(rb, &cm->render_width, &cm->render_height); } static void resize_mv_buffer(VP9_COMMON *cm) { @@ -1233,7 +1233,7 @@ static void setup_frame_size(VP9_COMMON *cm, struct vpx_read_bit_buffer *rb) { BufferPool *const pool = cm->buffer_pool; vp9_read_frame_size(rb, &width, &height); resize_context_buffers(cm, width, height); - setup_display_size(cm, rb); + setup_render_size(cm, rb); lock_buffer_pool(pool); if (vpx_realloc_frame_buffer( @@ -1257,6 +1257,8 @@ static void setup_frame_size(VP9_COMMON *cm, struct vpx_read_bit_buffer *rb) { pool->frame_bufs[cm->new_fb_idx].buf.bit_depth = (unsigned int)cm->bit_depth; pool->frame_bufs[cm->new_fb_idx].buf.color_space = cm->color_space; pool->frame_bufs[cm->new_fb_idx].buf.color_range = cm->color_range; + pool->frame_bufs[cm->new_fb_idx].buf.render_width = cm->render_width; + pool->frame_bufs[cm->new_fb_idx].buf.render_height = cm->render_height; } static INLINE int valid_ref_frame_img_fmt(vpx_bit_depth_t ref_bit_depth, @@ -1315,7 +1317,7 @@ static void setup_frame_size_with_refs(VP9_COMMON *cm, } resize_context_buffers(cm, width, height); - setup_display_size(cm, rb); + setup_render_size(cm, rb); lock_buffer_pool(pool); if (vpx_realloc_frame_buffer( @@ -1339,6 +1341,8 @@ static void setup_frame_size_with_refs(VP9_COMMON *cm, pool->frame_bufs[cm->new_fb_idx].buf.bit_depth = (unsigned int)cm->bit_depth; pool->frame_bufs[cm->new_fb_idx].buf.color_space = cm->color_space; pool->frame_bufs[cm->new_fb_idx].buf.color_range = cm->color_range; + pool->frame_bufs[cm->new_fb_idx].buf.render_width = cm->render_width; + pool->frame_bufs[cm->new_fb_idx].buf.render_height = cm->render_height; } static void setup_tile_info(VP9_COMMON *cm, struct vpx_read_bit_buffer *rb) { @@ -1563,9 +1567,10 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi, return vpx_reader_find_end(&tile_data->bit_reader); } -static int tile_worker_hook(TileWorkerData *const tile_data, - const TileInfo *const tile) { +static int tile_worker_hook(TileWorkerData *const tile_data, void *unused) { + const TileInfo *const tile = &tile_data->xd.tile; int mi_row, mi_col; + (void)unused; if (setjmp(tile_data->error_info.jmp)) { tile_data->error_info.setjmp = 0; @@ -1628,8 +1633,6 @@ static const uint8_t *decode_tiles_mt(VP9Decoder *pbi, CHECK_MEM_ERROR(cm, pbi->tile_worker_data, vpx_memalign(32, num_threads * sizeof(*pbi->tile_worker_data))); - CHECK_MEM_ERROR(cm, pbi->tile_worker_info, - vpx_malloc(num_threads * sizeof(*pbi->tile_worker_info))); for (i = 0; i < num_threads; ++i) { VPxWorker *const worker = &pbi->tile_workers[i]; ++pbi->num_tile_workers; @@ -1645,10 +1648,15 @@ static const uint8_t *decode_tiles_mt(VP9Decoder *pbi, // Reset tile decoding hook for (n = 0; n < num_workers; ++n) { VPxWorker *const worker = &pbi->tile_workers[n]; + TileWorkerData *const tile_data = &pbi->tile_worker_data[n]; winterface->sync(worker); + tile_data->pbi = pbi; + tile_data->xd = pbi->mb; + tile_data->xd.counts = + cm->frame_parallel_decoding_mode ? NULL : &tile_data->counts; worker->hook = (VPxWorkerHook)tile_worker_hook; - worker->data1 = &pbi->tile_worker_data[n]; - worker->data2 = &pbi->tile_worker_info[n]; + worker->data1 = tile_data; + worker->data2 = NULL; } // Note: this memset assumes above_context[0], [1] and [2] @@ -1698,16 +1706,10 @@ static const uint8_t *decode_tiles_mt(VP9Decoder *pbi, for (i = 0; i < num_workers && n < tile_cols; ++i) { VPxWorker *const worker = &pbi->tile_workers[i]; TileWorkerData *const tile_data = (TileWorkerData*)worker->data1; - TileInfo *const tile = (TileInfo*)worker->data2; TileBuffer *const buf = &tile_buffers[0][n]; - tile_data->pbi = pbi; - tile_data->xd = pbi->mb; tile_data->xd.corrupted = 0; - tile_data->xd.counts = cm->frame_parallel_decoding_mode ? - 0 : &tile_data->counts; vp9_zero(tile_data->dqcoeff); - vp9_tile_init(tile, cm, 0, buf->col); vp9_tile_init(&tile_data->xd.tile, cm, 0, buf->col); setup_token_decoder(buf->data, data_end, buf->size, &cm->error, &tile_data->bit_reader, pbi->decrypt_cb, @@ -1742,14 +1744,15 @@ static const uint8_t *decode_tiles_mt(VP9Decoder *pbi, bit_reader_end = vpx_reader_find_end(&tile_data->bit_reader); final_worker = -1; } + } - // Accumulate thread frame counts. - if (n >= tile_cols && !cm->frame_parallel_decoding_mode) { - for (i = 0; i < num_workers; ++i) { - TileWorkerData *const tile_data = - (TileWorkerData*)pbi->tile_workers[i].data1; - vp9_accumulate_frame_counts(cm, &tile_data->counts, 1); - } + // Accumulate thread frame counts. + if (!cm->frame_parallel_decoding_mode) { + int i; + for (i = 0; i < num_workers; ++i) { + TileWorkerData *const tile_data = + (TileWorkerData*)pbi->tile_workers[i].data1; + vp9_accumulate_frame_counts(&cm->counts, &tile_data->counts, 1); } } @@ -1949,6 +1952,8 @@ static size_t read_uncompressed_header(VP9Decoder *pbi, #endif get_frame_new_buffer(cm)->color_space = cm->color_space; get_frame_new_buffer(cm)->color_range = cm->color_range; + get_frame_new_buffer(cm)->render_width = cm->render_width; + get_frame_new_buffer(cm)->render_height = cm->render_height; if (pbi->need_resync) { vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, diff --git a/vp9/decoder/vp9_decoder.c b/vp9/decoder/vp9_decoder.c index 6734d0029..4e88819b1 100644 --- a/vp9/decoder/vp9_decoder.c +++ b/vp9/decoder/vp9_decoder.c @@ -126,6 +126,9 @@ VP9Decoder *vp9_decoder_create(BufferPool *const pool) { void vp9_decoder_remove(VP9Decoder *pbi) { int i; + if (!pbi) + return; + vpx_get_worker_interface()->end(&pbi->lf_worker); vpx_free(pbi->lf_worker.data1); vpx_free(pbi->tile_data); @@ -134,7 +137,6 @@ void vp9_decoder_remove(VP9Decoder *pbi) { vpx_get_worker_interface()->end(worker); } vpx_free(pbi->tile_worker_data); - vpx_free(pbi->tile_worker_info); vpx_free(pbi->tile_workers); if (pbi->num_tile_workers > 0) { diff --git a/vp9/decoder/vp9_decoder.h b/vp9/decoder/vp9_decoder.h index 915f9dc8f..944f7daa3 100644 --- a/vp9/decoder/vp9_decoder.h +++ b/vp9/decoder/vp9_decoder.h @@ -65,7 +65,6 @@ typedef struct VP9Decoder { VPxWorker lf_worker; VPxWorker *tile_workers; TileWorkerData *tile_worker_data; - TileInfo *tile_worker_info; int num_tile_workers; TileData *tile_data; diff --git a/vp9/encoder/vp9_aq_cyclicrefresh.c b/vp9/encoder/vp9_aq_cyclicrefresh.c index e17b39789..0b4350414 100644 --- a/vp9/encoder/vp9_aq_cyclicrefresh.c +++ b/vp9/encoder/vp9_aq_cyclicrefresh.c @@ -20,9 +20,9 @@ #include "vp9/encoder/vp9_ratectrl.h" #include "vp9/encoder/vp9_segmentation.h" - CYCLIC_REFRESH *vp9_cyclic_refresh_alloc(int mi_rows, int mi_cols) { size_t last_coded_q_map_size; + size_t consec_zero_mv_size; CYCLIC_REFRESH *const cr = vpx_calloc(1, sizeof(*cr)); if (cr == NULL) return NULL; @@ -41,12 +41,20 @@ CYCLIC_REFRESH *vp9_cyclic_refresh_alloc(int mi_rows, int mi_cols) { assert(MAXQ <= 255); memset(cr->last_coded_q_map, MAXQ, last_coded_q_map_size); + consec_zero_mv_size = mi_rows * mi_cols * sizeof(*cr->consec_zero_mv); + cr->consec_zero_mv = vpx_malloc(consec_zero_mv_size); + if (cr->consec_zero_mv == NULL) { + vpx_free(cr); + return NULL; + } + memset(cr->consec_zero_mv, 0, consec_zero_mv_size); return cr; } void vp9_cyclic_refresh_free(CYCLIC_REFRESH *cr) { vpx_free(cr->map); vpx_free(cr->last_coded_q_map); + vpx_free(cr->consec_zero_mv); vpx_free(cr); } @@ -228,22 +236,48 @@ void vp9_cyclic_refresh_update_segment(VP9_COMP *const cpi, int map_offset = block_index + y * cm->mi_cols + x; cr->map[map_offset] = new_map_value; cpi->segmentation_map[map_offset] = mbmi->segment_id; + } +} + +void vp9_cyclic_refresh_update_sb_postencode(VP9_COMP *const cpi, + const MB_MODE_INFO *const mbmi, + int mi_row, int mi_col, + BLOCK_SIZE bsize) { + const VP9_COMMON *const cm = &cpi->common; + CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; + MV mv = mbmi->mv[0].as_mv; + const int bw = num_8x8_blocks_wide_lookup[bsize]; + const int bh = num_8x8_blocks_high_lookup[bsize]; + const int xmis = VPXMIN(cm->mi_cols - mi_col, bw); + const int ymis = VPXMIN(cm->mi_rows - mi_row, bh); + const int block_index = mi_row * cm->mi_cols + mi_col; + int x, y; + for (y = 0; y < ymis; y++) + for (x = 0; x < xmis; x++) { + int map_offset = block_index + y * cm->mi_cols + x; // Inter skip blocks were clearly not coded at the current qindex, so // don't update the map for them. For cases where motion is non-zero or // the reference frame isn't the previous frame, the previous value in // the map for this spatial location is not entirely correct. - if ((!is_inter_block(mbmi) || !skip) && + if ((!is_inter_block(mbmi) || !mbmi->skip) && mbmi->segment_id <= CR_SEGMENT_ID_BOOST2) { cr->last_coded_q_map[map_offset] = clamp( cm->base_qindex + cr->qindex_delta[mbmi->segment_id], 0, MAXQ); - } else if (is_inter_block(mbmi) && skip && + } else if (is_inter_block(mbmi) && mbmi->skip && mbmi->segment_id <= CR_SEGMENT_ID_BOOST2) { cr->last_coded_q_map[map_offset] = VPXMIN( clamp(cm->base_qindex + cr->qindex_delta[mbmi->segment_id], 0, MAXQ), cr->last_coded_q_map[map_offset]); + // Update the consecutive zero/low_mv count. + if (is_inter_block(mbmi) && (abs(mv.row) < 8 && abs(mv.col) < 8)) { + if (cr->consec_zero_mv[map_offset] < 255) + cr->consec_zero_mv[map_offset]++; + } else { + cr->consec_zero_mv[map_offset] = 0; } } + } } // Update the actual number of blocks that were applied the segment delta q. @@ -380,9 +414,10 @@ static void cyclic_refresh_update_map(VP9_COMP *const cpi) { int mi_row = sb_row_index * MI_BLOCK_SIZE; int mi_col = sb_col_index * MI_BLOCK_SIZE; int qindex_thresh = - cpi->oxcf.content == VP9E_CONTENT_SCREEN - ? vp9_get_qindex(&cm->seg, CR_SEGMENT_ID_BOOST2, cm->base_qindex) - : 0; + vp9_get_qindex(&cm->seg, CR_SEGMENT_ID_BOOST2, cm->base_qindex); + int consec_zero_mv_thresh = + cpi->oxcf.content == VP9E_CONTENT_SCREEN ? 0 + : 10 * (100 / cr->percent_refresh); assert(mi_row >= 0 && mi_row < cm->mi_rows); assert(mi_col >= 0 && mi_col < cm->mi_cols); bl_index = mi_row * cm->mi_cols + mi_col; @@ -398,7 +433,8 @@ static void cyclic_refresh_update_map(VP9_COMP *const cpi) { // for possible boost/refresh (segment 1). The segment id may get // reset to 0 later if block gets coded anything other than ZEROMV. if (cr->map[bl_index2] == 0) { - if (cr->last_coded_q_map[bl_index2] > qindex_thresh) + if (cr->last_coded_q_map[bl_index2] > qindex_thresh || + cr->consec_zero_mv[bl_index2] < consec_zero_mv_thresh) sum_map++; } else if (cr->map[bl_index2] < 0) { cr->map[bl_index2]++; @@ -447,7 +483,7 @@ void vp9_cyclic_refresh_update_parameters(VP9_COMP *const cpi) { cr->rate_boost_fac = 10; } else { cr->motion_thresh = 32; - cr->rate_boost_fac = 17; + cr->rate_boost_fac = 15; } if (cpi->svc.spatial_layer_id > 0) { cr->motion_thresh = 4; @@ -475,6 +511,8 @@ void vp9_cyclic_refresh_setup(VP9_COMP *const cpi) { if (cm->frame_type == KEY_FRAME) { memset(cr->last_coded_q_map, MAXQ, cm->mi_rows * cm->mi_cols * sizeof(*cr->last_coded_q_map)); + memset(cr->consec_zero_mv, 0, + cm->mi_rows * cm->mi_cols * sizeof(*cr->consec_zero_mv)); cr->sb_index = 0; } return; @@ -544,6 +582,8 @@ void vp9_cyclic_refresh_reset_resize(VP9_COMP *const cpi) { const VP9_COMMON *const cm = &cpi->common; CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; memset(cr->map, 0, cm->mi_rows * cm->mi_cols); + memset(cr->last_coded_q_map, MAXQ, cm->mi_rows * cm->mi_cols); + memset(cr->consec_zero_mv, 0, cm->mi_rows * cm->mi_cols); cr->sb_index = 0; cpi->refresh_golden_frame = 1; } diff --git a/vp9/encoder/vp9_aq_cyclicrefresh.h b/vp9/encoder/vp9_aq_cyclicrefresh.h index 7da1f94cf..839ce6df4 100644 --- a/vp9/encoder/vp9_aq_cyclicrefresh.h +++ b/vp9/encoder/vp9_aq_cyclicrefresh.h @@ -51,6 +51,8 @@ struct CYCLIC_REFRESH { signed char *map; // Map of the last q a block was coded at. uint8_t *last_coded_q_map; + // Count on how many consecutive times a block uses ZER0MV for encoding. + uint8_t *consec_zero_mv; // Thresholds applied to the projected rate/distortion of the coding block, // when deciding whether block should be refreshed. int64_t thresh_rate_sb; @@ -92,6 +94,11 @@ void vp9_cyclic_refresh_update_segment(struct VP9_COMP *const cpi, int mi_row, int mi_col, BLOCK_SIZE bsize, int64_t rate, int64_t dist, int skip); +void vp9_cyclic_refresh_update_sb_postencode(struct VP9_COMP *const cpi, + const MB_MODE_INFO *const mbmi, + int mi_row, int mi_col, + BLOCK_SIZE bsize); + // Update the segmentation map, and related quantities: cyclic refresh map, // refresh sb_index, and target number of blocks to be refreshed. void vp9_cyclic_refresh_update__map(struct VP9_COMP *const cpi); diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c index 01cced001..f3c8579b3 100644 --- a/vp9/encoder/vp9_bitstream.c +++ b/vp9/encoder/vp9_bitstream.c @@ -969,14 +969,14 @@ static size_t encode_tiles(VP9_COMP *cpi, uint8_t *data_ptr) { return total_size; } -static void write_display_size(const VP9_COMMON *cm, - struct vpx_write_bit_buffer *wb) { - const int scaling_active = cm->width != cm->display_width || - cm->height != cm->display_height; +static void write_render_size(const VP9_COMMON *cm, + struct vpx_write_bit_buffer *wb) { + const int scaling_active = cm->width != cm->render_width || + cm->height != cm->render_height; vpx_wb_write_bit(wb, scaling_active); if (scaling_active) { - vpx_wb_write_literal(wb, cm->display_width - 1, 16); - vpx_wb_write_literal(wb, cm->display_height - 1, 16); + vpx_wb_write_literal(wb, cm->render_width - 1, 16); + vpx_wb_write_literal(wb, cm->render_height - 1, 16); } } @@ -985,7 +985,7 @@ static void write_frame_size(const VP9_COMMON *cm, vpx_wb_write_literal(wb, cm->width - 1, 16); vpx_wb_write_literal(wb, cm->height - 1, 16); - write_display_size(cm, wb); + write_render_size(cm, wb); } static void write_frame_size_with_refs(VP9_COMP *cpi, @@ -1023,7 +1023,7 @@ static void write_frame_size_with_refs(VP9_COMP *cpi, vpx_wb_write_literal(wb, cm->height - 1, 16); } - write_display_size(cm, wb); + write_render_size(cm, wb); } static void write_sync_code(struct vpx_write_bit_buffer *wb) { diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 4a4301e85..1c4f35a53 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -4213,5 +4213,7 @@ static void encode_superblock(VP9_COMP *cpi, ThreadData *td, } ++td->counts->tx.tx_totals[mbmi->tx_size]; ++td->counts->tx.tx_totals[get_uv_tx_size(mbmi, &xd->plane[1])]; + if (cm->seg.enabled && cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) + vp9_cyclic_refresh_update_sb_postencode(cpi, mbmi, mi_row, mi_col, bsize); } } diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index 91e92ff24..202c11242 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -1509,15 +1509,23 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) { cm->interp_filter = cpi->sf.default_interp_filter; - cm->display_width = cpi->oxcf.width; - cm->display_height = cpi->oxcf.height; + if (cpi->oxcf.render_width > 0 && cpi->oxcf.render_height > 0) { + cm->render_width = cpi->oxcf.render_width; + cm->render_height = cpi->oxcf.render_height; + } else { + cm->render_width = cpi->oxcf.width; + cm->render_height = cpi->oxcf.height; + } if (last_w != cpi->oxcf.width || last_h != cpi->oxcf.height) { cm->width = cpi->oxcf.width; cm->height = cpi->oxcf.height; } if (cpi->initial_width) { - if (cm->width > cpi->initial_width || cm->height > cpi->initial_height) { + int new_mi_size = 0; + vp9_set_mb_mi(cm, cm->width, cm->height); + new_mi_size = cm->mi_stride * calc_mi_size(cm->mi_rows); + if (cm->mi_alloc_size < new_mi_size) { vp9_free_context_buffers(cm); alloc_compressor_data(cpi); realloc_segmentation_maps(cpi); @@ -1927,14 +1935,15 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf, snprintf((H) + strlen(H), sizeof(H) - strlen(H), (T), (V)) void vp9_remove_compressor(VP9_COMP *cpi) { - VP9_COMMON *const cm = &cpi->common; + VP9_COMMON *cm; unsigned int i; int t; if (!cpi) return; - if (cpi && (cm->current_video_frame > 0)) { + cm = &cpi->common; + if (cm->current_video_frame > 0) { #if CONFIG_INTERNAL_STATS vpx_clear_system_state(); @@ -3820,6 +3829,8 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, cm->frame_to_show = get_frame_new_buffer(cm); cm->frame_to_show->color_space = cm->color_space; cm->frame_to_show->color_range = cm->color_range; + cm->frame_to_show->render_width = cm->render_width; + cm->frame_to_show->render_height = cm->render_height; // Pick the loop filter level for the frame. loopfilter_frame(cpi, cm); @@ -4642,8 +4653,10 @@ int vp9_set_internal_size(VP9_COMP *cpi, // always go to the next whole number cm->width = (hs - 1 + cpi->oxcf.width * hr) / hs; cm->height = (vs - 1 + cpi->oxcf.height * vr) / vs; - assert(cm->width <= cpi->initial_width); - assert(cm->height <= cpi->initial_height); + if (cm->current_video_frame) { + assert(cm->width <= cpi->initial_width); + assert(cm->height <= cpi->initial_height); + } update_frame_size(cpi); diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h index b50f2fb31..174e2b461 100644 --- a/vp9/encoder/vp9_encoder.h +++ b/vp9/encoder/vp9_encoder.h @@ -239,6 +239,8 @@ typedef struct VP9EncoderConfig { #endif vpx_color_space_t color_space; int color_range; + int render_width; + int render_height; VP9E_TEMPORAL_LAYERING_MODE temporal_layering_mode; } VP9EncoderConfig; diff --git a/vp9/encoder/vp9_ethread.c b/vp9/encoder/vp9_ethread.c index 69ed1325c..ad25712be 100644 --- a/vp9/encoder/vp9_ethread.c +++ b/vp9/encoder/vp9_ethread.c @@ -192,7 +192,7 @@ void vp9_encode_tiles_mt(VP9_COMP *cpi) { // Accumulate counters. if (i < cpi->num_workers - 1) { - vp9_accumulate_frame_counts(cm, thread_data->td->counts, 0); + vp9_accumulate_frame_counts(&cm->counts, thread_data->td->counts, 0); accumulate_rd_opt(&cpi->td, thread_data->td); } } diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c index 19b0beb34..fc4d9ae67 100644 --- a/vp9/encoder/vp9_pickmode.c +++ b/vp9/encoder/vp9_pickmode.c @@ -1238,10 +1238,12 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, if (const_motion[ref_frame] && this_mode == NEARMV) continue; - i = (ref_frame == LAST_FRAME) ? GOLDEN_FRAME : LAST_FRAME; - if ((cpi->ref_frame_flags & flag_list[i]) && sf->reference_masking) - if (x->pred_mv_sad[ref_frame] > (x->pred_mv_sad[i] << 1)) - ref_frame_skip_mask |= (1 << ref_frame); + if (!(this_mode == ZEROMV && ref_frame == LAST_FRAME)) { + i = (ref_frame == LAST_FRAME) ? GOLDEN_FRAME : LAST_FRAME; + if ((cpi->ref_frame_flags & flag_list[i]) && sf->reference_masking) + if (x->pred_mv_sad[ref_frame] > (x->pred_mv_sad[i] << 1)) + ref_frame_skip_mask |= (1 << ref_frame); + } if (ref_frame_skip_mask & (1 << ref_frame)) continue; @@ -1530,11 +1532,13 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, (!x->skip && best_rdc.rdcost > inter_mode_thresh && bsize <= cpi->sf.max_intra_bsize)) { struct estimate_block_intra_args args = { cpi, x, DC_PRED, 0, 0 }; - const TX_SIZE intra_tx_size = - VPXMIN(max_txsize_lookup[bsize], - tx_mode_to_biggest_tx_size[cpi->common.tx_mode]); int i; TX_SIZE best_intra_tx_size = TX_SIZES; + TX_SIZE intra_tx_size = + VPXMIN(max_txsize_lookup[bsize], + tx_mode_to_biggest_tx_size[cpi->common.tx_mode]); + if (cpi->oxcf.content != VP9E_CONTENT_SCREEN && intra_tx_size > TX_16X16) + intra_tx_size = TX_16X16; if (reuse_inter_pred && best_pred != NULL) { if (best_pred->data == orig_dst.buf) { diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 3c84a7753..0bffcbac2 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -664,6 +664,7 @@ static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, } else if (s[n]) { if (is_inter_block(mbmi)) { rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, sse[n]); + r[n][1] -= r_tx_size; } else { rd[n][0] = RDCOST(x->rdmult, x->rddiv, s1, sse[n]); rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1 + r_tx_size, sse[n]); @@ -673,6 +674,11 @@ static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]); } + if (is_inter_block(mbmi) && !xd->lossless && !s[n] && sse[n] != INT64_MAX) { + rd[n][0] = VPXMIN(rd[n][0], RDCOST(x->rdmult, x->rddiv, s1, sse[n])); + rd[n][1] = VPXMIN(rd[n][1], RDCOST(x->rdmult, x->rddiv, s1, sse[n])); + } + // Early termination in transform size search. if (cpi->sf.tx_size_search_breakout && (rd[n][1] == INT64_MAX || diff --git a/vp9/encoder/vp9_svc_layercontext.c b/vp9/encoder/vp9_svc_layercontext.c index 58d273f5c..64a4ebd7a 100644 --- a/vp9/encoder/vp9_svc_layercontext.c +++ b/vp9/encoder/vp9_svc_layercontext.c @@ -541,13 +541,21 @@ int vp9_one_pass_cbr_svc_start_layer(VP9_COMP *const cpi) { set_flags_and_fb_idx_for_temporal_mode2(cpi); } else if (cpi->svc.temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS) { - // VP9E_TEMPORAL_LAYERING_MODE_BYPASS : - // if the code goes here, it means the encoder will be relying on the - // flags from outside for layering. - // However, since when spatial+temporal layering is used, the buffer indices - // cannot be derived automatically, the bypass mode will only work when the - // number of spatial layers equals 1. - assert(cpi->svc.number_spatial_layers == 1); + // In the BYPASS/flexible mode, the encoder is relying on the application + // to specify, for each spatial layer, the flags and buffer indices for the + // layering. + // Note that the check (cpi->ext_refresh_frame_flags_pending == 0) is + // needed to support the case where the frame flags may be passed in via + // vpx_codec_encode(), which can be used for the temporal-only svc case. + if (cpi->ext_refresh_frame_flags_pending == 0) { + int sl; + cpi->svc.spatial_layer_id = cpi->svc.spatial_layer_to_encode; + sl = cpi->svc.spatial_layer_id; + vp9_apply_encoding_flags(cpi, cpi->svc.ext_frame_flags[sl]); + cpi->lst_fb_idx = cpi->svc.ext_lst_fb_idx[sl]; + cpi->gld_fb_idx = cpi->svc.ext_gld_fb_idx[sl]; + cpi->alt_fb_idx = cpi->svc.ext_alt_fb_idx[sl]; + } } lc = &cpi->svc.layer_context[cpi->svc.spatial_layer_id * diff --git a/vp9/encoder/vp9_svc_layercontext.h b/vp9/encoder/vp9_svc_layercontext.h index 168edbf89..ae55c2fd3 100644 --- a/vp9/encoder/vp9_svc_layercontext.h +++ b/vp9/encoder/vp9_svc_layercontext.h @@ -74,6 +74,12 @@ typedef struct { // Indicates what sort of temporal layering is used. // Currently, this only works for CBR mode. VP9E_TEMPORAL_LAYERING_MODE temporal_layering_mode; + // Frame flags and buffer indexes for each spatial layer, set by the + // application (external settings). + int ext_frame_flags[VPX_MAX_LAYERS]; + int ext_lst_fb_idx[VPX_MAX_LAYERS]; + int ext_gld_fb_idx[VPX_MAX_LAYERS]; + int ext_alt_fb_idx[VPX_MAX_LAYERS]; } SVC; struct VP9_COMP; diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c index fd65ed966..a253c0692 100644 --- a/vp9/vp9_cx_iface.c +++ b/vp9/vp9_cx_iface.c @@ -46,6 +46,8 @@ struct vp9_extracfg { vp9e_tune_content content; vpx_color_space_t color_space; int color_range; + int render_width; + int render_height; }; static struct vp9_extracfg default_extra_cfg = { @@ -73,6 +75,8 @@ static struct vp9_extracfg default_extra_cfg = { VP9E_CONTENT_DEFAULT, // content VPX_CS_UNKNOWN, // color space 0, // color range + 0, // render width + 0, // render height }; struct vpx_codec_alg_priv { @@ -469,6 +473,8 @@ static vpx_codec_err_t set_encoder_config( oxcf->color_space = extra_cfg->color_space; oxcf->color_range = extra_cfg->color_range; + oxcf->render_width = extra_cfg->render_width; + oxcf->render_height = extra_cfg->render_height; oxcf->arnr_max_frames = extra_cfg->arnr_max_frames; oxcf->arnr_strength = extra_cfg->arnr_strength; oxcf->min_gf_interval = extra_cfg->min_gf_interval; @@ -1416,6 +1422,20 @@ static vpx_codec_err_t ctrl_set_svc_parameters(vpx_codec_alg_priv_t *ctx, return VPX_CODEC_OK; } +static vpx_codec_err_t ctrl_set_svc_ref_frame_config(vpx_codec_alg_priv_t *ctx, + va_list args) { + VP9_COMP *const cpi = ctx->cpi; + vpx_svc_ref_frame_config_t *data = va_arg(args, vpx_svc_ref_frame_config_t *); + int sl; + for (sl = 0; sl < cpi->svc.number_spatial_layers; ++sl) { + cpi->svc.ext_frame_flags[sl] = data->frame_flags[sl]; + cpi->svc.ext_lst_fb_idx[sl] = data->lst_fb_idx[sl]; + cpi->svc.ext_gld_fb_idx[sl] = data->gld_fb_idx[sl]; + cpi->svc.ext_alt_fb_idx[sl] = data->alt_fb_idx[sl]; + } + return VPX_CODEC_OK; +} + static vpx_codec_err_t ctrl_register_cx_callback(vpx_codec_alg_priv_t *ctx, va_list args) { vpx_codec_priv_output_cx_pkt_cb_pair_t *cbp = @@ -1447,6 +1467,15 @@ static vpx_codec_err_t ctrl_set_color_range(vpx_codec_alg_priv_t *ctx, return update_extra_cfg(ctx, &extra_cfg); } +static vpx_codec_err_t ctrl_set_render_size(vpx_codec_alg_priv_t *ctx, + va_list args) { + struct vp9_extracfg extra_cfg = ctx->extra_cfg; + int *const render_size = va_arg(args, int *); + extra_cfg.render_width = render_size[0]; + extra_cfg.render_height = render_size[1]; + return update_extra_cfg(ctx, &extra_cfg); +} + static vpx_codec_ctrl_fn_map_t encoder_ctrl_maps[] = { {VP8_COPY_REFERENCE, ctrl_copy_reference}, {VP8E_UPD_ENTROPY, ctrl_update_entropy}, @@ -1487,6 +1516,8 @@ static vpx_codec_ctrl_fn_map_t encoder_ctrl_maps[] = { {VP9E_SET_NOISE_SENSITIVITY, ctrl_set_noise_sensitivity}, {VP9E_SET_MIN_GF_INTERVAL, ctrl_set_min_gf_interval}, {VP9E_SET_MAX_GF_INTERVAL, ctrl_set_max_gf_interval}, + {VP9E_SET_SVC_REF_FRAME_CONFIG, ctrl_set_svc_ref_frame_config}, + {VP9E_SET_RENDER_SIZE, ctrl_set_render_size}, // Getters {VP8E_GET_LAST_QUANTIZER, ctrl_get_quantizer}, diff --git a/vp9/vp9_dx_iface.c b/vp9/vp9_dx_iface.c index eb2371e2c..c6b1ba95f 100644 --- a/vp9/vp9_dx_iface.c +++ b/vp9/vp9_dx_iface.c @@ -979,9 +979,9 @@ static vpx_codec_err_t ctrl_get_frame_size(vpx_codec_alg_priv_t *ctx, return VPX_CODEC_INVALID_PARAM; } -static vpx_codec_err_t ctrl_get_display_size(vpx_codec_alg_priv_t *ctx, - va_list args) { - int *const display_size = va_arg(args, int *); +static vpx_codec_err_t ctrl_get_render_size(vpx_codec_alg_priv_t *ctx, + va_list args) { + int *const render_size = va_arg(args, int *); // Only support this function in serial decode. if (ctx->frame_parallel_decode) { @@ -989,14 +989,14 @@ static vpx_codec_err_t ctrl_get_display_size(vpx_codec_alg_priv_t *ctx, return VPX_CODEC_INCAPABLE; } - if (display_size) { + if (render_size) { if (ctx->frame_workers) { VPxWorker *const worker = ctx->frame_workers; FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1; const VP9_COMMON *const cm = &frame_worker_data->pbi->common; - display_size[0] = cm->display_width; - display_size[1] = cm->display_height; + render_size[0] = cm->render_width; + render_size[1] = cm->render_height; return VPX_CODEC_OK; } else { return VPX_CODEC_ERROR; @@ -1095,7 +1095,7 @@ static vpx_codec_ctrl_fn_map_t decoder_ctrl_maps[] = { {VP8D_GET_LAST_REF_UPDATES, ctrl_get_last_ref_updates}, {VP8D_GET_FRAME_CORRUPTED, ctrl_get_frame_corrupted}, {VP9_GET_REFERENCE, ctrl_get_reference}, - {VP9D_GET_DISPLAY_SIZE, ctrl_get_display_size}, + {VP9D_GET_DISPLAY_SIZE, ctrl_get_render_size}, {VP9D_GET_BIT_DEPTH, ctrl_get_bit_depth}, {VP9D_GET_FRAME_SIZE, ctrl_get_frame_size}, diff --git a/vp9/vp9_iface_common.h b/vp9/vp9_iface_common.h index 7d514ba55..938d4224b 100644 --- a/vp9/vp9_iface_common.h +++ b/vp9/vp9_iface_common.h @@ -43,6 +43,8 @@ static void yuvconfig2image(vpx_image_t *img, const YV12_BUFFER_CONFIG *yv12, img->h = ALIGN_POWER_OF_TWO(yv12->y_height + 2 * VP9_ENC_BORDER_IN_PIXELS, 3); img->d_w = yv12->y_crop_width; img->d_h = yv12->y_crop_height; + img->r_w = yv12->render_width; + img->r_h = yv12->render_height; img->x_chroma_shift = yv12->subsampling_x; img->y_chroma_shift = yv12->subsampling_y; img->planes[VPX_PLANE_Y] = yv12->y_buffer; @@ -84,6 +86,8 @@ static vpx_codec_err_t image2yuvconfig(const vpx_image_t *img, yv12->y_crop_width = img->d_w; yv12->y_crop_height = img->d_h; + yv12->render_width = img->r_w; + yv12->render_height = img->r_h; yv12->y_width = img->d_w; yv12->y_height = img->d_h; diff --git a/vpx/src/svc_encodeframe.c b/vpx/src/svc_encodeframe.c index 78932d2ff..ff600830e 100644 --- a/vpx/src/svc_encodeframe.c +++ b/vpx/src/svc_encodeframe.c @@ -339,7 +339,8 @@ void assign_layer_bitrates(const SvcContext *svc_ctx, (spatial_layer_target >> 1) + (spatial_layer_target >> 2); enc_cfg->layer_target_bitrate[sl * svc_ctx->temporal_layers + 2] = spatial_layer_target; - } else if (svc_ctx->temporal_layering_mode == 2) { + } else if (svc_ctx->temporal_layering_mode == 2 || + svc_ctx->temporal_layering_mode == 1) { enc_cfg->layer_target_bitrate[sl * svc_ctx->temporal_layers] = spatial_layer_target * 2 / 3; enc_cfg->layer_target_bitrate[sl * svc_ctx->temporal_layers + 1] = @@ -417,7 +418,8 @@ vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx, // si->svc_params.temporal_layering_mode = svc_ctx->temporal_layering_mode; if (svc_ctx->temporal_layering_mode == 3) { svc_ctx->temporal_layers = 3; - } else if (svc_ctx->temporal_layering_mode == 2) { + } else if (svc_ctx->temporal_layering_mode == 2 || + svc_ctx->temporal_layering_mode == 1) { svc_ctx->temporal_layers = 2; } diff --git a/vpx/vp8cx.h b/vpx/vp8cx.h index b7d85d89c..69b8d3e34 100644 --- a/vpx/vp8cx.h +++ b/vpx/vp8cx.h @@ -556,6 +556,22 @@ enum vp8e_enc_control_id { * Supported in codecs: VP9 */ VP9E_SET_COLOR_RANGE, + + /*!\brief Codec control function to set the frame flags and buffer indices + * for spatial layers. The frame flags and buffer indices are set using the + * struct #vpx_svc_ref_frame_config defined below. + * + * Supported in codecs: VP9 + */ + VP9E_SET_SVC_REF_FRAME_CONFIG, + + /*!\brief Codec control function to set intended rendering image size. + * + * By default, this is identical to the image size in pixels. + * + * Supported in codecs: VP9 + */ + VP9E_SET_RENDER_SIZE, }; /*!\brief vpx 1-D scaling mode @@ -682,6 +698,21 @@ typedef struct vpx_svc_layer_id { int temporal_layer_id; /**< Temporal layer id number. */ } vpx_svc_layer_id_t; +/*!\brief vp9 svc frame flag parameters. + * + * This defines the frame flags and buffer indices for each spatial layer for + * svc encoding. + * This is used with the #VP9E_SET_SVC_REF_FRAME_CONFIG control to set frame + * flags and buffer indices for each spatial layer for the current (super)frame. + * + */ +typedef struct vpx_svc_ref_frame_config { + int frame_flags[VPX_TS_MAX_LAYERS]; /**< Frame flags. */ + int lst_fb_idx[VPX_TS_MAX_LAYERS]; /**< Last buffer index. */ + int gld_fb_idx[VPX_TS_MAX_LAYERS]; /**< Golden buffer index. */ + int alt_fb_idx[VPX_TS_MAX_LAYERS]; /**< Altref buffer index. */ +} vpx_svc_ref_frame_config_t; + /*!\brief VP8 encoder control function parameter type * * Defines the data types that VP8E control functions take. Note that @@ -773,6 +804,15 @@ VPX_CTRL_USE_TYPE(VP9E_GET_ACTIVEMAP, vpx_active_map_t *) */ #define VPX_CTRL_VP9E_SET_COLOR_RANGE VPX_CTRL_USE_TYPE(VP9E_SET_COLOR_RANGE, int) + +VPX_CTRL_USE_TYPE(VP9E_SET_SVC_REF_FRAME_CONFIG, vpx_svc_ref_frame_config_t *) + +/*!\brief + * + * TODO(rbultje) : add support of the control in ffmpeg + */ +#define VPX_CTRL_VP9E_SET_RENDER_SIZE +VPX_CTRL_USE_TYPE(VP9E_SET_RENDER_SIZE, int *) /*! @} - end defgroup vp8_encoder */ #ifdef __cplusplus } // extern "C" diff --git a/vpx/vpx_image.h b/vpx/vpx_image.h index be9314751..51100599c 100644 --- a/vpx/vpx_image.h +++ b/vpx/vpx_image.h @@ -93,6 +93,10 @@ extern "C" { unsigned int d_w; /**< Displayed image width */ unsigned int d_h; /**< Displayed image height */ + /* Image intended rendering dimensions */ + unsigned int r_w; /**< Intended rendering image width */ + unsigned int r_h; /**< Intended rendering image height */ + /* Chroma subsampling info */ unsigned int x_chroma_shift; /**< subsampling order, X */ unsigned int y_chroma_shift; /**< subsampling order, Y */ diff --git a/vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c b/vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c index 8d5c7c2dd..3d303f2e1 100644 --- a/vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c +++ b/vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c @@ -291,122 +291,6 @@ void vpx_filter_block1d8_v8_intrin_ssse3(const uint8_t *src_ptr, } } -#if ARCH_X86_64 -static void vpx_filter_block1d16_v8_intrin_ssse3(const uint8_t *src_ptr, - ptrdiff_t src_pitch, - uint8_t *output_ptr, - ptrdiff_t out_pitch, - uint32_t output_height, - const int16_t *filter) { - __m128i addFilterReg64, filtersReg, srcRegFilt1, srcRegFilt3; - __m128i firstFilters, secondFilters, thirdFilters, forthFilters; - __m128i srcRegFilt5, srcRegFilt6, srcRegFilt7, srcRegFilt8; - __m128i srcReg1, srcReg2, srcReg3, srcReg4, srcReg5, srcReg6, srcReg7; - __m128i srcReg8; - unsigned int i; - - // create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64 - addFilterReg64 = _mm_set1_epi32((int)0x0400040u); - filtersReg = _mm_loadu_si128((const __m128i *)filter); - // converting the 16 bit (short) to 8 bit (byte) and have the same data - // in both lanes of 128 bit register. - filtersReg =_mm_packs_epi16(filtersReg, filtersReg); - - // duplicate only the first 16 bits in the filter - firstFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x100u)); - // duplicate only the second 16 bits in the filter - secondFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x302u)); - // duplicate only the third 16 bits in the filter - thirdFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x504u)); - // duplicate only the forth 16 bits in the filter - forthFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x706u)); - - // load the first 7 rows of 16 bytes - srcReg1 = _mm_loadu_si128((const __m128i *)(src_ptr)); - srcReg2 = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch)); - srcReg3 = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 2)); - srcReg4 = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 3)); - srcReg5 = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 4)); - srcReg6 = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 5)); - srcReg7 = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 6)); - - for (i = 0; i < output_height; i++) { - // load the last 16 bytes - srcReg8 = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 7)); - - // merge the result together - srcRegFilt5 = _mm_unpacklo_epi8(srcReg1, srcReg2); - srcRegFilt6 = _mm_unpacklo_epi8(srcReg7, srcReg8); - srcRegFilt1 = _mm_unpackhi_epi8(srcReg1, srcReg2); - srcRegFilt3 = _mm_unpackhi_epi8(srcReg7, srcReg8); - - // multiply 2 adjacent elements with the filter and add the result - srcRegFilt5 = _mm_maddubs_epi16(srcRegFilt5, firstFilters); - srcRegFilt6 = _mm_maddubs_epi16(srcRegFilt6, forthFilters); - srcRegFilt1 = _mm_maddubs_epi16(srcRegFilt1, firstFilters); - srcRegFilt3 = _mm_maddubs_epi16(srcRegFilt3, forthFilters); - - // add and saturate the results together - srcRegFilt5 = _mm_adds_epi16(srcRegFilt5, srcRegFilt6); - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt3); - - // merge the result together - srcRegFilt3 = _mm_unpacklo_epi8(srcReg3, srcReg4); - srcRegFilt6 = _mm_unpackhi_epi8(srcReg3, srcReg4); - - // multiply 2 adjacent elements with the filter and add the result - srcRegFilt3 = _mm_maddubs_epi16(srcRegFilt3, secondFilters); - srcRegFilt6 = _mm_maddubs_epi16(srcRegFilt6, secondFilters); - - // merge the result together - srcRegFilt7 = _mm_unpacklo_epi8(srcReg5, srcReg6); - srcRegFilt8 = _mm_unpackhi_epi8(srcReg5, srcReg6); - - // multiply 2 adjacent elements with the filter and add the result - srcRegFilt7 = _mm_maddubs_epi16(srcRegFilt7, thirdFilters); - srcRegFilt8 = _mm_maddubs_epi16(srcRegFilt8, thirdFilters); - - // add and saturate the results together - srcRegFilt5 = _mm_adds_epi16(srcRegFilt5, - _mm_min_epi16(srcRegFilt3, srcRegFilt7)); - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, - _mm_min_epi16(srcRegFilt6, srcRegFilt8)); - - // add and saturate the results together - srcRegFilt5 = _mm_adds_epi16(srcRegFilt5, - _mm_max_epi16(srcRegFilt3, srcRegFilt7)); - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, - _mm_max_epi16(srcRegFilt6, srcRegFilt8)); - srcRegFilt5 = _mm_adds_epi16(srcRegFilt5, addFilterReg64); - srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, addFilterReg64); - - // shift by 7 bit each 16 bit - srcRegFilt5 = _mm_srai_epi16(srcRegFilt5, 7); - srcRegFilt1 = _mm_srai_epi16(srcRegFilt1, 7); - - // shrink to 8 bit each 16 bits, the first lane contain the first - // convolve result and the second lane contain the second convolve - // result - srcRegFilt1 = _mm_packus_epi16(srcRegFilt5, srcRegFilt1); - - src_ptr+=src_pitch; - - // shift down a row - srcReg1 = srcReg2; - srcReg2 = srcReg3; - srcReg3 = srcReg4; - srcReg4 = srcReg5; - srcReg5 = srcReg6; - srcReg6 = srcReg7; - srcReg7 = srcReg8; - - // save 16 bytes convolve result - _mm_store_si128((__m128i*)output_ptr, srcRegFilt1); - - output_ptr+=out_pitch; - } -} -#endif // ARCH_X86_64 filter8_1dfunction vpx_filter_block1d16_v8_ssse3; filter8_1dfunction vpx_filter_block1d16_h8_ssse3; diff --git a/vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm b/vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm index 7ea6a0e58..3fbaa274c 100644 --- a/vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm +++ b/vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm @@ -104,7 +104,7 @@ cglobal filter_block1d4_%1, 6, 6+(ARCH_X86_64*2), 11, LOCAL_VARS_SIZE, \ %define k0k1k4k5 m8 %define k2k3k6k7 m9 %define krd m10 - %define orig_height r7 + %define orig_height r7d mova krd, [GLOBAL(pw_64)] pshuflw k0k1k4k5, m4, 0b ;k0_k1 pshufhw k0k1k4k5, k0k1k4k5, 10101010b ;k0_k1_k4_k5 @@ -131,8 +131,8 @@ cglobal filter_block1d4_%1, 6, 6+(ARCH_X86_64*2), 11, LOCAL_VARS_SIZE, \ mova k2k3k6k7, m7 mova krd, m1 %endif - mov orig_height, heightq - shr heightq, 1 + mov orig_height, heightd + shr heightd, 1 .loop: ;Do two rows at once movh m0, [srcq - 3] @@ -200,12 +200,12 @@ cglobal filter_block1d4_%1, 6, 6+(ARCH_X86_64*2), 11, LOCAL_VARS_SIZE, \ lea dstq, [dstq + 2 * dstrideq ] prefetcht0 [srcq + 2 * sstrideq - 3] - dec heightq + dec heightd jnz .loop ; Do last row if output_height is odd - mov heightq, orig_height - and heightq, 1 + mov heightd, orig_height + and heightd, 1 je .done movh m0, [srcq - 3] ; load src @@ -254,17 +254,17 @@ cglobal filter_block1d4_%1, 6, 6+(ARCH_X86_64*2), 11, LOCAL_VARS_SIZE, \ ;------------------------------------------------------------------------------- %macro SUBPIX_HFILTER8 1 -cglobal filter_block1d8_%1, 6, 6+(ARCH_X86_64*1), 13, LOCAL_VARS_SIZE, \ +cglobal filter_block1d8_%1, 6, 6+(ARCH_X86_64*1), 14, LOCAL_VARS_SIZE, \ src, sstride, dst, dstride, height, filter mova m4, [filterq] SETUP_LOCAL_VARS %if ARCH_X86_64 - %define orig_height r7 + %define orig_height r7d %else %define orig_height heightmp %endif - mov orig_height, heightq - shr heightq, 1 + mov orig_height, heightd + shr heightd, 1 .loop: movh m0, [srcq - 3] @@ -336,12 +336,12 @@ cglobal filter_block1d8_%1, 6, 6+(ARCH_X86_64*1), 13, LOCAL_VARS_SIZE, \ lea srcq, [srcq + sstrideq ] lea dstq, [dstq + 2 * dstrideq ] prefetcht0 [srcq + 2 * sstrideq - 3] - dec heightq + dec heightd jnz .loop ;Do last row if output_height is odd - mov heightq, orig_height - and heightq, 1 + mov heightd, orig_height + and heightd, 1 je .done movh m0, [srcq - 3] @@ -361,7 +361,7 @@ cglobal filter_block1d8_%1, 6, 6+(ARCH_X86_64*1), 13, LOCAL_VARS_SIZE, \ ;------------------------------------------------------------------------------- %macro SUBPIX_HFILTER16 1 -cglobal filter_block1d16_%1, 6, 6+(ARCH_X86_64*0), 13, LOCAL_VARS_SIZE, \ +cglobal filter_block1d16_%1, 6, 6+(ARCH_X86_64*0), 14, LOCAL_VARS_SIZE, \ src, sstride, dst, dstride, height, filter mova m4, [filterq] SETUP_LOCAL_VARS @@ -427,7 +427,7 @@ cglobal filter_block1d16_%1, 6, 6+(ARCH_X86_64*0), 13, LOCAL_VARS_SIZE, \ lea srcq, [srcq + sstrideq] mova [dstq], m0 lea dstq, [dstq + dstrideq] - dec heightq + dec heightd jnz .loop RET %endm @@ -527,11 +527,11 @@ cglobal filter_block1d%2_%1, 6, 6+(ARCH_X86_64*3), 14, LOCAL_VARS_SIZE, \ %endif movx [dstq], m1 add dstq, dst_stride - sub heightq, 2 - cmp heightq, 1 + sub heightd, 2 + cmp heightd, 1 jg .loop - cmp heightq, 0 + cmp heightd, 0 je .done movx m0, [srcq ] ;A @@ -570,7 +570,7 @@ cglobal filter_block1d%2_%1, 6, 6+(ARCH_X86_64*3), 14, LOCAL_VARS_SIZE, \ ;------------------------------------------------------------------------------- %macro SUBPIX_VFILTER16 1 -cglobal filter_block1d16_%1, 6, 6+(ARCH_X86_64*2), 13, LOCAL_VARS_SIZE, \ +cglobal filter_block1d16_%1, 6, 6+(ARCH_X86_64*3), 14, LOCAL_VARS_SIZE, \ src, sstride, dst, dstride, height, filter mova m4, [filterq] @@ -655,7 +655,7 @@ cglobal filter_block1d16_%1, 6, 6+(ARCH_X86_64*2), 13, LOCAL_VARS_SIZE, \ %endif movh [dstq + 8], m3 add dstq, dst_stride - dec heightq + dec heightd jnz .loop RET %endm diff --git a/vpx_ports/bitops.h b/vpx_ports/bitops.h index 0d3223e32..84ff3659f 100644 --- a/vpx_ports/bitops.h +++ b/vpx_ports/bitops.h @@ -11,6 +11,8 @@ #ifndef VPX_PORTS_BITOPS_H_ #define VPX_PORTS_BITOPS_H_ +#include + #include "vpx_ports/msvc.h" #ifdef _MSC_VER @@ -25,10 +27,15 @@ extern "C" { #endif +// These versions of get_msb() are only valid when n != 0 because all +// of the optimized versions are undefined when n == 0: +// https://gcc.gnu.org/onlinedocs/gcc/Other-Builtins.html + // use GNU builtins where available. #if defined(__GNUC__) && \ ((__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || __GNUC__ >= 4) static INLINE int get_msb(unsigned int n) { + assert(n != 0); return 31 ^ __builtin_clz(n); } #elif defined(USE_MSC_INTRINSICS) @@ -36,6 +43,7 @@ static INLINE int get_msb(unsigned int n) { static INLINE int get_msb(unsigned int n) { unsigned long first_set_bit; + assert(n != 0); _BitScanReverse(&first_set_bit, n); return first_set_bit; } @@ -47,6 +55,8 @@ static INLINE int get_msb(unsigned int n) { unsigned int value = n; int i; + assert(n != 0); + for (i = 4; i >= 0; --i) { const int shift = (1 << i); const unsigned int x = value >> shift; diff --git a/vpx_scale/yv12config.h b/vpx_scale/yv12config.h index 2e12acebc..3a044526e 100644 --- a/vpx_scale/yv12config.h +++ b/vpx_scale/yv12config.h @@ -57,6 +57,8 @@ typedef struct yv12_buffer_config { unsigned int bit_depth; vpx_color_space_t color_space; int color_range; + int render_width; + int render_height; int corrupted; int flags; diff --git a/vpxdec.c b/vpxdec.c index fe1e3f040..fde3b9a0b 100644 --- a/vpxdec.c +++ b/vpxdec.c @@ -953,22 +953,22 @@ static int main_loop(int argc, const char **argv_) { // these is set to 0, use the display size set in the first frame // header. If that is unavailable, use the raw decoded size of the // first decoded frame. - int display_width = vpx_input_ctx.width; - int display_height = vpx_input_ctx.height; - if (!display_width || !display_height) { - int display_size[2]; + int render_width = vpx_input_ctx.width; + int render_height = vpx_input_ctx.height; + if (!render_width || !render_height) { + int render_size[2]; if (vpx_codec_control(&decoder, VP9D_GET_DISPLAY_SIZE, - display_size)) { + render_size)) { // As last resort use size of first frame as display size. - display_width = img->d_w; - display_height = img->d_h; + render_width = img->d_w; + render_height = img->d_h; } else { - display_width = display_size[0]; - display_height = display_size[1]; + render_width = render_size[0]; + render_height = render_size[1]; } } - scaled_img = vpx_img_alloc(NULL, img->fmt, display_width, - display_height, 16); + scaled_img = vpx_img_alloc(NULL, img->fmt, render_width, + render_height, 16); scaled_img->bit_depth = img->bit_depth; } -- GitLab