diff --git a/aom_dsp/aom_dsp.mk b/aom_dsp/aom_dsp.mk index 50255563875cca11939f9a223abcb8cdff75d2cf..34be4532fdbf9b1542074d5a4f14e553e8773128 100644 --- a/aom_dsp/aom_dsp.mk +++ b/aom_dsp/aom_dsp.mk @@ -212,6 +212,24 @@ DSP_SRCS-$(HAVE_MSA) += mips/fwd_txfm_msa.c DSP_SRCS-$(HAVE_MSA) += mips/fwd_dct32x32_msa.c endif # CONFIG_AV1_ENCODER +ifeq ($(CONFIG_PVQ),yes) +DSP_SRCS-yes += fwd_txfm.c +DSP_SRCS-yes += fwd_txfm.h +DSP_SRCS-$(HAVE_SSE2) += x86/fwd_txfm_sse2.h +DSP_SRCS-$(HAVE_SSE2) += x86/fwd_txfm_sse2.c +DSP_SRCS-$(HAVE_SSE2) += x86/fwd_txfm_impl_sse2.h +DSP_SRCS-$(HAVE_SSE2) += x86/fwd_dct32x32_impl_sse2.h +ifeq ($(ARCH_X86_64),yes) +DSP_SRCS-$(HAVE_SSSE3) += x86/fwd_txfm_ssse3_x86_64.asm +endif +DSP_SRCS-$(HAVE_AVX2) += x86/fwd_txfm_avx2.c +DSP_SRCS-$(HAVE_AVX2) += x86/fwd_dct32x32_impl_avx2.h +DSP_SRCS-$(HAVE_NEON) += arm/fwd_txfm_neon.c +DSP_SRCS-$(HAVE_MSA) += mips/fwd_txfm_msa.h +DSP_SRCS-$(HAVE_MSA) += mips/fwd_txfm_msa.c +DSP_SRCS-$(HAVE_MSA) += mips/fwd_dct32x32_msa.c +endif # CONFIG_PVQ + # inverse transform ifneq ($(filter yes,$(CONFIG_AV1)),) DSP_SRCS-yes += inv_txfm.h diff --git a/aom_dsp/aom_dsp_rtcd_defs.pl b/aom_dsp/aom_dsp_rtcd_defs.pl index dc9a6b97ede86268395315c1df82a14f5ec46060..53af943414e2e99c8176423212f76e68cfe4c2f9 100644 --- a/aom_dsp/aom_dsp_rtcd_defs.pl +++ b/aom_dsp/aom_dsp_rtcd_defs.pl @@ -595,7 +595,7 @@ if (aom_config("CONFIG_AOM_HIGHBITDEPTH") eq "yes") { # # Forward transform # -if (aom_config("CONFIG_AV1_ENCODER") eq "yes") { +if ((aom_config("CONFIG_AV1_ENCODER") eq "yes") || (aom_config("CONFIG_PVQ") eq "yes")){ if (aom_config("CONFIG_AOM_HIGHBITDEPTH") eq "yes") { add_proto qw/void aom_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride"; specialize qw/aom_fdct4x4 sse2/; diff --git a/aom_dsp/daalaboolreader.h b/aom_dsp/daalaboolreader.h index 9d6cebdbe0ec87bf7df8b871ef9de6acf218de09..8977995a865f2af972f832d9ccf22a19187beb68 100644 --- a/aom_dsp/daalaboolreader.h +++ b/aom_dsp/daalaboolreader.h @@ -41,7 +41,7 @@ uint32_t aom_daala_reader_tell_frac(const daala_reader *r); static INLINE int aom_daala_read(daala_reader *r, int prob) { if (prob == 128) { - return od_ec_dec_bits(&r->ec, 1); + return od_ec_dec_bits(&r->ec, 1, "aom_bits"); } else { int p = ((prob << 15) + (256 - prob)) >> 8; return od_ec_decode_bool_q15(&r->ec, p); diff --git a/aom_dsp/entcode.c b/aom_dsp/entcode.c index 49284b0d2e45b88c442c7d7aa4f61661844c763a..ff8e8e2cff15cf59f8988e9617ac2ab1824e186b 100644 --- a/aom_dsp/entcode.c +++ b/aom_dsp/entcode.c @@ -1,26 +1,13 @@ -/*Daala video codec -Copyright (c) 2001-2012 Daala project contributors. All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -- Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -- Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/ +/* + * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ #ifdef HAVE_CONFIG_H #include "./config.h" diff --git a/aom_dsp/entcode.h b/aom_dsp/entcode.h index 77ed1717642b6fc1405c70adcdaa9c129071a6d4..91fcb679595b74acf6f0d686cdc938ace374872c 100644 --- a/aom_dsp/entcode.h +++ b/aom_dsp/entcode.h @@ -1,26 +1,13 @@ -/*Daala video codec -Copyright (c) 2001-2013 Daala project contributors. All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -- Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -- Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/ +/* + * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ #if !defined(_entcode_H) #define _entcode_H (1) diff --git a/aom_dsp/entdec.c b/aom_dsp/entdec.c index 18563b2e33a6fa092b47f4b12ef56ba0c9aeb8e7..b015956fa03d93e0e4de21293cd667a2a08fb02d 100644 --- a/aom_dsp/entdec.c +++ b/aom_dsp/entdec.c @@ -1,26 +1,13 @@ -/*Daala video codec -Copyright (c) 2001-2013 Daala project contributors. All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -- Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -- Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/ +/* + * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ #ifdef HAVE_CONFIG_H #include "./config.h" @@ -440,7 +427,7 @@ uint32_t od_ec_dec_uint(od_ec_dec *dec, uint32_t ft) { ftb = OD_ILOG_NZ(ft) - OD_EC_UINT_BITS; ft1 = (int)(ft >> ftb) + 1; t = od_ec_decode_cdf_q15(dec, OD_UNIFORM_CDF_Q15(ft1), ft1); - t = t << ftb | od_ec_dec_bits(dec, ftb); + t = t << ftb | od_ec_dec_bits(dec, ftb, ""); if (t <= ft) return t; dec->error = 1; return ft; @@ -453,7 +440,7 @@ uint32_t od_ec_dec_uint(od_ec_dec *dec, uint32_t ft) { ftb: The number of bits to extract. This must be between 0 and 25, inclusive. Return: The decoded bits.*/ -uint32_t od_ec_dec_bits(od_ec_dec *dec, unsigned ftb) { +uint32_t od_ec_dec_bits_(od_ec_dec *dec, unsigned ftb) { od_ec_window window; int available; uint32_t ret; diff --git a/aom_dsp/entdec.h b/aom_dsp/entdec.h index 80363b590271fcce087d5bc4d674c723734f9a39..6d6e2b51ea40dd974c4b197968eb79f4bddab702 100644 --- a/aom_dsp/entdec.h +++ b/aom_dsp/entdec.h @@ -1,26 +1,13 @@ -/*Daala video codec -Copyright (c) 2001-2013 Daala project contributors. All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -- Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -- Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/ +/* + * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ #if !defined(_entdec_H) #define _entdec_H (1) @@ -33,6 +20,14 @@ extern "C" { typedef struct od_ec_dec od_ec_dec; +#if OD_ACCOUNTING +#define OD_ACC_STR , char *acc_str +#define od_ec_dec_bits(dec, ftb, str) od_ec_dec_bits_(dec, ftb, str) +#else +#define OD_ACC_STR +#define od_ec_dec_bits(dec, ftb, str) od_ec_dec_bits_(dec, ftb) +#endif + /*The entropy decoder context.*/ struct od_ec_dec { /*The start of the current input buffer.*/ @@ -91,7 +86,7 @@ OD_WARN_UNUSED_RESULT int od_ec_decode_cdf_unscaled_dyadic(od_ec_dec *dec, OD_WARN_UNUSED_RESULT uint32_t od_ec_dec_uint(od_ec_dec *dec, uint32_t ft) OD_ARG_NONNULL(1); -OD_WARN_UNUSED_RESULT uint32_t od_ec_dec_bits(od_ec_dec *dec, unsigned ftb) +OD_WARN_UNUSED_RESULT uint32_t od_ec_dec_bits_(od_ec_dec *dec, unsigned ftb) OD_ARG_NONNULL(1); OD_WARN_UNUSED_RESULT int od_ec_dec_tell(const od_ec_dec *dec) diff --git a/aom_dsp/entenc.c b/aom_dsp/entenc.c index 3e9cb62c533f609149988b327304c881624be1c2..390f61b9ba905d2c15d4ac0fb108caa6848d4d1a 100644 --- a/aom_dsp/entenc.c +++ b/aom_dsp/entenc.c @@ -1,26 +1,13 @@ -/*Daala video codec -Copyright (c) 2001-2013 Daala project contributors. All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -- Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -- Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/ +/* + * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ #ifdef HAVE_CONFIG_H #include "./config.h" diff --git a/aom_dsp/entenc.h b/aom_dsp/entenc.h index 32163f794af1012bd044a340a7d2ce2289e8d530..5e121b63332611f6ff47edbd8613aa147e869b2d 100644 --- a/aom_dsp/entenc.h +++ b/aom_dsp/entenc.h @@ -1,26 +1,13 @@ -/*Daala video codec -Copyright (c) 2001-2013 Daala project contributors. All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -- Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -- Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/ +/* + * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ #if !defined(_entenc_H) #define _entenc_H (1) diff --git a/av1/av1_common.mk b/av1/av1_common.mk index 75a2569f2e78bf0350fad4ba4ce38b19dd8f3de6..e254ddc6698242802a9ffd7476fe2a3b8a9628b8 100644 --- a/av1/av1_common.mk +++ b/av1/av1_common.mk @@ -96,6 +96,24 @@ endif AV1_COMMON_SRCS-yes += common/odintrin.c AV1_COMMON_SRCS-yes += common/odintrin.h +ifeq ($(CONFIG_PVQ),yes) +# PVQ from daala +AV1_COMMON_SRCS-yes += common/pvq.c +AV1_COMMON_SRCS-yes += common/pvq.h +AV1_COMMON_SRCS-yes += common/partition.c +AV1_COMMON_SRCS-yes += common/partition.h +AV1_COMMON_SRCS-yes += common/zigzag4.c +AV1_COMMON_SRCS-yes += common/zigzag8.c +AV1_COMMON_SRCS-yes += common/zigzag16.c +AV1_COMMON_SRCS-yes += common/zigzag32.c +AV1_COMMON_SRCS-yes += common/zigzag.h +AV1_COMMON_SRCS-yes += common/generic_code.c +AV1_COMMON_SRCS-yes += common/generic_code.h +AV1_COMMON_SRCS-yes += common/pvq_state.c +AV1_COMMON_SRCS-yes += common/pvq_state.h +AV1_COMMON_SRCS-yes += common/laplace_tables.c +endif + ifneq ($(CONFIG_AOM_HIGHBITDEPTH),yes) AV1_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/itrans4_dspr2.c AV1_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/itrans8_dspr2.c diff --git a/av1/av1_cx.mk b/av1/av1_cx.mk index 7ec294b92443fcbc7bfec4316e4157d60425498a..1dc31a466897d95a0e7209505e70bc3929486474 100644 --- a/av1/av1_cx.mk +++ b/av1/av1_cx.mk @@ -96,6 +96,16 @@ AV1_CX_SRCS-$(HAVE_SSE4_1) += encoder/clpf_rdo_sse4_1.c AV1_CX_SRCS-$(HAVE_NEON) += encoder/clpf_rdo_neon.c endif +ifeq ($(CONFIG_PVQ),yes) +# PVQ from daala +AV1_CX_SRCS-yes += encoder/daala_compat_enc.c +AV1_CX_SRCS-yes += encoder/pvq_encoder.c +AV1_CX_SRCS-yes += encoder/pvq_encoder.h +AV1_CX_SRCS-yes += encoder/encint.h +AV1_CX_SRCS-yes += encoder/generic_encoder.c +AV1_CX_SRCS-yes += encoder/laplace_encoder.c +endif + AV1_CX_SRCS-$(HAVE_SSE2) += encoder/x86/temporal_filter_apply_sse2.asm AV1_CX_SRCS-$(HAVE_SSE2) += encoder/x86/quantize_sse2.c ifeq ($(CONFIG_AOM_HIGHBITDEPTH),yes) diff --git a/av1/av1_dx.mk b/av1/av1_dx.mk index 362e7c6d3542f145045c6204183fbfc211e627e0..24decc706273393f3df4db91b96c831ef229e194 100644 --- a/av1/av1_dx.mk +++ b/av1/av1_dx.mk @@ -32,4 +32,29 @@ AV1_DX_SRCS-yes += decoder/decoder.h AV1_DX_SRCS-yes += decoder/dsubexp.c AV1_DX_SRCS-yes += decoder/dsubexp.h +ifeq ($(CONFIG_PVQ),yes) +# PVQ from daala +AV1_DX_SRCS-yes += decoder/pvq_decoder.c +AV1_DX_SRCS-yes += decoder/pvq_decoder.h +AV1_DX_SRCS-yes += decoder/decint.h +AV1_DX_SRCS-yes += decoder/generic_decoder.c +AV1_DX_SRCS-yes += decoder/laplace_decoder.c +AV1_DX_SRCS-yes += encoder/hybrid_fwd_txfm.c +AV1_DX_SRCS-yes += encoder/hybrid_fwd_txfm.h + +AV1_DX_SRCS-yes += encoder/dct.c +AV1_DX_SRCS-$(HAVE_SSE2) += encoder/x86/dct_sse2.asm +AV1_DX_SRCS-$(HAVE_SSE2) += encoder/x86/dct_intrin_sse2.c +AV1_DX_SRCS-$(HAVE_SSSE3) += encoder/x86/dct_ssse3.c + +ifneq ($(CONFIG_AOM_HIGHBITDEPTH),yes) +AV1_DX_SRCS-$(HAVE_NEON) += encoder/arm/neon/dct_neon.c +endif + +AV1_DX_SRCS-$(HAVE_MSA) += encoder/mips/msa/fdct4x4_msa.c +AV1_DX_SRCS-$(HAVE_MSA) += encoder/mips/msa/fdct8x8_msa.c +AV1_DX_SRCS-$(HAVE_MSA) += encoder/mips/msa/fdct16x16_msa.c +AV1_DX_SRCS-$(HAVE_MSA) += encoder/mips/msa/fdct_msa.h +endif + AV1_DX_SRCS-yes := $(filter-out $(AV1_DX_SRCS_REMOVE-yes),$(AV1_DX_SRCS-yes)) diff --git a/av1/common/av1_rtcd_defs.pl b/av1/common/av1_rtcd_defs.pl index d939c4494faa497699ed43ae34e36a11eb9f5719..6b7623b15a4a4bb3a8948449b7aafc181970a67f 100644 --- a/av1/common/av1_rtcd_defs.pl +++ b/av1/common/av1_rtcd_defs.pl @@ -633,6 +633,196 @@ if (aom_config("CONFIG_AOM_HIGHBITDEPTH") eq "yes") { } # end encoder functions +# If PVQ is enabled, fwd transforms are required by decoder +if (aom_config("CONFIG_PVQ") eq "yes") { +# fdct functions + +if (aom_config("CONFIG_AOM_HIGHBITDEPTH") eq "yes") { + add_proto qw/void av1_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type"; + specialize qw/av1_fht4x4 sse2/; + + add_proto qw/void av1_fht8x8/, "const int16_t *input, tran_low_t *output, int stride, int tx_type"; + specialize qw/av1_fht8x8 sse2/; + + add_proto qw/void av1_fht16x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type"; + specialize qw/av1_fht16x16 sse2/; + + add_proto qw/void av1_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/av1_fwht4x4 sse2/; + if (aom_config("CONFIG_EMULATE_HARDWARE") eq "yes") { + add_proto qw/void av1_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/av1_fdct4x4/; + + add_proto qw/void av1_fdct4x4_1/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/av1_fdct4x4_1/; + + add_proto qw/void av1_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/av1_fdct8x8/; + + add_proto qw/void av1_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/av1_fdct8x8_1/; + + add_proto qw/void av1_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/av1_fdct16x16/; + + add_proto qw/void av1_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/av1_fdct16x16_1/; + + add_proto qw/void av1_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/av1_fdct32x32/; + + add_proto qw/void av1_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/av1_fdct32x32_rd/; + + add_proto qw/void av1_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/av1_fdct32x32_1/; + + add_proto qw/void av1_highbd_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/av1_highbd_fdct4x4/; + + add_proto qw/void av1_highbd_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/av1_highbd_fdct8x8/; + + add_proto qw/void av1_highbd_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/av1_highbd_fdct8x8_1/; + + add_proto qw/void av1_highbd_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/av1_highbd_fdct16x16/; + + add_proto qw/void av1_highbd_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/av1_highbd_fdct16x16_1/; + + add_proto qw/void av1_highbd_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/av1_highbd_fdct32x32/; + + add_proto qw/void av1_highbd_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/av1_highbd_fdct32x32_rd/; + + add_proto qw/void av1_highbd_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/av1_highbd_fdct32x32_1/; + } else { + add_proto qw/void av1_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/av1_fdct4x4 sse2/; + + add_proto qw/void av1_fdct4x4_1/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/av1_fdct4x4_1 sse2/; + + add_proto qw/void av1_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/av1_fdct8x8 sse2/; + + add_proto qw/void av1_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/av1_fdct8x8_1 sse2/; + + add_proto qw/void av1_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/av1_fdct16x16 sse2/; + + add_proto qw/void av1_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/av1_fdct16x16_1 sse2/; + + add_proto qw/void av1_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/av1_fdct32x32 sse2/; + + add_proto qw/void av1_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/av1_fdct32x32_rd sse2/; + + add_proto qw/void av1_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/av1_fdct32x32_1 sse2/; + + add_proto qw/void av1_highbd_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/av1_highbd_fdct4x4 sse2/; + + add_proto qw/void av1_highbd_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/av1_highbd_fdct8x8 sse2/; + + add_proto qw/void av1_highbd_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/av1_highbd_fdct8x8_1/; + + add_proto qw/void av1_highbd_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/av1_highbd_fdct16x16 sse2/; + + add_proto qw/void av1_highbd_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/av1_highbd_fdct16x16_1/; + + add_proto qw/void av1_highbd_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/av1_highbd_fdct32x32 sse2/; + + add_proto qw/void av1_highbd_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/av1_highbd_fdct32x32_rd sse2/; + + add_proto qw/void av1_highbd_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/av1_highbd_fdct32x32_1/; + } +} else { + add_proto qw/void av1_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type"; + specialize qw/av1_fht4x4 sse2 msa/; + + add_proto qw/void av1_fht8x8/, "const int16_t *input, tran_low_t *output, int stride, int tx_type"; + specialize qw/av1_fht8x8 sse2 msa/; + + add_proto qw/void av1_fht16x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type"; + specialize qw/av1_fht16x16 sse2 msa/; + + add_proto qw/void av1_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/av1_fwht4x4 msa sse2/; + if (aom_config("CONFIG_EMULATE_HARDWARE") eq "yes") { + add_proto qw/void av1_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/av1_fdct4x4/; + + add_proto qw/void av1_fdct4x4_1/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/av1_fdct4x4_1/; + + add_proto qw/void av1_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/av1_fdct8x8/; + + add_proto qw/void av1_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/av1_fdct8x8_1/; + + add_proto qw/void av1_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/av1_fdct16x16/; + + add_proto qw/void av1_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/av1_fdct16x16_1/; + + add_proto qw/void av1_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/av1_fdct32x32/; + + add_proto qw/void av1_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/av1_fdct32x32_rd/; + + add_proto qw/void av1_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/av1_fdct32x32_1/; + } else { + add_proto qw/void av1_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/av1_fdct4x4 sse2/; + + add_proto qw/void av1_fdct4x4_1/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/av1_fdct4x4_1 sse2/; + + add_proto qw/void av1_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/av1_fdct8x8 sse2/; + + add_proto qw/void av1_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/av1_fdct8x8_1 sse2/; + + add_proto qw/void av1_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/av1_fdct16x16 sse2/; + + add_proto qw/void av1_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/av1_fdct16x16_1 sse2/; + + add_proto qw/void av1_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/av1_fdct32x32 sse2/; + + add_proto qw/void av1_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/av1_fdct32x32_rd sse2/; + + add_proto qw/void av1_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride"; + specialize qw/av1_fdct32x32_1 sse2/; + } +} + +} + # Deringing Functions if (aom_config("CONFIG_DERING") eq "yes") { diff --git a/av1/common/blockd.c b/av1/common/blockd.c index 435f7bd534713967be8855ee51c477e3b3ad8503..932bdf2cee959b1aa4da653cc15975e39958b528 100644 --- a/av1/common/blockd.c +++ b/av1/common/blockd.c @@ -86,6 +86,7 @@ void av1_foreach_transformed_block(const MACROBLOCKD *const xd, av1_foreach_transformed_block_in_plane(xd, bsize, plane, visit, arg); } +#if !CONFIG_PVQ void av1_set_contexts(const MACROBLOCKD *xd, struct macroblockd_plane *pd, TX_SIZE tx_size, int has_eob, int aoff, int loff) { ENTROPY_CONTEXT *const a = pd->above_context + aoff; @@ -121,6 +122,7 @@ void av1_set_contexts(const MACROBLOCKD *xd, struct macroblockd_plane *pd, memset(l, has_eob, sizeof(ENTROPY_CONTEXT) * tx_size_in_blocks); } } +#endif void av1_setup_block_planes(MACROBLOCKD *xd, int ss_x, int ss_y) { int i; diff --git a/av1/common/blockd.h b/av1/common/blockd.h index d46ba57115ae59161277936235104fef22b95c00..5a1a23042b984c1d146a2105ea5023e13e47fe36 100644 --- a/av1/common/blockd.h +++ b/av1/common/blockd.h @@ -28,6 +28,11 @@ #include "av1/common/scale.h" #include "av1/common/seg_common.h" #include "av1/common/tile_common.h" +#if CONFIG_PVQ +#include "av1/common/pvq.h" +#include "av1/common/pvq_state.h" +#include "av1/decoder/decint.h" +#endif #ifdef __cplusplus extern "C" { @@ -87,6 +92,33 @@ static INLINE int is_inter_mode(PREDICTION_MODE mode) { return mode >= NEARESTMV && mode <= NEWMV; } +#if CONFIG_PVQ +typedef struct PVQ_INFO { + int theta[PVQ_MAX_PARTITIONS]; + int max_theta[PVQ_MAX_PARTITIONS]; + int qg[PVQ_MAX_PARTITIONS]; + int k[PVQ_MAX_PARTITIONS]; + od_coeff y[OD_BSIZE_MAX * OD_BSIZE_MAX]; + int nb_bands; + int off[PVQ_MAX_PARTITIONS]; + int size[PVQ_MAX_PARTITIONS]; + int skip_rest; + int skip_dir; + int bs; // log of the block size minus two, + // i.e. equivalent to aom's TX_SIZE + int ac_dc_coded; // block skip info, indicating whether DC/AC is coded. + // bit0: DC coded, bit1 : AC coded (1 means coded) + tran_low_t dq_dc_residue; +} PVQ_INFO; + +typedef struct PVQ_QUEUE { + PVQ_INFO *buf; // buffer for pvq info, stored in encoding order + int curr_pos; // curr position to write PVQ_INFO + int buf_len; // allocated buffer length + int last_pos; // last written position of PVQ_INFO in a tile +} PVQ_QUEUE; +#endif + /* For keyframes, intra block modes are predicted by the (already decoded) modes for the Y blocks to the left and above us; for interframes, there is a single probability table. */ @@ -223,6 +255,12 @@ struct macroblockd_plane { #if CONFIG_AOM_QM const qm_val_t *seg_qmatrix[MAX_SEGMENTS][2][TX_SIZES]; #endif + +#if CONFIG_PVQ + DECLARE_ALIGNED(16, int16_t, pred[MAX_SB_SQUARE]); + // PVQ: forward transformed predicted image, a reference for PVQ. + tran_low_t *pvq_ref_coeff; +#endif }; #define BLOCK_OFFSET(x, i) ((x) + (i)*16) @@ -282,6 +320,9 @@ typedef struct macroblockd { PARTITION_CONTEXT *above_seg_context; PARTITION_CONTEXT left_seg_context[8]; +#if CONFIG_PVQ + daala_dec_ctx daala_dec; +#endif #if CONFIG_AOM_HIGHBITDEPTH /* Bit depth: 8, 10, 12 */ int bd; diff --git a/av1/common/generic_code.c b/av1/common/generic_code.c new file mode 100644 index 0000000000000000000000000000000000000000..4022cf15f77cb30d9d7488d0eaea3547d01dbfa3 --- /dev/null +++ b/av1/common/generic_code.c @@ -0,0 +1,145 @@ +/* + * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +/* clang-format off */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "generic_code.h" + +void od_cdf_init(uint16_t *cdf, int ncdfs, int nsyms, int val, int first) { + int i; + int j; + for (i = 0; i < ncdfs; i++) { + for (j = 0; j < nsyms; j++) { + cdf[i*nsyms + j] = val*j + first; + } + } +} + +/** Adapts a Q15 cdf after encoding/decoding a symbol. */ +void od_cdf_adapt_q15(int val, uint16_t *cdf, int n, int *count, int rate) { + int i; + *count = OD_MINI(*count + 1, 1 << rate); + OD_ASSERT(cdf[n - 1] == 32768); + if (*count >= 1 << rate) { + /* Steady-state adaptation based on a simple IIR with dyadic rate. */ + for (i = 0; i < n; i++) { + int tmp; + /* When (i < val), we want the adjustment ((cdf[i] - tmp) >> rate) to be + positive so long as (cdf[i] > i + 1), and 0 when (cdf[i] == i + 1), + to ensure we don't drive any probabilities to 0. Replacing cdf[i] with + (i + 2) and solving ((i + 2 - tmp) >> rate == 1) for tmp produces + tmp == i + 2 - (1 << rate). Using this value of tmp with + cdf[i] == i + 1 instead gives an adjustment of 0 as desired. + + When (i >= val), we want ((cdf[i] - tmp) >> rate) to be negative so + long as cdf[i] < 32768 - (n - 1 - i), and 0 when + cdf[i] == 32768 - (n - 1 - i), again to ensure we don't drive any + probabilities to 0. Since right-shifting any negative value is still + negative, we can solve (32768 - (n - 1 - i) - tmp == 0) for tmp, + producing tmp = 32769 - n + i. Using this value of tmp with smaller + values of cdf[i] instead gives negative adjustments, as desired. + + Combining the two cases gives the expression below. These could be + stored in a lookup table indexed by n and rate to avoid the + arithmetic. */ + tmp = 2 - (1<= val); + cdf[i] -= (cdf[i] - tmp) >> rate; + } + } + else { + int alpha; + /* Initial adaptation for the first symbols. The adaptation rate is + computed to be equivalent to what od_{en,de}code_cdf_adapt() does + when the initial cdf is set to increment/4. */ + alpha = 4*32768/(n + 4**count); + for (i = 0; i < n; i++) { + int tmp; + tmp = (32768 - n)*(i >= val) + i + 1; + cdf[i] -= ((cdf[i] - tmp)*alpha) >> 15; + } + } + OD_ASSERT(cdf[n - 1] == 32768); +} + +/** Initializes the cdfs and freq counts for a model. + * + * @param [out] model model being initialized + */ +void generic_model_init(generic_encoder *model) { + int i; + int j; + model->increment = 64; + for (i = 0; i < GENERIC_TABLES; i++) { + for (j = 0; j < 16; j++) { + /* Do flat initialization equivalent to a single symbol in each bin. */ + model->cdf[i][j] = (j + 1) * model->increment; + } + } +} + +/** Takes the base-2 log of E(x) in Q1. + * + * @param [in] ExQ16 expectation of x in Q16 + * + * @retval 2*log2(ExQ16/2^16) + */ +int log_ex(int ex_q16) { + int lg; + int lg_q1; + int odd; + lg = OD_ILOG(ex_q16); + if (lg < 15) { + odd = ex_q16*ex_q16 > 2 << 2*lg; + } + else { + int tmp; + tmp = ex_q16 >> (lg - 8); + odd = tmp*tmp > (1 << 15); + } + lg_q1 = OD_MAXI(0, 2*lg - 33 + odd); + return lg_q1; +} + +/** Updates the probability model based on the encoded/decoded value + * + * @param [in,out] model generic prob model + * @param [in,out] ExQ16 expectation of x + * @param [in] x variable encoded/decoded (used for ExQ16) + * @param [in] xs variable x after shift (used for the model) + * @param [in] id id of the icdf to adapt + * @param [in] integration integration period of ExQ16 (leaky average over + * 1<cdf[id]; + /* Renormalize if we cannot add increment */ + if (cdf[15] + model->increment > 32767) { + for (i = 0; i < 16; i++) { + /* Second term ensures that the pdf is non-null */ + cdf[i] = (cdf[i] >> 1) + i + 1; + } + } + /* Update freq count */ + xenc = OD_MINI(15, xs); + /* This can be easily vectorized */ + for (i = xenc; i < 16; i++) cdf[i] += model->increment; + /* We could have saturated ExQ16 directly, but this is safe and simpler */ + x = OD_MINI(x, 32767); + OD_IIR_DIADIC(*ex_q16, x << 16, integration); +} diff --git a/av1/common/generic_code.h b/av1/common/generic_code.h new file mode 100644 index 0000000000000000000000000000000000000000..6059190e3feadb02e9590867b5c2c5b110793e05 --- /dev/null +++ b/av1/common/generic_code.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +/* clang-format off */ + +#if !defined(_generic_code_H) +# define _generic_code_H + +# include "aom_dsp/entdec.h" +# include "aom_dsp/entenc.h" + +# define GENERIC_TABLES 12 + +#if OD_ACCOUNTING +# define generic_decode(dec, model, max, ex_q16, integration, str) generic_decode_(dec, model, max, ex_q16, integration, str) +# define od_decode_cdf_adapt_q15(ec, cdf, n, count, rate, str) od_decode_cdf_adapt_q15_(ec, cdf, n, count, rate, str) +# define od_decode_cdf_adapt(ec, cdf, n, increment, str) od_decode_cdf_adapt_(ec, cdf, n, increment, str) +#else +# define generic_decode(dec, model, max, ex_q16, integration, str) generic_decode_(dec, model, max, ex_q16, integration) +# define od_decode_cdf_adapt_q15(ec, cdf, n, count, rate, str) od_decode_cdf_adapt_q15_(ec, cdf, n, count, rate) +# define od_decode_cdf_adapt(ec, cdf, n, increment, str) od_decode_cdf_adapt_(ec, cdf, n, increment) +#endif + +typedef struct { + /** cdf for multiple expectations of x */ + uint16_t cdf[GENERIC_TABLES][16]; + /** Frequency increment for learning the cdfs */ + int increment; +} generic_encoder; + +#define OD_IIR_DIADIC(y, x, shift) ((y) += ((x) - (y)) >> (shift)) + +void generic_model_init(generic_encoder *model); + +#define OD_CDFS_INIT(cdf, val) od_cdf_init(&cdf[0][0],\ + sizeof(cdf)/sizeof(cdf[0]), sizeof(cdf[0])/sizeof(cdf[0][0]), val, val) + +#define OD_CDFS_INIT_FIRST(cdf, val, first) od_cdf_init(&cdf[0][0],\ + sizeof(cdf)/sizeof(cdf[0]), sizeof(cdf[0])/sizeof(cdf[0][0]), val, first) + +#define OD_SINGLE_CDF_INIT(cdf, val) od_cdf_init(cdf,\ + 1, sizeof(cdf)/sizeof(cdf[0]), val, val) + +#define OD_SINGLE_CDF_INIT_FIRST(cdf, val, first) od_cdf_init(cdf,\ + 1, sizeof(cdf)/sizeof(cdf[0]), val, first) + +void od_cdf_init(uint16_t *cdf, int ncdfs, int nsyms, int val, int first); + +void od_cdf_adapt_q15(int val, uint16_t *cdf, int n, int *count, int rate); + +void od_encode_cdf_adapt_q15(od_ec_enc *ec, int val, uint16_t *cdf, int n, + int *count, int rate); + +void od_encode_cdf_adapt(od_ec_enc *ec, int val, uint16_t *cdf, int n, + int increment); + +int od_decode_cdf_adapt_(od_ec_dec *ec, uint16_t *cdf, int n, + int increment OD_ACC_STR); + +void generic_encode(od_ec_enc *enc, generic_encoder *model, int x, int max, + int *ex_q16, int integration); +double generic_encode_cost(generic_encoder *model, int x, int max, + int *ex_q16); + +double od_encode_cdf_cost(int val, uint16_t *cdf, int n); + +int od_decode_cdf_adapt_q15_(od_ec_dec *ec, uint16_t *cdf, int n, + int *count, int rate OD_ACC_STR); + +int generic_decode_(od_ec_dec *dec, generic_encoder *model, int max, + int *ex_q16, int integration OD_ACC_STR); + +int log_ex(int ex_q16); + +void generic_model_update(generic_encoder *model, int *ex_q16, int x, int xs, + int id, int integration); + +#endif diff --git a/av1/common/laplace_tables.c b/av1/common/laplace_tables.c new file mode 100644 index 0000000000000000000000000000000000000000..f1c3f9afe39331eb5ae4f934afe526286b6b9655 --- /dev/null +++ b/av1/common/laplace_tables.c @@ -0,0 +1,272 @@ +/* This file is auto-generated using "gen_laplace_tables 128 7" */ + +/* clang-format off */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "pvq.h" + +const uint16_t EXP_CDF_TABLE[128][16] = { + {32753,32754,32755,32756,32757,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768}, + {32499,32753,32755,32756,32757,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768}, + {32243,32747,32755,32756,32757,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768}, + {31987,32737,32755,32756,32757,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768}, + {31732,32724,32755,32756,32757,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768}, + {31476,32706,32754,32756,32757,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768}, + {31220,32684,32753,32756,32757,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768}, + {30964,32658,32751,32756,32757,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768}, + {30708,32628,32748,32756,32757,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768}, + {30452,32594,32745,32756,32757,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768}, + {30198,32558,32742,32756,32757,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768}, + {29941,32515,32736,32755,32757,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768}, + {29686,32470,32731,32755,32757,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768}, + {29429,32419,32723,32754,32757,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768}, + {29174,32366,32715,32753,32757,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768}, + {28918,32308,32705,32752,32757,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768}, + {28662,32246,32694,32750,32757,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768}, + {28406,32180,32681,32748,32757,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768}, + {28150,32110,32667,32745,32756,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768}, + {27894,32036,32651,32742,32756,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768}, + {27639,31959,32634,32739,32755,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768}, + {27383,31877,32614,32735,32755,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768}, + {27126,31790,32592,32730,32754,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768}, + {26871,31701,32569,32725,32753,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768}, + {26615,31607,32543,32719,32752,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768}, + {26361,31511,32517,32713,32751,32758,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768}, + {26104,31408,32485,32704,32748,32757,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768}, + {25848,31302,32452,32695,32746,32757,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768}, + {25591,31191,32416,32684,32743,32756,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768}, + {25336,31078,32379,32674,32741,32756,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768}, + {25080,30960,32338,32661,32737,32755,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768}, + {24824,30838,32295,32648,32733,32754,32759,32760,32761,32762,32763,32764,32765,32766,32767,32768}, + {24568,30712,32248,32632,32728,32752,32758,32760,32761,32762,32763,32764,32765,32766,32767,32768}, + {24313,30583,32199,32616,32723,32751,32758,32760,32761,32762,32763,32764,32765,32766,32767,32768}, + {24057,30449,32147,32598,32718,32750,32758,32760,32761,32762,32763,32764,32765,32766,32767,32768}, + {23801,30311,32091,32578,32711,32747,32757,32760,32761,32762,32763,32764,32765,32766,32767,32768}, + {23546,30170,32033,32557,32704,32745,32757,32760,32761,32762,32763,32764,32765,32766,32767,32768}, + {23288,30022,31969,32532,32695,32742,32756,32760,32761,32762,32763,32764,32765,32766,32767,32768}, + {23033,29873,31904,32507,32686,32739,32755,32760,32761,32762,32763,32764,32765,32766,32767,32768}, + {22778,29720,31835,32479,32675,32735,32753,32759,32761,32762,32763,32764,32765,32766,32767,32768}, + {22521,29561,31761,32449,32664,32731,32752,32759,32761,32762,32763,32764,32765,32766,32767,32768}, + {22267,29401,31686,32418,32652,32727,32751,32759,32761,32762,32763,32764,32765,32766,32767,32768}, + {22011,29235,31605,32383,32638,32722,32749,32758,32761,32762,32763,32764,32765,32766,32767,32768}, + {21754,29064,31520,32345,32622,32715,32746,32757,32761,32762,32763,32764,32765,32766,32767,32768}, + {21501,28893,31434,32307,32607,32710,32745,32757,32761,32762,32763,32764,32765,32766,32767,32768}, + {21243,28713,31339,32262,32587,32701,32741,32755,32760,32762,32763,32764,32765,32766,32767,32768}, + {20988,28532,31243,32217,32567,32693,32738,32754,32760,32762,32763,32764,32765,32766,32767,32768}, + {20730,28344,31140,32167,32544,32682,32733,32752,32759,32762,32763,32764,32765,32766,32767,32768}, + {20476,28156,31036,32116,32521,32673,32730,32751,32759,32762,32763,32764,32765,32766,32767,32768}, + {20220,27962,30926,32061,32495,32661,32725,32749,32758,32762,32763,32764,32765,32766,32767,32768}, + {19963,27763,30810,32000,32465,32647,32718,32746,32757,32761,32763,32764,32765,32766,32767,32768}, + {19708,27562,30691,31938,32435,32633,32712,32743,32756,32761,32763,32764,32765,32766,32767,32768}, + {19454,27358,30569,31873,32403,32618,32705,32741,32755,32761,32763,32764,32765,32766,32767,32768}, + {19196,27146,30438,31801,32365,32599,32696,32736,32753,32760,32763,32764,32765,32766,32767,32768}, + {18942,26934,30306,31728,32328,32581,32688,32733,32752,32760,32763,32764,32765,32766,32767,32768}, + {18684,26714,30164,31647,32284,32558,32676,32727,32749,32758,32762,32764,32765,32766,32767,32768}, + {18429,26493,30021,31565,32240,32535,32664,32721,32746,32757,32762,32764,32765,32766,32767,32768}, + {18174,26268,29872,31477,32192,32510,32652,32715,32743,32756,32762,32764,32765,32766,32767,32768}, + {17920,26040,29719,31386,32141,32483,32638,32708,32740,32754,32761,32764,32765,32766,32767,32768}, + {17661,25803,29556,31286,32083,32451,32620,32698,32734,32751,32759,32763,32765,32766,32767,32768}, + {17406,25566,29391,31184,32024,32418,32603,32690,32731,32750,32759,32763,32765,32766,32767,32768}, + {17151,25325,29220,31076,31961,32383,32584,32680,32726,32748,32758,32763,32765,32766,32767,32768}, + {16896,25080,29044,30964,31894,32344,32562,32668,32719,32744,32756,32762,32765,32766,32767,32768}, + {16639,24829,28860,30844,31821,32302,32539,32655,32712,32740,32754,32761,32764,32766,32767,32768}, + {16384,24576,28672,30720,31744,32256,32512,32640,32704,32736,32752,32760,32764,32766,32767,32768}, + {16130,24320,28479,30591,31663,32208,32485,32625,32696,32732,32750,32759,32764,32766,32767,32768}, + {15872,24056,28276,30452,31574,32152,32450,32604,32683,32724,32745,32756,32762,32765,32766,32768}, + {15615,23789,28068,30308,31480,32094,32415,32583,32671,32717,32741,32754,32761,32764,32766,32768}, + {15361,23521,27856,30159,31382,32032,32377,32560,32657,32709,32737,32752,32760,32764,32766,32768}, + {15103,23245,27634,30000,31275,31963,32334,32534,32642,32700,32731,32748,32757,32762,32765,32768}, + {14848,22968,27409,29837,31165,31891,32288,32505,32624,32689,32725,32744,32755,32761,32764,32768}, + {14592,22686,27176,29666,31047,31813,32238,32474,32605,32678,32718,32740,32752,32759,32763,32768}, + {14336,22400,26936,29488,30923,31730,32184,32439,32583,32664,32709,32735,32749,32757,32762,32768}, + {14079,22109,26689,29301,30791,31641,32125,32401,32559,32649,32700,32729,32746,32756,32761,32768}, + {13825,21817,26437,29108,30652,31545,32061,32359,32532,32632,32690,32723,32742,32753,32759,32768}, + {13568,21518,26176,28905,30504,31441,31990,32312,32501,32611,32676,32714,32736,32749,32757,32768}, + {13314,21218,25911,28697,30351,31333,31916,32262,32468,32590,32662,32705,32731,32746,32755,32768}, + {13054,20908,25633,28475,30185,31214,31833,32205,32429,32564,32645,32694,32723,32741,32752,32768}, + {12803,20603,25356,28252,30017,31093,31748,32147,32390,32538,32628,32683,32717,32737,32749,32768}, + {12544,20286,25064,28013,29833,30956,31649,32077,32341,32504,32605,32667,32705,32729,32744,32768}, + {12288,19968,24768,27768,29643,30815,31547,32005,32291,32470,32582,32652,32696,32723,32740,32768}, + {12033,19647,24465,27514,29443,30664,31437,31926,32235,32431,32555,32633,32683,32714,32734,32768}, + {11777,19321,24154,27250,29233,30504,31318,31839,32173,32387,32524,32612,32668,32704,32727,32768}, + {11521,18991,23835,26976,29013,30334,31190,31745,32105,32338,32489,32587,32651,32692,32719,32768}, + {11265,18657,23508,26691,28780,30151,31051,31641,32028,32282,32449,32559,32631,32678,32709,32768}, + {11006,18316,23170,26394,28535,29957,30901,31528,31944,32220,32404,32526,32607,32661,32697,32768}, + {10752,17976,22830,26091,28282,29754,30743,31408,31854,32154,32356,32491,32582,32643,32684,32768}, + {10496,17630,22479,25775,28015,29538,30573,31276,31754,32079,32300,32450,32552,32621,32668,32768}, + {10240,17280,22120,25448,27736,29309,30390,31133,31644,31995,32237,32403,32517,32595,32649,32768}, + { 9984,16926,21753,25109,27443,29066,30194,30978,31523,31902,32166,32349,32476,32565,32627,32768}, + { 9728,16568,21377,24759,27137,28809,29984,30811,31392,31801,32088,32290,32432,32532,32602,32768}, + { 9474,16208,20995,24399,26819,28539,29762,30631,31249,31688,32000,32222,32380,32492,32572,32768}, + { 9216,15840,20601,24023,26483,28251,29522,30435,31091,31563,31902,32146,32321,32447,32537,32768}, + { 8959,15469,20199,23636,26133,27947,29265,30223,30919,31425,31792,32059,32253,32394,32496,32768}, + { 8705,15097,19791,23238,25770,27629,28994,29997,30733,31274,31671,31963,32177,32334,32449,32768}, + { 8449,14719,19373,22827,25390,27292,28704,29752,30530,31107,31535,31853,32089,32264,32394,32768}, + { 8192,14336,18944,22400,24992,26936,28394,29488,30308,30923,31384,31730,31989,32184,32330,32768}, + { 7936,13950,18507,21961,24578,26561,28064,29203,30066,30720,31216,31592,31877,32093,32256,32768}, + { 7678,13558,18060,21507,24146,26166,27713,28897,29804,30498,31030,31437,31749,31988,32171,32768}, + { 7423,13165,17606,21041,23698,25753,27342,28571,29522,30257,30826,31266,31606,31869,32073,32768}, + { 7168,12768,17143,20561,23231,25317,26947,28220,29215,29992,30599,31073,31444,31734,31960,32768}, + { 6911,12365,16669,20065,22744,24858,26526,27842,28881,29701,30348,30858,31261,31579,31830,32768}, + { 6657,11961,16188,19556,22240,24379,26083,27441,28523,29385,30072,30620,31056,31404,31681,32768}, + { 6400,11550,15694,19029,21712,23871,25609,27007,28132,29037,29766,30352,30824,31204,31509,32768}, + { 6142,11134,15190,18486,21164,23340,25108,26544,27711,28659,29429,30055,30564,30977,31313,32768}, + { 5890,10720,14682,17932,20598,22785,24579,26051,27258,28248,29060,29726,30273,30721,31089,32768}, + { 5631,10295,14157,17356,20005,22199,24016,25520,26766,27798,28652,29359,29945,30430,30832,32768}, + { 5377, 9871,13628,16768,19393,21587,23421,24954,26236,27308,28204,28953,29579,30102,30539,32768}, + { 5121, 9441,13086,16161,18756,20945,22792,24351,25666,26776,27712,28502,29169,29731,30206,32768}, + { 4865, 9007,12534,15538,18096,20274,22129,23708,25053,26198,27173,28004,28711,29313,29826,32768}, + { 4608, 8568,11971,14896,17409,19569,21425,23020,24391,25569,26581,27451,28199,28842,29394,32768}, + { 4351, 8125,11398,14236,16697,18831,20682,22287,23679,24886,25933,26841,27628,28311,28903,32768}, + { 4096, 7680,10816,13560,15961,18062,19900,21508,22915,24146,25224,26167,26992,27714,28346,32768}, + { 3840, 7230,10223,12865,15197,17256,19074,20679,22096,23347,24451,25426,26287,27047,27718,32768}, + { 3584, 6776, 9619,12151,14406,16414,18203,19796,21215,22479,23604,24606,25499,26294,27002,32768}, + { 3328, 6318, 9004,11417,13585,15533,17283,18856,20269,21538,22678,23703,24624,25451,26194,32768}, + { 3072, 5856, 8379,10665,12737,14615,16317,17859,19257,20524,21672,22712,23655,24509,25283,32768}, + { 2816, 5390, 7743, 9894,11860,13657,15299,16800,18172,19426,20573,21621,22579,23455,24255,32768}, + { 2560, 4920, 7096, 9102,10951,12656,14227,15676,17011,18242,19377,20423,21388,22277,23097,32768}, + { 2304, 4446, 6437, 8288,10009,11609,13097,14480,15766,16961,18072,19105,20066,20959,21789,32768}, + { 2048, 3968, 5768, 7456, 9038,10521,11911,13215,14437,15583,16657,17664,18608,19493,20323,32768}, + { 1792, 3486, 5087, 6601, 8032, 9385,10664,11873,13016,14096,15117,16082,16995,17858,18673,32768}, + { 1536, 3000, 4395, 5725, 6993, 8201, 9353,10451,11497,12494,13444,14350,15213,16036,16820,32768}, + { 1280, 2510, 3692, 4828, 5919, 6968, 7976, 8944, 9875,10769,11628,12454,13248,14011,14744,32768}, + { 1024, 2016, 2977, 3908, 4810, 5684, 6530, 7350, 8144, 8913, 9658,10380,11080,11758,12415,32768}, + { 768, 1518, 2250, 2965, 3663, 4345, 5011, 5662, 6297, 6917, 7523, 8115, 8693, 9257, 9808,32768}, + { 512, 1016, 1512, 2000, 2481, 2954, 3420, 3879, 4330, 4774, 5211, 5642, 6066, 6483, 6894,32768}, + { 256, 510, 762, 1012, 1260, 1506, 1750, 1992, 2232, 2471, 2708, 2943, 3176, 3407, 3636,32768}, +}; + + +const uint16_t LAPLACE_OFFSET[128] = { + 0, + 29871, + 28672, + 27751, + 26975, + 26291, + 25673, + 25105, + 24576, + 24079, + 23609, + 23162, + 22734, + 22325, + 21931, + 21550, + 21182, + 20826, + 20480, + 20143, + 19815, + 19495, + 19183, + 18877, + 18579, + 18286, + 17999, + 17718, + 17442, + 17170, + 16904, + 16642, + 16384, + 16129, + 15879, + 15633, + 15390, + 15150, + 14913, + 14680, + 14450, + 14222, + 13997, + 13775, + 13556, + 13338, + 13124, + 12911, + 12701, + 12493, + 12288, + 12084, + 11882, + 11682, + 11484, + 11288, + 11094, + 10901, + 10710, + 10521, + 10333, + 10147, + 9962, + 9779, + 9597, + 9417, + 9238, + 9060, + 8884, + 8709, + 8535, + 8363, + 8192, + 8021, + 7853, + 7685, + 7518, + 7352, + 7188, + 7025, + 6862, + 6701, + 6540, + 6381, + 6222, + 6065, + 5908, + 5753, + 5598, + 5444, + 5291, + 5138, + 4987, + 4837, + 4687, + 4538, + 4390, + 4242, + 4096, + 3950, + 3804, + 3660, + 3516, + 3373, + 3231, + 3089, + 2948, + 2808, + 2668, + 2529, + 2391, + 2253, + 2116, + 1979, + 1843, + 1708, + 1573, + 1439, + 1306, + 1172, + 1040, + 908, + 777, + 646, + 516, + 386, + 257, + 128, +}; diff --git a/av1/common/odintrin.c b/av1/common/odintrin.c index bb36104d0511d87efc35d98af504800fba4606d9..868efacc99c9cd44896058a1fcf998fa5782bf47 100644 --- a/av1/common/odintrin.c +++ b/av1/common/odintrin.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved * * This source code is subject to the terms of the BSD 2 Clause License and * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License @@ -8,8 +8,21 @@ * Media Patent License 1.0 was not distributed with this source code in the * PATENTS file, you can obtain it at www.aomedia.org/license/patent. */ + +/* clang-format off */ + #include "av1/common/odintrin.h" +#if defined(OD_ENABLE_ASSERTIONS) +# include + +void od_fatal_impl(const char *_str, const char *_file, int _line) { + fprintf(stderr, "Fatal (internal) error in %s, line %d: %s\n", + _file, _line, _str); + abort(); +} +#endif + /*Constants for use with OD_DIVU_SMALL(). See \cite{Rob05} for details on computing these constants. @INPROCEEDINGS{Rob05, diff --git a/av1/common/odintrin.h b/av1/common/odintrin.h index f9049c7e41b23aabdf58c3cf72bcd49c52f26d42..64cadd652014fbd8336086707285c7ed808a5fb6 100644 --- a/av1/common/odintrin.h +++ b/av1/common/odintrin.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved * * This source code is subject to the terms of the BSD 2 Clause License and * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License @@ -8,9 +8,16 @@ * Media Patent License 1.0 was not distributed with this source code in the * PATENTS file, you can obtain it at www.aomedia.org/license/patent. */ + +/* clang-format off */ + #ifndef AV1_COMMON_ODINTRIN_H_ #define AV1_COMMON_ODINTRIN_H_ +#include +#include +#include + #include "aom/aom_integer.h" #include "aom_dsp/aom_dsp_common.h" #include "aom_ports/bitops.h" @@ -20,15 +27,48 @@ extern "C" { #endif +# if !defined(M_LOG2E) +# define M_LOG2E (1.4426950408889634073599246810019) +# endif + +# if !defined(M_LN2) +# define M_LN2 (0.69314718055994530941723212145818) +# endif + /*Smallest blocks are 4x4*/ #define OD_LOG_BSIZE0 (2) -/*There are 5 block sizes total (4x4, 8x8, 16x16, 32x32 and 64x64).*/ -#define OD_NBSIZES (5) +/*There are 4 block sizes total (4x4, 8x8, 16x16 and 32x32).*/ +#define OD_NBSIZES (4) /*The log of the maximum length of the side of a block.*/ #define OD_LOG_BSIZE_MAX (OD_LOG_BSIZE0 + OD_NBSIZES - 1) /*The maximum length of the side of a block.*/ #define OD_BSIZE_MAX (1 << OD_LOG_BSIZE_MAX) +/**The maximum number of color planes allowed in a single frame.*/ +# define OD_NPLANES_MAX (3) + +# define OD_COEFF_SHIFT (4) + +# define OD_DISABLE_CFL (1) +# define OD_DISABLE_FILTER (1) + +# define OD_ENABLE_ASSERTIONS (1) + +# define OD_LOG(a) +# define OD_LOG_PARTIAL(a) + +/*Possible block sizes, note that OD_BLOCK_NXN = log2(N) - 2.*/ +#define OD_BLOCK_4X4 (0) +#define OD_BLOCK_8X8 (1) +#define OD_BLOCK_16X16 (2) +#define OD_BLOCK_32X32 (3) +#define OD_BLOCK_SIZES (OD_BLOCK_32X32 + 1) + +# define OD_LIMIT_BSIZE_MIN (OD_BLOCK_4X4) +# define OD_LIMIT_BSIZE_MAX (OD_BLOCK_32X32) + +# define OD_ROBUST_STREAM (1) + typedef int od_coeff; #define OD_DIVU_DMAX (1024) @@ -58,7 +98,8 @@ extern uint32_t OD_DIVU_SMALL_CONSTS[OD_DIVU_DMAX][2]; We define a special version of the macro to use when x can be zero.*/ #define OD_ILOG(x) ((x) ? OD_ILOG_NZ(x) : 0) -#define OD_LOG2 AOMLOG2 +#define OD_LOG2(x) (M_LOG2E*log(x)) +#define OD_EXP2(x) (exp(M_LN2*(x))) /*Enable special features for gcc and compatible compilers.*/ #if defined(__GNUC__) && defined(__GNUC_MINOR__) && defined(__GNUC_PATCHLEVEL__) @@ -121,10 +162,92 @@ void od_fatal_impl(const char *_str, const char *_file, int _line); /** Copy n elements of memory from src to dst, allowing overlapping regions. The 0* term provides compile-time type checking */ #if !defined(OVERRIDE_OD_MOVE) -#define OD_MOVE(dst, src, n) \ - (memmove((dst), (src), sizeof(*(dst)) * (n) + 0 * ((dst) - (src)))) +# define OD_MOVE(dst, src, n) \ + (memmove((dst), (src), sizeof(*(dst))*(n) + 0*((dst) - (src)) )) #endif +/** Linkage will break without this if using a C++ compiler, and will issue + * warnings without this for a C compiler*/ +#if defined(__cplusplus) +# define OD_EXTERN extern +#else +# define OD_EXTERN +#endif + +/** Set n elements of dst to zero */ +#if !defined(OVERRIDE_OD_CLEAR) +# define OD_CLEAR(dst, n) (memset((dst), 0, sizeof(*(dst))*(n))) +#endif + +/** Silence unused parameter/variable warnings */ +# define OD_UNUSED(expr) (void)(expr) + +#if defined(OD_FLOAT_PVQ) +typedef double od_val16; +typedef double od_val32; +# define OD_QCONST32(x, bits) (x) +# define OD_ROUND16(x) (x) +# define OD_ROUND32(x) (x) +# define OD_SHL(x, shift) (x) +# define OD_SHR(x, shift) (x) +# define OD_SHR_ROUND(x, shift) (x) +# define OD_ABS(x) (fabs(x)) +# define OD_MULT16_16(a, b) ((a)*(b)) +# define OD_MULT16_32_Q16(a, b) ((a)*(b)) +#else +typedef int16_t od_val16; +typedef int32_t od_val32; +/** Compile-time conversion of float constant to 32-bit value */ +# define OD_QCONST32(x, bits) ((od_val32)(.5 + (x)*(((od_val32)1) << (bits)))) +# define OD_ROUND16(x) (int16_t)(floor(.5 + (x))) +# define OD_ROUND32(x) (int32_t)(floor(.5 + (x))) +/*Shift x left by shift*/ +# define OD_SHL(a, shift) ((int32_t)((uint32_t)(a) << (shift))) +/*Shift x right by shift (without rounding)*/ +# define OD_SHR(x, shift) \ + ((int32_t)((x) >> (shift))) +/*Shift x right by shift (with rounding)*/ +# define OD_SHR_ROUND(x, shift) \ + ((int32_t)(((x) + (1 << (shift) >> 1)) >> (shift))) +/*Shift x right by shift (without rounding) or left by -shift if shift + is negative.*/ +# define OD_VSHR(x, shift) \ + (((shift) > 0) ? OD_SHR(x, shift) : OD_SHL(x, -(shift))) +/*Shift x right by shift (with rounding) or left by -shift if shift + is negative.*/ +# define OD_VSHR_ROUND(x, shift) \ + (((shift) > 0) ? OD_SHR_ROUND(x, shift) : OD_SHL(x, -(shift))) +# define OD_ABS(x) (abs(x)) +/* (od_val32)(od_val16) gives TI compiler a hint that it's 16x16->32 multiply */ +/** 16x16 multiplication where the result fits in 32 bits */ +# define OD_MULT16_16(a, b) \ + (((od_val32)(od_val16)(a))*((od_val32)(od_val16)(b))) +/* Multiplies 16-bit a by 32-bit b and keeps bits [16:47]. */ +# define OD_MULT16_32_Q16(a, b) ((int16_t)(a)*(int64_t)(int32_t)(b) >> 16) +/*16x16 multiplication where the result fits in 16 bits, without rounding.*/ +# define OD_MULT16_16_Q15(a, b) \ + (((int16_t)(a)*((int32_t)(int16_t)(b))) >> 15) +/*16x16 multiplication where the result fits in 16 bits, without rounding.*/ +# define OD_MULT16_16_Q16(a, b) \ + ((((int16_t)(a))*((int32_t)(int16_t)(b))) >> 16) +#endif + +/*All of these macros should expect floats as arguments.*/ +/*These two should compile as a single SSE instruction.*/ +# define OD_MINF(a, b) ((a) < (b) ? (a) : (b)) +# define OD_MAXF(a, b) ((a) > (b) ? (a) : (b)) + +# define OD_DIV_R0(x, y) (((x) + OD_FLIPSIGNI((((y) + 1) >> 1) - 1, (x)))/(y)) + +# define OD_SIGNMASK(a) (-((a) < 0)) +# define OD_FLIPSIGNI(a, b) (((a) + OD_SIGNMASK(b)) ^ OD_SIGNMASK(b)) + +# define OD_MULT16_16_Q15(a, b) \ + (((int16_t)(a)*((int32_t)(int16_t)(b))) >> 15) + +/* Multiplies 16-bit a by 32-bit b and keeps bits [16:47]. */ +# define OD_MULT16_32_Q16(a, b) ((int16_t)(a)*(int64_t)(int32_t)(b) >> 16) + #ifdef __cplusplus } // extern "C" #endif diff --git a/av1/common/onyxc_int.h b/av1/common/onyxc_int.h index 0d42119f7613f151e120e922d571bb65151a3feb..b4e51663135169568d710ccd720eb8f644aa5b21 100644 --- a/av1/common/onyxc_int.h +++ b/av1/common/onyxc_int.h @@ -23,6 +23,10 @@ #include "av1/common/frame_buffers.h" #include "av1/common/loopfilter.h" #include "av1/common/tile_common.h" +#include "av1/common/odintrin.h" +#if CONFIG_PVQ +#include "av1/common/pvq.h" +#endif #ifdef __cplusplus extern "C" { @@ -437,11 +441,17 @@ static INLINE int frame_is_intra_only(const AV1_COMMON *const cm) { } static INLINE void av1_init_macroblockd(AV1_COMMON *cm, MACROBLOCKD *xd, +#if CONFIG_PVQ + tran_low_t *pvq_ref_coeff, +#endif tran_low_t *dqcoeff) { int i; for (i = 0; i < MAX_MB_PLANE; ++i) { xd->plane[i].dqcoeff = dqcoeff; +#if CONFIG_PVQ + xd->plane[i].pvq_ref_coeff = pvq_ref_coeff; +#endif xd->above_context[i] = cm->above_context + i * sizeof(*cm->above_context) * 2 * mi_cols_aligned_to_sb(cm->mi_cols); diff --git a/av1/common/partition.c b/av1/common/partition.c new file mode 100644 index 0000000000000000000000000000000000000000..63d9d6921ec4bddfc150e09791bcaa7792393717 --- /dev/null +++ b/av1/common/partition.c @@ -0,0 +1,256 @@ +/* + * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +/* clang-format off */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "enums.h" +#include "odintrin.h" +#include "partition.h" +#include "zigzag.h" + +OD_EXTERN const index_pair *OD_ZIGZAG4[4] = { + OD_ZIGZAG4_DCT_DCT, + OD_ZIGZAG4_ADST_DCT, + OD_ZIGZAG4_DCT_ADST, + OD_ZIGZAG4_ADST_ADST +}; + +OD_EXTERN const index_pair *OD_ZIGZAG8[4] = { + OD_ZIGZAG8_DCT_DCT, + OD_ZIGZAG8_ADST_DCT, + OD_ZIGZAG8_DCT_ADST, + OD_ZIGZAG8_ADST_ADST +}; + +OD_EXTERN const index_pair *OD_ZIGZAG16[4] = { + OD_ZIGZAG16_DCT_DCT, + OD_ZIGZAG16_ADST_DCT, + OD_ZIGZAG16_DCT_ADST, + OD_ZIGZAG16_ADST_ADST +}; + +OD_EXTERN const index_pair *OD_ZIGZAG32[4] = { + OD_ZIGZAG32_DCT_DCT, + OD_ZIGZAG32_DCT_DCT, + OD_ZIGZAG32_DCT_DCT, + OD_ZIGZAG32_DCT_DCT +}; + +/* The tables below specify how coefficient blocks are translated to + and from PVQ partition coding scan order for 4x4, 8x8 and 16x16 */ + +static const int OD_LAYOUT32_OFFSETS[4] = { 0, 128, 256, 768 }; +const band_layout OD_LAYOUT32 = { + OD_ZIGZAG32, + 32, + 3, + OD_LAYOUT32_OFFSETS +}; + +static const int OD_LAYOUT16_OFFSETS[4] = { 0, 32, 64, 192 }; +const band_layout OD_LAYOUT16 = { + OD_ZIGZAG16, + 16, + 3, + OD_LAYOUT16_OFFSETS +}; + +const int OD_LAYOUT8_OFFSETS[4] = { 0, 8, 16, 48 }; +const band_layout OD_LAYOUT8 = { + OD_ZIGZAG8, + 8, + 3, + OD_LAYOUT8_OFFSETS +}; + +static const int OD_LAYOUT4_OFFSETS[2] = { 0, 15 }; +const band_layout OD_LAYOUT4 = { + OD_ZIGZAG4, + 4, + 1, + OD_LAYOUT4_OFFSETS +}; + +/* First element is the number of bands, followed by the list all the band + boundaries. */ +static const int OD_BAND_OFFSETS4[] = {1, 1, 16}; +static const int OD_BAND_OFFSETS8[] = {4, 1, 16, 24, 32, 64}; +static const int OD_BAND_OFFSETS16[] = {7, 1, 16, 24, 32, 64, 96, 128, 256}; +static const int OD_BAND_OFFSETS32[] = {10, 1, 16, 24, 32, 64, 96, 128, 256, + 384, 512, 1024}; +static const int OD_BAND_OFFSETS64[] = {13, 1, 16, 24, 32, 64, 96, 128, 256, + 384, 512, 1024, 1536, 2048, 4096}; + +const int *const OD_BAND_OFFSETS[OD_NBSIZES + 1] = { + OD_BAND_OFFSETS4, + OD_BAND_OFFSETS8, + OD_BAND_OFFSETS16, + OD_BAND_OFFSETS32, + OD_BAND_OFFSETS64 +}; + +/** Perform a single stage of conversion from a coefficient block in + * raster order into coding scan order + * + * @param [in] layout scan order specification + * @param [out] dst destination vector + * @param [in] src source coefficient block + * @param [int] int source vector row stride + */ +static void od_band_from_raster(const band_layout *layout, int16_t *dst, + const int16_t *src, int stride, TX_TYPE tx_type) { + int i; + int len; + len = layout->band_offsets[layout->nb_bands]; + for (i = 0; i < len; i++) { + dst[i] = src[layout->dst_table[tx_type][i][1]*stride + layout->dst_table[tx_type][i][0]]; + } +} + +/** Perform a single stage of conversion from a vector in coding scan + order back into a coefficient block in raster order + * + * @param [in] layout scan order specification + * @param [out] dst destination coefficient block + * @param [in] src source vector + * @param [int] stride destination vector row stride + */ +static void od_raster_from_band(const band_layout *layout, int16_t *dst, + int stride, TX_TYPE tx_type, const int16_t *src) { + int i; + int len; + len = layout->band_offsets[layout->nb_bands]; + for (i = 0; i < len; i++) { + dst[layout->dst_table[tx_type][i][1]*stride + layout->dst_table[tx_type][i][0]] = src[i]; + } +} + +static const band_layout *const OD_LAYOUTS[] = {&OD_LAYOUT4, &OD_LAYOUT8, + &OD_LAYOUT16, &OD_LAYOUT32}; + +/** Converts a coefficient block in raster order into a vector in + * coding scan order with the PVQ partitions laid out one after + * another. This works in stages; the 4x4 conversion is applied to + * the coefficients nearest DC, then the 8x8 applied to the 8x8 block + * nearest DC that was not already coded by 4x4, then 16x16 following + * the same pattern. + * + * @param [out] dst destination vector + * @param [in] n block size (along one side) + * @param [in] ty_type transfrom type + * @param [in] src source coefficient block + * @param [in] stride source vector row stride + */ +void od_raster_to_coding_order(int16_t *dst, int n, TX_TYPE ty_type, + const int16_t *src, int stride) { + int bs; + /* dst + 1 because DC is not included for 4x4 blocks. */ + od_band_from_raster(OD_LAYOUTS[0], dst + 1, src, stride, ty_type); + for (bs = 1; bs < OD_NBSIZES; bs++) { + int size; + int offset; + /* Length of block size > 4. */ + size = 1 << (OD_LOG_BSIZE0 + bs); + /* Offset is the size of the previous block squared. */ + offset = 1 << 2*(OD_LOG_BSIZE0 - 1 + bs); + if (n >= size) { + /* 3 16x16 bands come after 3 8x8 bands, which come after 2 4x4 bands. */ + od_band_from_raster(OD_LAYOUTS[bs], dst + offset, src, stride, ty_type); + } + } + dst[0] = src[0]; +} + +/** Converts a vector in coding scan order witht he PVQ partitions + * laid out one after another into a coefficient block in raster + * order. This works in stages in the reverse order of raster->scan + * order; the 16x16 conversion is applied to the coefficients that + * don't appear in an 8x8 block, then the 8x8 applied to the 8x8 block + * sans the 4x4 block it contains, then 4x4 is converted sans DC. + * + * @param [out] dst destination coefficient block + * @param [in] stride destination vector row stride + * @param [in] src source vector + * @param [in] n block size (along one side) + */ +void od_coding_order_to_raster(int16_t *dst, int stride, TX_TYPE ty_type, + const int16_t *src, int n) { + int bs; + /* src + 1 because DC is not included for 4x4 blocks. */ + od_raster_from_band(OD_LAYOUTS[0], dst, stride, ty_type, src + 1); + for (bs = 1; bs < OD_NBSIZES; bs++) { + int size; + int offset; + /* Length of block size > 4 */ + size = 1 << (OD_LOG_BSIZE0 + bs); + /* Offset is the size of the previous block squared. */ + offset = 1 << 2*(OD_LOG_BSIZE0 - 1 + bs); + if (n >= size) { + /* 3 16x16 bands come after 3 8x8 bands, which come after 2 4x4 bands. */ + od_raster_from_band(OD_LAYOUTS[bs], dst, stride, ty_type, src + offset); + } + } + dst[0] = src[0]; +} + +/** Perform a single stage of conversion from a coefficient block in + * raster order into coding scan order + * + * @param [in] layout scan order specification + * @param [out] dst destination vector + * @param [in] src source coefficient block + * @param [int] int source vector row stride + */ +static void od_band_from_raster_16(const band_layout *layout, int16_t *dst, + const int16_t *src, int stride) { + int i; + int len; + len = layout->band_offsets[layout->nb_bands]; + for (i = 0; i < len; i++) { + dst[i] = src[layout->dst_table[DCT_DCT][i][1]*stride + layout->dst_table[DCT_DCT][i][0]]; + } +} + +/** Converts a coefficient block in raster order into a vector in + * coding scan order with the PVQ partitions laid out one after + * another. This works in stages; the 4x4 conversion is applied to + * the coefficients nearest DC, then the 8x8 applied to the 8x8 block + * nearest DC that was not already coded by 4x4, then 16x16 following + * the same pattern. + * + * @param [out] dst destination vector + * @param [in] n block size (along one side) + * @param [in] src source coefficient block + * @param [in] stride source vector row stride + */ +void od_raster_to_coding_order_16(int16_t *dst, int n, const int16_t *src, + int stride) { + int bs; + /* dst + 1 because DC is not included for 4x4 blocks. */ + od_band_from_raster_16(OD_LAYOUTS[0], dst + 1, src, stride); + for (bs = 1; bs < OD_NBSIZES; bs++) { + int size; + int offset; + /* Length of block size > 4. */ + size = 1 << (OD_LOG_BSIZE0 + bs); + /* Offset is the size of the previous block squared. */ + offset = 1 << 2*(OD_LOG_BSIZE0 - 1 + bs); + if (n >= size) { + /* 3 16x16 bands come after 3 8x8 bands, which come after 2 4x4 bands. */ + od_band_from_raster_16(OD_LAYOUTS[bs], dst + offset, src, stride); + } + } + dst[0] = src[0]; +} diff --git a/av1/common/partition.h b/av1/common/partition.h new file mode 100644 index 0000000000000000000000000000000000000000..c86cb81f320c278b468089b55ea08313b1af2bae --- /dev/null +++ b/av1/common/partition.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +/* clang-format off */ + +#if !defined(_partition_H) +# define _partition_H + +#include "av1/common/enums.h" +#include "odintrin.h" + +typedef unsigned char index_pair[2]; + +typedef struct { + const index_pair **const dst_table; + int size; + int nb_bands; + const int *const band_offsets; +} band_layout; + +extern const int *const OD_BAND_OFFSETS[OD_NBSIZES + 1]; + +void od_raster_to_coding_order(int16_t *dst, int n, TX_TYPE ty_type, + const int16_t *src, int stride); + +void od_coding_order_to_raster(int16_t *dst, int stride, TX_TYPE ty_type, + const int16_t *src, int n); + +void od_raster_to_coding_order_16(int16_t *dst, int n, const int16_t *src, + int stride); + +#endif diff --git a/av1/common/pvq.c b/av1/common/pvq.c new file mode 100644 index 0000000000000000000000000000000000000000..62f3632da0e6e32339d92f9c76413abf1663eca6 --- /dev/null +++ b/av1/common/pvq.c @@ -0,0 +1,954 @@ +/* + * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +/* clang-format off */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "odintrin.h" +#include "partition.h" +#include "pvq.h" +#include +#include +#include +#include + +/* Quantization matrices for 8x8. For other block sizes, we currently just do + resampling. */ +/* Flat quantization, i.e. optimize for PSNR. */ +const int OD_QM8_Q4_FLAT[] = { + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16 +}; +# if 0 +/* M1: MPEG2 matrix for inter (which has a dead zone). */ +const int OD_QM8_Q4[] = { + 16, 17, 18, 19, 20, 21, 22, 23, + 17, 18, 19, 20, 21, 22, 23, 24, + 18, 19, 20, 21, 22, 23, 24, 25, + 19, 20, 21, 22, 23, 24, 26, 27, + 20, 21, 22, 23, 25, 26, 27, 28, + 21, 22, 23, 24, 26, 27, 28, 30, + 22, 23, 24, 26, 27, 28, 30, 31, + 23, 24, 25, 27, 28, 30, 31, 33}; +# endif +# if 0 +/* M2: MPEG2 matrix for intra (no dead zone). */ +const int OD_QM8_Q4[] = { + 16, 16, 19, 22, 22, 26, 26, 27, + 16, 16, 22, 22, 26, 27, 27, 29, + 19, 22, 26, 26, 27, 29, 29, 35, + 22, 24, 27, 27, 29, 32, 34, 38, + 26, 27, 29, 29, 32, 35, 38, 46, + 27, 29, 34, 34, 35, 40, 46, 56, + 29, 34, 34, 37, 40, 48, 56, 69, + 34, 37, 38, 40, 48, 58, 69, 83 +}; +# endif +# if 0 +/* M3: Taken from dump_psnrhvs. */ +const int OD_QM8_Q4[] = { + 16, 16, 17, 20, 24, 29, 36, 42, + 16, 17, 17, 19, 22, 26, 31, 37, + 17, 17, 21, 23, 26, 30, 34, 40, + 20, 19, 23, 28, 31, 35, 39, 45, + 24, 22, 26, 31, 36, 41, 46, 51, + 29, 26, 30, 35, 41, 47, 52, 58, + 36, 31, 34, 39, 46, 52, 59, 66, + 42, 37, 40, 45, 51, 58, 66, 73 +}; +# endif +# if 1 +/* M4: a compromise equal to .5*(M3 + .5*(M2+transpose(M2))) */ +const int OD_QM8_Q4_HVS[] = { + 16, 16, 18, 21, 24, 28, 32, 36, + 16, 17, 20, 21, 24, 27, 31, 35, + 18, 20, 24, 25, 27, 31, 33, 38, + 21, 21, 25, 28, 30, 34, 37, 42, + 24, 24, 27, 30, 34, 38, 43, 49, + 28, 27, 31, 34, 38, 44, 50, 58, + 32, 31, 33, 37, 43, 50, 58, 68, + 36, 35, 38, 42, 49, 58, 68, 78 +}; +#endif + +/* Constants for the beta parameter, which controls how activity masking is + used. + beta = 1 / (1 - alpha), so when beta is 1, alpha is 0 and activity + masking is disabled. When beta is 1.5, activity masking is used. Note that + activity masking is neither used for 4x4 blocks nor for chroma. */ +#define OD_BETA(b) OD_QCONST32(b, OD_BETA_SHIFT) +static const od_val16 OD_PVQ_BETA4_LUMA[1] = {OD_BETA(1.)}; +static const od_val16 OD_PVQ_BETA8_LUMA[4] = {OD_BETA(1.), OD_BETA(1.), + OD_BETA(1.), OD_BETA(1.)}; +static const od_val16 OD_PVQ_BETA16_LUMA[7] = {OD_BETA(1.), OD_BETA(1.), + OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.)}; +static const od_val16 OD_PVQ_BETA32_LUMA[10] = {OD_BETA(1.), OD_BETA(1.), + OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), + OD_BETA(1.), OD_BETA(1.)}; +static const od_val16 OD_PVQ_BETA64_LUMA[13] = {OD_BETA(1.), OD_BETA(1.), + OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), + OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.)}; + +static const od_val16 OD_PVQ_BETA4_LUMA_MASKING[1] = {OD_BETA(1.)}; +static const od_val16 OD_PVQ_BETA8_LUMA_MASKING[4] = {OD_BETA(1.5), + OD_BETA(1.5), OD_BETA(1.5), OD_BETA(1.5)}; +static const od_val16 OD_PVQ_BETA16_LUMA_MASKING[7] = {OD_BETA(1.5), + OD_BETA(1.5), OD_BETA(1.5), OD_BETA(1.5), OD_BETA(1.5), OD_BETA(1.5), + OD_BETA(1.5)}; +static const od_val16 OD_PVQ_BETA32_LUMA_MASKING[10] = {OD_BETA(1.5), + OD_BETA(1.5), OD_BETA(1.5), OD_BETA(1.5), OD_BETA(1.5), OD_BETA(1.5), + OD_BETA(1.5), OD_BETA(1.5), OD_BETA(1.5), OD_BETA(1.5)}; +static const od_val16 OD_PVQ_BETA64_LUMA_MASKING[13] = {OD_BETA(1.5), + OD_BETA(1.5), OD_BETA(1.5), OD_BETA(1.5), OD_BETA(1.5), OD_BETA(1.5), + OD_BETA(1.5), OD_BETA(1.5), OD_BETA(1.5), OD_BETA(1.5), OD_BETA(1.5), + OD_BETA(1.5), OD_BETA(1.5)}; + +static const od_val16 OD_PVQ_BETA4_CHROMA[1] = {OD_BETA(1.)}; +static const od_val16 OD_PVQ_BETA8_CHROMA[4] = {OD_BETA(1.), OD_BETA(1.), + OD_BETA(1.), OD_BETA(1.)}; +static const od_val16 OD_PVQ_BETA16_CHROMA[7] = {OD_BETA(1.), OD_BETA(1.), + OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.)}; +static const od_val16 OD_PVQ_BETA32_CHROMA[10] = {OD_BETA(1.), OD_BETA(1.), + OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), + OD_BETA(1.), OD_BETA(1.)}; +static const od_val16 OD_PVQ_BETA64_CHROMA[13] = {OD_BETA(1.), OD_BETA(1.), + OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), + OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.), OD_BETA(1.)}; + +const od_val16 *const OD_PVQ_BETA[2][OD_NPLANES_MAX][OD_NBSIZES + 1] = { + {{OD_PVQ_BETA4_LUMA, OD_PVQ_BETA8_LUMA, + OD_PVQ_BETA16_LUMA, OD_PVQ_BETA32_LUMA}, + {OD_PVQ_BETA4_CHROMA, OD_PVQ_BETA8_CHROMA, + OD_PVQ_BETA16_CHROMA, OD_PVQ_BETA32_CHROMA}, + {OD_PVQ_BETA4_CHROMA, OD_PVQ_BETA8_CHROMA, + OD_PVQ_BETA16_CHROMA, OD_PVQ_BETA32_CHROMA}}, + {{OD_PVQ_BETA4_LUMA_MASKING, OD_PVQ_BETA8_LUMA_MASKING, + OD_PVQ_BETA16_LUMA_MASKING, OD_PVQ_BETA32_LUMA_MASKING}, + {OD_PVQ_BETA4_CHROMA, OD_PVQ_BETA8_CHROMA, + OD_PVQ_BETA16_CHROMA, OD_PVQ_BETA32_CHROMA}, + {OD_PVQ_BETA4_CHROMA, OD_PVQ_BETA8_CHROMA, + OD_PVQ_BETA16_CHROMA, OD_PVQ_BETA32_CHROMA}} +}; + +void od_adapt_pvq_ctx_reset(od_pvq_adapt_ctx *state, int is_keyframe) { + od_pvq_codeword_ctx *ctx; + int i; + int pli; + int bs; + ctx = &state->pvq_codeword_ctx; + generic_model_init(&state->pvq_param_model[0]); + generic_model_init(&state->pvq_param_model[1]); + generic_model_init(&state->pvq_param_model[2]); + for (i = 0; i < 2*OD_NBSIZES; i++) { + ctx->pvq_adapt[4*i + OD_ADAPT_K_Q8] = 384; + ctx->pvq_adapt[4*i + OD_ADAPT_SUM_EX_Q8] = 256; + ctx->pvq_adapt[4*i + OD_ADAPT_COUNT_Q8] = 104; + ctx->pvq_adapt[4*i + OD_ADAPT_COUNT_EX_Q8] = 128; + } + ctx->pvq_k1_increment = 128; + OD_CDFS_INIT(ctx->pvq_k1_cdf, ctx->pvq_k1_increment); + for (pli = 0; pli < OD_NPLANES_MAX; pli++) { + for (bs = 0; bs < OD_NBSIZES; bs++) + for (i = 0; i < PVQ_MAX_PARTITIONS; i++) { + state->pvq_exg[pli][bs][i] = 2 << 16; + } + } + for (i = 0; i < OD_NBSIZES*PVQ_MAX_PARTITIONS; i++) { + state->pvq_ext[i] = is_keyframe ? 24576 : 2 << 16; + } + state->pvq_gaintheta_increment = 128; + OD_CDFS_INIT(state->pvq_gaintheta_cdf, state->pvq_gaintheta_increment >> 2); + state->pvq_skip_dir_increment = 128; + OD_CDFS_INIT(state->pvq_skip_dir_cdf, state->pvq_skip_dir_increment >> 2); + ctx->pvq_split_increment = 128; + OD_CDFS_INIT(ctx->pvq_split_cdf, ctx->pvq_split_increment >> 1); +} + +/* QMs are arranged from smallest to largest blocksizes, first for + blocks with decimation=0, followed by blocks with decimation=1.*/ +int od_qm_offset(int bs, int xydec) +{ + return xydec*OD_QM_STRIDE + OD_QM_OFFSET(bs); +} + +/* Initialize the quantization matrix. */ +// Note: When varying scan orders for hybrid transform is used by PVQ, +// since AOM does not use magnitude compensation (i.e. simplay x16 for all coeffs), +// we don't need seperate qm and qm_inv for each transform type. +void od_init_qm(int16_t *x, int16_t *x_inv, const int *qm) { + int i; + int j; + int16_t y[OD_BSIZE_MAX*OD_BSIZE_MAX]; + int16_t y_inv[OD_BSIZE_MAX*OD_BSIZE_MAX]; + int16_t *x1; + int16_t *x1_inv; + int off; + int bs; + int xydec; + for (bs = 0; bs < OD_NBSIZES; bs++) { + for (xydec = 0; xydec < 2; xydec++) { + off = od_qm_offset(bs, xydec); + x1 = x + off; + x1_inv = x_inv + off; + for (i = 0; i < 4 << bs; i++) { + for (j = 0; j < 4 << bs; j++) { + double mag; + mag = 1.0; + if (i == 0 && j == 0) { + mag = 1.0; + } + else { + mag /= 0.0625*qm[(i << 1 >> bs)*8 + (j << 1 >> bs)]; + OD_ASSERT(mag > 0.0); + } + /*Convert to fit in 16 bits.*/ + y[i*(4 << bs) + j] = (int16_t)OD_MINI(OD_QM_SCALE_MAX, + (int32_t)floor(.5 + mag*OD_QM_SCALE)); + y_inv[i*(4 << bs) + j] = (int16_t)floor(.5 + + OD_QM_SCALE*OD_QM_INV_SCALE/(double)y[i*(4 << bs) + j]); + } + } + od_raster_to_coding_order_16(x1, 4 << bs, y, 4 << bs); + od_raster_to_coding_order_16(x1_inv, 4 << bs, y_inv, 4 << bs); + } + } +} + +/* Maps each possible size (n) in the split k-tokenizer to a different value. + Possible values of n are: + 2, 3, 4, 7, 8, 14, 15, 16, 31, 32, 63, 64, 127, 128 + Since we don't care about the order (even in the bit-stream) the simplest + ordering (implemented here) is: + 14, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128 */ +int od_pvq_size_ctx(int n) { + int logn; + int odd; + logn = OD_ILOG(n - 1); + odd = n & 1; + return 2*logn - 1 - odd - 7*(n == 14); +} + +/* Maps a length n to a context for the (k=1, n<=16) coder, with a special + case when n is the original length (orig_length=1) of the vector (i.e. we + haven't split it yet). For orig_length=0, we use the same mapping as + od_pvq_size_ctx() up to n=16. When orig_length=1, we map lengths + 7, 8, 14, 15 to contexts 8 to 11. */ +int od_pvq_k1_ctx(int n, int orig_length) { + if (orig_length) return 8 + 2*(n > 8) + (n & 1); + else return od_pvq_size_ctx(n); +} + +/* Indexing for the packed quantization matrices. */ +int od_qm_get_index(int bs, int band) { + /* The -band/3 term is due to the fact that we force corresponding horizontal + and vertical bands to have the same quantization. */ + OD_ASSERT(bs >= 0 && bs < OD_NBSIZES); + return bs*(bs + 1) + band - band/3; +} + +#if !defined(OD_FLOAT_PVQ) +/*See celt/mathops.c in Opus and tools/cos_search.c.*/ +static int16_t od_pvq_cos_pi_2(int16_t x) +{ + int16_t x2; + x2 = OD_MULT16_16_Q15(x, x); + return OD_MINI(32767, (1073758164 - x*x + x2*(-7654 + OD_MULT16_16_Q16(x2, + 16573 + OD_MULT16_16_Q16(-2529, x2)))) >> 15); +} +#endif + +/*Approximates cos(x) for -pi < x < pi. + Input is in OD_THETA_SCALE.*/ +od_val16 od_pvq_cos(od_val32 x) { +#if defined(OD_FLOAT_PVQ) + return cos(x); +#else + /*Wrap x around by masking, since cos is periodic.*/ + x = x & 0x0001ffff; + if (x > (1 << 16)) { + x = (1 << 17) - x; + } + if (x & 0x00007fff) { + if (x < (1 << 15)) { + return od_pvq_cos_pi_2((int16_t)x); + } + else { + return -od_pvq_cos_pi_2((int16_t)(65536 - x)); + } + } + else { + if (x & 0x0000ffff) { + return 0; + } + else if (x & 0x0001ffff) { + return -32767; + } + else { + return 32767; + } + } +#endif +} + +/*Approximates sin(x) for 0 <= x < pi. + Input is in OD_THETA_SCALE.*/ +od_val16 od_pvq_sin(od_val32 x) { +#if defined(OD_FLOAT_PVQ) + return sin(x); +#else + return od_pvq_cos(32768 - x); +#endif +} + +#if !defined(OD_FLOAT_PVQ) +/* Computes an upper-bound on the number of bits required to store the L2 norm + of a vector (excluding sign). */ +int od_vector_log_mag(const od_coeff *x, int n) { + int i; + int32_t sum; + sum = 0; + for (i = 0; i < n; i++) { + int16_t tmp; + tmp = x[i] >> 8; + sum += tmp*(int32_t)tmp; + } + /* We add one full bit (instead of rounding OD_ILOG() up) for safety because + the >> 8 above causes the sum to be slightly underestimated. */ + return 8 + 1 + OD_ILOG(n + sum)/2; +} +#endif + +/** Computes Householder reflection that aligns the reference r to the + * dimension in r with the greatest absolute value. The reflection + * vector is returned in r. + * + * @param [in,out] r reference vector to be reflected, reflection + * also returned in r + * @param [in] n number of dimensions in r + * @param [in] gr gain of reference vector + * @param [out] sign sign of reflection + * @return dimension number to which reflection aligns + **/ +int od_compute_householder(od_val16 *r, int n, od_val32 gr, int *sign, + int shift) { + int m; + int i; + int s; + od_val16 maxr; + OD_UNUSED(shift); + /* Pick component with largest magnitude. Not strictly + * necessary, but it helps numerical stability */ + m = 0; + maxr = 0; + for (i = 0; i < n; i++) { + if (OD_ABS(r[i]) > maxr) { + maxr = OD_ABS(r[i]); + m = i; + } + } + s = r[m] > 0 ? 1 : -1; + /* This turns r into a Householder reflection vector that would reflect + * the original r[] to e_m */ + r[m] += OD_SHR_ROUND(gr*s, shift); + *sign = s; + return m; +} + +#if !defined(OD_FLOAT_PVQ) +#define OD_RCP_INSHIFT 15 +#define OD_RCP_OUTSHIFT 14 +static od_val16 od_rcp(od_val16 x) +{ + int i; + od_val16 n; + od_val16 r; + i = OD_ILOG(x) - 1; + /*n is Q15 with range [0,1).*/ + n = OD_VSHR_ROUND(x, i - OD_RCP_INSHIFT) - (1 << OD_RCP_INSHIFT); + /*Start with a linear approximation: + r = 1.8823529411764706-0.9411764705882353*n. + The coefficients and the result are Q14 in the range [15420,30840].*/ + r = 30840 + OD_MULT16_16_Q15(-15420, n); + /*Perform two Newton iterations: + r -= r*((r*n)-1.Q15) + = r*((r*n)+(r-1.Q15)).*/ + r = r - OD_MULT16_16_Q15(r, (OD_MULT16_16_Q15(r, n) + r - 32768)); + /*We subtract an extra 1 in the second iteration to avoid overflow; it also + neatly compensates for truncation error in the rest of the process.*/ + r = r - (1 + OD_MULT16_16_Q15(r, OD_MULT16_16_Q15(r, n) + r - 32768)); + /*r is now the Q15 solution to 2/(n+1), with a maximum relative error + of 7.05346E-5, a (relative) RMSE of 2.14418E-5, and a peak absolute + error of 1.24665/32768.*/ + return OD_VSHR_ROUND(r, i - OD_RCP_OUTSHIFT); +} +#endif + +/** Applies Householder reflection from compute_householder(). The + * reflection is its own inverse. + * + * @param [out] out reflected vector + * @param [in] x vector to be reflected + * @param [in] r reflection + * @param [in] n number of dimensions in x,r + */ +void od_apply_householder(od_val16 *out, const od_val16 *x, const od_val16 *r, + int n) { + int i; + od_val32 proj; + od_val16 proj_1; + od_val32 l2r; +#if !defined(OD_FLOAT_PVQ) + od_val16 proj_norm; + od_val16 l2r_norm; + od_val16 rcp; + int proj_shift; + int l2r_shift; + int outshift; +#endif + /*FIXME: Can we get l2r and/or l2r_shift from an earlier computation?*/ + l2r = 0; + for (i = 0; i < n; i++) { + l2r += OD_MULT16_16(r[i], r[i]); + } + /* Apply Householder reflection */ + proj = 0; + for (i = 0; i < n; i++) { + proj += OD_MULT16_16(r[i], x[i]); + } +#if defined(OD_FLOAT_PVQ) + proj_1 = proj*2./(1e-100 + l2r); + for (i = 0; i < n; i++) { + out[i] = x[i] - r[i]*proj_1; + } +#else + /*l2r_norm is [0.5, 1.0[ in Q15.*/ + l2r_shift = (OD_ILOG(l2r) - 1) - 14; + l2r_norm = OD_VSHR_ROUND(l2r, l2r_shift); + rcp = od_rcp(l2r_norm); + proj_shift = (OD_ILOG(abs(proj)) - 1) - 14; + /*proj_norm is [0.5, 1.0[ in Q15.*/ + proj_norm = OD_VSHR_ROUND(proj, proj_shift); + proj_1 = OD_MULT16_16_Q15(proj_norm, rcp); + /*The proj*2. in the float code becomes -1 in the final outshift. + The sign of l2r_shift is positive since we're taking the reciprocal of + l2r_norm and this is a right shift.*/ + outshift = OD_MINI(30, OD_RCP_OUTSHIFT - proj_shift - 1 + l2r_shift); + if (outshift >= 0) { + for (i = 0; i < n; i++) { + int32_t tmp; + tmp = OD_MULT16_16(r[i], proj_1); + tmp = OD_SHR_ROUND(tmp, outshift); + out[i] = x[i] - tmp; + } + } + else { + /*FIXME: Can we make this case impossible? + Right now, if r[] is all zeros except for 1, 2, or 3 ones, and + if x[] is all zeros except for large values at the same position as the + ones in r[], then we can end up with a shift of -1.*/ + for (i = 0; i < n; i++) { + int32_t tmp; + tmp = OD_MULT16_16(r[i], proj_1); + tmp = OD_SHL(tmp, -outshift); + out[i] = x[i] - tmp; + } + } +#endif +} + +#if !defined(OD_FLOAT_PVQ) +#define OD_EXP2_INSHIFT 15 +#define OD_EXP2_FRACSHIFT 15 +#define OD_EXP2_OUTSHIFT 15 +static const int32_t OD_EXP2_C[5] = {32768, 22709, 7913, 1704, 443}; +/*Output is [1.0, 2.0) in Q(OD_EXP2_FRACSHIFT). + It does not include the integer offset, which is added in od_exp2 after the + final shift).*/ +static int32_t od_exp2_frac(int32_t x) +{ + return OD_MULT16_16_Q15(x, (OD_EXP2_C[1] + OD_MULT16_16_Q15(x, + (OD_EXP2_C[2] + OD_MULT16_16_Q15(x, (OD_EXP2_C[3] + + OD_MULT16_16_Q15(x, OD_EXP2_C[4]))))))); +} + +/** Base-2 exponential approximation (2^x) with Q15 input and output.*/ +static int32_t od_exp2(int32_t x) +{ + int integer; + int32_t frac; + integer = x >> OD_EXP2_INSHIFT; + if (integer > 14) + return 0x7f000000; + else if (integer < -15) + return 0; + frac = od_exp2_frac(x - OD_SHL(integer, OD_EXP2_INSHIFT)); + return OD_VSHR_ROUND(OD_EXP2_C[0] + frac, -integer) + 1; +} + +#define OD_LOG2_INSHIFT 15 +#define OD_LOG2_OUTSHIFT 15 +#define OD_LOG2_INSCALE_1 (1./(1 << OD_LOG2_INSHIFT)) +#define OD_LOG2_OUTSCALE (1 << OD_LOG2_OUTSHIFT) +static int16_t od_log2(int16_t x) +{ + return x + OD_MULT16_16_Q15(x, (14482 + OD_MULT16_16_Q15(x, (-23234 + + OD_MULT16_16_Q15(x, (13643 + OD_MULT16_16_Q15(x, (-6403 + + OD_MULT16_16_Q15(x, 1515))))))))); +} + +static int32_t od_pow(int32_t x, od_val16 beta) +{ + int16_t t; + int xshift; + int log2_x; + od_val32 logr; + /*FIXME: this conditional is to avoid doing log2(0).*/ + if (x == 0) + return 0; + log2_x = (OD_ILOG(x) - 1); + xshift = log2_x - OD_LOG2_INSHIFT; + /*t should be in range [0.0, 1.0[ in Q(OD_LOG2_INSHIFT).*/ + t = OD_VSHR(x, xshift) - (1 << OD_LOG2_INSHIFT); + /*log2(g/OD_COMPAND_SCALE) = log2(x) - OD_COMPAND_SHIFT in + Q(OD_LOG2_OUTSHIFT).*/ + logr = od_log2(t) + (log2_x - OD_COMPAND_SHIFT)*OD_LOG2_OUTSCALE; + logr = OD_MULT16_32_QBETA(beta, logr); + return od_exp2(logr); +} +#endif + +/** Gain companding: raises gain to the power 1/beta for activity masking. + * + * @param [in] g real (uncompanded) gain + * @param [in] q0 uncompanded quality parameter + * @param [in] beta activity masking beta param (exponent) + * @return g^(1/beta) + */ +static od_val32 od_gain_compand(od_val32 g, int q0, od_val16 beta) { +#if defined(OD_FLOAT_PVQ) + if (beta == 1) return OD_ROUND32(OD_CGAIN_SCALE*g/(double)q0); + else { + return OD_ROUND32(OD_CGAIN_SCALE*OD_COMPAND_SCALE*pow(g*OD_COMPAND_SCALE_1, + 1./beta)/(double)q0); + } +#else + if (beta == OD_BETA(1)) return (OD_CGAIN_SCALE*g + (q0 >> 1))/q0; + else { + int32_t expr; + /*FIXME: This is 1/beta in Q(BETA_SHIFT), should use od_rcp() instead.*/ + expr = od_pow(g, OD_ROUND16((1 << (2*OD_BETA_SHIFT))/(double)beta)); + expr <<= OD_CGAIN_SHIFT + OD_COMPAND_SHIFT - OD_EXP2_OUTSHIFT; + return (expr + (q0 >> 1))/q0; + } +#endif +} + +#if !defined(OD_FLOAT_PVQ) +#define OD_SQRT_INSHIFT 16 +#define OD_SQRT_OUTSHIFT 15 +static int16_t od_rsqrt_norm(int16_t x); + +static int16_t od_sqrt_norm(int32_t x) +{ + OD_ASSERT(x < 65536); + return OD_MINI(OD_SHR_ROUND(x*od_rsqrt_norm(x), OD_SQRT_OUTSHIFT), 32767); +} + +static int16_t od_sqrt(int32_t x, int *sqrt_shift) +{ + int k; + int s; + int32_t t; + if (x == 0) { + *sqrt_shift = 0; + return 0; + } + OD_ASSERT(x < (1 << 30)); + k = ((OD_ILOG(x) - 1) >> 1); + /*t is x in the range [0.25, 1) in QINSHIFT, or x*2^(-s). + Shift by log2(x) - log2(0.25*(1 << INSHIFT)) to ensure 0.25 lower bound.*/ + s = 2*k - (OD_SQRT_INSHIFT - 2); + t = OD_VSHR(x, s); + /*We want to express od_sqrt() in terms of od_sqrt_norm(), which is + defined as (2^OUTSHIFT)*sqrt(t*(2^-INSHIFT)) with t=x*(2^-s). + This simplifies to 2^(OUTSHIFT-(INSHIFT/2)-(s/2))*sqrt(x), so the caller + needs to shift right by OUTSHIFT - INSHIFT/2 - s/2.*/ + *sqrt_shift = OD_SQRT_OUTSHIFT - ((s + OD_SQRT_INSHIFT) >> 1); + return od_sqrt_norm(t); +} +#endif + +/** Gain expanding: raises gain to the power beta for activity masking. + * + * @param [in] cg companded gain + * @param [in] q0 uncompanded quality parameter + * @param [in] beta activity masking beta param (exponent) + * @return g^beta + */ +od_val32 od_gain_expand(od_val32 cg0, int q0, od_val16 beta) { + if (beta == OD_BETA(1)) { + /*The multiply fits into 28 bits because the expanded gain has a range from + 0 to 2^20.*/ + return OD_SHR_ROUND(cg0*q0, OD_CGAIN_SHIFT); + } + else if (beta == OD_BETA(1.5)) { +#if defined(OD_FLOAT_PVQ) + double cg; + cg = cg0*OD_CGAIN_SCALE_1; + cg *= q0*OD_COMPAND_SCALE_1; + return OD_ROUND32(OD_COMPAND_SCALE*cg*sqrt(cg)); +#else + int32_t irt; + int64_t tmp; + int sqrt_inshift; + int sqrt_outshift; + /*cg0 is in Q(OD_CGAIN_SHIFT) and we need to divide it by + 2^OD_COMPAND_SHIFT.*/ + irt = od_sqrt(cg0*q0, &sqrt_outshift); + sqrt_inshift = (OD_CGAIN_SHIFT + OD_COMPAND_SHIFT) >> 1; + /*tmp is in Q(OD_CGAIN_SHIFT + OD_COMPAND_SHIFT).*/ + tmp = cg0*q0*(int64_t)irt; + /*Expanded gain must be in Q(OD_COMPAND_SHIFT), thus OD_COMPAND_SHIFT is + not included here.*/ + return OD_VSHR_ROUND(tmp, OD_CGAIN_SHIFT + sqrt_outshift + sqrt_inshift); +#endif + } + else { +#if defined(OD_FLOAT_PVQ) + /*Expanded gain must be in Q(OD_COMPAND_SHIFT), hence the multiply by + OD_COMPAND_SCALE.*/ + double cg; + cg = cg0*OD_CGAIN_SCALE_1; + return OD_ROUND32(OD_COMPAND_SCALE*pow(cg*q0*OD_COMPAND_SCALE_1, beta)); +#else + int32_t expr; + int32_t cg; + cg = OD_SHR_ROUND(cg0*q0, OD_CGAIN_SHIFT); + expr = od_pow(cg, beta); + /*Expanded gain must be in Q(OD_COMPAND_SHIFT), hence the subtraction by + OD_COMPAND_SHIFT.*/ + return OD_SHR_ROUND(expr, OD_EXP2_OUTSHIFT - OD_COMPAND_SHIFT); +#endif + } +} + +/** Computes the raw and quantized/companded gain of a given input + * vector + * + * @param [in] x vector of input data + * @param [in] n number of elements in vector x + * @param [in] q0 quantizer + * @param [out] g raw gain + * @param [in] beta activity masking beta param + * @param [in] bshift shift to be applied to raw gain + * @return quantized/companded gain + */ +od_val32 od_pvq_compute_gain(const od_val16 *x, int n, int q0, od_val32 *g, + od_val16 beta, int bshift) { + int i; + od_val32 acc; +#if !defined(OD_FLOAT_PVQ) + od_val32 irt; + int sqrt_shift; +#else + OD_UNUSED(bshift); +#endif + acc = 0; + for (i = 0; i < n; i++) { + acc += x[i]*(od_val32)x[i]; + } +#if defined(OD_FLOAT_PVQ) + *g = sqrt(acc); +#else + irt = od_sqrt(acc, &sqrt_shift); + *g = OD_VSHR_ROUND(irt, sqrt_shift - bshift); +#endif + /* Normalize gain by quantization step size and apply companding + (if ACTIVITY != 1). */ + return od_gain_compand(*g, q0, beta); +} + +static od_val16 od_beta_rcp(od_val16 beta){ + if (beta == OD_BETA(1.)) + return OD_BETA(1.); + else if (beta == OD_BETA(1.5)) + return OD_BETA(1./1.5); + else { + od_val16 rcp_beta; + /*Shift by 1 less, transposing beta to range [.5, .75] and thus < 32768.*/ + rcp_beta = od_rcp(beta << (OD_RCP_INSHIFT - 1 - OD_BETA_SHIFT)); + return OD_SHR_ROUND(rcp_beta, OD_RCP_OUTSHIFT + 1 - OD_BETA_SHIFT); + } +} + +/** Compute theta quantization range from quantized/companded gain + * + * @param [in] qcg quantized companded gain value + * @param [in] beta activity masking beta param + * @return max theta value + */ +int od_pvq_compute_max_theta(od_val32 qcg, od_val16 beta){ + /* Set angular resolution (in ra) to match the encoded gain */ +#if defined(OD_FLOAT_PVQ) + int ts = (int)floor(.5 + qcg*OD_CGAIN_SCALE_1*M_PI/(2*beta)); +#else + int ts = OD_SHR_ROUND(qcg*OD_MULT16_16_QBETA(OD_QCONST32(M_PI/2, + OD_CGAIN_SHIFT), od_beta_rcp(beta)), OD_CGAIN_SHIFT*2); +#endif + /* Special case for low gains -- will need to be tuned anyway */ + if (qcg < OD_QCONST32(1.4, OD_CGAIN_SHIFT)) ts = 1; + return ts; +} + +/** Decode quantized theta value from coded value + * + * @param [in] t quantized companded gain value + * @param [in] max_theta maximum theta value + * @return decoded theta value + */ +od_val32 od_pvq_compute_theta(int t, int max_theta) { + if (max_theta != 0) { +#if defined(OD_FLOAT_PVQ) + return OD_MINI(t, max_theta - 1)*.5*M_PI/max_theta; +#else + return (OD_MAX_THETA_SCALE*OD_MINI(t, max_theta - 1) + + (max_theta >> 1))/max_theta; +#endif + } + else return 0; +} + +#define OD_ITHETA_SHIFT 15 +/** Compute the number of pulses used for PVQ encoding a vector from + * available metrics (encode and decode side) + * + * @param [in] qcg quantized companded gain value + * @param [in] itheta quantized PVQ error angle theta + * @param [in] theta PVQ error angle theta + * @param [in] noref indicates present or lack of reference + * (prediction) + * @param [in] n number of elements to be coded + * @param [in] beta activity masking beta param + * @param [in] nodesync do not use info that depends on the reference + * @return number of pulses to use for coding + */ +int od_pvq_compute_k(od_val32 qcg, int itheta, od_val32 theta, int noref, int n, + od_val16 beta, int nodesync) { + if (noref) { + if (qcg == 0) return 0; + if (n == 15 && qcg == OD_CGAIN_SCALE && beta > OD_BETA(1.25)) { + return 1; + } + else { +#if defined(OD_FLOAT_PVQ) + return OD_MAXI(1, (int)floor(.5 + (qcg*OD_CGAIN_SCALE_1 - .2)* + sqrt((n + 3)/2)/beta)); +#else + od_val32 rt; + int sqrt_shift; + rt = od_sqrt((n + 3) >> 1, &sqrt_shift); + /*FIXME: get rid of 64-bit mul.*/ + return OD_MAXI(1, OD_SHR_ROUND((int64_t)((qcg + - (int64_t)OD_QCONST32(.2, OD_CGAIN_SHIFT))*rt/(beta*OD_BETA_SCALE_1)), + OD_CGAIN_SHIFT + sqrt_shift)); +#endif + } + } + else { + if (itheta == 0) return 0; + /* Sets K according to gain and theta, based on the high-rate + PVQ distortion curves (see PVQ document). Low-rate will have to be + perceptually tuned anyway. We subtract 0.2 from the radius as an + approximation for the fact that the coefficients aren't identically + distributed within a band so at low gain the number of dimensions that + are likely to have a pulse is less than n. */ + if (nodesync) { +#if defined(OD_FLOAT_PVQ) + return OD_MAXI(1, (int)floor(.5 + (itheta - .2)*sqrt((n + 2)/2))); +#else + od_val32 rt; + int sqrt_outshift; + rt = od_sqrt((n + 2)/2, &sqrt_outshift); + /*FIXME: get rid of 64-bit mul.*/ + return OD_MAXI(1, OD_VSHR_ROUND(((OD_SHL(itheta, OD_ITHETA_SHIFT) + - OD_QCONST32(.2, OD_ITHETA_SHIFT)))*(int64_t)rt, + sqrt_outshift + OD_ITHETA_SHIFT)); +#endif + } + else { + return OD_MAXI(1, (int)floor(.5 + (qcg*OD_CGAIN_SCALE_1* + od_pvq_sin(theta)*OD_TRIG_SCALE_1 - .2)*sqrt((n + + 2)/2)/(beta*OD_BETA_SCALE_1))); + } + } +} + +#if !defined(OD_FLOAT_PVQ) +#define OD_RSQRT_INSHIFT 16 +#define OD_RSQRT_OUTSHIFT 14 +/** Reciprocal sqrt approximation where the input is in the range [0.25,1) in + Q16 and the output is in the range (1.0, 2.0] in Q14). + Error is always within +/1 of round(1/sqrt(t))*/ +static int16_t od_rsqrt_norm(int16_t t) +{ + int16_t n; + int32_t r; + int32_t r2; + int32_t ry; + int32_t y; + int32_t ret; + /* Range of n is [-16384,32767] ([-0.5,1) in Q15).*/ + n = t - 32768; + OD_ASSERT(n >= -16384); + /*Get a rough initial guess for the root. + The optimal minimax quadratic approximation (using relative error) is + r = 1.437799046117536+n*(-0.823394375837328+n*0.4096419668459485). + Coefficients here, and the final result r, are Q14.*/ + r = (23565 + OD_MULT16_16_Q15(n, (-13481 + OD_MULT16_16_Q15(n, 6711)))); + /*We want y = t*r*r-1 in Q15, but t is 32-bit Q16 and r is Q14. + We can compute the result from n and r using Q15 multiplies with some + adjustment, carefully done to avoid overflow.*/ + r2 = r*r; + y = (((r2 >> 15)*n + r2) >> 12) - 131077; + ry = r*y; + /*Apply a 2nd-order Householder iteration: r += r*y*(y*0.375-0.5). + This yields the Q14 reciprocal square root of the Q16 t, with a maximum + relative error of 1.04956E-4, a (relative) RMSE of 2.80979E-5, and a peak + absolute error of 2.26591/16384.*/ + ret = r + ((((ry >> 16)*(3*y) >> 3) - ry) >> 18); + OD_ASSERT(ret >= 16384 && ret < 32768); + return (int16_t)ret; +} + +static int16_t od_rsqrt(int32_t x, int *rsqrt_shift) +{ + int k; + int s; + int16_t t; + k = (OD_ILOG(x) - 1) >> 1; + /*t is x in the range [0.25, 1) in QINSHIFT, or x*2^(-s). + Shift by log2(x) - log2(0.25*(1 << INSHIFT)) to ensure 0.25 lower bound.*/ + s = 2*k - (OD_RSQRT_INSHIFT - 2); + t = OD_VSHR(x, s); + /*We want to express od_rsqrt() in terms of od_rsqrt_norm(), which is + defined as (2^OUTSHIFT)/sqrt(t*(2^-INSHIFT)) with t=x*(2^-s). + This simplifies to 2^(OUTSHIFT+(INSHIFT/2)+(s/2))/sqrt(x), so the caller + needs to shift right by OUTSHIFT + INSHIFT/2 + s/2.*/ + *rsqrt_shift = OD_RSQRT_OUTSHIFT + ((s + OD_RSQRT_INSHIFT) >> 1); + return od_rsqrt_norm(t); +} +#endif + +/** Synthesizes one parition of coefficient values from a PVQ-encoded + * vector. This 'partial' version is called by the encode loop where + * the Householder reflection has already been computed and there's no + * need to recompute it. + * + * @param [out] xcoeff output coefficient partition (x in math doc) + * @param [in] ypulse PVQ-encoded values (y in the math doc); in + * the noref case, this vector has n entries, + * in the reference case it contains n-1 entries + * (the m-th entry is not included) + * @param [in] r reference vector (prediction) + * @param [in] n number of elements in this partition + * @param [in] noref indicates presence or lack of prediction + * @param [in] g decoded quantized vector gain + * @param [in] theta decoded theta (prediction error) + * @param [in] m alignment dimension of Householder reflection + * @param [in] s sign of Householder reflection + * @param [in] qm_inv inverse of the QM with magnitude compensation + */ +void od_pvq_synthesis_partial(od_coeff *xcoeff, const od_coeff *ypulse, + const od_val16 *r16, int n, int noref, od_val32 g, od_val32 theta, int m, int s, + const int16_t *qm_inv) { + int i; + int yy; + od_val32 scale; + int nn; + int gshift; + int qshift; + OD_ASSERT(g != 0); + nn = n-(!noref); /* when noref==0, vector in is sized n-1 */ + yy = 0; + for (i = 0; i < nn; i++) + yy += ypulse[i]*(int32_t)ypulse[i]; + /* Shift required for the magnitude of the pre-qm synthesis to be guaranteed + to fit in 16 bits. In practice, the range will be 8192-16384 after scaling + most of the time. */ + gshift = OD_MAXI(0, OD_ILOG(g) - 14); + /*scale is g/sqrt(yy) in Q(16-gshift) so that x[]*scale has a norm that fits + in 16 bits.*/ + if (yy == 0) scale = 0; +#if defined(OD_FLOAT_PVQ) + else { + scale = g/sqrt(yy); + } + OD_UNUSED(gshift); + OD_UNUSED(qshift); +#else + else { + int rsqrt_shift; + int16_t rsqrt; + /*FIXME: should be < int64_t*/ + int64_t tmp; + rsqrt = od_rsqrt(yy, &rsqrt_shift); + tmp = rsqrt*(int64_t)g; + scale = OD_VSHR_ROUND(tmp, rsqrt_shift + gshift - 16); + } + /* Shift to apply after multiplying by the inverse QM, taking into account + gshift. */ + qshift = OD_QM_INV_SHIFT - gshift; +#endif + if (noref) { + for (i = 0; i < n; i++) { + od_val32 x; + /* This multiply doesn't round, so it introduces some bias. + It would be nice (but not critical) to fix this. */ + x = OD_MULT16_32_Q16(ypulse[i], scale); +#if defined(OD_FLOAT_PVQ) + xcoeff[i] = (od_coeff)floor(.5 + + x*(qm_inv[i]*OD_QM_INV_SCALE_1)); +#else + xcoeff[i] = OD_SHR_ROUND(x*qm_inv[i], qshift); +#endif + } + } + else{ + od_val16 x[MAXN]; + scale = OD_ROUND32(scale*OD_TRIG_SCALE_1*od_pvq_sin(theta)); + /* The following multiply doesn't round, but it's probably OK since + the Householder reflection is likely to undo most of the resulting + bias. */ + for (i = 0; i < m; i++) + x[i] = OD_MULT16_32_Q16(ypulse[i], scale); + x[m] = OD_ROUND16(-s*(OD_SHR_ROUND(g, gshift))*OD_TRIG_SCALE_1* + od_pvq_cos(theta)); + for (i = m; i < nn; i++) + x[i+1] = OD_MULT16_32_Q16(ypulse[i], scale); + od_apply_householder(x, x, r16, n); + for (i = 0; i < n; i++) { +#if defined(OD_FLOAT_PVQ) + xcoeff[i] = (od_coeff)floor(.5 + (x[i]*(qm_inv[i]*OD_QM_INV_SCALE_1))); +#else + xcoeff[i] = OD_SHR_ROUND(x[i]*qm_inv[i], qshift); +#endif + } + } +} diff --git a/av1/common/pvq.h b/av1/common/pvq.h new file mode 100644 index 0000000000000000000000000000000000000000..a5051b40abf818439755e6e8c4efb114f83dacc7 --- /dev/null +++ b/av1/common/pvq.h @@ -0,0 +1,168 @@ +/* + * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +/* clang-format off */ + +#if !defined(_pvq_H) +# define _pvq_H (1) +# include "generic_code.h" +# include "odintrin.h" + +extern const double *OD_BASIS_MAG[2][OD_NBSIZES + 1]; +extern const int OD_QM8_Q4_FLAT[]; +extern const int OD_QM8_Q4_HVS[]; + +extern const uint16_t EXP_CDF_TABLE[][16]; +extern const uint16_t LAPLACE_OFFSET[]; + +# define PVQ_MAX_PARTITIONS (1 + 3*(OD_NBSIZES-1)) + +# define OD_NOREF_ADAPT_SPEED (4) +/* Normalized lambda for PVQ quantizer. Since we normalize the gain by q, the + distortion is normalized by q^2 and lambda does not need the q^2 factor. + At high rate, this would be log(2)/6, but we're using a slightly more + aggressive value, closer to: + Li, Xiang, et al. "Laplace distribution based Lagrangian rate distortion + optimization for hybrid video coding." Circuits and Systems for Video + Technology, IEEE Transactions on 19.2 (2009): 193-205. + */ +# define OD_PVQ_LAMBDA (.1146) + +#define OD_PVQ_SKIP_ZERO 1 +#define OD_PVQ_SKIP_COPY 2 + +/* Maximum size for coding a PVQ band. */ +#define OD_MAX_PVQ_SIZE (1024) + +#if defined(OD_FLOAT_PVQ) +#define OD_QM_SHIFT (15) +#else +#define OD_QM_SHIFT (11) +#endif +#define OD_QM_SCALE (1 << OD_QM_SHIFT) +#if defined(OD_FLOAT_PVQ) +#define OD_QM_SCALE_1 (1./OD_QM_SCALE) +#endif +#define OD_QM_SCALE_MAX 32767 +#define OD_QM_INV_SHIFT (12) +#define OD_QM_INV_SCALE (1 << OD_QM_INV_SHIFT) +#if defined(OD_FLOAT_PVQ) +#define OD_QM_INV_SCALE_1 (1./OD_QM_INV_SCALE) +#endif +#define OD_QM_OFFSET(bs) ((((1 << 2*bs) - 1) << 2*OD_LOG_BSIZE0)/3) +#define OD_QM_STRIDE (OD_QM_OFFSET(OD_NBSIZES)) +#define OD_QM_BUFFER_SIZE (2*OD_QM_STRIDE) + +#if !defined(OD_FLOAT_PVQ) +#define OD_THETA_SHIFT (15) +#define OD_THETA_SCALE ((1 << OD_THETA_SHIFT)*2./M_PI) +#define OD_MAX_THETA_SCALE (1 << OD_THETA_SHIFT) +#define OD_TRIG_SCALE (32768) +#define OD_BETA_SHIFT (12) +#define OD_BETA_SCALE_1 (1./(1 << OD_BETA_SHIFT)) +/*Multiplies 16-bit a by 32-bit b and keeps bits [16:64-OD_BETA_SHIFT-1].*/ +#define OD_MULT16_32_QBETA(a, b) \ + ((int16_t)(a)*(int64_t)(int32_t)(b) >> OD_BETA_SHIFT) +# define OD_MULT16_16_QBETA(a, b) \ + ((((int16_t)(a))*((int32_t)(int16_t)(b))) >> OD_BETA_SHIFT) +#define OD_CGAIN_SHIFT (8) +#define OD_CGAIN_SCALE (1 << OD_CGAIN_SHIFT) +#else +#define OD_BETA_SCALE_1 (1.) +#define OD_THETA_SCALE (1) +#define OD_TRIG_SCALE (1) +#define OD_CGAIN_SCALE (1) +#endif +#define OD_THETA_SCALE_1 (1./OD_THETA_SCALE) +#define OD_TRIG_SCALE_1 (1./OD_TRIG_SCALE) +#define OD_CGAIN_SCALE_1 (1./OD_CGAIN_SCALE) +#define OD_CGAIN_SCALE_2 (OD_CGAIN_SCALE_1*OD_CGAIN_SCALE_1) + +/* Largest PVQ partition is half the coefficients of largest block size. */ +#define MAXN (OD_BSIZE_MAX*OD_BSIZE_MAX/2) + +#define OD_COMPAND_SHIFT (8 + OD_COEFF_SHIFT) +#define OD_COMPAND_SCALE (1 << OD_COMPAND_SHIFT) +#define OD_COMPAND_SCALE_1 (1./OD_COMPAND_SCALE) + +#define OD_QM_SIZE (OD_NBSIZES*(OD_NBSIZES + 1)) + +#define OD_FLAT_QM 0 +#define OD_HVS_QM 1 + +# define OD_NSB_ADAPT_CTXS (4) + +# define OD_ADAPT_K_Q8 0 +# define OD_ADAPT_SUM_EX_Q8 1 +# define OD_ADAPT_COUNT_Q8 2 +# define OD_ADAPT_COUNT_EX_Q8 3 + +# define OD_ADAPT_NO_VALUE (-2147483647-1) + +typedef struct od_pvq_adapt_ctx od_pvq_adapt_ctx; +typedef struct od_pvq_codeword_ctx od_pvq_codeword_ctx; + +struct od_pvq_codeword_ctx { + int pvq_adapt[2*OD_NBSIZES*OD_NSB_ADAPT_CTXS]; + int pvq_k1_increment; + /* CDFs are size 16 despite the fact that we're using less than that. */ + uint16_t pvq_k1_cdf[12][16]; + uint16_t pvq_split_cdf[22*7][8]; + int pvq_split_increment; +}; + +struct od_pvq_adapt_ctx { + od_pvq_codeword_ctx pvq_codeword_ctx; + generic_encoder pvq_param_model[3]; + int pvq_ext[OD_NBSIZES*PVQ_MAX_PARTITIONS]; + int pvq_exg[OD_NPLANES_MAX][OD_NBSIZES][PVQ_MAX_PARTITIONS]; + int pvq_gaintheta_increment; + uint16_t pvq_gaintheta_cdf[2*OD_NBSIZES*PVQ_MAX_PARTITIONS][16]; + int pvq_skip_dir_increment; + uint16_t pvq_skip_dir_cdf[2*(OD_NBSIZES-1)][7]; +}; + +void od_adapt_pvq_ctx_reset(od_pvq_adapt_ctx *state, int is_keyframe); +int od_pvq_size_ctx(int n); +int od_pvq_k1_ctx(int n, int orig_size); + +od_val16 od_pvq_sin(od_val32 x); +od_val16 od_pvq_cos(od_val32 x); +#if !defined(OD_FLOAT_PVQ) +int od_vector_log_mag(const od_coeff *x, int n); +#endif + +int od_qm_get_index(int bs, int band); + +extern const od_val16 *const OD_PVQ_BETA[2][OD_NPLANES_MAX][OD_NBSIZES + 1]; + +void od_init_qm(int16_t *x, int16_t *x_inv, const int *qm); +int od_compute_householder(od_val16 *r, int n, od_val32 gr, int *sign, + int shift); +void od_apply_householder(od_val16 *out, const od_val16 *x, const od_val16 *r, + int n); +void od_pvq_synthesis_partial(od_coeff *xcoeff, const od_coeff *ypulse, + const od_val16 *r, int n, + int noref, od_val32 g, + od_val32 theta, int m, int s, + const int16_t *qm_inv); +od_val32 od_gain_expand(od_val32 cg, int q0, od_val16 beta); +od_val32 od_pvq_compute_gain(const od_val16 *x, int n, int q0, od_val32 *g, + od_val16 beta, int bshift); +int od_pvq_compute_max_theta(od_val32 qcg, od_val16 beta); +od_val32 od_pvq_compute_theta(int t, int max_theta); +int od_pvq_compute_k(od_val32 qcg, int itheta, od_val32 theta, int noref, + int n, od_val16 beta, int nodesync); + +int od_vector_is_null(const od_coeff *x, int len); +int od_qm_offset(int bs, int xydec); + +#endif diff --git a/av1/common/pvq_state.c b/av1/common/pvq_state.c new file mode 100644 index 0000000000000000000000000000000000000000..45d51843fc81f48f67629489e5cb225e365c4bcb --- /dev/null +++ b/av1/common/pvq_state.c @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include "av1/common/pvq_state.h" +#include "av1/common/odintrin.h" + +void od_adapt_ctx_reset(od_adapt_ctx *adapt, int is_keyframe) { + int i; + int pli; + od_adapt_pvq_ctx_reset(&adapt->pvq, is_keyframe); + adapt->skip_increment = 128; + OD_CDFS_INIT(adapt->skip_cdf, adapt->skip_increment >> 2); + for (pli = 0; pli < OD_NPLANES_MAX; pli++) { + generic_model_init(&adapt->model_dc[pli]); + for (i = 0; i < OD_NBSIZES; i++) { + adapt->ex_g[pli][i] = 8; + } + for (i = 0; i < 4; i++) { + int j; + for (j = 0; j < 3; j++) { + adapt->ex_dc[pli][i][j] = pli > 0 ? 8 : 32768; + } + } + } +} + +void od_init_skipped_coeffs(int16_t *d, int16_t *pred, int is_keyframe, int bo, + int n, int w) { + int i; + int j; + if (is_keyframe) { + for (i = 0; i < n; i++) { + for (j = 0; j < n; j++) { + /* skip DC */ + if (i || j) d[bo + i * w + j] = 0; + } + } + } else { + for (i = 0; i < n; i++) { + for (j = 0; j < n; j++) { + d[bo + i * w + j] = pred[i * n + j]; + } + } + } +} diff --git a/av1/common/pvq_state.h b/av1/common/pvq_state.h new file mode 100644 index 0000000000000000000000000000000000000000..6cf56fea7ac674870e61db7f870f06d80237fe68 --- /dev/null +++ b/av1/common/pvq_state.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +/* clang-format off */ + +#if !defined(_state_H) +# define _state_H (1) + +typedef struct od_state od_state; +typedef struct od_adapt_ctx od_adapt_ctx; + +# include "generic_code.h" +# include "odintrin.h" +# include "pvq.h" + +/*Adaptation speed of scalar Laplace encoding.*/ +# define OD_SCALAR_ADAPT_SPEED (4) + +struct od_adapt_ctx { + /* Support for PVQ encode/decode */ + od_pvq_adapt_ctx pvq; + + generic_encoder model_dc[OD_NPLANES_MAX]; + + int ex_dc[OD_NPLANES_MAX][OD_NBSIZES][3]; + int ex_g[OD_NPLANES_MAX][OD_NBSIZES]; + + /* Joint skip flag for DC and AC */ + uint16_t skip_cdf[OD_NBSIZES*2][4]; + int skip_increment; +}; + +struct od_state { + od_adapt_ctx adapt; + /* TODO(yushin): Enable this for activity masking, + when pvq_qm_q4 is available in AOM. */ + /* unsigned char pvq_qm_q4[OD_NPLANES_MAX][OD_QM_SIZE]; */ + + /* Quantization matrices and their inverses. */ + int16_t qm[OD_QM_BUFFER_SIZE]; + int16_t qm_inv[OD_QM_BUFFER_SIZE]; +}; + +void od_adapt_ctx_reset(od_adapt_ctx *state, int is_keyframe); +void od_init_skipped_coeffs(int16_t *d, int16_t *pred, int is_keyframe, + int bo, int n, int w); + +#endif diff --git a/av1/common/zigzag.h b/av1/common/zigzag.h new file mode 100644 index 0000000000000000000000000000000000000000..295ed233ed3426607ba7b31a2cfebde9dd30a4f5 --- /dev/null +++ b/av1/common/zigzag.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +/* clang-format off */ + +#if !defined(_zigzag_H) +# define _zigzag_H (1) + +extern const unsigned char OD_ZIGZAG4_DCT_DCT[15][2]; +extern const unsigned char OD_ZIGZAG4_ADST_DCT[15][2]; +extern const unsigned char OD_ZIGZAG4_DCT_ADST[15][2]; +extern const unsigned char OD_ZIGZAG4_ADST_ADST[15][2]; + +extern const unsigned char OD_ZIGZAG8_DCT_DCT[48][2]; +extern const unsigned char OD_ZIGZAG8_ADST_DCT[48][2]; +extern const unsigned char OD_ZIGZAG8_DCT_ADST[48][2]; +extern const unsigned char OD_ZIGZAG8_ADST_ADST[48][2]; + +extern const unsigned char OD_ZIGZAG16_DCT_DCT[192][2]; +extern const unsigned char OD_ZIGZAG16_ADST_DCT[192][2]; +extern const unsigned char OD_ZIGZAG16_DCT_ADST[192][2]; +extern const unsigned char OD_ZIGZAG16_ADST_ADST[192][2]; + +extern const unsigned char OD_ZIGZAG32_DCT_DCT[768][2]; +#endif diff --git a/av1/common/zigzag16.c b/av1/common/zigzag16.c new file mode 100644 index 0000000000000000000000000000000000000000..94c34870a6d47ee8b9b857840b201ee8b7a15a96 --- /dev/null +++ b/av1/common/zigzag16.c @@ -0,0 +1,208 @@ +/* This file is generated by gen_zigzag16.m */ + +/* clang-format off */ + +#include "odintrin.h" +OD_EXTERN const unsigned char OD_ZIGZAG16_DCT_DCT[192][2] = { + {8, 0}, {8, 1}, {8, 2}, {9, 0}, + {8, 3}, {9, 1}, {9, 2}, {10, 0}, + {9, 3}, {10, 1}, {10, 2}, {11, 0}, + {10, 3}, {11, 1}, {11, 2}, {11, 3}, + {12, 0}, {12, 1}, {13, 0}, {12, 2}, + {12, 3}, {13, 1}, {13, 2}, {14, 0}, + {13, 3}, {14, 1}, {15, 0}, {14, 2}, + {14, 3}, {15, 1}, {15, 2}, {15, 3}, + {0, 8}, {1, 8}, {0, 9}, {2, 8}, + {1, 9}, {3, 8}, {0, 10}, {2, 9}, + {1, 10}, {3, 9}, {0, 11}, {2, 10}, + {1, 11}, {3, 10}, {0, 12}, {2, 11}, + {1, 12}, {3, 11}, {0, 13}, {2, 12}, + {1, 13}, {0, 14}, {3, 12}, {2, 13}, + {1, 14}, {3, 13}, {0, 15}, {2, 14}, + {1, 15}, {3, 14}, {2, 15}, {3, 15}, + {4, 8}, {5, 8}, {4, 9}, {8, 4}, + {8, 5}, {6, 8}, {5, 9}, {4, 10}, + {9, 4}, {8, 6}, {7, 8}, {9, 5}, + {5, 10}, {8, 7}, {6, 9}, {4, 11}, + {10, 4}, {9, 6}, {7, 9}, {8, 8}, + {10, 5}, {6, 10}, {5, 11}, {9, 7}, + {8, 9}, {10, 6}, {7, 10}, {4, 12}, + {11, 4}, {9, 8}, {6, 11}, {10, 7}, + {11, 5}, {5, 12}, {8, 10}, {7, 11}, + {9, 9}, {4, 13}, {10, 8}, {11, 6}, + {11, 7}, {6, 12}, {8, 11}, {9, 10}, + {12, 4}, {5, 13}, {10, 9}, {12, 5}, + {7, 12}, {11, 8}, {4, 14}, {6, 13}, + {10, 10}, {9, 11}, {12, 6}, {13, 4}, + {11, 9}, {8, 12}, {5, 14}, {12, 7}, + {7, 13}, {4, 15}, {13, 5}, {10, 11}, + {11, 10}, {9, 12}, {13, 6}, {12, 8}, + {6, 14}, {8, 13}, {5, 15}, {13, 7}, + {14, 4}, {12, 9}, {7, 14}, {11, 11}, + {10, 12}, {9, 13}, {14, 5}, {6, 15}, + {13, 8}, {8, 14}, {12, 10}, {14, 6}, + {7, 15}, {13, 9}, {15, 4}, {10, 13}, + {11, 12}, {14, 7}, {9, 14}, {12, 11}, + {8, 15}, {15, 5}, {13, 10}, {14, 8}, + {11, 13}, {15, 6}, {9, 15}, {10, 14}, + {14, 9}, {15, 7}, {13, 11}, {12, 12}, + {10, 15}, {11, 14}, {15, 8}, {14, 10}, + {12, 13}, {13, 12}, {15, 9}, {11, 15}, + {14, 11}, {13, 13}, {15, 10}, {12, 14}, + {13, 14}, {15, 11}, {14, 12}, {12, 15}, + {14, 13}, {13, 15}, {15, 12}, {14, 14}, + {15, 13}, {14, 15}, {15, 14}, {15, 15} + }; + +OD_EXTERN const unsigned char OD_ZIGZAG16_ADST_DCT[192][2] = { + {8, 0}, {9, 0}, {10, 0}, {8, 1}, + {11, 0}, {9, 1}, {8, 2}, {12, 0}, + {10, 1}, {9, 2}, {8, 3}, {13, 0}, + {11, 1}, {10, 2}, {9, 3}, {14, 0}, + {12, 1}, {10, 3}, {15, 0}, {11, 2}, + {13, 1}, {11, 3}, {12, 2}, {14, 1}, + {12, 3}, {13, 2}, {15, 1}, {13, 3}, + {14, 2}, {14, 3}, {15, 2}, {15, 3}, + {0, 8}, {1, 8}, {2, 8}, {0, 9}, + {3, 8}, {1, 9}, {2, 9}, {0, 10}, + {3, 9}, {1, 10}, {2, 10}, {0, 11}, + {3, 10}, {1, 11}, {2, 11}, {0, 12}, + {3, 11}, {1, 12}, {2, 12}, {0, 13}, + {3, 12}, {1, 13}, {0, 14}, {2, 13}, + {0, 15}, {1, 14}, {3, 13}, {2, 14}, + {1, 15}, {3, 14}, {2, 15}, {3, 15}, + {8, 4}, {9, 4}, {8, 5}, {4, 8}, + {10, 4}, {9, 5}, {5, 8}, {8, 6}, + {4, 9}, {10, 5}, {9, 6}, {6, 8}, + {8, 7}, {11, 4}, {7, 8}, {5, 9}, + {9, 7}, {11, 5}, {10, 6}, {4, 10}, + {6, 9}, {8, 8}, {5, 10}, {7, 9}, + {12, 4}, {10, 7}, {9, 8}, {11, 6}, + {8, 9}, {4, 11}, {6, 10}, {7, 10}, + {12, 5}, {5, 11}, {10, 8}, {11, 7}, + {9, 9}, {4, 12}, {13, 4}, {8, 10}, + {6, 11}, {12, 6}, {5, 12}, {10, 9}, + {7, 11}, {9, 10}, {11, 8}, {13, 5}, + {8, 11}, {4, 13}, {6, 12}, {10, 10}, + {12, 7}, {11, 9}, {7, 12}, {14, 4}, + {5, 13}, {9, 11}, {13, 6}, {8, 12}, + {4, 14}, {12, 8}, {6, 13}, {11, 10}, + {10, 11}, {12, 9}, {5, 14}, {13, 7}, + {14, 5}, {9, 12}, {4, 15}, {7, 13}, + {8, 13}, {6, 14}, {13, 8}, {11, 11}, + {10, 12}, {15, 4}, {12, 10}, {14, 6}, + {13, 9}, {5, 15}, {9, 13}, {7, 14}, + {15, 5}, {6, 15}, {8, 14}, {14, 7}, + {11, 12}, {7, 15}, {9, 14}, {13, 10}, + {10, 13}, {14, 8}, {15, 6}, {14, 9}, + {12, 11}, {8, 15}, {15, 7}, {10, 14}, + {11, 13}, {9, 15}, {13, 11}, {12, 12}, + {15, 8}, {14, 10}, {15, 9}, {10, 15}, + {11, 14}, {13, 12}, {12, 13}, {15, 10}, + {14, 11}, {11, 15}, {13, 13}, {15, 11}, + {14, 12}, {12, 14}, {15, 12}, {13, 14}, + {12, 15}, {14, 13}, {13, 15}, {15, 13}, + {14, 14}, {15, 14}, {14, 15}, {15, 15} + }; + +OD_EXTERN const unsigned char OD_ZIGZAG16_DCT_ADST[192][2] = { + {8, 0}, {8, 1}, {8, 2}, {8, 3}, + {9, 0}, {9, 1}, {9, 2}, {9, 3}, + {10, 0}, {10, 1}, {10, 2}, {10, 3}, + {11, 0}, {11, 1}, {11, 2}, {11, 3}, + {12, 0}, {12, 1}, {12, 2}, {12, 3}, + {13, 0}, {13, 1}, {13, 2}, {13, 3}, + {14, 0}, {15, 0}, {14, 1}, {14, 2}, + {14, 3}, {15, 1}, {15, 2}, {15, 3}, + {0, 8}, {0, 9}, {0, 10}, {1, 8}, + {0, 11}, {1, 9}, {2, 8}, {0, 12}, + {1, 10}, {2, 9}, {0, 13}, {1, 11}, + {3, 8}, {2, 10}, {0, 14}, {1, 12}, + {3, 9}, {0, 15}, {2, 11}, {3, 10}, + {1, 13}, {2, 12}, {3, 11}, {1, 14}, + {2, 13}, {1, 15}, {3, 12}, {2, 14}, + {3, 13}, {2, 15}, {3, 14}, {3, 15}, + {4, 8}, {4, 9}, {5, 8}, {4, 10}, + {5, 9}, {4, 11}, {6, 8}, {5, 10}, + {8, 4}, {6, 9}, {4, 12}, {5, 11}, + {8, 5}, {6, 10}, {7, 8}, {8, 6}, + {4, 13}, {7, 9}, {5, 12}, {8, 7}, + {9, 4}, {6, 11}, {8, 8}, {7, 10}, + {5, 13}, {9, 5}, {4, 14}, {9, 6}, + {8, 9}, {6, 12}, {9, 7}, {7, 11}, + {4, 15}, {8, 10}, {9, 8}, {5, 14}, + {10, 4}, {6, 13}, {10, 5}, {9, 9}, + {7, 12}, {8, 11}, {10, 6}, {5, 15}, + {10, 7}, {6, 14}, {9, 10}, {7, 13}, + {8, 12}, {10, 8}, {9, 11}, {6, 15}, + {11, 4}, {11, 5}, {10, 9}, {8, 13}, + {7, 14}, {11, 6}, {9, 12}, {11, 7}, + {10, 10}, {7, 15}, {8, 14}, {12, 4}, + {11, 8}, {12, 5}, {9, 13}, {10, 11}, + {8, 15}, {11, 9}, {12, 6}, {12, 7}, + {10, 12}, {9, 14}, {11, 10}, {13, 4}, + {12, 8}, {9, 15}, {13, 5}, {11, 11}, + {12, 9}, {10, 13}, {13, 6}, {13, 7}, + {12, 10}, {14, 4}, {11, 12}, {13, 8}, + {10, 14}, {14, 5}, {12, 11}, {13, 9}, + {14, 6}, {10, 15}, {11, 13}, {15, 4}, + {14, 7}, {12, 12}, {13, 10}, {14, 8}, + {15, 5}, {13, 11}, {15, 6}, {11, 14}, + {14, 9}, {12, 13}, {11, 15}, {15, 7}, + {14, 10}, {15, 8}, {13, 12}, {12, 14}, + {15, 9}, {14, 11}, {13, 13}, {12, 15}, + {15, 10}, {14, 12}, {13, 14}, {15, 11}, + {13, 15}, {14, 13}, {14, 14}, {15, 12}, + {14, 15}, {15, 13}, {15, 14}, {15, 15} + }; + +OD_EXTERN const unsigned char OD_ZIGZAG16_ADST_ADST[192][2] = { + {8, 0}, {8, 1}, {8, 2}, {9, 0}, + {8, 3}, {9, 1}, {9, 2}, {10, 0}, + {9, 3}, {10, 1}, {10, 2}, {11, 0}, + {10, 3}, {11, 1}, {11, 2}, {11, 3}, + {12, 0}, {12, 1}, {13, 0}, {12, 2}, + {12, 3}, {13, 1}, {13, 2}, {14, 0}, + {13, 3}, {14, 1}, {15, 0}, {14, 2}, + {14, 3}, {15, 1}, {15, 2}, {15, 3}, + {0, 8}, {1, 8}, {0, 9}, {2, 8}, + {1, 9}, {3, 8}, {0, 10}, {2, 9}, + {1, 10}, {3, 9}, {0, 11}, {2, 10}, + {1, 11}, {3, 10}, {0, 12}, {2, 11}, + {1, 12}, {3, 11}, {0, 13}, {2, 12}, + {1, 13}, {0, 14}, {3, 12}, {2, 13}, + {1, 14}, {3, 13}, {0, 15}, {2, 14}, + {1, 15}, {3, 14}, {2, 15}, {3, 15}, + {4, 8}, {5, 8}, {4, 9}, {8, 4}, + {8, 5}, {6, 8}, {5, 9}, {4, 10}, + {9, 4}, {8, 6}, {7, 8}, {9, 5}, + {5, 10}, {8, 7}, {6, 9}, {4, 11}, + {10, 4}, {9, 6}, {7, 9}, {8, 8}, + {10, 5}, {6, 10}, {5, 11}, {9, 7}, + {8, 9}, {10, 6}, {7, 10}, {4, 12}, + {11, 4}, {9, 8}, {6, 11}, {10, 7}, + {11, 5}, {5, 12}, {8, 10}, {7, 11}, + {9, 9}, {4, 13}, {10, 8}, {11, 6}, + {11, 7}, {6, 12}, {8, 11}, {9, 10}, + {12, 4}, {5, 13}, {10, 9}, {12, 5}, + {7, 12}, {11, 8}, {4, 14}, {6, 13}, + {10, 10}, {9, 11}, {12, 6}, {13, 4}, + {11, 9}, {8, 12}, {5, 14}, {12, 7}, + {7, 13}, {4, 15}, {13, 5}, {10, 11}, + {11, 10}, {9, 12}, {13, 6}, {12, 8}, + {6, 14}, {8, 13}, {5, 15}, {13, 7}, + {14, 4}, {12, 9}, {7, 14}, {11, 11}, + {10, 12}, {9, 13}, {14, 5}, {6, 15}, + {13, 8}, {8, 14}, {12, 10}, {14, 6}, + {7, 15}, {13, 9}, {15, 4}, {10, 13}, + {11, 12}, {14, 7}, {9, 14}, {12, 11}, + {8, 15}, {15, 5}, {13, 10}, {14, 8}, + {11, 13}, {15, 6}, {9, 15}, {10, 14}, + {14, 9}, {15, 7}, {13, 11}, {12, 12}, + {10, 15}, {11, 14}, {15, 8}, {14, 10}, + {12, 13}, {13, 12}, {15, 9}, {11, 15}, + {14, 11}, {13, 13}, {15, 10}, {12, 14}, + {13, 14}, {15, 11}, {14, 12}, {12, 15}, + {14, 13}, {13, 15}, {15, 12}, {14, 14}, + {15, 13}, {14, 15}, {15, 14}, {15, 15} + }; diff --git a/av1/common/zigzag32.c b/av1/common/zigzag32.c new file mode 100644 index 0000000000000000000000000000000000000000..cb3b9bc632242f05e4da05c29941ed26da8256f9 --- /dev/null +++ b/av1/common/zigzag32.c @@ -0,0 +1,199 @@ +/* This file is generated by gen_zigzag32.m */ + +/* clang-format off */ + +#include "odintrin.h" +OD_EXTERN const unsigned char OD_ZIGZAG32_DCT_DCT[768][2] = { + { 16, 0 }, { 17, 0 }, { 18, 0 }, { 19, 0 }, + { 16, 1 }, { 17, 1 }, { 20, 0 }, { 16, 2 }, + { 18, 1 }, { 21, 0 }, { 17, 2 }, { 16, 3 }, + { 19, 1 }, { 22, 0 }, { 18, 2 }, { 17, 3 }, + { 20, 1 }, { 16, 4 }, { 23, 0 }, { 19, 2 }, + { 24, 0 }, { 16, 5 }, { 21, 1 }, { 17, 4 }, + { 18, 3 }, { 20, 2 }, { 17, 5 }, { 16, 6 }, + { 19, 3 }, { 18, 4 }, { 25, 0 }, { 22, 1 }, + { 16, 7 }, { 21, 2 }, { 17, 6 }, { 20, 3 }, + { 26, 0 }, { 18, 5 }, { 19, 4 }, { 17, 7 }, + { 23, 1 }, { 22, 2 }, { 18, 6 }, { 27, 0 }, + { 19, 5 }, { 24, 1 }, { 21, 3 }, { 28, 0 }, + { 20, 4 }, { 18, 7 }, { 19, 6 }, { 23, 2 }, + { 29, 0 }, { 25, 1 }, { 21, 4 }, { 30, 0 }, + { 20, 5 }, { 22, 3 }, { 31, 0 }, { 19, 7 }, + { 24, 2 }, { 26, 1 }, { 20, 6 }, { 21, 5 }, + { 22, 4 }, { 23, 3 }, { 27, 1 }, { 25, 2 }, + { 20, 7 }, { 28, 1 }, { 24, 3 }, { 21, 6 }, + { 22, 5 }, { 23, 4 }, { 26, 2 }, { 21, 7 }, + { 29, 1 }, { 25, 3 }, { 30, 1 }, { 27, 2 }, + { 22, 6 }, { 23, 5 }, { 31, 1 }, { 24, 4 }, + { 26, 3 }, { 28, 2 }, { 22, 7 }, { 23, 6 }, + { 25, 4 }, { 24, 5 }, { 29, 2 }, { 30, 2 }, + { 27, 3 }, { 23, 7 }, { 31, 2 }, { 24, 6 }, + { 26, 4 }, { 25, 5 }, { 28, 3 }, { 24, 7 }, + { 27, 4 }, { 29, 3 }, { 25, 6 }, { 26, 5 }, + { 30, 3 }, { 31, 3 }, { 28, 4 }, { 27, 5 }, + { 25, 7 }, { 29, 4 }, { 26, 6 }, { 28, 5 }, + { 30, 4 }, { 26, 7 }, { 27, 6 }, { 31, 4 }, + { 29, 5 }, { 27, 7 }, { 30, 5 }, { 28, 6 }, + { 31, 5 }, { 29, 6 }, { 28, 7 }, { 30, 6 }, + { 31, 6 }, { 29, 7 }, { 30, 7 }, { 31, 7 }, + { 0, 16 }, { 0, 17 }, { 1, 16 }, { 0, 18 }, + { 1, 17 }, { 0, 19 }, { 2, 16 }, { 1, 18 }, + { 0, 20 }, { 2, 17 }, { 3, 16 }, { 1, 19 }, + { 2, 18 }, { 0, 21 }, { 3, 17 }, { 4, 16 }, + { 1, 20 }, { 2, 19 }, { 0, 22 }, { 3, 18 }, + { 4, 17 }, { 5, 16 }, { 0, 23 }, { 3, 19 }, + { 2, 20 }, { 1, 21 }, { 4, 18 }, { 6, 16 }, + { 5, 17 }, { 3, 20 }, { 2, 21 }, { 1, 22 }, + { 0, 24 }, { 0, 25 }, { 4, 19 }, { 7, 16 }, + { 6, 17 }, { 5, 18 }, { 0, 26 }, { 3, 21 }, + { 2, 22 }, { 1, 23 }, { 4, 20 }, { 5, 19 }, + { 6, 18 }, { 1, 24 }, { 7, 17 }, { 0, 27 }, + { 2, 23 }, { 3, 22 }, { 4, 21 }, { 1, 25 }, + { 5, 20 }, { 7, 18 }, { 0, 28 }, { 6, 19 }, + { 2, 24 }, { 1, 26 }, { 0, 29 }, { 4, 22 }, + { 3, 23 }, { 2, 25 }, { 5, 21 }, { 0, 31 }, + { 7, 19 }, { 6, 20 }, { 0, 30 }, { 1, 27 }, + { 3, 24 }, { 2, 26 }, { 4, 23 }, { 5, 22 }, + { 7, 20 }, { 1, 28 }, { 6, 21 }, { 3, 25 }, + { 2, 27 }, { 1, 29 }, { 4, 24 }, { 2, 28 }, + { 1, 30 }, { 7, 21 }, { 5, 23 }, { 3, 26 }, + { 6, 22 }, { 1, 31 }, { 4, 25 }, { 7, 22 }, + { 3, 27 }, { 2, 29 }, { 2, 30 }, { 5, 24 }, + { 2, 31 }, { 6, 23 }, { 4, 26 }, { 3, 28 }, + { 5, 25 }, { 3, 29 }, { 6, 24 }, { 7, 23 }, + { 3, 30 }, { 4, 27 }, { 3, 31 }, { 5, 26 }, + { 6, 25 }, { 4, 28 }, { 7, 24 }, { 4, 29 }, + { 5, 27 }, { 4, 30 }, { 4, 31 }, { 6, 26 }, + { 5, 28 }, { 7, 25 }, { 6, 27 }, { 5, 29 }, + { 7, 26 }, { 5, 30 }, { 5, 31 }, { 6, 28 }, + { 7, 27 }, { 6, 29 }, { 6, 30 }, { 7, 28 }, + { 6, 31 }, { 7, 29 }, { 7, 30 }, { 7, 31 }, + { 8, 16 }, { 9, 16 }, { 8, 17 }, { 10, 16 }, + { 9, 17 }, { 16, 8 }, { 8, 18 }, { 16, 9 }, + { 10, 17 }, { 11, 16 }, { 17, 8 }, { 9, 18 }, + { 8, 19 }, { 16, 10 }, { 11, 17 }, { 12, 16 }, + { 10, 18 }, { 17, 9 }, { 9, 19 }, { 16, 11 }, + { 8, 20 }, { 18, 8 }, { 17, 10 }, { 10, 19 }, + { 12, 17 }, { 11, 18 }, { 9, 20 }, { 16, 12 }, + { 18, 9 }, { 8, 21 }, { 13, 16 }, { 17, 11 }, + { 19, 8 }, { 18, 10 }, { 13, 17 }, { 16, 13 }, + { 11, 19 }, { 12, 18 }, { 10, 20 }, { 17, 12 }, + { 9, 21 }, { 19, 9 }, { 8, 22 }, { 14, 16 }, + { 18, 11 }, { 11, 20 }, { 10, 21 }, { 20, 8 }, + { 13, 18 }, { 16, 14 }, { 12, 19 }, { 17, 13 }, + { 19, 10 }, { 14, 17 }, { 9, 22 }, { 18, 12 }, + { 8, 23 }, { 17, 14 }, { 20, 9 }, { 15, 16 }, + { 16, 15 }, { 13, 19 }, { 10, 22 }, { 19, 11 }, + { 11, 21 }, { 14, 18 }, { 12, 20 }, { 18, 13 }, + { 20, 10 }, { 21, 8 }, { 15, 17 }, { 9, 23 }, + { 19, 12 }, { 11, 22 }, { 8, 24 }, { 21, 9 }, + { 17, 15 }, { 16, 16 }, { 14, 19 }, { 18, 14 }, + { 12, 21 }, { 13, 20 }, { 20, 11 }, { 10, 23 }, + { 19, 13 }, { 15, 18 }, { 16, 17 }, { 21, 10 }, + { 22, 8 }, { 9, 24 }, { 8, 25 }, { 20, 12 }, + { 15, 19 }, { 11, 23 }, { 17, 16 }, { 18, 15 }, + { 14, 20 }, { 12, 22 }, { 10, 24 }, { 22, 9 }, + { 21, 11 }, { 19, 14 }, { 13, 21 }, { 16, 18 }, + { 9, 25 }, { 17, 17 }, { 8, 26 }, { 20, 13 }, + { 23, 8 }, { 12, 23 }, { 13, 22 }, { 22, 10 }, + { 19, 15 }, { 15, 20 }, { 16, 19 }, { 21, 12 }, + { 11, 24 }, { 14, 21 }, { 8, 27 }, { 18, 16 }, + { 10, 25 }, { 9, 26 }, { 22, 11 }, { 20, 14 }, + { 23, 9 }, { 18, 17 }, { 17, 18 }, { 17, 19 }, + { 19, 16 }, { 21, 13 }, { 10, 26 }, { 12, 24 }, + { 23, 10 }, { 24, 8 }, { 8, 28 }, { 16, 20 }, + { 9, 27 }, { 15, 21 }, { 22, 12 }, { 14, 22 }, + { 13, 23 }, { 20, 15 }, { 11, 25 }, { 24, 9 }, + { 18, 18 }, { 19, 17 }, { 23, 11 }, { 10, 27 }, + { 8, 29 }, { 12, 25 }, { 9, 28 }, { 8, 30 }, + { 21, 14 }, { 13, 24 }, { 11, 26 }, { 25, 8 }, + { 24, 10 }, { 20, 16 }, { 19, 18 }, { 14, 23 }, + { 22, 13 }, { 8, 31 }, { 17, 20 }, { 9, 29 }, + { 23, 12 }, { 15, 22 }, { 25, 9 }, { 11, 27 }, + { 10, 28 }, { 20, 17 }, { 21, 15 }, { 18, 19 }, + { 16, 21 }, { 24, 11 }, { 9, 30 }, { 12, 26 }, + { 10, 29 }, { 22, 14 }, { 14, 24 }, { 9, 31 }, + { 26, 8 }, { 13, 25 }, { 25, 10 }, { 18, 20 }, + { 19, 19 }, { 11, 28 }, { 15, 23 }, { 20, 18 }, + { 10, 30 }, { 12, 27 }, { 17, 21 }, { 23, 13 }, + { 24, 12 }, { 21, 16 }, { 16, 22 }, { 26, 9 }, + { 27, 8 }, { 13, 26 }, { 22, 15 }, { 10, 31 }, + { 14, 25 }, { 12, 28 }, { 25, 11 }, { 21, 17 }, + { 26, 10 }, { 20, 19 }, { 11, 29 }, { 15, 24 }, + { 23, 14 }, { 27, 9 }, { 11, 30 }, { 13, 27 }, + { 19, 20 }, { 24, 13 }, { 28, 8 }, { 11, 31 }, + { 22, 16 }, { 17, 22 }, { 16, 23 }, { 25, 12 }, + { 18, 21 }, { 12, 29 }, { 21, 18 }, { 28, 9 }, + { 27, 10 }, { 26, 11 }, { 29, 8 }, { 14, 26 }, + { 15, 25 }, { 13, 28 }, { 12, 30 }, { 23, 15 }, + { 30, 8 }, { 16, 24 }, { 13, 29 }, { 25, 13 }, + { 24, 14 }, { 20, 20 }, { 31, 8 }, { 12, 31 }, + { 14, 27 }, { 28, 10 }, { 26, 12 }, { 22, 17 }, + { 21, 19 }, { 17, 23 }, { 18, 22 }, { 29, 9 }, + { 27, 11 }, { 19, 21 }, { 27, 12 }, { 30, 9 }, + { 31, 9 }, { 13, 30 }, { 24, 15 }, { 23, 16 }, + { 15, 26 }, { 14, 28 }, { 29, 10 }, { 28, 11 }, + { 26, 13 }, { 17, 24 }, { 13, 31 }, { 25, 14 }, + { 22, 18 }, { 16, 25 }, { 30, 10 }, { 14, 29 }, + { 15, 27 }, { 19, 22 }, { 21, 20 }, { 20, 21 }, + { 27, 13 }, { 29, 11 }, { 18, 23 }, { 23, 17 }, + { 16, 26 }, { 31, 10 }, { 24, 16 }, { 14, 30 }, + { 22, 19 }, { 14, 31 }, { 28, 12 }, { 26, 14 }, + { 30, 11 }, { 15, 28 }, { 25, 15 }, { 17, 25 }, + { 23, 18 }, { 18, 24 }, { 15, 30 }, { 29, 12 }, + { 31, 11 }, { 16, 27 }, { 24, 17 }, { 28, 13 }, + { 19, 23 }, { 15, 29 }, { 25, 16 }, { 17, 26 }, + { 27, 14 }, { 22, 20 }, { 15, 31 }, { 20, 22 }, + { 21, 21 }, { 16, 28 }, { 17, 27 }, { 30, 12 }, + { 26, 15 }, { 19, 24 }, { 18, 25 }, { 23, 19 }, + { 29, 13 }, { 31, 12 }, { 24, 18 }, { 26, 16 }, + { 25, 17 }, { 16, 29 }, { 28, 14 }, { 20, 23 }, + { 18, 26 }, { 21, 22 }, { 19, 25 }, { 22, 21 }, + { 27, 15 }, { 17, 28 }, { 16, 30 }, { 26, 17 }, + { 23, 20 }, { 16, 31 }, { 25, 18 }, { 27, 16 }, + { 20, 24 }, { 24, 19 }, { 31, 13 }, { 30, 13 }, + { 29, 14 }, { 18, 27 }, { 28, 15 }, { 17, 29 }, + { 19, 26 }, { 17, 30 }, { 21, 23 }, { 22, 22 }, + { 30, 14 }, { 20, 25 }, { 23, 21 }, { 17, 31 }, + { 18, 28 }, { 25, 19 }, { 24, 20 }, { 28, 16 }, + { 31, 14 }, { 26, 18 }, { 19, 27 }, { 29, 15 }, + { 27, 17 }, { 30, 15 }, { 21, 24 }, { 22, 23 }, + { 26, 19 }, { 23, 22 }, { 28, 17 }, { 29, 16 }, + { 18, 30 }, { 24, 21 }, { 25, 20 }, { 18, 31 }, + { 18, 29 }, { 20, 26 }, { 19, 28 }, { 27, 18 }, + { 31, 15 }, { 20, 27 }, { 30, 16 }, { 19, 29 }, + { 29, 17 }, { 31, 16 }, { 27, 19 }, { 21, 25 }, + { 28, 18 }, { 26, 20 }, { 22, 24 }, { 25, 21 }, + { 19, 30 }, { 24, 22 }, { 30, 17 }, { 21, 26 }, + { 23, 23 }, { 19, 31 }, { 20, 28 }, { 31, 17 }, + { 28, 19 }, { 27, 20 }, { 21, 27 }, { 29, 18 }, + { 30, 18 }, { 25, 22 }, { 26, 21 }, { 20, 29 }, + { 22, 25 }, { 24, 23 }, { 29, 19 }, { 23, 24 }, + { 20, 31 }, { 20, 30 }, { 28, 20 }, { 21, 28 }, + { 22, 26 }, { 31, 18 }, { 27, 21 }, { 30, 19 }, + { 22, 27 }, { 29, 20 }, { 23, 25 }, { 24, 24 }, + { 26, 22 }, { 21, 29 }, { 25, 23 }, { 31, 19 }, + { 21, 30 }, { 23, 26 }, { 28, 21 }, { 21, 31 }, + { 22, 28 }, { 30, 20 }, { 25, 24 }, { 27, 22 }, + { 29, 21 }, { 26, 23 }, { 24, 25 }, { 31, 20 }, + { 23, 27 }, { 22, 29 }, { 30, 21 }, { 28, 22 }, + { 24, 26 }, { 25, 25 }, { 27, 23 }, { 22, 30 }, + { 23, 28 }, { 22, 31 }, { 26, 24 }, { 31, 21 }, + { 24, 27 }, { 29, 22 }, { 27, 24 }, { 30, 22 }, + { 25, 26 }, { 28, 23 }, { 23, 30 }, { 23, 29 }, + { 24, 28 }, { 25, 27 }, { 31, 22 }, { 23, 31 }, + { 26, 25 }, { 28, 24 }, { 29, 23 }, { 24, 29 }, + { 24, 30 }, { 27, 25 }, { 25, 28 }, { 26, 26 }, + { 30, 23 }, { 26, 27 }, { 31, 23 }, { 28, 25 }, + { 27, 26 }, { 25, 29 }, { 24, 31 }, { 29, 24 }, + { 30, 24 }, { 27, 27 }, { 29, 25 }, { 26, 28 }, + { 31, 24 }, { 25, 30 }, { 25, 31 }, { 28, 26 }, + { 27, 28 }, { 26, 29 }, { 30, 25 }, { 29, 26 }, + { 28, 27 }, { 26, 30 }, { 31, 25 }, { 27, 29 }, + { 26, 31 }, { 30, 26 }, { 28, 28 }, { 31, 26 }, + { 29, 27 }, { 27, 30 }, { 28, 29 }, { 27, 31 }, + { 30, 27 }, { 31, 27 }, { 28, 30 }, { 29, 28 }, + { 30, 28 }, { 29, 29 }, { 30, 29 }, { 31, 28 }, + { 28, 31 }, { 29, 30 }, { 29, 31 }, { 31, 29 }, + { 30, 30 }, { 30, 31 }, { 31, 30 }, { 31, 31 } +}; diff --git a/av1/common/zigzag4.c b/av1/common/zigzag4.c new file mode 100644 index 0000000000000000000000000000000000000000..7ccc16001f748a1e1ebcec8ca9b7b79301fc10ab --- /dev/null +++ b/av1/common/zigzag4.c @@ -0,0 +1,28 @@ +/* This file is generated by gen_zigzag4.m */ + +/* clang-format off */ + +#include "odintrin.h" +OD_EXTERN const unsigned char OD_ZIGZAG4_DCT_DCT[15][2] = { + {0, 1}, {1, 0}, {1, 1}, {0, 2}, + {2, 0}, {0, 3}, {1, 2}, {3, 0}, + {2, 1}, {1, 3}, {2, 2}, {3, 1}, + {2, 3}, {3, 2}, {3, 3} }; + +OD_EXTERN const unsigned char OD_ZIGZAG4_ADST_DCT[15][2] = { + {1, 0}, {0, 1}, {2, 0}, {1, 1}, + {3, 0}, {2, 1}, {0, 2}, {1, 2}, + {3, 1}, {0, 3}, {2, 2}, {1, 3}, + {3, 2}, {2, 3}, {3, 3} }; + +OD_EXTERN const unsigned char OD_ZIGZAG4_DCT_ADST[15][2] = { + {0, 1}, {0, 2}, {1, 0}, {0, 3}, + {1, 1}, {1, 2}, {2, 0}, {1, 3}, + {2, 1}, {2, 2}, {3, 0}, {3, 1}, + {2, 3}, {3, 2}, {3, 3} }; + +OD_EXTERN const unsigned char OD_ZIGZAG4_ADST_ADST[15][2] = { + {0, 1}, {1, 0}, {1, 1}, {0, 2}, + {2, 0}, {0, 3}, {1, 2}, {3, 0}, + {2, 1}, {1, 3}, {2, 2}, {3, 1}, + {2, 3}, {3, 2}, {3, 3} }; diff --git a/av1/common/zigzag8.c b/av1/common/zigzag8.c new file mode 100644 index 0000000000000000000000000000000000000000..ba39ac0684f26435e58594457aeb0ef6bdbd4243 --- /dev/null +++ b/av1/common/zigzag8.c @@ -0,0 +1,65 @@ +/* This file is generated by gen_zigzag8.m */ + +/* clang-format off */ + +#include "odintrin.h" + +OD_EXTERN const unsigned char OD_ZIGZAG8_DCT_DCT[48][2] = { + {4, 0}, {4, 1}, {5, 0}, {5, 1}, + {6, 0}, {7, 0}, {6, 1}, {7, 1}, + {0, 4}, {1, 4}, {0, 5}, {1, 5}, + {0, 6}, {1, 6}, {0, 7}, {1, 7}, + {2, 4}, {4, 2}, {3, 4}, {2, 5}, + {4, 3}, {5, 2}, {4, 4}, {3, 5}, + {5, 3}, {2, 6}, {4, 5}, {6, 2}, + {5, 4}, {3, 6}, {2, 7}, {6, 3}, + {5, 5}, {7, 2}, {4, 6}, {3, 7}, + {6, 4}, {7, 3}, {4, 7}, {5, 6}, + {6, 5}, {7, 4}, {5, 7}, {6, 6}, + {7, 5}, {6, 7}, {7, 6}, {7, 7} + }; + +OD_EXTERN const unsigned char OD_ZIGZAG8_ADST_DCT[48][2] = { + {4, 0}, {5, 0}, {4, 1}, {6, 0}, + {5, 1}, {7, 0}, {6, 1}, {7, 1}, + {0, 4}, {1, 4}, {0, 5}, {1, 5}, + {0, 6}, {1, 6}, {0, 7}, {1, 7}, + {4, 2}, {2, 4}, {5, 2}, {4, 3}, + {3, 4}, {2, 5}, {5, 3}, {4, 4}, + {6, 2}, {3, 5}, {5, 4}, {2, 6}, + {4, 5}, {6, 3}, {7, 2}, {3, 6}, + {2, 7}, {5, 5}, {6, 4}, {4, 6}, + {7, 3}, {3, 7}, {5, 6}, {6, 5}, + {4, 7}, {7, 4}, {5, 7}, {7, 5}, + {6, 6}, {7, 6}, {6, 7}, {7, 7} + }; + +OD_EXTERN const unsigned char OD_ZIGZAG8_DCT_ADST[48][2] = { + {4, 0}, {4, 1}, {5, 0}, {5, 1}, + {6, 0}, {6, 1}, {7, 0}, {7, 1}, + {0, 4}, {0, 5}, {1, 4}, {0, 6}, + {1, 5}, {0, 7}, {1, 6}, {1, 7}, + {2, 4}, {2, 5}, {3, 4}, {4, 2}, + {2, 6}, {4, 3}, {3, 5}, {4, 4}, + {2, 7}, {3, 6}, {5, 2}, {4, 5}, + {5, 3}, {3, 7}, {5, 4}, {4, 6}, + {6, 2}, {5, 5}, {4, 7}, {6, 3}, + {6, 4}, {5, 6}, {7, 2}, {6, 5}, + {7, 3}, {5, 7}, {7, 4}, {6, 6}, + {7, 5}, {6, 7}, {7, 6}, {7, 7} + }; + +OD_EXTERN const unsigned char OD_ZIGZAG8_ADST_ADST[48][2] = { + {4, 0}, {4, 1}, {5, 0}, {5, 1}, + {6, 0}, {7, 0}, {6, 1}, {7, 1}, + {0, 4}, {1, 4}, {0, 5}, {1, 5}, + {0, 6}, {1, 6}, {0, 7}, {1, 7}, + {2, 4}, {4, 2}, {3, 4}, {2, 5}, + {4, 3}, {5, 2}, {4, 4}, {3, 5}, + {5, 3}, {2, 6}, {4, 5}, {6, 2}, + {5, 4}, {3, 6}, {2, 7}, {6, 3}, + {5, 5}, {7, 2}, {4, 6}, {3, 7}, + {6, 4}, {7, 3}, {4, 7}, {5, 6}, + {6, 5}, {7, 4}, {5, 7}, {6, 6}, + {7, 5}, {6, 7}, {7, 6}, {7, 7} + }; diff --git a/av1/decoder/decint.h b/av1/decoder/decint.h new file mode 100644 index 0000000000000000000000000000000000000000..99dbc434ace1584cfca44a06abdafca21e288c58 --- /dev/null +++ b/av1/decoder/decint.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +/* clang-format off */ + +#if !defined(_decint_H) +# define _decint_H (1) +# include "av1/common/pvq_state.h" +# include "aom_dsp/entdec.h" + +typedef struct daala_dec_ctx daala_dec_ctx; + +typedef struct daala_dec_ctx od_dec_ctx; + + +struct daala_dec_ctx { + /* Stores context-adaptive CDFs for PVQ. */ + od_state state; + /* Daala entropy decoder. */ + od_ec_dec *ec; + /* Mode of quantization matrice : FLAT (0) or HVS (1) */ + int qm; +}; + +#endif diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c index d1d0ae7a8446eba22702b99f706003f7a15c9130..1af7a1a74c387afccaa29ce7497ba2fd894236fa 100644 --- a/av1/decoder/decodeframe.c +++ b/av1/decoder/decodeframe.c @@ -60,6 +60,16 @@ #define MAX_AV1_HEADER_SIZE 80 #define ACCT_STR __func__ +#if CONFIG_PVQ +#include "av1/decoder/pvq_decoder.h" +#include "av1/encoder/encodemb.h" + +#include "aom_dsp/entdec.h" +#include "av1/common/partition.h" +#include "av1/decoder/decint.h" +#include "av1/encoder/hybrid_fwd_txfm.h" +#endif + static struct aom_read_bit_buffer *init_read_bit_buffer( AV1Decoder *pbi, struct aom_read_bit_buffer *rb, const uint8_t *data, const uint8_t *data_end, uint8_t clear_data[MAX_AV1_HEADER_SIZE]); @@ -327,6 +337,141 @@ static void inverse_transform_block(MACROBLOCKD *xd, int plane, } } +#if CONFIG_PVQ +static int av1_pvq_decode_helper(od_dec_ctx *dec, int16_t *ref_coeff, + int16_t *dqcoeff, int16_t *quant, int pli, + int bs, TX_TYPE tx_type, int xdec, + int ac_dc_coded) { + unsigned int flags; // used for daala's stream analyzer. + int off; + const int is_keyframe = 0; + const int has_dc_skip = 1; + int quant_shift = bs == TX_32X32 ? 1 : 0; + // DC quantizer for PVQ + int pvq_dc_quant; + int lossless = (quant[0] == 0); + const int blk_size = tx_size_1d[bs]; + int eob = 0; + int i; + // TODO(yushin) : To enable activity masking, + // int use_activity_masking = dec->use_activity_masking; + int use_activity_masking = 0; + + DECLARE_ALIGNED(16, int16_t, dqcoeff_pvq[OD_BSIZE_MAX * OD_BSIZE_MAX]); + DECLARE_ALIGNED(16, int16_t, ref_coeff_pvq[OD_BSIZE_MAX * OD_BSIZE_MAX]); + + od_coeff ref_int32[OD_BSIZE_MAX * OD_BSIZE_MAX]; + od_coeff out_int32[OD_BSIZE_MAX * OD_BSIZE_MAX]; + + od_raster_to_coding_order(ref_coeff_pvq, blk_size, tx_type, ref_coeff, + blk_size); + + if (lossless) + pvq_dc_quant = 1; + else { + // TODO(yushin): Enable this for activity masking, + // when pvq_qm_q4 is available in AOM. + // pvq_dc_quant = OD_MAXI(1, quant* + // dec->state.pvq_qm_q4[pli][od_qm_get_index(bs, 0)] >> 4); + pvq_dc_quant = OD_MAXI(1, quant[0] >> quant_shift); + } + + off = od_qm_offset(bs, xdec); + + // copy int16 inputs to int32 + for (i = 0; i < blk_size * blk_size; i++) ref_int32[i] = ref_coeff_pvq[i]; + + od_pvq_decode(dec, ref_int32, out_int32, (int)quant[1] >> quant_shift, pli, + bs, OD_PVQ_BETA[use_activity_masking][pli][bs], + OD_ROBUST_STREAM, + is_keyframe, &flags, ac_dc_coded, dec->state.qm + off, + dec->state.qm_inv + off); + + // copy int32 result back to int16 + for (i = 0; i < blk_size * blk_size; i++) dqcoeff_pvq[i] = out_int32[i]; + + if (!has_dc_skip || dqcoeff_pvq[0]) { + dqcoeff_pvq[0] = + has_dc_skip + generic_decode(dec->ec, &dec->state.adapt.model_dc[pli], + -1, &dec->state.adapt.ex_dc[pli][bs][0], 2, + "dc:mag"); + if (dqcoeff_pvq[0]) + dqcoeff_pvq[0] *= od_ec_dec_bits(dec->ec, 1, "dc:sign") ? -1 : 1; + } + dqcoeff_pvq[0] = dqcoeff_pvq[0] * pvq_dc_quant + ref_coeff_pvq[0]; + + od_coding_order_to_raster(dqcoeff, blk_size, tx_type, dqcoeff_pvq, blk_size); + + eob = blk_size * blk_size; + + return eob; +} + +static int av1_pvq_decode_helper2( + MACROBLOCKD *const xd, MB_MODE_INFO *const mbmi, int plane, int row, + int col, TX_SIZE tx_size, TX_TYPE tx_type ) { + struct macroblockd_plane *const pd = &xd->plane[plane]; + // transform block size in pixels + int tx_blk_size = tx_size_1d[tx_size]; + int i, j; + tran_low_t *pvq_ref_coeff = pd->pvq_ref_coeff; + const int diff_stride = tx_blk_size; + int16_t *pred = pd->pred; + tran_low_t *const dqcoeff = pd->dqcoeff; + int ac_dc_coded; // bit0: DC coded, bit1 : AC coded + uint8_t *dst; + int eob; + + eob = 0; + dst = &pd->dst.buf[4 * row * pd->dst.stride + 4 * col]; + + // decode ac/dc coded flag. bit0: DC coded, bit1 : AC coded + // NOTE : we don't use 5 symbols for luma here in aom codebase, + // since block partition is taken care of by aom. + // So, only AC/DC skip info is coded + ac_dc_coded = od_decode_cdf_adapt( + xd->daala_dec.ec, + xd->daala_dec.state.adapt.skip_cdf[2 * tx_size + (plane != 0)], 4, + xd->daala_dec.state.adapt.skip_increment, "skip"); + + if (ac_dc_coded) { + int xdec = pd->subsampling_x; + int seg_id = mbmi->segment_id; + int16_t *quant; + FWD_TXFM_PARAM fwd_txfm_param; + + for (j = 0; j < tx_blk_size; j++) + for (i = 0; i < tx_blk_size; i++) { + pred[diff_stride * j + i] = dst[pd->dst.stride * j + i]; + } + + fwd_txfm_param.tx_type = tx_type; + fwd_txfm_param.tx_size = tx_size; + fwd_txfm_param.fwd_txfm_opt = FWD_TXFM_OPT_NORMAL; + fwd_txfm_param.rd_transform = 0; + fwd_txfm_param.lossless = xd->lossless[seg_id]; + + fwd_txfm(pred, pvq_ref_coeff, diff_stride, &fwd_txfm_param); + + quant = &pd->seg_dequant[seg_id][0]; // aom's quantizer + + eob = av1_pvq_decode_helper(&xd->daala_dec, pvq_ref_coeff, dqcoeff, quant, + plane, tx_size, tx_type, xdec, ac_dc_coded); + + // Since av1 does not have separate inverse transform + // but also contains adding to predicted image, + // pass blank dummy image to av1_inv_txfm_add_*x*(), i.e. set dst as zeros + for (j = 0; j < tx_blk_size; j++) + for (i = 0; i < tx_blk_size; i++) dst[j * pd->dst.stride + i] = 0; + + inverse_transform_block(xd, plane, tx_type, tx_size, dst, + pd->dst.stride, eob); + } + + return eob; +} +#endif + static void predict_and_reconstruct_intra_block( AV1_COMMON *cm, MACROBLOCKD *const xd, aom_reader *r, MB_MODE_INFO *const mbmi, int plane, int row, int col, TX_SIZE tx_size) { @@ -335,6 +480,10 @@ static void predict_and_reconstruct_intra_block( PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV; uint8_t *dst; int block_idx = (row << 1) + col; +#if CONFIG_PVQ + (void)cm; + (void)r; +#endif dst = &pd->dst.buf[4 * row * pd->dst.stride + 4 * col]; if (mbmi->sb_type < BLOCK_8X8) @@ -345,6 +494,7 @@ static void predict_and_reconstruct_intra_block( if (!mbmi->skip) { TX_TYPE tx_type = get_tx_type(plane_type, xd, block_idx); +#if !CONFIG_PVQ const SCAN_ORDER *scan_order = get_scan(cm, tx_size, tx_type); const int eob = av1_decode_block_tokens(xd, plane, scan_order, col, row, tx_size, r, mbmi->segment_id); @@ -353,6 +503,9 @@ static void predict_and_reconstruct_intra_block( #endif inverse_transform_block(xd, plane, tx_type, tx_size, dst, pd->dst.stride, eob); +#else + av1_pvq_decode_helper2(xd, mbmi, plane, row, col, tx_size, tx_type); +#endif } } @@ -364,6 +517,13 @@ static int reconstruct_inter_block(AV1_COMMON *cm, MACROBLOCKD *const xd, PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV; int block_idx = (row << 1) + col; TX_TYPE tx_type = get_tx_type(plane_type, xd, block_idx); +#if CONFIG_PVQ + int eob; + (void)cm; + (void)r; +#endif + +#if !CONFIG_PVQ const SCAN_ORDER *scan_order = get_scan(cm, tx_size, tx_type); const int eob = av1_decode_block_tokens(xd, plane, scan_order, col, row, tx_size, r, mbmi->segment_id); @@ -373,6 +533,9 @@ static int reconstruct_inter_block(AV1_COMMON *cm, MACROBLOCKD *const xd, inverse_transform_block(xd, plane, tx_type, tx_size, &pd->dst.buf[4 * row * pd->dst.stride + 4 * col], pd->dst.stride, eob); +#else + eob = av1_pvq_decode_helper2(xd, mbmi, plane, row, col, tx_size, tx_type); +#endif return eob; } @@ -623,6 +786,11 @@ static void decode_partition(AV1Decoder *const pbi, MACROBLOCKD *const xd, partition = read_partition(cm, xd, mi_row, mi_col, r, has_rows, has_cols, n8x8_l2); subsize = subsize_lookup[partition][bsize]; // get_subsize(bsize, partition); + +#if CONFIG_PVQ + assert(partition < PARTITION_TYPES); + assert(subsize < BLOCK_SIZES); +#endif if (!hbs) { // calculate bmode block dimensions (log 2) xd->bmode_blocks_wl = 1 >> !!(partition & PARTITION_VERT); @@ -728,6 +896,7 @@ static void setup_token_decoder(const uint8_t *data, const uint8_t *data_end, "Failed to allocate bool decoder %d", 1); } +#if !CONFIG_PVQ static void read_coef_probs_common(av1_coeff_probs_model *coef_probs, aom_reader *r) { int i, j, k, l, m; @@ -752,6 +921,7 @@ static void read_coef_probs(FRAME_CONTEXT *fc, TX_MODE tx_mode, aom_reader *r) { for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size) read_coef_probs_common(fc->coef_probs[tx_size], r); } +#endif static void setup_segmentation(AV1_COMMON *const cm, struct aom_read_bit_buffer *rb) { @@ -1262,6 +1432,18 @@ static void get_tile_buffers(AV1Decoder *pbi, const uint8_t *data, #endif } +#if CONFIG_PVQ +static void daala_dec_init(daala_dec_ctx *daala_dec, od_ec_dec *ec) { + daala_dec->ec = ec; + od_adapt_ctx_reset(&daala_dec->state.adapt, 0); + + daala_dec->qm = OD_FLAT_QM; + + od_init_qm(daala_dec->state.qm, daala_dec->state.qm_inv, + daala_dec->qm == OD_HVS_QM ? OD_QM8_Q4_HVS : OD_QM8_Q4_FLAT); +} +#endif + static const uint8_t *decode_tiles(AV1Decoder *pbi, const uint8_t *data, const uint8_t *data_end) { AV1_COMMON *const cm = &pbi->common; @@ -1331,6 +1513,9 @@ static const uint8_t *decode_tiles(AV1Decoder *pbi, const uint8_t *data, ? &cm->counts : NULL; av1_zero(tile_data->dqcoeff); +#if CONFIG_PVQ + av1_zero(tile_data->pvq_ref_coeff); +#endif av1_tile_init(&tile_data->xd.tile, tile_data->cm, tile_row, tile_col); setup_token_decoder(buf->data, data_end, buf->size, &cm->error, &tile_data->bit_reader, pbi->decrypt_cb, @@ -1342,7 +1527,14 @@ static const uint8_t *decode_tiles(AV1Decoder *pbi, const uint8_t *data, tile_data->bit_reader.accounting = NULL; } #endif - av1_init_macroblockd(cm, &tile_data->xd, tile_data->dqcoeff); + av1_init_macroblockd(cm, &tile_data->xd, +#if CONFIG_PVQ + tile_data->pvq_ref_coeff, +#endif + tile_data->dqcoeff); +#if CONFIG_PVQ + daala_dec_init(&tile_data->xd.daala_dec, &tile_data->bit_reader.ec); +#endif #if CONFIG_PALETTE tile_data->xd.plane[0].color_index_map = tile_data->color_index_map[0]; tile_data->xd.plane[1].color_index_map = tile_data->color_index_map[1]; @@ -1598,7 +1790,14 @@ static const uint8_t *decode_tiles_mt(AV1Decoder *pbi, const uint8_t *data, setup_token_decoder(buf->data, data_end, buf->size, &cm->error, &tile_data->bit_reader, pbi->decrypt_cb, pbi->decrypt_state); - av1_init_macroblockd(cm, &tile_data->xd, tile_data->dqcoeff); + av1_init_macroblockd(cm, &tile_data->xd, +#if CONFIG_PVQ + tile_data->pvq_ref_coeff, +#endif + tile_data->dqcoeff); +#if CONFIG_PVQ + daala_dec_init(&tile_data->xd.daala_dec, &tile_data->bit_reader.ec); +#endif #if CONFIG_PALETTE tile_data->xd.plane[0].color_index_map = tile_data->color_index_map[0]; tile_data->xd.plane[1].color_index_map = tile_data->color_index_map[1]; @@ -2044,7 +2243,9 @@ static int read_compressed_header(AV1Decoder *pbi, const uint8_t *data, if (cm->tx_mode == TX_MODE_SELECT) read_tx_mode_probs(&fc->tx_probs, &r); +#if !CONFIG_PVQ read_coef_probs(fc, cm->tx_mode, &r); +#endif for (k = 0; k < SKIP_CONTEXTS; ++k) av1_diff_update_prob(&r, &fc->skip_probs[k], ACCT_STR); diff --git a/av1/decoder/decoder.c b/av1/decoder/decoder.c index fa2d0618dbb37735d4224ec05be103df19025c25..1b12476899194f97c6fc124c2d0e10b4cc898c7b 100644 --- a/av1/decoder/decoder.c +++ b/av1/decoder/decoder.c @@ -32,7 +32,10 @@ #include "av1/decoder/decodeframe.h" #include "av1/decoder/decoder.h" + +#if !CONFIG_PVQ #include "av1/decoder/detokenize.h" +#endif static void initialize_dec(void) { static volatile int init_done = 0; diff --git a/av1/decoder/decoder.h b/av1/decoder/decoder.h index d19909e21ec29ba8a0620661c15e5904b07840a4..8d28a10ecd8cde11d7e39aee3e5ab9b86f2ad55d 100644 --- a/av1/decoder/decoder.h +++ b/av1/decoder/decoder.h @@ -26,6 +26,12 @@ #include "av1/common/accounting.h" #endif +#if CONFIG_PVQ +#include "aom_dsp/entdec.h" +#include "av1/decoder/decint.h" +#include "av1/encoder/encodemb.h" +#endif + #ifdef __cplusplus extern "C" { #endif @@ -37,6 +43,10 @@ typedef struct TileData { DECLARE_ALIGNED(16, MACROBLOCKD, xd); /* dqcoeff are shared by all the planes. So planes must be decoded serially */ DECLARE_ALIGNED(16, tran_low_t, dqcoeff[32 * 32]); +#if CONFIG_PVQ + /* forward transformed predicted image, a reference for PVQ */ + DECLARE_ALIGNED(16, tran_low_t, pvq_ref_coeff[OD_BSIZE_MAX * OD_BSIZE_MAX]); +#endif #if CONFIG_PALETTE DECLARE_ALIGNED(16, uint8_t, color_index_map[2][64 * 64]); #endif // CONFIG_PALETTE @@ -49,6 +59,10 @@ typedef struct TileWorkerData { DECLARE_ALIGNED(16, MACROBLOCKD, xd); /* dqcoeff are shared by all the planes. So planes must be decoded serially */ DECLARE_ALIGNED(16, tran_low_t, dqcoeff[32 * 32]); +#if CONFIG_PVQ + /* forward transformed predicted image, a reference for PVQ */ + DECLARE_ALIGNED(16, tran_low_t, pvq_ref_coeff[OD_BSIZE_MAX * OD_BSIZE_MAX]); +#endif #if CONFIG_PALETTE DECLARE_ALIGNED(16, uint8_t, color_index_map[2][64 * 64]); #endif // CONFIG_PALETTE diff --git a/av1/decoder/detokenize.c b/av1/decoder/detokenize.c index 1de004851a99214c15149192bb0cec1b481610f8..68d87cbdc741c02304db3083e418f18cbe210a79 100644 --- a/av1/decoder/detokenize.c +++ b/av1/decoder/detokenize.c @@ -9,9 +9,12 @@ * PATENTS file, you can obtain it at www.aomedia.org/license/patent. */ +#if !CONFIG_PVQ #include "aom_mem/aom_mem.h" #include "aom_ports/mem.h" +#endif +#if !CONFIG_PVQ #include "av1/common/blockd.h" #include "av1/common/common.h" #include "av1/common/entropy.h" @@ -319,3 +322,4 @@ int av1_decode_block_tokens(MACROBLOCKD *xd, int plane, const SCAN_ORDER *sc, av1_set_contexts(xd, pd, tx_size, eob > 0, x, y); return eob; } +#endif diff --git a/av1/decoder/detokenize.h b/av1/decoder/detokenize.h index c2868d6fb8624a75498ac394ac3fff73ee4affdf..569580cf2a24540e26dfb620199868dd773b7d13 100644 --- a/av1/decoder/detokenize.h +++ b/av1/decoder/detokenize.h @@ -9,6 +9,7 @@ * PATENTS file, you can obtain it at www.aomedia.org/license/patent. */ +#if !CONFIG_PVQ #ifndef AV1_DECODER_DETOKENIZE_H_ #define AV1_DECODER_DETOKENIZE_H_ @@ -31,5 +32,5 @@ int av1_decode_block_tokens(MACROBLOCKD *xd, int plane, const SCAN_ORDER *sc, #ifdef __cplusplus } // extern "C" #endif - #endif // AV1_DECODER_DETOKENIZE_H_ +#endif diff --git a/av1/decoder/generic_decoder.c b/av1/decoder/generic_decoder.c new file mode 100644 index 0000000000000000000000000000000000000000..86187fa9d3baeae5f41bf66f72823093e4793a72 --- /dev/null +++ b/av1/decoder/generic_decoder.c @@ -0,0 +1,137 @@ +/* + * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +/* clang-format off */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include + +#include "aom_dsp/entdec.h" +#include "av1/common/generic_code.h" +#include "av1/common/odintrin.h" +#include "pvq_decoder.h" + +/** Decodes a value from 0 to N-1 (with N up to 16) based on a cdf and adapts + * the cdf accordingly. + * + * @param [in,out] enc range encoder + * @param [in,out] cdf CDF of the variable (Q15) + * @param [in] n number of values possible + * @param [in,out] count number of symbols encoded with that cdf so far + * @param [in] rate adaptation rate shift (smaller is faster) + * @return decoded variable + */ +int od_decode_cdf_adapt_q15_(od_ec_dec *ec, uint16_t *cdf, int n, + int *count, int rate OD_ACC_STR) { + int val; + int i; + if (*count == 0) { + int ft; + ft = cdf[n - 1]; + for (i = 0; i < n; i++) { + cdf[i] = cdf[i]*32768/ft; + } + } + val = od_ec_decode_cdf_q15(ec, cdf, n); + od_cdf_adapt_q15(val, cdf, n, count, rate); + return val; +} + +/** Decodes a value from 0 to N-1 (with N up to 16) based on a cdf and adapts + * the cdf accordingly. + * + * @param [in,out] enc range encoder + * @param [in] cdf CDF of the variable (Q15) + * @param [in] n number of values possible + * @param [in] increment adaptation speed (Q15) + * + * @retval decoded variable + */ +int od_decode_cdf_adapt_(od_ec_dec *ec, uint16_t *cdf, int n, + int increment OD_ACC_STR) { + int i; + int val; + val = od_ec_decode_cdf_unscaled(ec, cdf, n); + if (cdf[n-1] + increment > 32767) { + for (i = 0; i < n; i++) { + /* Second term ensures that the pdf is non-null */ + cdf[i] = (cdf[i] >> 1) + i + 1; + } + } + for (i = val; i < n; i++) cdf[i] += increment; + return val; +} + +/** Encodes a random variable using a "generic" model, assuming that the + * distribution is one-sided (zero and up), has a single mode, and decays + * exponentially past the model. + * + * @param [in,out] dec range decoder + * @param [in,out] model generic probability model + * @param [in] x variable being encoded + * @param [in,out] ExQ16 expectation of x (adapted) + * @param [in] integration integration period of ExQ16 (leaky average over + * 1<> 1); + /* Choose the cdf to use: we have two per "octave" of ExQ16. */ + id = OD_MINI(GENERIC_TABLES - 1, lg_q1); + cdf = model->cdf[id]; + ms = (max + (1 << shift >> 1)) >> shift; + if (max == -1) xs = od_ec_decode_cdf_unscaled(dec, cdf, 16); + else xs = od_ec_decode_cdf_unscaled(dec, cdf, OD_MINI(ms + 1, 16)); + if (xs == 15) { + int e; + unsigned decay; + /* Estimate decay based on the assumption that the distribution is close + to Laplacian for large values. We should probably have an adaptive + estimate instead. Note: The 2* is a kludge that's not fully understood + yet. */ + OD_ASSERT(*ex_q16 < INT_MAX >> 1); + e = ((2**ex_q16 >> 8) + (1 << shift >> 1)) >> shift; + decay = OD_MAXI(2, OD_MINI(254, 256*e/(e + 256))); + xs += laplace_decode_special(dec, decay, (max == -1) ? -1 : ms - 15, acc_str); + } + if (shift != 0) { + int special; + /* Because of the rounding, there's only half the number of possibilities + for xs=0 */ + special = xs == 0; + if (shift - special > 0) lsb = od_ec_dec_bits(dec, shift - special, acc_str); + lsb -= !special << (shift - 1); + } + x = (xs << shift) + lsb; + generic_model_update(model, ex_q16, x, xs, id, integration); + OD_LOG((OD_LOG_ENTROPY_CODER, OD_LOG_DEBUG, + "dec: %d %d %d %d %d %x", *ex_q16, x, shift, id, xs, dec->rng)); + return x; +} diff --git a/av1/decoder/laplace_decoder.c b/av1/decoder/laplace_decoder.c new file mode 100644 index 0000000000000000000000000000000000000000..4c3def5fdd4c0ac53f60f57ee76e3aa8cd39f900 --- /dev/null +++ b/av1/decoder/laplace_decoder.c @@ -0,0 +1,323 @@ +/* + * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ +/* clang-format off */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include + +#include "aom_dsp/entdec.h" +#include "av1/common/pvq.h" +#include "pvq_decoder.h" + +#if OD_ACCOUNTING +# define od_decode_pvq_split(ec, adapt, sum, ctx, str) od_decode_pvq_split_(ec, adapt, sum, ctx, str) +#else +# define od_decode_pvq_split(ec, adapt, sum, ctx, str) od_decode_pvq_split_(ec, adapt, sum, ctx) +#endif + +static int od_decode_pvq_split_(od_ec_dec *ec, od_pvq_codeword_ctx *adapt, + int sum, int ctx OD_ACC_STR) { + int shift; + int count; + int msbs; + int fctx; + count = 0; + if (sum == 0) return 0; + shift = OD_MAXI(0, OD_ILOG(sum) - 3); + fctx = 7*ctx + (sum >> shift) - 1; + msbs = od_decode_cdf_adapt(ec, adapt->pvq_split_cdf[fctx], + (sum >> shift) + 1, adapt->pvq_split_increment, acc_str); + if (shift) count = od_ec_dec_bits(ec, shift, acc_str); + count += msbs << shift; + if (count > sum) { + count = sum; + ec->error = 1; + } + return count; +} + +void od_decode_band_pvq_splits(od_ec_dec *ec, od_pvq_codeword_ctx *adapt, + od_coeff *y, int n, int k, int level) { + int mid; + int count_right; + if (n == 1) { + y[0] = k; + } + else if (k == 0) { + OD_CLEAR(y, n); + } + else if (k == 1 && n <= 16) { + int cdf_id; + int pos; + cdf_id = od_pvq_k1_ctx(n, level == 0); + OD_CLEAR(y, n); + pos = od_decode_cdf_adapt(ec, adapt->pvq_k1_cdf[cdf_id], n, + adapt->pvq_k1_increment, "pvq:k1"); + y[pos] = 1; + } + else { + mid = n >> 1; + count_right = od_decode_pvq_split(ec, adapt, k, od_pvq_size_ctx(n), + "pvq:split"); + od_decode_band_pvq_splits(ec, adapt, y, mid, k - count_right, level + 1); + od_decode_band_pvq_splits(ec, adapt, y + mid, n - mid, count_right, + level + 1); + } +} + +/** Decodes the tail of a Laplace-distributed variable, i.e. it doesn't + * do anything special for the zero case. + * + * @param [dec] range decoder + * @param [decay] decay factor of the distribution, i.e. pdf ~= decay^x + * @param [max] maximum possible value of x (used to truncate the pdf) + * + * @retval decoded variable x + */ +int od_laplace_decode_special_(od_ec_dec *dec, unsigned decay, int max OD_ACC_STR) { + int pos; + int shift; + int xs; + int ms; + int sym; + const uint16_t *cdf; + shift = 0; + if (max == 0) return 0; + /* We don't want a large decay value because that would require too many + symbols. However, it's OK if the max is below 15. */ + while (((max >> shift) >= 15 || max == -1) && decay > 235) { + decay = (decay*decay + 128) >> 8; + shift++; + } + decay = OD_MINI(decay, 254); + decay = OD_MAXI(decay, 2); + ms = max >> shift; + cdf = EXP_CDF_TABLE[(decay + 1) >> 1]; + OD_LOG((OD_LOG_PVQ, OD_LOG_DEBUG, "decay = %d\n", decay)); + xs = 0; + do { + sym = OD_MINI(xs, 15); + { + int i; + OD_LOG((OD_LOG_PVQ, OD_LOG_DEBUG, "%d %d %d %d", xs, shift, sym, max)); + for (i = 0; i < 16; i++) { + OD_LOG_PARTIAL((OD_LOG_PVQ, OD_LOG_DEBUG, "%d ", cdf[i])); + } + OD_LOG_PARTIAL((OD_LOG_PVQ, OD_LOG_DEBUG, "\n")); + } + if (ms > 0 && ms < 15) { + /* Simple way of truncating the pdf when we have a bound. */ + sym = od_ec_decode_cdf_unscaled(dec, cdf, ms + 1); + } + else sym = od_ec_decode_cdf_q15(dec, cdf, 16); + xs += sym; + ms -= 15; + } + while (sym >= 15 && ms != 0); + if (shift) pos = (xs << shift) + od_ec_dec_bits(dec, shift, acc_str); + else pos = xs; + OD_ASSERT(pos >> shift <= max >> shift || max == -1); + if (max != -1 && pos > max) { + pos = max; + dec->error = 1; + } + OD_ASSERT(pos <= max || max == -1); + return pos; +} + +/** Decodes a Laplace-distributed variable for use in PVQ. + * + * @param [in,out] dec range decoder + * @param [in] ExQ8 expectation of the absolute value of x + * @param [in] K maximum value of |x| + * + * @retval decoded variable (including sign) + */ +int od_laplace_decode_(od_ec_dec *dec, unsigned ex_q8, int k OD_ACC_STR) { + int j; + int shift; + uint16_t cdf[16]; + int sym; + int lsb; + int decay; + int offset; + lsb = 0; + /* Shift down x if expectation is too high. */ + shift = OD_ILOG(ex_q8) - 11; + if (shift < 0) shift = 0; + /* Apply the shift with rounding to Ex, K and xs. */ + ex_q8 = (ex_q8 + (1 << shift >> 1)) >> shift; + k = (k + (1 << shift >> 1)) >> shift; + decay = OD_MINI(254, OD_DIVU(256*ex_q8, (ex_q8 + 256))); + offset = LAPLACE_OFFSET[(decay + 1) >> 1]; + for (j = 0; j < 16; j++) { + cdf[j] = EXP_CDF_TABLE[(decay + 1) >> 1][j] - offset; + } + /* Simple way of truncating the pdf when we have a bound */ + if (k == 0) sym = 0; + else sym = od_ec_decode_cdf_unscaled(dec, cdf, OD_MINI(k + 1, 16)); + if (shift) { + int special; + /* Because of the rounding, there's only half the number of possibilities + for xs=0 */ + special = (sym == 0); + if (shift - special > 0) lsb = od_ec_dec_bits(dec, shift - special, acc_str); + lsb -= (!special << (shift - 1)); + } + /* Handle the exponentially-decaying tail of the distribution */ + if (sym == 15) sym += laplace_decode_special(dec, decay, k - 15, acc_str); + return (sym << shift) + lsb; +} + +#if OD_ACCOUNTING +# define laplace_decode_vector_delta(dec, y, n, k, curr, means, str) laplace_decode_vector_delta_(dec, y, n, k, curr, means, str) +#else +# define laplace_decode_vector_delta(dec, y, n, k, curr, means, str) laplace_decode_vector_delta_(dec, y, n, k, curr, means) +#endif + +static void laplace_decode_vector_delta_(od_ec_dec *dec, od_coeff *y, int n, int k, + int32_t *curr, const int32_t *means + OD_ACC_STR) { + int i; + int prev; + int sum_ex; + int sum_c; + int coef; + int pos; + int k0; + int sign; + int first; + int k_left; + prev = 0; + sum_ex = 0; + sum_c = 0; + coef = 256*means[OD_ADAPT_COUNT_Q8]/ + (1 + means[OD_ADAPT_COUNT_EX_Q8]); + pos = 0; + sign = 0; + first = 1; + k_left = k; + for (i = 0; i < n; i++) y[i] = 0; + k0 = k_left; + coef = OD_MAXI(coef, 1); + for (i = 0; i < k0; i++) { + int count; + if (first) { + int decay; + int ex = coef*(n - prev)/k_left; + if (ex > 65280) decay = 255; + else { + decay = OD_MINI(255, + (int)((256*ex/(ex + 256) + (ex>>5)*ex/((n + 1)*(n - 1)*(n - 1))))); + } + /*Update mean position.*/ + count = laplace_decode_special(dec, decay, n - 1, acc_str); + first = 0; + } + else count = laplace_decode(dec, coef*(n - prev)/k_left, n - prev - 1, acc_str); + sum_ex += 256*(n - prev); + sum_c += count*k_left; + pos += count; + OD_ASSERT(pos < n); + if (y[pos] == 0) + sign = od_ec_dec_bits(dec, 1, acc_str); + y[pos] += sign ? -1 : 1; + prev = pos; + k_left--; + if (k_left == 0) break; + } + if (k > 0) { + curr[OD_ADAPT_COUNT_Q8] = 256*sum_c; + curr[OD_ADAPT_COUNT_EX_Q8] = sum_ex; + } + else { + curr[OD_ADAPT_COUNT_Q8] = -1; + curr[OD_ADAPT_COUNT_EX_Q8] = 0; + } + curr[OD_ADAPT_K_Q8] = 0; + curr[OD_ADAPT_SUM_EX_Q8] = 0; +} + +/** Decodes a vector of integers assumed to come from rounding a sequence of + * Laplace-distributed real values in decreasing order of variance. + * + * @param [in,out] dec range decoder + * @param [in] y decoded vector + * @param [in] N dimension of the vector + * @param [in] K sum of the absolute value of components of y + * @param [out] curr Adaptation context output, may alias means. + * @param [in] means Adaptation context input. + */ +void od_laplace_decode_vector_(od_ec_dec *dec, od_coeff *y, int n, int k, + int32_t *curr, const int32_t *means OD_ACC_STR) { + int i; + int sum_ex; + int kn; + int exp_q8; + int mean_k_q8; + int mean_sum_ex_q8; + int ran_delta; + ran_delta = 0; + if (k <= 1) { + laplace_decode_vector_delta(dec, y, n, k, curr, means, acc_str); + return; + } + if (k == 0) { + curr[OD_ADAPT_COUNT_Q8] = OD_ADAPT_NO_VALUE; + curr[OD_ADAPT_COUNT_EX_Q8] = OD_ADAPT_NO_VALUE; + curr[OD_ADAPT_K_Q8] = 0; + curr[OD_ADAPT_SUM_EX_Q8] = 0; + for (i = 0; i < n; i++) y[i] = 0; + return; + } + sum_ex = 0; + kn = k; + /* Estimates the factor relating pulses_left and positions_left to E(|x|).*/ + mean_k_q8 = means[OD_ADAPT_K_Q8]; + mean_sum_ex_q8 = means[OD_ADAPT_SUM_EX_Q8]; + if (mean_k_q8 < 1 << 23) exp_q8 = 256*mean_k_q8/(1 + mean_sum_ex_q8); + else exp_q8 = mean_k_q8/(1 + (mean_sum_ex_q8 >> 8)); + for (i = 0; i < n; i++) { + int ex; + int x; + if (kn == 0) break; + if (kn <= 1 && i != n - 1) { + laplace_decode_vector_delta(dec, y + i, n - i, kn, curr, means, acc_str); + ran_delta = 1; + i = n; + break; + } + /* Expected value of x (round-to-nearest) is + expQ8*pulses_left/positions_left. */ + ex = (2*exp_q8*kn + (n - i))/(2*(n - i)); + if (ex > kn*256) ex = kn*256; + sum_ex += (2*256*kn + (n - i))/(2*(n - i)); + /* No need to encode the magnitude for the last bin. */ + if (i != n - 1) x = laplace_decode(dec, ex, kn, acc_str); + else x = kn; + if (x != 0) { + if (od_ec_dec_bits(dec, 1, acc_str)) x = -x; + } + y[i] = x; + kn -= abs(x); + } + /* Adapting the estimates for expQ8. */ + if (!ran_delta) { + curr[OD_ADAPT_COUNT_Q8] = OD_ADAPT_NO_VALUE; + curr[OD_ADAPT_COUNT_EX_Q8] = OD_ADAPT_NO_VALUE; + } + curr[OD_ADAPT_K_Q8] = k - kn; + curr[OD_ADAPT_SUM_EX_Q8] = sum_ex; + for (; i < n; i++) y[i] = 0; +} diff --git a/av1/decoder/pvq_decoder.c b/av1/decoder/pvq_decoder.c new file mode 100644 index 0000000000000000000000000000000000000000..2340605ee8dc40e6730c30336afd4d3c04873f5b --- /dev/null +++ b/av1/decoder/pvq_decoder.c @@ -0,0 +1,371 @@ +/* + * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +/* clang-format off */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include +#include +#include "./aom_config.h" +#include "aom_dsp/entcode.h" +#include "aom_dsp/entdec.h" +#include "av1/common/odintrin.h" +#include "av1/common/partition.h" +#include "av1/common/pvq_state.h" +#include "av1/decoder/decint.h" +#include "av1/decoder/pvq_decoder.h" + +static void od_decode_pvq_codeword(od_ec_dec *ec, od_pvq_codeword_ctx *ctx, + od_coeff *y, int n, int k) { + int i; + od_decode_band_pvq_splits(ec, ctx, y, n, k, 0); + for (i = 0; i < n; i++) { + if (y[i] && od_ec_dec_bits(ec, 1, "pvq:sign")) y[i] = -y[i]; + } +} + +/** Inverse of neg_interleave; decodes the interleaved gain. + * + * @param [in] x quantized/interleaved gain to decode + * @param [in] ref quantized gain of the reference + * @return original quantized gain value + */ +static int neg_deinterleave(int x, int ref) { + if (x < 2*ref-1) { + if (x & 1) return ref - 1 - (x >> 1); + else return ref + (x >> 1); + } + else return x+1; +} + +/** Synthesizes one parition of coefficient values from a PVQ-encoded + * vector. + * + * @param [out] xcoeff output coefficient partition (x in math doc) + * @param [in] ypulse PVQ-encoded values (y in math doc); in the noref + * case, this vector has n entries, in the + * reference case it contains n-1 entries + * (the m-th entry is not included) + * @param [in] ref reference vector (prediction) + * @param [in] n number of elements in this partition + * @param [in] gr gain of the reference vector (prediction) + * @param [in] noref indicates presence or lack of prediction + * @param [in] g decoded quantized vector gain + * @param [in] theta decoded theta (prediction error) + * @param [in] qm QM with magnitude compensation + * @param [in] qm_inv Inverse of QM with magnitude compensation + */ +static void pvq_synthesis(od_coeff *xcoeff, od_coeff *ypulse, od_val16 *r16, + int n, od_val32 gr, int noref, od_val32 g, od_val32 theta, const int16_t *qm_inv, + int shift) { + int s; + int m; + /* Sign of the Householder reflection vector */ + s = 0; + /* Direction of the Householder reflection vector */ + m = noref ? 0 : od_compute_householder(r16, n, gr, &s, shift); + od_pvq_synthesis_partial(xcoeff, ypulse, r16, n, noref, g, theta, m, s, + qm_inv); +} + +typedef struct { + od_coeff *ref; + int nb_coeffs; + int allow_flip; +} cfl_ctx; + +/** Decodes a single vector of integers (eg, a partition within a + * coefficient block) encoded using PVQ + * + * @param [in,out] ec range encoder + * @param [in] q0 scale/quantizer + * @param [in] n number of coefficients in partition + * @param [in,out] model entropy decoder state + * @param [in,out] adapt adaptation context + * @param [in,out] exg ExQ16 expectation of decoded gain value + * @param [in,out] ext ExQ16 expectation of decoded theta value + * @param [in] ref 'reference' (prediction) vector + * @param [out] out decoded partition + * @param [out] noref boolean indicating absence of reference + * @param [in] beta per-band activity masking beta param + * @param [in] robust stream is robust to error in the reference + * @param [in] is_keyframe whether we're encoding a keyframe + * @param [in] pli plane index + * @param [in] cdf_ctx selects which cdf context to use + * @param [in,out] skip_rest whether to skip further bands in each direction + * @param [in] band index of the band being decoded + * @param [in] band index of the band being decoded + * @param [out] skip skip flag with range [0,1] + * @param [in] qm QM with magnitude compensation + * @param [in] qm_inv Inverse of QM with magnitude compensation + */ +static void pvq_decode_partition(od_ec_dec *ec, + int q0, + int n, + generic_encoder model[3], + od_adapt_ctx *adapt, + int *exg, + int *ext, + od_coeff *ref, + od_coeff *out, + int *noref, + od_val16 beta, + int robust, + int is_keyframe, + int pli, + int cdf_ctx, + cfl_ctx *cfl, + int has_skip, + int *skip_rest, + int band, + int *skip, + const int16_t *qm, + const int16_t *qm_inv) { + int k; + od_val32 qcg; + int max_theta; + int itheta; + od_val32 theta; + od_val32 gr; + od_val32 gain_offset; + od_coeff y[MAXN]; + int qg; + int nodesync; + int id; + int i; + od_val16 ref16[MAXN]; + int rshift; + theta = 0; + gr = 0; + gain_offset = 0; + /* We always use the robust bitstream for keyframes to avoid having + PVQ and entropy decoding depending on each other, hurting parallelism. */ + nodesync = robust || is_keyframe; + /* Skip is per-direction. For band=0, we can use any of the flags. */ + if (skip_rest[(band + 2) % 3]) { + qg = 0; + if (is_keyframe) { + itheta = -1; + *noref = 1; + } + else { + itheta = 0; + *noref = 0; + } + } + else { + /* Jointly decode gain, itheta and noref for small values. Then we handle + larger gain. We need to wait for itheta because in the !nodesync case + it depends on max_theta, which depends on the gain. */ + id = od_decode_cdf_adapt(ec, &adapt->pvq.pvq_gaintheta_cdf[cdf_ctx][0], + 8 + 7*has_skip, adapt->pvq.pvq_gaintheta_increment, + "pvq:gaintheta"); + if (!is_keyframe && id >= 10) id++; + if (is_keyframe && id >= 8) id++; + if (id >= 8) { + id -= 8; + skip_rest[0] = skip_rest[1] = skip_rest[2] = 1; + } + qg = id & 1; + itheta = (id >> 1) - 1; + *noref = (itheta == -1); + } + /* The CfL flip bit is only decoded on the first band that has noref=0. */ + if (cfl->allow_flip && !*noref) { + int flip; + flip = od_ec_dec_bits(ec, 1, "cfl:flip"); + if (flip) { + for (i = 0; i < cfl->nb_coeffs; i++) cfl->ref[i] = -cfl->ref[i]; + } + cfl->allow_flip = 0; + } + if (qg > 0) { + int tmp; + tmp = *exg; + qg = 1 + generic_decode(ec, &model[!*noref], -1, &tmp, 2, "pvq:gain"); + OD_IIR_DIADIC(*exg, qg << 16, 2); + } + *skip = 0; +#if defined(OD_FLOAT_PVQ) + rshift = 0; +#else + /* Shift needed to make the reference fit in 15 bits, so that the Householder + vector can fit in 16 bits. */ + rshift = OD_MAXI(0, od_vector_log_mag(ref, n) - 14); +#endif + for (i = 0; i < n; i++) { +#if defined(OD_FLOAT_PVQ) + ref16[i] = ref[i]*(double)qm[i]*OD_QM_SCALE_1; +#else + ref16[i] = OD_SHR_ROUND(ref[i]*qm[i], OD_QM_SHIFT + rshift); +#endif + } + if(!*noref){ + /* we have a reference; compute its gain */ + od_val32 cgr; + int icgr; + int cfl_enabled; + cfl_enabled = pli != 0 && is_keyframe && !OD_DISABLE_CFL; + cgr = od_pvq_compute_gain(ref16, n, q0, &gr, beta, rshift); + if (cfl_enabled) cgr = OD_CGAIN_SCALE; +#if defined(OD_FLOAT_PVQ) + icgr = (int)floor(.5 + cgr); +#else + icgr = OD_SHR_ROUND(cgr, OD_CGAIN_SHIFT); +#endif + /* quantized gain is interleave encoded when there's a reference; + deinterleave it now */ + if (is_keyframe) qg = neg_deinterleave(qg, icgr); + else { + qg = neg_deinterleave(qg, icgr + 1) - 1; + if (qg == 0) *skip = (icgr ? OD_PVQ_SKIP_ZERO : OD_PVQ_SKIP_COPY); + } + if (qg == icgr && itheta == 0 && !cfl_enabled) *skip = OD_PVQ_SKIP_COPY; + gain_offset = cgr - OD_SHL(icgr, OD_CGAIN_SHIFT); + qcg = OD_SHL(qg, OD_CGAIN_SHIFT) + gain_offset; + /* read and decode first-stage PVQ error theta */ + max_theta = od_pvq_compute_max_theta(qcg, beta); + if (itheta > 1 && (nodesync || max_theta > 3)) { + int tmp; + tmp = *ext; + itheta = 2 + generic_decode(ec, &model[2], nodesync ? -1 : max_theta - 3, + &tmp, 2, "pvq:theta"); + OD_IIR_DIADIC(*ext, itheta << 16, 2); + } + theta = od_pvq_compute_theta(itheta, max_theta); + } + else{ + itheta = 0; + if (!is_keyframe) qg++; + qcg = OD_SHL(qg, OD_CGAIN_SHIFT); + if (qg == 0) *skip = OD_PVQ_SKIP_ZERO; + } + + k = od_pvq_compute_k(qcg, itheta, theta, *noref, n, beta, nodesync); + if (k != 0) { + /* when noref==0, y is actually size n-1 */ + od_decode_pvq_codeword(ec, &adapt->pvq.pvq_codeword_ctx, y, n - !*noref, + k); + } + else { + OD_CLEAR(y, n); + } + if (*skip) { + if (*skip == OD_PVQ_SKIP_COPY) OD_COPY(out, ref, n); + else OD_CLEAR(out, n); + } + else { + od_val32 g; + g = od_gain_expand(qcg, q0, beta); + pvq_synthesis(out, y, ref16, n, gr, *noref, g, theta, qm_inv, rshift); + } + *skip = !!*skip; +} + +/** Decodes a coefficient block (except for DC) encoded using PVQ + * + * @param [in,out] dec daala decoder context + * @param [in] ref 'reference' (prediction) vector + * @param [out] out decoded partition + * @param [in] q0 quantizer + * @param [in] pli plane index + * @param [in] bs log of the block size minus two + * @param [in] beta per-band activity masking beta param + * @param [in] robust stream is robust to error in the reference + * @param [in] is_keyframe whether we're encoding a keyframe + * @param [out] flags bitmask of the per band skip and noref flags + * @param [in] block_skip skip flag for the block (range 0-3) + * @param [in] qm QM with magnitude compensation + * @param [in] qm_inv Inverse of QM with magnitude compensation + */ +void od_pvq_decode(daala_dec_ctx *dec, + od_coeff *ref, + od_coeff *out, + int q0, + int pli, + int bs, + const od_val16 *beta, + int robust, + int is_keyframe, + unsigned int *flags, + int block_skip, + const int16_t *qm, + const int16_t *qm_inv){ + + int noref[PVQ_MAX_PARTITIONS]; + int skip[PVQ_MAX_PARTITIONS]; + int *exg; + int *ext; + int nb_bands; + int i; + const int *off; + int size[PVQ_MAX_PARTITIONS]; + generic_encoder *model; + int skip_rest[3] = {0}; + cfl_ctx cfl; + /* const unsigned char *pvq_qm; */ + /*Default to skip=1 and noref=0 for all bands.*/ + for (i = 0; i < PVQ_MAX_PARTITIONS; i++) { + noref[i] = 0; + skip[i] = 1; + } + /* TODO(yushin): Enable this for activity masking, + when pvq_qm_q4 is available in AOM. */ + /*pvq_qm = &dec->state.pvq_qm_q4[pli][0];*/ + exg = &dec->state.adapt.pvq.pvq_exg[pli][bs][0]; + ext = dec->state.adapt.pvq.pvq_ext + bs*PVQ_MAX_PARTITIONS; + model = dec->state.adapt.pvq.pvq_param_model; + nb_bands = OD_BAND_OFFSETS[bs][0]; + off = &OD_BAND_OFFSETS[bs][1]; + OD_ASSERT(block_skip < 4); + out[0] = block_skip & 1; + if (!(block_skip >> 1)) { + if (is_keyframe) for (i = 1; i < 1 << (2*bs + 4); i++) out[i] = 0; + else for (i = 1; i < 1 << (2*bs + 4); i++) out[i] = ref[i]; + } + else { + for (i = 0; i < nb_bands; i++) size[i] = off[i+1] - off[i]; + cfl.ref = ref; + cfl.nb_coeffs = off[nb_bands]; + cfl.allow_flip = pli != 0 && is_keyframe; + for (i = 0; i < nb_bands; i++) { + int q; + /* TODO(yushin): Enable this for activity masking, + when pvq_qm_q4 is available in AOM. */ + /*q = OD_MAXI(1, q0*pvq_qm[od_qm_get_index(bs, i + 1)] >> 4);*/ + q = OD_MAXI(1, q0); + pvq_decode_partition(dec->ec, q, size[i], + model, &dec->state.adapt, exg + i, ext + i, ref + off[i], out + off[i], + &noref[i], beta[i], robust, is_keyframe, pli, + (pli != 0)*OD_NBSIZES*PVQ_MAX_PARTITIONS + bs*PVQ_MAX_PARTITIONS + i, + &cfl, i == 0 && (i < nb_bands - 1), skip_rest, i, &skip[i], + qm + off[i], qm_inv + off[i]); + if (i == 0 && !skip_rest[0] && bs > 0) { + int skip_dir; + int j; + skip_dir = od_decode_cdf_adapt(dec->ec, + &dec->state.adapt.pvq.pvq_skip_dir_cdf[(pli != 0) + 2*(bs - 1)][0], 7, + dec->state.adapt.pvq.pvq_skip_dir_increment, "pvq:skiprest"); + for (j = 0; j < 3; j++) skip_rest[j] = !!(skip_dir & (1 << j)); + } + } + } + *flags = 0; + for (i = nb_bands - 1; i >= 0; i--) { + *flags <<= 1; + *flags |= noref[i]&1; + *flags <<= 1; + *flags |= skip[i]&1; + } +} diff --git a/av1/decoder/pvq_decoder.h b/av1/decoder/pvq_decoder.h new file mode 100644 index 0000000000000000000000000000000000000000..d749040a6090ab5606403c68b0c9518b3d7dda65 --- /dev/null +++ b/av1/decoder/pvq_decoder.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +/* clang-format off */ + +#if !defined(_pvq_decoder_H) +# define _pvq_decoder_H (1) +# include "aom_dsp/entdec.h" +# include "av1/common/pvq.h" +# include "av1/decoder/decint.h" + +void od_decode_band_pvq_splits(od_ec_dec *ec, od_pvq_codeword_ctx *adapt, + od_coeff *y, int n, int k, int level); + +#if OD_ACCOUNTING +# define laplace_decode_special(dec, decay, max, str) od_laplace_decode_special_(dec, decay, max, str) +# define laplace_decode(dec, ex_q8, k, str) od_laplace_decode_(dec, ex_q8, k, str) +#define laplace_decode_vector(dec, y, n, k, curr, means, str) od_laplace_decode_vector_(dec, y, n, k, curr, means, str) +#else +# define laplace_decode_special(dec, decay, max, str) od_laplace_decode_special_(dec, decay, max) +# define laplace_decode(dec, ex_q8, k, str) od_laplace_decode_(dec, ex_q8, k) +#define laplace_decode_vector(dec, y, n, k, curr, means, str) od_laplace_decode_vector_(dec, y, n, k, curr, means) +#endif + +int od_laplace_decode_special_(od_ec_dec *dec, unsigned decay, int max OD_ACC_STR); +int od_laplace_decode_(od_ec_dec *dec, unsigned ex_q8, int k OD_ACC_STR); +void od_laplace_decode_vector_(od_ec_dec *dec, od_coeff *y, int n, int k, + int32_t *curr, const int32_t *means + OD_ACC_STR); + + +void od_pvq_decode(daala_dec_ctx *dec, od_coeff *ref, od_coeff *out, int q0, + int pli, int bs, const od_val16 *beta, int robust, int is_keyframe, + unsigned int *flags, int block_skip, const int16_t *qm, + const int16_t *qm_inv); + +#endif diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c index 156c4f4a88c6c4127c3bc5f039e6bff8f2c06f63..95e86c6c715fde7051a3f036370b9338151f1ad1 100644 --- a/av1/encoder/bitstream.c +++ b/av1/encoder/bitstream.c @@ -46,6 +46,9 @@ #include "av1/encoder/segmentation.h" #include "av1/encoder/subexp.h" #include "av1/encoder/tokenize.h" +#if CONFIG_PVQ +#include "av1/encoder/pvq_encoder.h" +#endif static struct av1_token intra_mode_encodings[INTRA_MODES]; static struct av1_token switchable_interp_encodings[SWITCHABLE_FILTERS]; @@ -451,6 +454,7 @@ static void pack_palette_tokens(aom_writer *w, TOKENEXTRA **tp, int n, } #endif // CONFIG_PALETTE +#if !CONFIG_PVQ static void pack_mb_tokens(aom_writer *w, TOKENEXTRA **tp, const TOKENEXTRA *const stop, aom_bit_depth_t bit_depth, const TX_SIZE tx) { @@ -551,6 +555,7 @@ static void pack_mb_tokens(aom_writer *w, TOKENEXTRA **tp, *tp = p; } +#endif static void write_segment_id(aom_writer *w, const struct segmentation *seg, struct segmentation_probs *segp, int segment_id) { @@ -1054,6 +1059,20 @@ static void write_mb_modes_kf(AV1_COMMON *cm, const MACROBLOCKD *xd, } } +#if CONFIG_PVQ +PVQ_INFO *get_pvq_block(PVQ_QUEUE *pvq_q) { + PVQ_INFO *pvq; + + assert(pvq_q->curr_pos <= pvq_q->last_pos); + assert(pvq_q->curr_pos < pvq_q->buf_len); + + pvq = pvq_q->buf + pvq_q->curr_pos; + ++pvq_q->curr_pos; + + return pvq; +} +#endif + static void write_modes_b(AV1_COMP *cpi, const TileInfo *const tile, aom_writer *w, TOKENEXTRA **tok, const TOKENEXTRA *const tok_end, int mi_row, @@ -1062,7 +1081,14 @@ static void write_modes_b(AV1_COMP *cpi, const TileInfo *const tile, MACROBLOCKD *const xd = &cpi->td.mb.e_mbd; MODE_INFO *m; int plane; +#if CONFIG_PVQ + MB_MODE_INFO *mbmi; + BLOCK_SIZE bsize; + od_adapt_ctx *adapt; + (void)tok; + (void)tok_end; +#endif xd->mi = cm->mi_grid_visible + (mi_row * cm->mi_stride + mi_col); m = xd->mi[0]; @@ -1071,6 +1097,12 @@ static void write_modes_b(AV1_COMP *cpi, const TileInfo *const tile, set_mi_row_col(xd, tile, mi_row, num_8x8_blocks_high_lookup[m->mbmi.sb_type], mi_col, num_8x8_blocks_wide_lookup[m->mbmi.sb_type], cm->mi_rows, cm->mi_cols); +#if CONFIG_PVQ + mbmi = &m->mbmi; + bsize = mbmi->sb_type; + adapt = &cpi->td.mb.daala_enc.state.adapt; +#endif + if (frame_is_intra_only(cm)) { write_mb_modes_kf(cm, xd, xd->mi, w); } else { @@ -1092,6 +1124,7 @@ static void write_modes_b(AV1_COMP *cpi, const TileInfo *const tile, } #endif // CONFIG_PALETTE +#if !CONFIG_PVQ if (!m->mbmi.skip) { assert(*tok < tok_end); for (plane = 0; plane < MAX_MB_PLANE; ++plane) { @@ -1102,6 +1135,104 @@ static void write_modes_b(AV1_COMP *cpi, const TileInfo *const tile, (*tok)++; } } +#else + // PVQ writes its tokens (i.e. symbols) here. + if (!m->mbmi.skip) { + for (plane = 0; plane < MAX_MB_PLANE; ++plane) { + PVQ_INFO *pvq; + TX_SIZE tx_size = + plane ? get_uv_tx_size(&m->mbmi, &xd->plane[plane]) : m->mbmi.tx_size; + int idx, idy; + const struct macroblockd_plane *const pd = &xd->plane[plane]; + int num_4x4_w; + int num_4x4_h; + int max_blocks_wide; + int max_blocks_high; + int step = (1 << tx_size); + const int step_xy = 1 << (tx_size << 1); + int block = 0; + + if (tx_size == TX_4X4 && bsize <= BLOCK_8X8) { + num_4x4_w = 2 >> xd->plane[plane].subsampling_x; + num_4x4_h = 2 >> xd->plane[plane].subsampling_y; + } else { + num_4x4_w = + num_4x4_blocks_wide_lookup[bsize] >> xd->plane[plane].subsampling_x; + num_4x4_h = + num_4x4_blocks_high_lookup[bsize] >> xd->plane[plane].subsampling_y; + } + // TODO: Do we need below for 4x4,4x8,8x4 cases as well? + max_blocks_wide = + num_4x4_w + (xd->mb_to_right_edge >= 0 + ? 0 + : xd->mb_to_right_edge >> (5 + pd->subsampling_x)); + max_blocks_high = + num_4x4_h + (xd->mb_to_bottom_edge >= 0 + ? 0 + : xd->mb_to_bottom_edge >> (5 + pd->subsampling_y)); + + // TODO(yushin) Try to use av1_foreach_transformed_block_in_plane(). + // Logic like the mb_to_right_edge/mb_to_bottom_edge stuff should + // really be centralized in one place. + + for (idy = 0; idy < max_blocks_high; idy += step) { + for (idx = 0; idx < max_blocks_wide; idx += step) { + const int is_keyframe = 0; + const int encode_flip = 0; + const int flip = 0; + const int robust = 1; + int i; + const int has_dc_skip = 1; + int *exg = &adapt->pvq.pvq_exg[plane][tx_size][0]; + int *ext = adapt->pvq.pvq_ext + tx_size * PVQ_MAX_PARTITIONS; + generic_encoder *model = adapt->pvq.pvq_param_model; + + pvq = get_pvq_block(cpi->td.mb.pvq_q); + + // encode block skip info + od_encode_cdf_adapt(&w->ec, pvq->ac_dc_coded, + adapt->skip_cdf[2 * tx_size + (plane != 0)], 4, + adapt->skip_increment); + + // AC coeffs coded? + if (pvq->ac_dc_coded & 0x02) { + assert(pvq->bs <= tx_size); + for (i = 0; i < pvq->nb_bands; i++) { + if (i == 0 || (!pvq->skip_rest && + !(pvq->skip_dir & (1 << ((i - 1) % 3))))) { + pvq_encode_partition( + &w->ec, pvq->qg[i], pvq->theta[i], pvq->max_theta[i], + pvq->y + pvq->off[i], pvq->size[i], pvq->k[i], model, adapt, + exg + i, ext + i, robust || is_keyframe, + (plane != 0) * OD_NBSIZES * PVQ_MAX_PARTITIONS + + pvq->bs * PVQ_MAX_PARTITIONS + i, + is_keyframe, i == 0 && (i < pvq->nb_bands - 1), + pvq->skip_rest, encode_flip, flip); + } + if (i == 0 && !pvq->skip_rest && pvq->bs > 0) { + od_encode_cdf_adapt( + &w->ec, pvq->skip_dir, + &adapt->pvq + .pvq_skip_dir_cdf[(plane != 0) + 2 * (pvq->bs - 1)][0], + 7, adapt->pvq.pvq_skip_dir_increment); + } + } + } + // Encode residue of DC coeff, if exist. + if (!has_dc_skip || (pvq->ac_dc_coded & 1)) { // DC coded? + generic_encode(&w->ec, &adapt->model_dc[plane], + abs(pvq->dq_dc_residue) - has_dc_skip, -1, + &adapt->ex_dc[plane][pvq->bs][0], 2); + } + if ((pvq->ac_dc_coded & 1)) { // DC coded? + od_ec_enc_bits(&w->ec, pvq->dq_dc_residue < 0, 1); + } + block += step_xy; + } + } // for (idy = 0; + } // for (plane = + } // if (!m->mbmi.skip) +#endif } static void write_partition(const AV1_COMMON *const cm, @@ -1233,6 +1364,9 @@ static void write_modes(AV1_COMP *cpi, const TileInfo *const tile, MACROBLOCKD *const xd = &cpi->td.mb.e_mbd; int mi_row, mi_col; +#if CONFIG_PVQ + assert(cpi->td.mb.pvq_q->curr_pos == 0); +#endif #if CONFIG_DELTA_Q if (cpi->common.delta_q_present_flag) { xd->prev_qindex = cpi->common.base_qindex; @@ -1246,8 +1380,16 @@ static void write_modes(AV1_COMP *cpi, const TileInfo *const tile, mi_col += MAX_MIB_SIZE) write_modes_sb(cpi, tile, w, tok, tok_end, mi_row, mi_col, BLOCK_64X64); } +#if CONFIG_PVQ + // Check that the number of PVQ blocks encoded and written to the bitstream + // are the same + assert(cpi->td.mb.pvq_q->curr_pos == cpi->td.mb.pvq_q->last_pos); + // Reset curr_pos in case we repack the bitstream + cpi->td.mb.pvq_q->curr_pos = 0; +#endif } +#if !CONFIG_PVQ static void build_tree_distribution(AV1_COMP *cpi, TX_SIZE tx_size, av1_coeff_stats *coef_branch_ct, av1_coeff_probs_model *coef_probs) { @@ -1441,6 +1583,7 @@ static void update_coef_probs(AV1_COMP *cpi, aom_writer *w) { } } } +#endif static void encode_loopfilter(struct loopfilter *lf, struct aom_write_bit_buffer *wb) { @@ -1796,6 +1939,9 @@ static size_t encode_tiles(AV1_COMP *cpi, uint8_t *data_ptr, for (tile_col = 0; tile_col < tile_cols; tile_col++) { const int tile_idx = tile_row * tile_cols + tile_col; unsigned int tile_size; +#if CONFIG_PVQ + TileDataEnc *this_tile = &cpi->tile_data[tile_idx]; +#endif TOKENEXTRA *tok = cpi->tile_tok[tile_row][tile_col]; #if !CONFIG_TILE_GROUPS const int is_last_tile = tile_idx == tile_rows * tile_cols - 1; @@ -1837,11 +1983,19 @@ static size_t encode_tiles(AV1_COMP *cpi, uint8_t *data_ptr, #else aom_start_encode(&residual_bc, data_ptr + total_size + 4 * !is_last_tile); +#if CONFIG_PVQ + // NOTE: This will not work with CONFIG_ANS turned on. + od_adapt_ctx_reset(&cpi->td.mb.daala_enc.state.adapt, 0); + cpi->td.mb.pvq_q = &this_tile->pvq_q; +#endif write_modes(cpi, &cpi->tile_data[tile_idx].tile_info, &residual_bc, &tok, tok_end); assert(tok == tok_end); aom_stop_encode(&residual_bc); tile_size = residual_bc.pos - 1; +#endif +#if CONFIG_PVQ + cpi->td.mb.pvq_q = NULL; #endif assert(tile_size > 0); if (!is_last_tile) { @@ -2165,7 +2319,9 @@ static size_t write_compressed_header(AV1_COMP *cpi, uint8_t *data) { update_txfm_probs(cm, header_bc, counts); +#if !CONFIG_PVQ update_coef_probs(cpi, header_bc); +#endif update_skip_probs(cm, header_bc, counts); #if CONFIG_DELTA_Q diff --git a/av1/encoder/block.h b/av1/encoder/block.h index 7e319c4c5a3ad2e7c3cc9f7a4a3e9d5b4a0aef96..211ae58493f961351a1195194bcbc45df1900ac8 100644 --- a/av1/encoder/block.h +++ b/av1/encoder/block.h @@ -14,6 +14,9 @@ #include "av1/common/entropymv.h" #include "av1/common/entropy.h" +#if CONFIG_PVQ +#include "av1/encoder/encint.h" +#endif #if CONFIG_REF_MV #include "av1/common/mvref_common.h" #endif @@ -22,6 +25,12 @@ extern "C" { #endif +#if CONFIG_PVQ +// Maximum possible # of tx blocks in luma plane, which is currently 256, +// since there can be 16x16 of 4x4 tx. +#define MAX_PVQ_BLOCKS_IN_SB (MAX_SB_SQUARE >> 2 * OD_LOG_BSIZE0) +#endif + typedef struct { unsigned int sse; int sum; @@ -30,6 +39,9 @@ typedef struct { struct macroblock_plane { DECLARE_ALIGNED(16, int16_t, src_diff[64 * 64]); +#if CONFIG_PVQ + DECLARE_ALIGNED(16, int16_t, src_int16[MAX_SB_SQUARE]); +#endif tran_low_t *qcoeff; tran_low_t *coeff; uint16_t *eobs; @@ -151,6 +163,25 @@ struct macroblock { // Used to store sub partition's choices. MV pred_mv[MAX_REF_FRAMES]; +#if CONFIG_PVQ + int rate; + // 1 if neither AC nor DC is coded. Only used during RDO. + int pvq_skip[MAX_MB_PLANE]; + PVQ_QUEUE *pvq_q; + + // Storage for PVQ tx block encodings in a superblock. + // There can be max 16x16 of 4x4 blocks (and YUV) encode by PVQ + // 256 is the max # of 4x4 blocks in a SB (64x64), which comes from: + // 1) Since PVQ is applied to each trasnform-ed block + // 2) 4x4 is the smallest tx size in AV1 + // 3) AV1 allows using smaller tx size than block (i.e. partition) size + // TODO(yushin) : The memory usage could be improved a lot, since this has + // storage for 10 bands and 128 coefficients for every 4x4 block, + PVQ_INFO pvq[MAX_PVQ_BLOCKS_IN_SB][MAX_MB_PLANE]; + daala_enc_ctx daala_enc; + int pvq_speed; + int pvq_coded; // Indicates whether pvq_info needs be stored to tokenize +#endif }; #ifdef __cplusplus diff --git a/av1/encoder/context_tree.c b/av1/encoder/context_tree.c index 9b21a1da0d28cc6b48ebf22ff19db0ca9e9f1b06..b7b5cbe6b56cbfea8808715b66dbe24b7fb691a3 100644 --- a/av1/encoder/context_tree.c +++ b/av1/encoder/context_tree.c @@ -30,6 +30,10 @@ static void alloc_mode_context(AV1_COMMON *cm, int num_4x4_blk, aom_memalign(32, num_pix * sizeof(*ctx->qcoeff[i]))); CHECK_MEM_ERROR(cm, ctx->dqcoeff[i], aom_memalign(32, num_pix * sizeof(*ctx->dqcoeff[i]))); +#if CONFIG_PVQ + CHECK_MEM_ERROR(cm, ctx->pvq_ref_coeff[i], + aom_memalign(32, num_pix * sizeof(*ctx->pvq_ref_coeff[i]))); +#endif CHECK_MEM_ERROR(cm, ctx->eobs[i], aom_memalign(32, num_blk * sizeof(*ctx->eobs[i]))); } @@ -54,6 +58,10 @@ static void free_mode_context(PICK_MODE_CONTEXT *ctx) { ctx->qcoeff[i] = 0; aom_free(ctx->dqcoeff[i]); ctx->dqcoeff[i] = 0; +#if CONFIG_PVQ + aom_free(ctx->pvq_ref_coeff[i]); + ctx->pvq_ref_coeff[i] = 0; +#endif aom_free(ctx->eobs[i]); ctx->eobs[i] = 0; } diff --git a/av1/encoder/context_tree.h b/av1/encoder/context_tree.h index c482e131a284f0449f55e09bcbbd126b8efa319c..4f1c647d2c29aaa7ea55c57bbf665a6870a384ad 100644 --- a/av1/encoder/context_tree.h +++ b/av1/encoder/context_tree.h @@ -33,6 +33,9 @@ typedef struct { tran_low_t *coeff[MAX_MB_PLANE]; tran_low_t *qcoeff[MAX_MB_PLANE]; tran_low_t *dqcoeff[MAX_MB_PLANE]; +#if CONFIG_PVQ + tran_low_t *pvq_ref_coeff[MAX_MB_PLANE]; +#endif uint16_t *eobs[MAX_MB_PLANE]; int num_4x4_blk; diff --git a/av1/encoder/daala_compat_enc.c b/av1/encoder/daala_compat_enc.c new file mode 100644 index 0000000000000000000000000000000000000000..c23b26d21de4b753291c79fc7e1cc3379abc317b --- /dev/null +++ b/av1/encoder/daala_compat_enc.c @@ -0,0 +1,22 @@ +/* + * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include "encint.h" + +void od_encode_checkpoint(const daala_enc_ctx *enc, od_rollback_buffer *rbuf) { + od_ec_enc_checkpoint(&rbuf->ec, &enc->ec); + OD_COPY(&rbuf->adapt, &enc->state.adapt, 1); +} + +void od_encode_rollback(daala_enc_ctx *enc, const od_rollback_buffer *rbuf) { + od_ec_enc_rollback(&enc->ec, &rbuf->ec); + OD_COPY(&enc->state.adapt, &rbuf->adapt, 1); +} diff --git a/av1/encoder/encint.h b/av1/encoder/encint.h new file mode 100644 index 0000000000000000000000000000000000000000..1e3516cc2f3fd2f000f838034902ecb31fc8c5cf --- /dev/null +++ b/av1/encoder/encint.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ +/* clang-format off */ + +#if !defined(_encint_H) +# define _encint_H (1) + +typedef struct daala_enc_ctx od_enc_ctx; +typedef struct od_params_ctx od_params_ctx; +typedef struct od_rollback_buffer od_rollback_buffer; + +# include "aom_dsp/entenc.h" +# include "av1/common/odintrin.h" +# include "av1/common/pvq_state.h" + +struct daala_enc_ctx{ + /* Stores context-adaptive CDFs for PVQ. */ + od_state state; + /* Daala entropy encoder. */ + od_ec_enc ec; + int use_activity_masking; + /* Mode of quantization matrice : FLAT (0) or HVS (1) */ + int qm; + /*Normalized PVQ lambda for use where we've already performed + quantization.*/ + double pvq_norm_lambda; + double pvq_norm_lambda_dc; +}; + +// from daalaenc.h +/**The encoder context.*/ +typedef struct daala_enc_ctx daala_enc_ctx; + +/** Holds important encoder information so we can roll back decisions */ +struct od_rollback_buffer { + od_ec_enc ec; + od_adapt_ctx adapt; +}; + +void od_encode_checkpoint(const daala_enc_ctx *enc, od_rollback_buffer *rbuf); +void od_encode_rollback(daala_enc_ctx *enc, const od_rollback_buffer *rbuf); + +#endif diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c index 839f96102e0583d6e8c77b3f43b90c6621c66541..c3855048cfe899602560206ab7e780b29f6ef55c 100644 --- a/av1/encoder/encodeframe.c +++ b/av1/encoder/encodeframe.c @@ -47,6 +47,10 @@ #include "av1/encoder/segmentation.h" #include "av1/encoder/tokenize.h" +#if CONFIG_PVQ +#include "av1/encoder/pvq_encoder.h" +#endif + static void encode_superblock(const AV1_COMP *const cpi, ThreadData *td, TOKENEXTRA **t, int output_enabled, int mi_row, int mi_col, BLOCK_SIZE bsize, @@ -941,6 +945,9 @@ static void update_state(const AV1_COMP *const cpi, ThreadData *td, p[i].coeff = ctx->coeff[i]; p[i].qcoeff = ctx->qcoeff[i]; pd[i].dqcoeff = ctx->dqcoeff[i]; +#if CONFIG_PVQ + pd[i].pvq_ref_coeff = ctx->pvq_ref_coeff[i]; +#endif p[i].eobs = ctx->eobs[i]; } @@ -1015,6 +1022,11 @@ static void rd_pick_sb_modes(const AV1_COMP *const cpi, TileDataEnc *tile_data, // Use the lower precision, but faster, 32x32 fdct for mode selection. x->use_lp32x32fdct = 1; +#if CONFIG_PVQ + x->pvq_speed = 1; + x->pvq_coded = 0; +#endif + set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize); mbmi = &xd->mi[0]->mbmi; mbmi->sb_type = bsize; @@ -1023,6 +1035,9 @@ static void rd_pick_sb_modes(const AV1_COMP *const cpi, TileDataEnc *tile_data, p[i].coeff = ctx->coeff[i]; p[i].qcoeff = ctx->qcoeff[i]; pd[i].dqcoeff = ctx->dqcoeff[i]; +#if CONFIG_PVQ + pd[i].pvq_ref_coeff = ctx->pvq_ref_coeff[i]; +#endif p[i].eobs = ctx->eobs[i]; } @@ -1431,6 +1446,9 @@ static void restore_context(MACROBLOCK *const x, int mi_row, int mi_col, ENTROPY_CONTEXT a[16 * MAX_MB_PLANE], ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], PARTITION_CONTEXT sa[8], PARTITION_CONTEXT sl[8], +#if CONFIG_PVQ + od_rollback_buffer *rdo_buf, +#endif BLOCK_SIZE bsize) { MACROBLOCKD *const xd = &x->e_mbd; int p; @@ -1453,12 +1471,18 @@ static void restore_context(MACROBLOCK *const x, int mi_row, int mi_col, sizeof(*xd->above_seg_context) * mi_width); memcpy(xd->left_seg_context + (mi_row & MAX_MIB_MASK), sl, sizeof(xd->left_seg_context[0]) * mi_height); +#if CONFIG_PVQ + od_encode_rollback(&x->daala_enc, rdo_buf); +#endif } static void save_context(MACROBLOCK *const x, int mi_row, int mi_col, ENTROPY_CONTEXT a[16 * MAX_MB_PLANE], ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], PARTITION_CONTEXT sa[8], PARTITION_CONTEXT sl[8], +#if CONFIG_PVQ + od_rollback_buffer *rdo_buf, +#endif BLOCK_SIZE bsize) { const MACROBLOCKD *const xd = &x->e_mbd; int p; @@ -1483,6 +1507,9 @@ static void save_context(MACROBLOCK *const x, int mi_row, int mi_col, sizeof(*xd->above_seg_context) * mi_width); memcpy(sl, xd->left_seg_context + (mi_row & MAX_MIB_MASK), sizeof(xd->left_seg_context[0]) * mi_height); +#if CONFIG_PVQ + od_encode_checkpoint(&x->daala_enc, rdo_buf); +#endif } static void encode_b(const AV1_COMP *const cpi, const TileInfo *const tile, @@ -1661,6 +1688,9 @@ static void rd_use_partition(AV1_COMP *cpi, ThreadData *td, BLOCK_SIZE bs_type = mi_8x8[0]->mbmi.sb_type; int do_partition_search = 1; PICK_MODE_CONTEXT *ctx_none = &pc_tree->none; +#if CONFIG_PVQ + od_rollback_buffer pre_rdo_buf; +#endif if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; @@ -1675,7 +1705,11 @@ static void rd_use_partition(AV1_COMP *cpi, ThreadData *td, subsize = get_subsize(bsize, partition); pc_tree->partitioning = partition; +#if !CONFIG_PVQ save_context(x, mi_row, mi_col, a, l, sa, sl, bsize); +#else + save_context(x, mi_row, mi_col, a, l, sa, sl, &pre_rdo_buf, bsize); +#endif if (bsize == BLOCK_16X16 && cpi->oxcf.aq_mode) { set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize); @@ -1715,7 +1749,11 @@ static void rd_use_partition(AV1_COMP *cpi, ThreadData *td, RDCOST(x->rdmult, x->rddiv, none_rdc.rate, none_rdc.dist); } +#if !CONFIG_PVQ restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); +#else + restore_context(x, mi_row, mi_col, a, l, sa, sl, &pre_rdo_buf, bsize); +#endif mi_8x8[0]->mbmi.sb_type = bs_type; pc_tree->partitioning = partition; } @@ -1819,7 +1857,11 @@ static void rd_use_partition(AV1_COMP *cpi, ThreadData *td, BLOCK_SIZE split_subsize = get_subsize(bsize, PARTITION_SPLIT); chosen_rdc.rate = 0; chosen_rdc.dist = 0; +#if !CONFIG_PVQ restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); +#else + restore_context(x, mi_row, mi_col, a, l, sa, sl, &pre_rdo_buf, bsize); +#endif pc_tree->partitioning = PARTITION_SPLIT; // Split partition. @@ -1829,18 +1871,27 @@ static void rd_use_partition(AV1_COMP *cpi, ThreadData *td, RD_COST tmp_rdc; ENTROPY_CONTEXT l2[16 * MAX_MB_PLANE], a2[16 * MAX_MB_PLANE]; PARTITION_CONTEXT sl2[8], sa2[8]; - +#if CONFIG_PVQ + od_rollback_buffer buf; +#endif if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols)) continue; +#if !CONFIG_PVQ save_context(x, mi_row, mi_col, a2, l2, sa2, sl2, bsize); +#else + save_context(x, mi_row, mi_col, a2, l2, sa2, sl2, &buf, bsize); +#endif pc_tree->split[i]->partitioning = PARTITION_NONE; rd_pick_sb_modes(cpi, tile_data, x, mi_row + y_idx, mi_col + x_idx, &tmp_rdc, split_subsize, &pc_tree->split[i]->none, INT64_MAX); +#if !CONFIG_PVQ restore_context(x, mi_row, mi_col, a2, l2, sa2, sl2, bsize); - +#else + restore_context(x, mi_row, mi_col, a2, l2, sa2, sl2, &buf, bsize); +#endif if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) { av1_rd_cost_reset(&chosen_rdc); break; @@ -1877,7 +1928,11 @@ static void rd_use_partition(AV1_COMP *cpi, ThreadData *td, chosen_rdc = none_rdc; } +#if !CONFIG_PVQ restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); +#else + restore_context(x, mi_row, mi_col, a, l, sa, sl, &pre_rdo_buf, bsize); +#endif // We must have chosen a partitioning and encoding or we'll fail later on. // No other opportunities for success. @@ -2169,6 +2224,11 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td, !force_vert_split && yss <= xss && bsize_at_least_8x8; int partition_vert_allowed = !force_horz_split && xss <= yss && bsize_at_least_8x8; + +#if CONFIG_PVQ + od_rollback_buffer pre_rdo_buf; +#endif + (void)*tp_orig; assert(num_8x8_blocks_wide_lookup[bsize] == @@ -2209,8 +2269,11 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td, partition_horz_allowed &= force_horz_split; partition_vert_allowed &= force_vert_split; } - +#if !CONFIG_PVQ save_context(x, mi_row, mi_col, a, l, sa, sl, bsize); +#else + save_context(x, mi_row, mi_col, a, l, sa, sl, &pre_rdo_buf, bsize); +#endif #if CONFIG_FP_MB_STATS if (cpi->use_fp_mb_stats) { @@ -2355,7 +2418,11 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td, #endif } } +#if !CONFIG_PVQ restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); +#else + restore_context(x, mi_row, mi_col, a, l, sa, sl, &pre_rdo_buf, bsize); +#endif } // store estimated motion vector @@ -2418,7 +2485,11 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td, // gives better rd cost do_rectangular_split &= !partition_none_allowed; } +#if !CONFIG_PVQ restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); +#else + restore_context(x, mi_row, mi_col, a, l, sa, sl, &pre_rdo_buf, bsize); +#endif } // PARTITION_HORZ @@ -2466,8 +2537,13 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td, pc_tree->partitioning = PARTITION_HORZ; } } +#if !CONFIG_PVQ restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); +#else + restore_context(x, mi_row, mi_col, a, l, sa, sl, &pre_rdo_buf, bsize); +#endif } + // PARTITION_VERT if (partition_vert_allowed && (do_rectangular_split || av1_active_v_edge(cpi, mi_col, mi_step))) { @@ -2513,7 +2589,11 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td, pc_tree->partitioning = PARTITION_VERT; } } +#if !CONFIG_PVQ restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); +#else + restore_context(x, mi_row, mi_col, a, l, sa, sl, &pre_rdo_buf, bsize); +#endif } // TODO(jbb): This code added so that we avoid static analysis @@ -2531,7 +2611,9 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td, } if (bsize == BLOCK_64X64) { +#if !CONFIG_PVQ assert(tp_orig < *tp || (tp_orig == *tp && xd->mi[0]->mbmi.skip)); +#endif assert(best_rdc.rate < INT_MAX); assert(best_rdc.dist < INT64_MAX); } else { @@ -2745,6 +2827,13 @@ void av1_init_tile_data(AV1_COMP *cpi) { tile_data->mode_map[i][j] = j; } } +#if CONFIG_PVQ + // This will be dynamically increased as more pvq block is encoded. + tile_data->pvq_q.buf_len = 1000; + CHECK_MEM_ERROR(cm, tile_data->pvq_q.buf, + aom_malloc(tile_data->pvq_q.buf_len * sizeof(PVQ_INFO))); + tile_data->pvq_q.curr_pos = 0; +#endif } } @@ -2757,6 +2846,9 @@ void av1_init_tile_data(AV1_COMP *cpi) { cpi->tile_tok[tile_row][tile_col] = pre_tok + tile_tok; pre_tok = cpi->tile_tok[tile_row][tile_col]; tile_tok = allocated_tokens(*tile_info); +#if CONFIG_PVQ + cpi->tile_data[tile_row * tile_cols + tile_col].pvq_q.curr_pos = 0; +#endif } } } @@ -2769,11 +2861,43 @@ void av1_encode_tile(AV1_COMP *cpi, ThreadData *td, int tile_row, const TileInfo *const tile_info = &this_tile->tile_info; TOKENEXTRA *tok = cpi->tile_tok[tile_row][tile_col]; int mi_row; +#if CONFIG_PVQ + od_adapt_ctx *adapt; +#endif // Set up pointers to per thread motion search counters. td->mb.m_search_count_ptr = &td->rd_counts.m_search_count; td->mb.ex_search_count_ptr = &td->rd_counts.ex_search_count; +#if CONFIG_PVQ + td->mb.pvq_q = &this_tile->pvq_q; + + // TODO(yushin) + // If activity masking is enabled, change below to OD_HVS_QM + td->mb.daala_enc.qm = OD_FLAT_QM; // Hard coded. Enc/dec required to sync. + { + // FIXME: Multiple segments support + int segment_id = 0; + int rdmult = set_segment_rdmult(cpi, &td->mb, segment_id); + int qindex = av1_get_qindex(&cm->seg, segment_id, cm->base_qindex); + int64_t q_ac = av1_ac_quant(qindex, 0, cpi->common.bit_depth); + int64_t q_dc = av1_dc_quant(qindex, 0, cpi->common.bit_depth); + /* td->mb.daala_enc.pvq_norm_lambda = OD_PVQ_LAMBDA; */ + td->mb.daala_enc.pvq_norm_lambda = + (double)rdmult * (64 / 16) / (q_ac * q_ac * (1 << RDDIV_BITS)); + td->mb.daala_enc.pvq_norm_lambda_dc = + (double)rdmult * (64 / 16) / (q_dc * q_dc * (1 << RDDIV_BITS)); + // printf("%f\n", td->mb.daala_enc.pvq_norm_lambda); + } + od_init_qm(td->mb.daala_enc.state.qm, td->mb.daala_enc.state.qm_inv, + td->mb.daala_enc.qm == OD_HVS_QM ? OD_QM8_Q4_HVS : OD_QM8_Q4_FLAT); + od_ec_enc_init(&td->mb.daala_enc.ec, 65025); + + adapt = &td->mb.daala_enc.state.adapt; + od_ec_enc_reset(&td->mb.daala_enc.ec); + od_adapt_ctx_reset(adapt, 0); +#endif + for (mi_row = tile_info->mi_row_start; mi_row < tile_info->mi_row_end; mi_row += MAX_MIB_SIZE) { encode_rd_sb_row(cpi, td, this_tile, mi_row, &tok); @@ -2782,6 +2906,16 @@ void av1_encode_tile(AV1_COMP *cpi, ThreadData *td, int tile_row, (unsigned int)(tok - cpi->tile_tok[tile_row][tile_col]); assert(tok - cpi->tile_tok[tile_row][tile_col] <= allocated_tokens(*tile_info)); +#if CONFIG_PVQ + od_ec_enc_clear(&td->mb.daala_enc.ec); + + td->mb.pvq_q->last_pos = td->mb.pvq_q->curr_pos; + // rewind current position so that bitstream can be written + // from the 1st pvq block + td->mb.pvq_q->curr_pos = 0; + + td->mb.pvq_q = NULL; +#endif } static void encode_tiles(AV1_COMP *cpi) { @@ -3065,6 +3199,11 @@ static void encode_superblock(const AV1_COMP *const cpi, ThreadData *td, x->use_lp32x32fdct = cpi->sf.use_lp32x32fdct; +#if CONFIG_PVQ + x->pvq_speed = 0; + x->pvq_coded = output_enabled ? 1 : 0; +#endif + if (!is_inter_block(mbmi)) { int plane; mbmi->skip = 1; diff --git a/av1/encoder/encodemb.c b/av1/encoder/encodemb.c index 4c938c054d47c470eaf46c8229a50f95988793dc..1b87589546ec75fff323ea4126da60499164c9db 100644 --- a/av1/encoder/encodemb.c +++ b/av1/encoder/encodemb.c @@ -27,6 +27,12 @@ #include "av1/encoder/rd.h" #include "av1/encoder/tokenize.h" +#if CONFIG_PVQ +#include "av1/encoder/encint.h" +#include "av1/common/partition.h" +#include "av1/encoder/pvq_encoder.h" +#endif + struct optimize_ctx { ENTROPY_CONTEXT ta[MAX_MB_PLANE][16]; ENTROPY_CONTEXT tl[MAX_MB_PLANE][16]; @@ -63,6 +69,7 @@ typedef struct av1_token_state { short qc; } av1_token_state; +#if !CONFIG_PVQ // TODO(jimbankoski): experiment to find optimal RD numbers. static const int plane_rd_mult[PLANE_TYPES] = { 4, 2 }; @@ -328,6 +335,7 @@ static int optimize_b(const AV1_COMMON *const cm, MACROBLOCK *mb, int plane, mb->plane[plane].eobs[block] = final_eob; return final_eob; } +#endif // TODO(sarahparker) refactor fwd quant functions to use fwd_txfm fns in // hybrid_fwd_txfm.c @@ -335,8 +343,13 @@ void av1_xform_quant_fp(const AV1_COMMON *const cm, MACROBLOCK *x, int plane, int block, int blk_row, int blk_col, BLOCK_SIZE plane_bsize, TX_SIZE tx_size) { MACROBLOCKD *const xd = &x->e_mbd; +#if !CONFIG_PVQ const struct macroblock_plane *const p = &x->plane[plane]; const struct macroblockd_plane *const pd = &xd->plane[plane]; +#else + struct macroblock_plane *const p = &x->plane[plane]; + struct macroblockd_plane *const pd = &xd->plane[plane]; +#endif PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV; TX_TYPE tx_type = get_tx_type(plane_type, xd, block); const SCAN_ORDER *const scan_order = get_scan(cm, tx_size, tx_type); @@ -345,12 +358,13 @@ void av1_xform_quant_fp(const AV1_COMMON *const cm, MACROBLOCK *x, int plane, tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); uint16_t *const eob = &p->eobs[block]; const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; -#if CONFIG_AOM_QM int seg_id = xd->mi[0]->mbmi.segment_id; +#if CONFIG_AOM_QM int is_intra = !is_inter_block(&xd->mi[0]->mbmi); const qm_val_t *qmatrix = pd->seg_qmatrix[seg_id][is_intra][tx_size]; const qm_val_t *iqmatrix = pd->seg_iqmatrix[seg_id][is_intra][tx_size]; #endif +#if !CONFIG_PVQ const int16_t *src_diff; (void)cm; @@ -364,6 +378,40 @@ void av1_xform_quant_fp(const AV1_COMMON *const cm, MACROBLOCK *x, int plane, */ src_diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)]; +#else + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; + tran_low_t *ref_coeff = BLOCK_OFFSET(pd->pvq_ref_coeff, block); + uint8_t *src, *dst; + int16_t *src_int16, *pred; + const int src_stride = p->src.stride; + const int dst_stride = pd->dst.stride; + int tx_blk_size; + int i, j; + int skip = 1; + PVQ_INFO *pvq_info = NULL; + + (void)scan_order; + (void)qcoeff; + + if (x->pvq_coded) { + assert(block < MAX_PVQ_BLOCKS_IN_SB); + pvq_info = &x->pvq[block][plane]; + } + dst = &pd->dst.buf[4 * (blk_row * dst_stride + blk_col)]; + src = &p->src.buf[4 * (blk_row * src_stride + blk_col)]; + src_int16 = &p->src_int16[4 * (blk_row * diff_stride + blk_col)]; + pred = &pd->pred[4 * (blk_row * diff_stride + blk_col)]; + // transform block size in pixels + tx_blk_size = tx_size_1d[tx_size]; + + // copy uint8 orig and predicted block to int16 buffer + // in order to use existing VP10 transform functions + for (j = 0; j < tx_blk_size; j++) + for (i = 0; i < tx_blk_size; i++) { + src_int16[diff_stride * j + i] = src[src_stride * j + i]; + pred[diff_stride * j + i] = dst[dst_stride * j + i]; + } +#endif #if CONFIG_AOM_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { @@ -402,7 +450,7 @@ void av1_xform_quant_fp(const AV1_COMMON *const cm, MACROBLOCK *x, int plane, #endif break; case TX_4X4: - if (xd->lossless[xd->mi[0]->mbmi.segment_id]) { + if (xd->lossless[seg_id]) { av1_highbd_fwht4x4(src_diff, coeff, diff_stride); } else { aom_highbd_fdct4x4(src_diff, coeff, diff_stride); @@ -422,6 +470,7 @@ void av1_xform_quant_fp(const AV1_COMMON *const cm, MACROBLOCK *x, int plane, } #endif // CONFIG_AOM_HIGHBITDEPTH +#if !CONFIG_PVQ switch (tx_size) { case TX_32X32: fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride); @@ -456,7 +505,7 @@ void av1_xform_quant_fp(const AV1_COMMON *const cm, MACROBLOCK *x, int plane, #endif break; case TX_4X4: - if (xd->lossless[xd->mi[0]->mbmi.segment_id]) { + if (xd->lossless[seg_id]) { av1_fwht4x4(src_diff, coeff, diff_stride); } else { aom_fdct4x4(src_diff, coeff, diff_stride); @@ -472,14 +521,68 @@ void av1_xform_quant_fp(const AV1_COMMON *const cm, MACROBLOCK *x, int plane, break; default: assert(0); break; } +#else // #if !CONFIG_PVQ + switch (tx_size) { + case TX_32X32: + // NOTE: Using x->use_lp32x32fdct == 1 will makes enc and dec mismatched, + // because decoder always uses x->use_lp32x32fdct == 0, + // forward transform of predicted image. + fdct32x32(0, pred, ref_coeff, diff_stride); + // forward transform of original image. + fdct32x32(0, src_int16, coeff, diff_stride); + break; + case TX_16X16: + aom_fdct16x16(pred, ref_coeff, diff_stride); + aom_fdct16x16(src_int16, coeff, diff_stride); + break; + case TX_8X8: + aom_fdct8x8(pred, ref_coeff, diff_stride); + aom_fdct8x8(src_int16, coeff, diff_stride); + break; + case TX_4X4: + if (xd->lossless[seg_id]) { + av1_fwht4x4(pred, ref_coeff, diff_stride); + av1_fwht4x4(src_int16, coeff, diff_stride); + } else { + aom_fdct4x4(pred, ref_coeff, diff_stride); + aom_fdct4x4(src_int16, coeff, diff_stride); + } + break; + default: assert(0); break; + } + + // PVQ for inter mode block + if (!x->skip_block) + skip = av1_pvq_encode_helper(&x->daala_enc, + coeff, // target original vector + ref_coeff, // reference vector + dqcoeff, // de-quantized vector + eob, // End of Block marker + pd->dequant, // aom's quantizers + plane, // image plane + tx_size, // block size in log_2 - 2 + tx_type, + &x->rate, // rate measured + x->pvq_speed, + pvq_info); // PVQ info for a block + + x->pvq_skip[plane] = skip; + + if (!skip) mbmi->skip = 0; +#endif // #if !CONFIG_PVQ } void av1_xform_quant(const AV1_COMMON *const cm, MACROBLOCK *x, int plane, int block, int blk_row, int blk_col, BLOCK_SIZE plane_bsize, TX_SIZE tx_size) { MACROBLOCKD *const xd = &x->e_mbd; +#if !CONFIG_PVQ const struct macroblock_plane *const p = &x->plane[plane]; const struct macroblockd_plane *const pd = &xd->plane[plane]; +#else + struct macroblock_plane *const p = &x->plane[plane]; + struct macroblockd_plane *const pd = &xd->plane[plane]; +#endif PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV; TX_TYPE tx_type = get_tx_type(plane_type, xd, block); const SCAN_ORDER *const scan_order = get_scan(cm, tx_size, tx_type); @@ -489,22 +592,60 @@ void av1_xform_quant(const AV1_COMMON *const cm, MACROBLOCK *x, int plane, uint16_t *const eob = &p->eobs[block]; const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; int seg_id = xd->mi[0]->mbmi.segment_id; + FWD_TXFM_PARAM fwd_txfm_param; + #if CONFIG_AOM_QM int is_intra = !is_inter_block(&xd->mi[0]->mbmi); const qm_val_t *qmatrix = pd->seg_qmatrix[seg_id][is_intra][tx_size]; const qm_val_t *iqmatrix = pd->seg_iqmatrix[seg_id][is_intra][tx_size]; #endif + +#if !CONFIG_PVQ const int16_t *src_diff; - FWD_TXFM_PARAM fwd_txfm_param; + src_diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)]; +#else + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; + tran_low_t *ref_coeff = BLOCK_OFFSET(pd->pvq_ref_coeff, block); + uint8_t *src, *dst; + int16_t *src_int16, *pred; + const int src_stride = p->src.stride; + const int dst_stride = pd->dst.stride; + int tx_blk_size; + int i, j; + int skip = 1; + PVQ_INFO *pvq_info = NULL; + + (void)scan_order; + (void)qcoeff; + + if (x->pvq_coded) { + assert(block < MAX_PVQ_BLOCKS_IN_SB); + pvq_info = &x->pvq[block][plane]; + } + dst = &pd->dst.buf[4 * (blk_row * dst_stride + blk_col)]; + src = &p->src.buf[4 * (blk_row * src_stride + blk_col)]; + src_int16 = &p->src_int16[4 * (blk_row * diff_stride + blk_col)]; + pred = &pd->pred[4 * (blk_row * diff_stride + blk_col)]; + + // transform block size in pixels + tx_blk_size = tx_size_1d[tx_size]; + + // copy uint8 orig and predicted block to int16 buffer + // in order to use existing VP10 transform functions + for (j = 0; j < tx_blk_size; j++) + for (i = 0; i < tx_blk_size; i++) { + src_int16[diff_stride * j + i] = src[src_stride * j + i]; + pred[diff_stride * j + i] = dst[dst_stride * j + i]; + } +#endif + fwd_txfm_param.tx_type = tx_type; fwd_txfm_param.tx_size = tx_size; fwd_txfm_param.fwd_txfm_opt = FWD_TXFM_OPT_NORMAL; fwd_txfm_param.rd_transform = x->use_lp32x32fdct; fwd_txfm_param.lossless = xd->lossless[seg_id]; - src_diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)]; - #if CONFIG_AOM_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { highbd_fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param); @@ -555,6 +696,7 @@ void av1_xform_quant(const AV1_COMMON *const cm, MACROBLOCK *x, int plane, } #endif // CONFIG_AOM_HIGHBITDEPTH +#if !CONFIG_PVQ fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param); switch (tx_size) { case TX_32X32: @@ -599,6 +741,31 @@ void av1_xform_quant(const AV1_COMMON *const cm, MACROBLOCK *x, int plane, break; default: assert(0); break; } +#else // #if !CONFIG_PVQ + fwd_txfm_param.rd_transform = 0; + + fwd_txfm(src_int16, coeff, diff_stride, &fwd_txfm_param); + fwd_txfm(pred, ref_coeff, diff_stride, &fwd_txfm_param); + + // PVQ for inter mode block + if (!x->skip_block) + skip = av1_pvq_encode_helper(&x->daala_enc, + coeff, // target original vector + ref_coeff, // reference vector + dqcoeff, // de-quantized vector + eob, // End of Block marker + pd->dequant, // aom's quantizers + plane, // image plane + tx_size, // block size in log_2 - 2 + tx_type, + &x->rate, // rate measured + x->pvq_speed, + pvq_info); // PVQ info for a block + + x->pvq_skip[plane] = skip; + + if (!skip) mbmi->skip = 0; +#endif // #if !CONFIG_PVQ } static void encode_block(int plane, int block, int blk_row, int blk_col, @@ -614,6 +781,10 @@ static void encode_block(int plane, int block, int blk_row, int blk_col, uint8_t *dst; ENTROPY_CONTEXT *a, *l; TX_TYPE tx_type = get_tx_type(pd->plane_type, xd, block); +#if CONFIG_PVQ + int tx_blk_size; + int i, j; +#endif dst = &pd->dst.buf[4 * blk_row * pd->dst.stride + 4 * blk_col]; a = &ctx->ta[plane][blk_col]; l = &ctx->tl[plane][blk_row]; @@ -626,6 +797,7 @@ static void encode_block(int plane, int block, int blk_row, int blk_col, tx_size); } +#if !CONFIG_PVQ if (x->optimize) { const int combined_ctx = combine_entropy_contexts(*a, *l); *a = *l = optimize_b(cm, x, plane, block, tx_size, combined_ctx) > 0; @@ -636,6 +808,24 @@ static void encode_block(int plane, int block, int blk_row, int blk_col, if (p->eobs[block]) *(args->skip) = 0; if (p->eobs[block] == 0) return; +#else + *a = *l = !x->pvq_skip[plane]; + + if (!x->pvq_skip[plane]) *(args->skip) = 0; + + if (x->pvq_skip[plane]) return; + + // transform block size in pixels + tx_blk_size = tx_size_1d[tx_size]; + + // Since av1 does not have separate function which does inverse transform + // but av1_inv_txfm_add_*x*() also does addition of predicted image to + // inverse transformed image, + // pass blank dummy image to av1_inv_txfm_add_*x*(), i.e. set dst as zeros + for (j = 0; j < tx_blk_size; j++) + for (i = 0; i < tx_blk_size; i++) dst[j * pd->dst.stride + i] = 0; +#endif + #if CONFIG_AOM_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { switch (tx_size) { @@ -665,7 +855,6 @@ static void encode_block(int plane, int block, int blk_row, int blk_col, return; } #endif // CONFIG_AOM_HIGHBITDEPTH - switch (tx_size) { case TX_32X32: av1_inv_txfm_add_32x32(dqcoeff, dst, pd->dst.stride, p->eobs[block], @@ -710,7 +899,28 @@ static void encode_block_pass1(int plane, int block, int blk_row, int blk_col, av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size); +#if !CONFIG_PVQ if (p->eobs[block] > 0) { +#else + if (!x->pvq_skip[plane]) { +#endif + +#if CONFIG_PVQ + { + int tx_blk_size; + int i, j; + // transform block size in pixels + tx_blk_size = tx_size_1d[tx_size]; + + // Since av1 does not have separate function which does inverse transform + // but av1_inv_txfm_add_*x*() also does addition of predicted image to + // inverse transformed image, + // pass blank dummy image to av1_inv_txfm_add_*x*(), i.e. set dst as zeros + for (j = 0; j < tx_blk_size; j++) + for (i = 0; i < tx_blk_size; i++) dst[j * pd->dst.stride + i] = 0; + } +#endif + #if CONFIG_AOM_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { if (xd->lossless[0]) { @@ -750,8 +960,9 @@ void av1_encode_sb(AV1_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE bsize) { if (x->skip) return; for (plane = 0; plane < MAX_MB_PLANE; ++plane) { +#if !CONFIG_PVQ av1_subtract_plane(x, bsize, plane); - +#endif if (x->optimize) { const struct macroblockd_plane *const pd = &xd->plane[plane]; const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi, pd) : mbmi->tx_size; @@ -785,7 +996,6 @@ void av1_encode_block_intra(int plane, int block, int blk_row, int blk_col, const int bhl = b_height_log2_lookup[plane_bsize]; const int diff_stride = 4 * (1 << bwl); uint8_t *src, *dst; - int16_t *src_diff; uint16_t *eob = &p->eobs[block]; int seg_id = xd->mi[0]->mbmi.segment_id; #if CONFIG_AOM_QM @@ -795,10 +1005,31 @@ void av1_encode_block_intra(int plane, int block, int blk_row, int blk_col, #endif const int src_stride = p->src.stride; const int dst_stride = pd->dst.stride; - + FWD_TXFM_PARAM fwd_txfm_param; + int16_t *src_diff; int tx1d_size = tx_size_1d[tx_size]; - FWD_TXFM_PARAM fwd_txfm_param; +#if CONFIG_PVQ + tran_low_t *ref_coeff = BLOCK_OFFSET(pd->pvq_ref_coeff, block); + int16_t *src_int16; + int tx_blk_size; + int i, j; + int16_t *pred = &pd->pred[4 * (blk_row * diff_stride + blk_col)]; + int skip = 1; + PVQ_INFO *pvq_info = NULL; + + (void)scan_order; + (void)qcoeff; + + if (x->pvq_coded) { + assert(block < MAX_PVQ_BLOCKS_IN_SB); + pvq_info = &x->pvq[block][plane]; + } + src_int16 = &p->src_int16[4 * (blk_row * diff_stride + blk_col)]; +#endif + + src_diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)]; + fwd_txfm_param.tx_type = tx_type; fwd_txfm_param.tx_size = tx_size; fwd_txfm_param.fwd_txfm_opt = FWD_TXFM_OPT_NORMAL; @@ -807,8 +1038,6 @@ void av1_encode_block_intra(int plane, int block, int blk_row, int blk_col, dst = &pd->dst.buf[4 * (blk_row * dst_stride + blk_col)]; src = &p->src.buf[4 * (blk_row * src_stride + blk_col)]; - src_diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)]; - mode = plane == 0 ? get_y_mode(xd->mi[0], block) : mbmi->uv_mode; av1_predict_intra_block(xd, bwl, bhl, tx_size, mode, dst, dst_stride, dst, dst_stride, blk_col, blk_row, plane); @@ -884,6 +1113,8 @@ void av1_encode_block_intra(int plane, int block, int blk_row, int blk_col, aom_subtract_block(tx1d_size, tx1d_size, src_diff, diff_stride, src, src_stride, dst, dst_stride); + +#if !CONFIG_PVQ fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param); switch (tx_size) { case TX_32X32: @@ -939,7 +1170,78 @@ void av1_encode_block_intra(int plane, int block, int blk_row, int blk_col, break; default: assert(0); break; } +#else // #if !CONFIG_PVQ + // transform block size in pixels + tx_blk_size = tx_size_1d[tx_size]; + + // copy uint8 orig and predicted block to int16 buffer + // in order to use existing VP10 transform functions + for (j = 0; j < tx_blk_size; j++) + for (i = 0; i < tx_blk_size; i++) { + src_int16[diff_stride * j + i] = src[src_stride * j + i]; + pred[diff_stride * j + i] = dst[dst_stride * j + i]; + } + + fwd_txfm_param.rd_transform = 0; + + fwd_txfm(src_int16, coeff, diff_stride, &fwd_txfm_param); + fwd_txfm(pred, ref_coeff, diff_stride, &fwd_txfm_param); + + // PVQ for intra mode block + if (!x->skip_block) + skip = av1_pvq_encode_helper(&x->daala_enc, + coeff, // target original vector + ref_coeff, // reference vector + dqcoeff, // de-quantized vector + eob, // End of Block marker + pd->dequant, // aom's quantizers + plane, // image plane + tx_size, // block size in log_2 - 2 + tx_type, + &x->rate, // rate measured + x->pvq_speed, + pvq_info); // PVQ info for a block + + x->pvq_skip[plane] = skip; + + if (!skip) mbmi->skip = 0; + + // Since av1 does not have separate function which does inverse transform + // but av1_inv_txfm_add_*x*() also does addition of predicted image to + // inverse transformed image, + // pass blank dummy image to av1_inv_txfm_add_*x*(), i.e. set dst as zeros + + if (!skip) { + for (j = 0; j < tx_blk_size; j++) + for (i = 0; i < tx_blk_size; i++) dst[j * dst_stride + i] = 0; + + switch (tx_size) { + case TX_32X32: + av1_inv_txfm_add_32x32(dqcoeff, dst, dst_stride, *eob, tx_type); + break; + case TX_16X16: + av1_inv_txfm_add_16x16(dqcoeff, dst, dst_stride, *eob, tx_type); + break; + case TX_8X8: + av1_inv_txfm_add_8x8(dqcoeff, dst, dst_stride, *eob, tx_type); + break; + case TX_4X4: + // this is like av1_short_idct4x4 but has a special case around eob<=1 + // which is significant (not just an optimization) for the lossless + // case. + av1_inv_txfm_add_4x4(dqcoeff, dst, dst_stride, *eob, tx_type, + xd->lossless[seg_id]); + break; + default: assert(0); break; + } + } +#endif // #if !CONFIG_PVQ + +#if !CONFIG_PVQ if (*eob) *(args->skip) = 0; +#else +// Note : *(args->skip) == mbmi->skip +#endif } void av1_encode_intra_block_plane(AV1_COMMON *cm, MACROBLOCK *x, @@ -950,3 +1252,141 @@ void av1_encode_intra_block_plane(AV1_COMMON *cm, MACROBLOCK *x, av1_foreach_transformed_block_in_plane(xd, bsize, plane, av1_encode_block_intra, &arg); } + +#if CONFIG_PVQ +int av1_pvq_encode_helper(daala_enc_ctx *daala_enc, tran_low_t *const coeff, + tran_low_t *ref_coeff, tran_low_t *const dqcoeff, + uint16_t *eob, const int16_t *quant, int plane, + int tx_size, TX_TYPE tx_type, int *rate, int speed, + PVQ_INFO *pvq_info) { + const int tx_blk_size = tx_size_1d[tx_size]; + int skip; + // TODO(yushin): Enable this later, when pvq_qm_q4 is available in AOM. + // int pvq_dc_quant = OD_MAXI(1, + // quant * daala_enc->state.pvq_qm_q4[plane][od_qm_get_index(tx_size, 0)] >> + // 4); + int quant_shift = tx_size == TX_32X32 ? 1 : 0; + // DC quantizer for PVQ + int pvq_dc_quant = OD_MAXI(1, quant[0] >> quant_shift); + int tell; + int has_dc_skip = 1; + int i; + int off = od_qm_offset(tx_size, plane ? 1 : 0); +#if PVQ_CHROMA_RD + double save_pvq_lambda; +#endif + DECLARE_ALIGNED(16, int16_t, coeff_pvq[OD_BSIZE_MAX * OD_BSIZE_MAX]); + DECLARE_ALIGNED(16, int16_t, ref_coeff_pvq[OD_BSIZE_MAX * OD_BSIZE_MAX]); + DECLARE_ALIGNED(16, int16_t, dqcoeff_pvq[OD_BSIZE_MAX * OD_BSIZE_MAX]); + + DECLARE_ALIGNED(16, int32_t, in_int32[OD_BSIZE_MAX * OD_BSIZE_MAX]); + DECLARE_ALIGNED(16, int32_t, ref_int32[OD_BSIZE_MAX * OD_BSIZE_MAX]); + DECLARE_ALIGNED(16, int32_t, out_int32[OD_BSIZE_MAX * OD_BSIZE_MAX]); + + *eob = 0; + + tell = od_ec_enc_tell_frac(&daala_enc->ec); + + // Change coefficient ordering for pvq encoding. + od_raster_to_coding_order(coeff_pvq, tx_blk_size, tx_type, coeff, + tx_blk_size); + od_raster_to_coding_order(ref_coeff_pvq, tx_blk_size, tx_type, ref_coeff, + tx_blk_size); + + // copy int16 inputs to int32 + for (i = 0; i < tx_blk_size * tx_blk_size; i++) { + ref_int32[i] = ref_coeff_pvq[i]; + in_int32[i] = coeff_pvq[i]; + } + +#if PVQ_CHROMA_RD + if (plane != 0) { + save_pvq_lambda = daala_enc->pvq_norm_lambda; + daala_enc->pvq_norm_lambda *= 0.8; + } +#endif + if (abs(in_int32[0] - ref_int32[0]) < pvq_dc_quant * 141 / 256) { /* 0.55 */ + out_int32[0] = 0; + } else { + out_int32[0] = OD_DIV_R0(in_int32[0] - ref_int32[0], pvq_dc_quant); + } + + skip = od_pvq_encode( + daala_enc, ref_int32, in_int32, out_int32, + (int)quant[0] >> quant_shift, // scale/quantizer + (int)quant[1] >> quant_shift, // scale/quantizer + // TODO(yushin): Instead of 0, + // use daala_enc->use_activity_masking for activity masking. + plane, tx_size, OD_PVQ_BETA[0][plane][tx_size], + OD_ROBUST_STREAM, + 0, // is_keyframe, + 0, 0, 0, // q_scaling, bx, by, + daala_enc->state.qm + off, daala_enc->state.qm_inv + off, + speed, // speed + pvq_info); + + if (skip && pvq_info) assert(pvq_info->ac_dc_coded == 0); + + if (!skip && pvq_info) assert(pvq_info->ac_dc_coded > 0); + + // Encode residue of DC coeff, if required. + if (!has_dc_skip || out_int32[0]) { + generic_encode(&daala_enc->ec, &daala_enc->state.adapt.model_dc[plane], + abs(out_int32[0]) - has_dc_skip, -1, + &daala_enc->state.adapt.ex_dc[plane][tx_size][0], 2); + } + if (out_int32[0]) { + od_ec_enc_bits(&daala_enc->ec, out_int32[0] < 0, 1); + skip = 0; + } + + // need to save quantized residue of DC coeff + // so that final pvq bitstream writing can know whether DC is coded. + if (pvq_info) pvq_info->dq_dc_residue = out_int32[0]; + + out_int32[0] = out_int32[0] * pvq_dc_quant; + out_int32[0] += ref_int32[0]; + + // copy int32 result back to int16 + for (i = 0; i < tx_blk_size * tx_blk_size; i++) dqcoeff_pvq[i] = out_int32[i]; + + // Back to original coefficient order + od_coding_order_to_raster(dqcoeff, tx_blk_size, tx_type, dqcoeff_pvq, + tx_blk_size); + + *eob = tx_blk_size * tx_blk_size; + + *rate = (od_ec_enc_tell_frac(&daala_enc->ec) - tell) + << (AV1_PROB_COST_SHIFT - OD_BITRES); + assert(*rate >= 0); +#if PVQ_CHROMA_RD + if (plane != 0) daala_enc->pvq_norm_lambda = save_pvq_lambda; +#endif + return skip; +} + +void av1_store_pvq_enc_info(PVQ_INFO *pvq_info, int *qg, int *theta, + int *max_theta, int *k, od_coeff *y, int nb_bands, + const int *off, int *size, int skip_rest, + int skip_dir, + int bs) { // block size in log_2 -2 + int i; + const int tx_blk_size = tx_size_1d[bs]; + + for (i = 0; i < nb_bands; i++) { + pvq_info->qg[i] = qg[i]; + pvq_info->theta[i] = theta[i]; + pvq_info->max_theta[i] = max_theta[i]; + pvq_info->k[i] = k[i]; + pvq_info->off[i] = off[i]; + pvq_info->size[i] = size[i]; + } + + memcpy(pvq_info->y, y, tx_blk_size * tx_blk_size * sizeof(od_coeff)); + + pvq_info->nb_bands = nb_bands; + pvq_info->skip_rest = skip_rest; + pvq_info->skip_dir = skip_dir; + pvq_info->bs = bs; +} +#endif diff --git a/av1/encoder/encodemb.h b/av1/encoder/encodemb.h index 2576b1acae176e89e2c4ac5ca9f8a4b030fe1888..c5f2ac1eab6dc7300ea718e22a907f79f07ccf7c 100644 --- a/av1/encoder/encodemb.h +++ b/av1/encoder/encodemb.h @@ -43,6 +43,19 @@ void av1_encode_block_intra(int plane, int block, int blk_row, int blk_col, void av1_encode_intra_block_plane(AV1_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE bsize, int plane); +#if CONFIG_PVQ +int av1_pvq_encode_helper(daala_enc_ctx *daala_enc, tran_low_t *const coeff, + tran_low_t *ref_coeff, tran_low_t *const dqcoeff, + uint16_t *eob, const int16_t *quant, int plane, + int tx_size, TX_TYPE tx_type, int *rate, int speed, + PVQ_INFO *pvq_info); + +void av1_store_pvq_enc_info(PVQ_INFO *pvq_info, int *qg, int *theta, + int *max_theta, int *k, od_coeff *y, int nb_bands, + const int *off, int *size, int skip_rest, + int skip_dir, int bs); +#endif + #ifdef __cplusplus } // extern "C" #endif diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c index 0b6410a344ee1c8d5d2db6882a5ba9f073915850..13f54c978199c336a40728d61379d80f0f4fa006 100644 --- a/av1/encoder/encoder.c +++ b/av1/encoder/encoder.c @@ -373,6 +373,20 @@ static void dealloc_compressor_data(AV1_COMP *cpi) { aom_free(cpi->mbmi_ext_base); cpi->mbmi_ext_base = NULL; +#if CONFIG_PVQ + if (cpi->oxcf.pass != 1) { + const int tile_cols = 1 << cm->log2_tile_cols; + const int tile_rows = 1 << cm->log2_tile_rows; + int tile_col, tile_row; + + for (tile_row = 0; tile_row < tile_rows; ++tile_row) + for (tile_col = 0; tile_col < tile_cols; ++tile_col) { + TileDataEnc *tile_data = + &cpi->tile_data[tile_row * tile_cols + tile_col]; + aom_free(tile_data->pvq_q.buf); + } + } +#endif aom_free(cpi->tile_data); cpi->tile_data = NULL; @@ -727,7 +741,11 @@ static void update_frame_size(AV1_COMP *cpi) { av1_set_mb_mi(cm, cm->width, cm->height); av1_init_context_buffers(cm); - av1_init_macroblockd(cm, xd, NULL); + av1_init_macroblockd(cm, xd, +#if CONFIG_PVQ + NULL, +#endif + NULL); memset(cpi->mbmi_ext_base, 0, cm->mi_rows * cm->mi_cols * sizeof(*cpi->mbmi_ext_base)); diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h index d8a4b5f8e4a66b281e0f4620f8ada5a50aad9eb7..daa90b3a16d29e013713a1140d90f30dbd310130 100644 --- a/av1/encoder/encoder.h +++ b/av1/encoder/encoder.h @@ -265,6 +265,9 @@ typedef struct TileDataEnc { TileInfo tile_info; int thresh_freq_fact[BLOCK_SIZES][MAX_MODES]; int mode_map[BLOCK_SIZES][MAX_MODES]; +#if CONFIG_PVQ + PVQ_QUEUE pvq_q; +#endif } TileDataEnc; typedef struct RD_COUNTS { diff --git a/av1/encoder/firstpass.c b/av1/encoder/firstpass.c index 4d0acee9286904f9a72dbaf009546eb83b0868bc..1dfd0a660a966d8e9f0c7d0093695e33a9b4e52e 100644 --- a/av1/encoder/firstpass.c +++ b/av1/encoder/firstpass.c @@ -486,6 +486,9 @@ void av1_first_pass(AV1_COMP *cpi, const struct lookahead_entry *source) { double intra_factor; double brightness_factor; BufferPool *const pool = cm->buffer_pool; +#if CONFIG_PVQ + PVQ_QUEUE pvq_q; +#endif // First pass code requires valid last and new frame buffers. assert(new_yv12 != NULL); @@ -520,10 +523,43 @@ void av1_first_pass(AV1_COMP *cpi, const struct lookahead_entry *source) { av1_frame_init_quantizer(cpi); +#if CONFIG_PVQ + // For pass 1 of 2-pass encoding, init here for PVQ for now. + { + od_adapt_ctx *adapt; + + pvq_q.buf_len = 5000; + CHECK_MEM_ERROR(cm, pvq_q.buf, aom_malloc(pvq_q.buf_len * sizeof(PVQ_INFO))); + pvq_q.curr_pos = 0; + x->pvq_coded = 0; + + x->pvq_q = &pvq_q; + + // TODO(yushin): Since this init step is also called in 2nd pass, + // or 1-pass encoding, consider factoring out it as a function. + // TODO(yushin) + // If activity masking is enabled, change below to OD_HVS_QM + x->daala_enc.qm = OD_FLAT_QM; // Hard coded. Enc/dec required to sync. + x->daala_enc.pvq_norm_lambda = OD_PVQ_LAMBDA; + x->daala_enc.pvq_norm_lambda_dc = OD_PVQ_LAMBDA; + + od_init_qm(x->daala_enc.state.qm, x->daala_enc.state.qm_inv, + x->daala_enc.qm == OD_HVS_QM ? OD_QM8_Q4_HVS : OD_QM8_Q4_FLAT); + od_ec_enc_init(&x->daala_enc.ec, 65025); + + adapt = &x->daala_enc.state.adapt; + od_ec_enc_reset(&x->daala_enc.ec); + od_adapt_ctx_reset(adapt, 0); + } +#endif + for (i = 0; i < MAX_MB_PLANE; ++i) { p[i].coeff = ctx->coeff[i]; p[i].qcoeff = ctx->qcoeff[i]; pd[i].dqcoeff = ctx->dqcoeff[i]; +#if CONFIG_PVQ + pd[i].pvq_ref_coeff = ctx->pvq_ref_coeff[i]; +#endif p[i].eobs = ctx->eobs[i]; } @@ -912,6 +948,16 @@ void av1_first_pass(AV1_COMP *cpi, const struct lookahead_entry *source) { aom_clear_system_state(); } +#if CONFIG_PVQ + od_ec_enc_clear(&x->daala_enc.ec); + + x->pvq_q->last_pos = x->pvq_q->curr_pos; + x->pvq_q->curr_pos = 0; + x->pvq_q = NULL; + + aom_free(pvq_q.buf); +#endif + // Clamp the image start to rows/2. This number of rows is discarded top // and bottom as dead data so rows / 2 means the frame is blank. if ((image_data_start_row > cm->mb_rows / 2) || diff --git a/av1/encoder/generic_encoder.c b/av1/encoder/generic_encoder.c new file mode 100644 index 0000000000000000000000000000000000000000..466ede3f1e4033c311c804f33df23707ff004463 --- /dev/null +++ b/av1/encoder/generic_encoder.c @@ -0,0 +1,200 @@ +/* + * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +/* clang-format off */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include + +#include "aom_dsp/entdec.h" +#include "aom_dsp/entenc.h" +#include "av1/common/generic_code.h" +#include "av1/common/odintrin.h" +#include "pvq_encoder.h" + +/** Encodes a value from 0 to N-1 (with N up to 16) based on a cdf and adapts + * the cdf accordingly. + * + * @param [in,out] enc range encoder + * @param [in] val variable being encoded + * @param [in,out] cdf CDF of the variable (Q15) + * @param [in] n number of values possible + * @param [in,out] count number of symbols encoded with that cdf so far + * @param [in] rate adaptation rate shift (smaller is faster) + */ +void od_encode_cdf_adapt_q15(od_ec_enc *ec, int val, uint16_t *cdf, int n, + int *count, int rate) { + int i; + if (*count == 0) { + /* On the first call, we normalize the cdf to (32768 - n). This should + eventually be moved to the state init, but for now it makes it much + easier to experiment and convert symbols to the Q15 adaptation.*/ + int ft; + ft = cdf[n - 1]; + for (i = 0; i < n; i++) { + cdf[i] = cdf[i]*32768/ft; + } + } + od_ec_encode_cdf_q15(ec, val, cdf, n); + od_cdf_adapt_q15(val, cdf, n, count, rate); +} + +/** Encodes a value from 0 to N-1 (with N up to 16) based on a cdf and adapts + * the cdf accordingly. + * + * @param [in,out] enc range encoder + * @param [in] val variable being encoded + * @param [in] cdf CDF of the variable (Q15) + * @param [in] n number of values possible + * @param [in] increment adaptation speed (Q15) + */ +void od_encode_cdf_adapt(od_ec_enc *ec, int val, uint16_t *cdf, int n, + int increment) { + int i; + od_ec_encode_cdf_unscaled(ec, val, cdf, n); + if (cdf[n-1] + increment > 32767) { + for (i = 0; i < n; i++) { + /* Second term ensures that the pdf is non-null */ + cdf[i] = (cdf[i] >> 1) + i + 1; + } + } + for (i = val; i < n; i++) cdf[i] += increment; +} + +/** Encodes a random variable using a "generic" model, assuming that the + * distribution is one-sided (zero and up), has a single mode, and decays + * exponentially past the model. + * + * @param [in,out] enc range encoder + * @param [in,out] model generic probability model + * @param [in] x variable being encoded + * @param [in] max largest value possible + * @param [in,out] ExQ16 expectation of x (adapted) + * @param [in] integration integration period of ExQ16 (leaky average over + * 1<> 1); + /* Choose the cdf to use: we have two per "octave" of ExQ16 */ + id = OD_MINI(GENERIC_TABLES - 1, lg_q1); + cdf = model->cdf[id]; + xs = (x + (1 << shift >> 1)) >> shift; + ms = (max + (1 << shift >> 1)) >> shift; + OD_ASSERT(max == -1 || xs <= ms); + if (max == -1) od_ec_encode_cdf_unscaled(enc, OD_MINI(15, xs), cdf, 16); + else { + od_ec_encode_cdf_unscaled(enc, OD_MINI(15, xs), cdf, OD_MINI(ms + 1, 16)); + } + if (xs >= 15) { + int e; + unsigned decay; + /* Estimate decay based on the assumption that the distribution is close + to Laplacian for large values. We should probably have an adaptive + estimate instead. Note: The 2* is a kludge that's not fully understood + yet. */ + OD_ASSERT(*ex_q16 < INT_MAX >> 1); + e = ((2**ex_q16 >> 8) + (1 << shift >> 1)) >> shift; + decay = OD_MAXI(2, OD_MINI(254, 256*e/(e + 256))); + /* Encode the tail of the distribution assuming exponential decay. */ + od_laplace_encode_special(enc, xs - 15, decay, (max == -1) ? -1 : ms - 15); + } + if (shift != 0) { + int special; + /* Because of the rounding, there's only half the number of possibilities + for xs=0. */ + special = xs == 0; + if (shift - special > 0) { + od_ec_enc_bits(enc, x - (xs << shift) + (!special << (shift - 1)), + shift - special); + } + } + generic_model_update(model, ex_q16, x, xs, id, integration); + OD_LOG((OD_LOG_ENTROPY_CODER, OD_LOG_DEBUG, + "enc: %d %d %d %d %d %x", *ex_q16, x, shift, id, xs, enc->rng)); +} + +/** Estimates the cost of encoding a value with generic_encode(). + * + * @param [in,out] model generic probability model + * @param [in] x variable being encoded + * @param [in] max largest value possible + * @param [in,out] ExQ16 expectation of x (adapted) + * @return number of bits (approximation) + */ +double generic_encode_cost(generic_encoder *model, int x, int max, + int *ex_q16) { + int lg_q1; + int shift; + int id; + uint16_t *cdf; + int xs; + int ms; + int extra; + if (max == 0) return 0; + lg_q1 = log_ex(*ex_q16); + /* If expectation is too large, shift x to ensure that + all we have past xs=15 is the exponentially decaying tail + of the distribution */ + shift = OD_MAXI(0, (lg_q1 - 5) >> 1); + /* Choose the cdf to use: we have two per "octave" of ExQ16 */ + id = OD_MINI(GENERIC_TABLES - 1, lg_q1); + cdf = model->cdf[id]; + xs = (x + (1 << shift >> 1)) >> shift; + ms = (max + (1 << shift >> 1)) >> shift; + OD_ASSERT(max == -1 || xs <= ms); + extra = 0; + if (shift) extra = shift - (xs == 0); + xs = OD_MINI(15, xs); + /* Shortcut: assume it's going to cost 2 bits for the Laplace coder. */ + if (xs == 15) extra += 2; + if (max == -1) { + return extra - OD_LOG2((double)(cdf[xs] - (xs == 0 ? 0 : cdf[xs - 1]))/ + cdf[15]); + } + else { + return extra - OD_LOG2((double)(cdf[xs] - (xs == 0 ? 0 : cdf[xs - 1]))/ + cdf[OD_MINI(ms, 15)]); + } +} + +/*Estimates the cost of encoding a value with a given CDF.*/ +double od_encode_cdf_cost(int val, uint16_t *cdf, int n) { + int total_prob; + int prev_prob; + double val_prob; + OD_ASSERT(n > 0); + total_prob = cdf[n - 1]; + if (val == 0) { + prev_prob = 0; + } + else { + prev_prob = cdf[val - 1]; + } + val_prob = (cdf[val] - prev_prob) / (double)total_prob; + return -OD_LOG2(val_prob); +} diff --git a/av1/encoder/laplace_encoder.c b/av1/encoder/laplace_encoder.c new file mode 100644 index 0000000000000000000000000000000000000000..07dcacad7c0de2b15f8b3689800c15d67dfcca39 --- /dev/null +++ b/av1/encoder/laplace_encoder.c @@ -0,0 +1,292 @@ +/* + * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +/* clang-format off */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include + +#include "aom_dsp/entdec.h" +#include "aom_dsp/entenc.h" +#include "av1/common/odintrin.h" +#include "av1/common/pvq.h" +#include "pvq_encoder.h" + +static void od_encode_pvq_split(od_ec_enc *ec, od_pvq_codeword_ctx *adapt, + int count, int sum, int ctx) { + int shift; + int rest; + int fctx; + if (sum == 0) return; + shift = OD_MAXI(0, OD_ILOG(sum) - 3); + if (shift) { + rest = count & ((1 << shift) - 1); + count >>= shift; + sum >>= shift; + } + fctx = 7*ctx + sum - 1; + od_encode_cdf_adapt(ec, count, adapt->pvq_split_cdf[fctx], + sum + 1, adapt->pvq_split_increment); + if (shift) od_ec_enc_bits(ec, rest, shift); +} + +void od_encode_band_pvq_splits(od_ec_enc *ec, od_pvq_codeword_ctx *adapt, + const int *y, int n, int k, int level) { + int mid; + int i; + int count_right; + if (n <= 1 || k == 0) return; + if (k == 1 && n <= 16) { + int cdf_id; + int pos; + cdf_id = od_pvq_k1_ctx(n, level == 0); + for (pos = 0; !y[pos]; pos++); + OD_ASSERT(pos < n); + od_encode_cdf_adapt(ec, pos, adapt->pvq_k1_cdf[cdf_id], n, + adapt->pvq_k1_increment); + } + else { + mid = n >> 1; + count_right = k; + for (i = 0; i < mid; i++) count_right -= abs(y[i]); + od_encode_pvq_split(ec, adapt, count_right, k, od_pvq_size_ctx(n)); + od_encode_band_pvq_splits(ec, adapt, y, mid, k - count_right, level + 1); + od_encode_band_pvq_splits(ec, adapt, y + mid, n - mid, count_right, + level + 1); + } +} + +/** Encodes the tail of a Laplace-distributed variable, i.e. it doesn't + * do anything special for the zero case. + * + * @param [in,out] enc range encoder + * @param [in] x variable to encode (has to be positive) + * @param [in] decay decay factor of the distribution in Q8 format, + * i.e. pdf ~= decay^x + * @param [in] max maximum possible value of x (used to truncate + * the pdf) + */ +void od_laplace_encode_special(od_ec_enc *enc, int x, unsigned decay, int max) { + int shift; + int xs; + int ms; + int sym; + const uint16_t *cdf; + shift = 0; + if (max == 0) return; + /* We don't want a large decay value because that would require too many + symbols. However, it's OK if the max is below 15. */ + while (((max >> shift) >= 15 || max == -1) && decay > 235) { + decay = (decay*decay + 128) >> 8; + shift++; + } + OD_ASSERT(x <= max || max == -1); + decay = OD_MINI(decay, 254); + decay = OD_MAXI(decay, 2); + xs = x >> shift; + ms = max >> shift; + cdf = EXP_CDF_TABLE[(decay + 1) >> 1]; + OD_LOG((OD_LOG_PVQ, OD_LOG_DEBUG, "decay = %d", decay)); + do { + sym = OD_MINI(xs, 15); + { + int i; + OD_LOG((OD_LOG_PVQ, OD_LOG_DEBUG, "%d %d %d %d %d\n", x, xs, shift, + sym, max)); + for (i = 0; i < 16; i++) { + OD_LOG_PARTIAL((OD_LOG_PVQ, OD_LOG_DEBUG, "%d ", cdf[i])); + } + OD_LOG_PARTIAL((OD_LOG_PVQ, OD_LOG_DEBUG, "\n")); + } + if (ms > 0 && ms < 15) { + /* Simple way of truncating the pdf when we have a bound */ + od_ec_encode_cdf_unscaled(enc, sym, cdf, ms + 1); + } + else { + od_ec_encode_cdf_q15(enc, sym, cdf, 16); + } + xs -= 15; + ms -= 15; + } + while (sym >= 15 && ms != 0); + if (shift) od_ec_enc_bits(enc, x & ((1 << shift) - 1), shift); +} + +/** Encodes a Laplace-distributed variable for use in PVQ + * + * @param [in,out] enc range encoder + * @param [in] x variable to encode (including sign) + * @param [in] ExQ8 expectation of the absolute value of x in Q8 + * @param [in] K maximum value of |x| + */ +void od_laplace_encode(od_ec_enc *enc, int x, int ex_q8, int k) { + int j; + int shift; + int xs; + uint16_t cdf[16]; + int sym; + int decay; + int offset; + /* shift down x if expectation is too high */ + shift = OD_ILOG(ex_q8) - 11; + if (shift < 0) shift = 0; + /* Apply the shift with rounding to Ex, K and xs */ + ex_q8 = (ex_q8 + (1 << shift >> 1)) >> shift; + k = (k + (1 << shift >> 1)) >> shift; + xs = (x + (1 << shift >> 1)) >> shift; + decay = OD_MINI(254, 256*ex_q8/(ex_q8 + 256)); + offset = LAPLACE_OFFSET[(decay + 1) >> 1]; + for (j = 0; j < 16; j++) { + cdf[j] = EXP_CDF_TABLE[(decay + 1) >> 1][j] - offset; + } + sym = xs; + if (sym > 15) sym = 15; + /* Simple way of truncating the pdf when we have a bound */ + if (k != 0) od_ec_encode_cdf_unscaled(enc, sym, cdf, OD_MINI(k + 1, 16)); + if (shift) { + int special; + /* Because of the rounding, there's only half the number of possibilities + for xs=0 */ + special = xs == 0; + if (shift - special > 0) { + od_ec_enc_bits(enc, x - (xs << shift) + (!special << (shift - 1)), + shift - special); + } + } + /* Handle the exponentially-decaying tail of the distribution */ + OD_ASSERT(xs - 15 <= k - 15); + if (xs >= 15) od_laplace_encode_special(enc, xs - 15, decay, k - 15); +} + +static void laplace_encode_vector_delta(od_ec_enc *enc, const od_coeff *y, int n, int k, + int32_t *curr, const int32_t *means) { + int i; + int prev; + int sum_ex; + int sum_c; + int first; + int k_left; + int coef; + prev = 0; + sum_ex = 0; + sum_c = 0; + first = 1; + k_left = k; + coef = 256*means[OD_ADAPT_COUNT_Q8]/ + (1 + means[OD_ADAPT_COUNT_EX_Q8]); + coef = OD_MAXI(coef, 1); + for (i = 0; i < n; i++) { + if (y[i] != 0) { + int j; + int count; + int mag; + mag = abs(y[i]); + count = i - prev; + if (first) { + int decay; + int ex = coef*(n - prev)/k_left; + if (ex > 65280) decay = 255; + else { + decay = OD_MINI(255, + (int)((256*ex/(ex + 256) + (ex>>5)*ex/((n + 1)*(n - 1)*(n - 1))))); + } + /*Update mean position.*/ + OD_ASSERT(count <= n - 1); + od_laplace_encode_special(enc, count, decay, n - 1); + first = 0; + } + else od_laplace_encode(enc, count, coef*(n - prev)/k_left, n - prev - 1); + sum_ex += 256*(n - prev); + sum_c += count*k_left; + od_ec_enc_bits(enc, y[i] < 0, 1); + for (j = 0; j < mag - 1; j++) { + od_laplace_encode(enc, 0, coef*(n - i)/(k_left - 1 - j), n - i - 1); + sum_ex += 256*(n - i); + } + k_left -= mag; + prev = i; + if (k_left == 0) break; + } + } + if (k > 0) { + curr[OD_ADAPT_COUNT_Q8] = 256*sum_c; + curr[OD_ADAPT_COUNT_EX_Q8] = sum_ex; + } + else { + curr[OD_ADAPT_COUNT_Q8] = OD_ADAPT_NO_VALUE; + curr[OD_ADAPT_COUNT_EX_Q8] = OD_ADAPT_NO_VALUE; + } + curr[OD_ADAPT_K_Q8] = 0; + curr[OD_ADAPT_SUM_EX_Q8] = 0; +} + +/** Encodes a vector of integers assumed to come from rounding a sequence of + * Laplace-distributed real values in decreasing order of variance. + * + * @param [in,out] enc range encoder + * @param [in] y vector to encode + * @param [in] N dimension of the vector + * @param [in] K sum of the absolute value of components of y + * @param [out] curr Adaptation context output, may alias means. + * @param [in] means Adaptation context input. + */ +void od_laplace_encode_vector(od_ec_enc *enc, const od_coeff *y, int n, int k, + int32_t *curr, const int32_t *means) { + int i; + int sum_ex; + int kn; + int exp_q8; + int mean_k_q8; + int mean_sum_ex_q8; + int ran_delta; + ran_delta = 0; + if (k <= 1) { + laplace_encode_vector_delta(enc, y, n, k, curr, means); + return; + } + sum_ex = 0; + kn = k; + /* Estimates the factor relating pulses_left and positions_left to E(|x|) */ + mean_k_q8 = means[OD_ADAPT_K_Q8]; + mean_sum_ex_q8 = means[OD_ADAPT_SUM_EX_Q8]; + if (mean_k_q8 < 1 << 23) exp_q8 = 256*mean_k_q8/(1 + mean_sum_ex_q8); + else exp_q8 = mean_k_q8/(1 + (mean_sum_ex_q8 >> 8)); + for (i = 0; i < n; i++) { + int ex; + int x; + if (kn == 0) break; + if (kn <= 1 && i != n - 1) { + laplace_encode_vector_delta(enc, y + i, n - i, kn, curr, means); + ran_delta = 1; + break; + } + x = abs(y[i]); + /* Expected value of x (round-to-nearest) is + expQ8*pulses_left/positions_left */ + ex = (2*exp_q8*kn + (n - i))/(2*(n - i)); + if (ex > kn*256) ex = kn*256; + sum_ex += (2*256*kn + (n - i))/(2*(n - i)); + /* No need to encode the magnitude for the last bin. */ + if (i != n - 1) od_laplace_encode(enc, x, ex, kn); + if (x != 0) od_ec_enc_bits(enc, y[i] < 0, 1); + kn -= x; + } + /* Adapting the estimates for expQ8 */ + if (!ran_delta) { + curr[OD_ADAPT_COUNT_Q8] = OD_ADAPT_NO_VALUE; + curr[OD_ADAPT_COUNT_EX_Q8] = OD_ADAPT_NO_VALUE; + } + curr[OD_ADAPT_K_Q8] = k - kn; + curr[OD_ADAPT_SUM_EX_Q8] = sum_ex; +} diff --git a/av1/encoder/pvq_encoder.c b/av1/encoder/pvq_encoder.c new file mode 100644 index 0000000000000000000000000000000000000000..b0ee102a639b1549b8c3f56011c0d21173a135f0 --- /dev/null +++ b/av1/encoder/pvq_encoder.c @@ -0,0 +1,1015 @@ +/* + * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +/* clang-format off */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include +#include +#include +#include "aom_dsp/entcode.h" +#include "aom_dsp/entenc.h" +#include "av1/common/blockd.h" +#include "av1/common/odintrin.h" +#include "av1/common/partition.h" +#include "av1/common/pvq_state.h" +#include "av1/encoder/encodemb.h" +#include "pvq_encoder.h" + +#define OD_PVQ_RATE_APPROX (0) +/*Shift to ensure that the upper bound (i.e. for the max blocksize) of the + dot-product of the 1st band of chroma with the luma ref doesn't overflow.*/ +#define OD_CFL_FLIP_SHIFT (OD_LIMIT_BSIZE_MAX + 0) + +static void od_encode_pvq_codeword(od_ec_enc *ec, od_pvq_codeword_ctx *adapt, + const od_coeff *in, int n, int k) { + int i; + od_encode_band_pvq_splits(ec, adapt, in, n, k, 0); + for (i = 0; i < n; i++) if (in[i]) od_ec_enc_bits(ec, in[i] < 0, 1); +} + +/* Computes 1/sqrt(i) using a table for small values. */ +static double od_rsqrt_table(int i) { + static double table[16] = { + 1.000000, 0.707107, 0.577350, 0.500000, + 0.447214, 0.408248, 0.377964, 0.353553, + 0.333333, 0.316228, 0.301511, 0.288675, + 0.277350, 0.267261, 0.258199, 0.250000}; + if (i <= 16) return table[i-1]; + else return 1./sqrt(i); +} + +/*Computes 1/sqrt(start+2*i+1) using a lookup table containing the results + where 0 <= i < table_size.*/ +static double od_custom_rsqrt_dynamic_table(const double* table, + const int table_size, const double start, const int i) { + if (i < table_size) return table[i]; + else return od_rsqrt_table(start + 2*i + 1); +} + +/*Fills tables used in od_custom_rsqrt_dynamic_table for a given start.*/ +static void od_fill_dynamic_rqrt_table(double *table, const int table_size, + const double start) { + int i; + for (i = 0; i < table_size; i++) + table[i] = od_rsqrt_table(start + 2*i + 1); +} + +/** Find the codepoint on the given PSphere closest to the desired + * vector. Double-precision PVQ search just to make sure our tests + * aren't limited by numerical accuracy. + * + * @param [in] xcoeff input vector to quantize (x in the math doc) + * @param [in] n number of dimensions + * @param [in] k number of pulses + * @param [out] ypulse optimal codevector found (y in the math doc) + * @param [out] g2 multiplier for the distortion (typically squared + * gain units) + * @param [in] pvq_norm_lambda enc->pvq_norm_lambda for quantized RDO + * @param [in] prev_k number of pulses already in ypulse that we should + * reuse for the search (or 0 for a new search) + * @return cosine distance between x and y (between 0 and 1) + */ +static double pvq_search_rdo_double(const od_val16 *xcoeff, int n, int k, + od_coeff *ypulse, double g2, double pvq_norm_lambda, int prev_k) { + int i, j; + double xy; + double yy; + /* TODO - This blows our 8kB stack space budget and should be fixed when + converting PVQ to fixed point. */ + double x[MAXN]; + double xx; + double lambda; + double norm_1; + int rdo_pulses; + double delta_rate; + xx = xy = yy = 0; + for (j = 0; j < n; j++) { + x[j] = fabs((float)xcoeff[j]); + xx += x[j]*x[j]; + } + norm_1 = 1./sqrt(1e-30 + xx); + lambda = pvq_norm_lambda/(1e-30 + g2); + i = 0; + if (prev_k > 0 && prev_k <= k) { + /* We reuse pulses from a previous search so we don't have to search them + again. */ + for (j = 0; j < n; j++) { + ypulse[j] = abs(ypulse[j]); + xy += x[j]*ypulse[j]; + yy += ypulse[j]*ypulse[j]; + i += ypulse[j]; + } + } + else if (k > 2) { + double l1_norm; + double l1_inv; + l1_norm = 0; + for (j = 0; j < n; j++) l1_norm += x[j]; + l1_inv = 1./OD_MAXF(l1_norm, 1e-100); + for (j = 0; j < n; j++) { + double tmp; + tmp = k*x[j]*l1_inv; + ypulse[j] = OD_MAXI(0, (int)floor(tmp)); + xy += x[j]*ypulse[j]; + yy += ypulse[j]*ypulse[j]; + i += ypulse[j]; + } + } + else OD_CLEAR(ypulse, n); + + /* Only use RDO on the last few pulses. This not only saves CPU, but using + RDO on all pulses actually makes the results worse for reasons I don't + fully understand. */ + rdo_pulses = 1 + k/4; + /* Rough assumption for now, the last position costs about 3 bits more than + the first. */ + delta_rate = 3./n; + /* Search one pulse at a time */ + for (; i < k - rdo_pulses; i++) { + int pos; + double best_xy; + double best_yy; + pos = 0; + best_xy = -10; + best_yy = 1; + for (j = 0; j < n; j++) { + double tmp_xy; + double tmp_yy; + tmp_xy = xy + x[j]; + tmp_yy = yy + 2*ypulse[j] + 1; + tmp_xy *= tmp_xy; + if (j == 0 || tmp_xy*best_yy > best_xy*tmp_yy) { + best_xy = tmp_xy; + best_yy = tmp_yy; + pos = j; + } + } + xy = xy + x[pos]; + yy = yy + 2*ypulse[pos] + 1; + ypulse[pos]++; + } + /* Search last pulses with RDO. Distortion is D = (x-y)^2 = x^2 - 2*x*y + y^2 + and since x^2 and y^2 are constant, we just maximize x*y, plus a + lambda*rate term. Note that since x and y aren't normalized here, + we need to divide by sqrt(x^2)*sqrt(y^2). */ + for (; i < k; i++) { + double rsqrt_table[4]; + int rsqrt_table_size = 4; + int pos; + double best_cost; + pos = 0; + best_cost = -1e5; + /*Fill the small rsqrt lookup table with inputs relative to yy. + Specifically, the table of n values is filled with + rsqrt(yy + 1), rsqrt(yy + 2 + 1) .. rsqrt(yy + 2*(n-1) + 1).*/ + od_fill_dynamic_rqrt_table(rsqrt_table, rsqrt_table_size, yy); + for (j = 0; j < n; j++) { + double tmp_xy; + double tmp_yy; + tmp_xy = xy + x[j]; + /*Calculate rsqrt(yy + 2*ypulse[j] + 1) using an optimized method.*/ + tmp_yy = od_custom_rsqrt_dynamic_table(rsqrt_table, rsqrt_table_size, + yy, ypulse[j]); + tmp_xy = 2*tmp_xy*norm_1*tmp_yy - lambda*j*delta_rate; + if (j == 0 || tmp_xy > best_cost) { + best_cost = tmp_xy; + pos = j; + } + } + xy = xy + x[pos]; + yy = yy + 2*ypulse[pos] + 1; + ypulse[pos]++; + } + for (i = 0; i < n; i++) { + if (xcoeff[i] < 0) ypulse[i] = -ypulse[i]; + } + return xy/(1e-100 + sqrt(xx*yy)); +} + +/** Encodes the gain so that the return value increases with the + * distance |x-ref|, so that we can encode a zero when x=ref. The + * value x=0 is not covered because it is only allowed in the noref + * case. + * + * @param [in] x quantized gain to encode + * @param [in] ref quantized gain of the reference + * @return interleave-encoded quantized gain value + */ +static int neg_interleave(int x, int ref) { + if (x < ref) return -2*(x - ref) - 1; + else if (x < 2*ref) return 2*(x - ref); + else return x-1; +} + +int od_vector_is_null(const od_coeff *x, int len) { + int i; + for (i = 0; i < len; i++) if (x[i]) return 0; + return 1; +} + +static double od_pvq_rate(int qg, int icgr, int theta, int ts, + const od_adapt_ctx *adapt, const od_coeff *y0, int k, int n, + int is_keyframe, int pli, int speed) { + double rate; + if (k == 0) rate = 0; + else if (speed > 0) { + int i; + int sum; + double f; + /* Compute "center of mass" of the pulse vector. */ + sum = 0; + for (i = 0; i < n - (theta != -1); i++) sum += i*abs(y0[i]); + f = sum/(double)(k*n); + /* Estimates the number of bits it will cost to encode K pulses in + N dimensions based on hand-tuned fit for bitrate vs K, N and + "center of mass". */ + rate = (1 + .4*f)*n*OD_LOG2(1 + OD_MAXF(0, log(n*2*(1*f + .025))*k/n)) + 3; + } + else { + od_ec_enc ec; + od_pvq_codeword_ctx cd; + int tell; + od_ec_enc_init(&ec, 1000); + OD_COPY(&cd, &adapt->pvq.pvq_codeword_ctx, 1); + tell = od_ec_enc_tell_frac(&ec); + od_encode_pvq_codeword(&ec, &cd, y0, n - (theta != -1), k); + rate = (od_ec_enc_tell_frac(&ec)-tell)/8.; + od_ec_enc_clear(&ec); + } + if (qg > 0 && theta >= 0) { + /* Approximate cost of entropy-coding theta */ + rate += .9*OD_LOG2(ts); + /* Adding a cost to using the H/V pred because it's going to be off + most of the time. Cost is optimized on subset1, while making + sure we don't hurt the checkerboard image too much. + FIXME: Do real RDO instead of this arbitrary cost. */ + if (is_keyframe && pli == 0) rate += 6; + if (qg == icgr) rate -= .5; + } + return rate; +} + +#define MAX_PVQ_ITEMS (20) +/* This stores the information about a PVQ search candidate, so we can sort + based on K. */ +typedef struct { + int gain; + int k; + od_val32 qtheta; + int theta; + int ts; + od_val32 qcg; +} pvq_search_item; + +int items_compare(pvq_search_item *a, pvq_search_item *b) { + return a->k - b->k; +} + +/** Perform PVQ quantization with prediction, trying several + * possible gains and angles. See draft-valin-videocodec-pvq and + * http://jmvalin.ca/slides/pvq.pdf for more details. + * + * @param [out] out coefficients after quantization + * @param [in] x0 coefficients before quantization + * @param [in] r0 reference, aka predicted coefficients + * @param [in] n number of dimensions + * @param [in] q0 quantization step size + * @param [out] y pulse vector (i.e. selected PVQ codevector) + * @param [out] itheta angle between input and reference (-1 if noref) + * @param [out] max_theta maximum value of itheta that could have been + * @param [out] vk total number of pulses + * @param [in] beta per-band activity masking beta param + * @param [out] skip_diff distortion cost of skipping this block + * (accumulated) + * @param [in] robust make stream robust to error in the reference + * @param [in] is_keyframe whether we're encoding a keyframe + * @param [in] pli plane index + * @param [in] adapt probability adaptation context + * @param [in] qm QM with magnitude compensation + * @param [in] qm_inv Inverse of QM with magnitude compensation + * @param [in] pvq_norm_lambda enc->pvq_norm_lambda for quantized RDO + * @param [in] speed Make search faster by making approximations + * @return gain index of the quatized gain +*/ +static int pvq_theta(od_coeff *out, const od_coeff *x0, const od_coeff *r0, + int n, int q0, od_coeff *y, int *itheta, int *max_theta, int *vk, + od_val16 beta, double *skip_diff, int robust, int is_keyframe, int pli, + const od_adapt_ctx *adapt, const int16_t *qm, + const int16_t *qm_inv, double pvq_norm_lambda, int speed) { + od_val32 g; + od_val32 gr; + od_coeff y_tmp[MAXN]; + int i; + /* Number of pulses. */ + int k; + /* Companded gain of x and reference, normalized to q. */ + od_val32 cg; + od_val32 cgr; + int icgr; + int qg; + /* Best RDO cost (D + lamdba*R) so far. */ + double best_cost; + double dist0; + /* Distortion (D) that corresponds to the best RDO cost. */ + double best_dist; + double dist; + /* Sign of Householder reflection. */ + int s; + /* Dimension on which Householder reflects. */ + int m; + od_val32 theta; + double corr; + int best_k; + od_val32 best_qtheta; + od_val32 gain_offset; + int noref; + double skip_dist; + int cfl_enabled; + int skip; + double gain_weight; + od_val16 x16[MAXN]; + od_val16 r16[MAXN]; + int xshift; + int rshift; + /* Give more weight to gain error when calculating the total distortion. */ + gain_weight = 1.0; + OD_ASSERT(n > 1); + corr = 0; +#if !defined(OD_FLOAT_PVQ) + /* Shift needed to make x fit in 16 bits even after rotation. + This shift value is not normative (it can be changed without breaking + the bitstream) */ + xshift = OD_MAXI(0, od_vector_log_mag(x0, n) - 15); + /* Shift needed to make the reference fit in 15 bits, so that the Householder + vector can fit in 16 bits. + This shift value *is* normative, and has to match the decoder. */ + rshift = OD_MAXI(0, od_vector_log_mag(r0, n) - 14); +#else + xshift = 0; + rshift = 0; +#endif + for (i = 0; i < n; i++) { +#if defined(OD_FLOAT_PVQ) + /*This is slightly different from the original float PVQ code, + where the qm was applied in the accumulation in od_pvq_compute_gain and + the vectors were od_coeffs, not od_val16 (i.e. double).*/ + x16[i] = x0[i]*(double)qm[i]*OD_QM_SCALE_1; + r16[i] = r0[i]*(double)qm[i]*OD_QM_SCALE_1; +#else + x16[i] = OD_SHR_ROUND(x0[i]*qm[i], OD_QM_SHIFT + xshift); + r16[i] = OD_SHR_ROUND(r0[i]*qm[i], OD_QM_SHIFT + rshift); +#endif + corr += OD_MULT16_16(x16[i], r16[i]); + } + cfl_enabled = is_keyframe && pli != 0 && !OD_DISABLE_CFL; + cg = od_pvq_compute_gain(x16, n, q0, &g, beta, xshift); + cgr = od_pvq_compute_gain(r16, n, q0, &gr, beta, rshift); + if (cfl_enabled) cgr = OD_CGAIN_SCALE; + /* gain_offset is meant to make sure one of the quantized gains has + exactly the same gain as the reference. */ +#if defined(OD_FLOAT_PVQ) + icgr = (int)floor(.5 + cgr); +#else + icgr = OD_SHR_ROUND(cgr, OD_CGAIN_SHIFT); +#endif + gain_offset = cgr - OD_SHL(icgr, OD_CGAIN_SHIFT); + /* Start search with null case: gain=0, no pulse. */ + qg = 0; + dist = gain_weight*cg*cg*OD_CGAIN_SCALE_2; + best_dist = dist; + best_cost = dist + pvq_norm_lambda*od_pvq_rate(0, 0, -1, 0, adapt, NULL, 0, + n, is_keyframe, pli, speed); + noref = 1; + best_k = 0; + *itheta = -1; + *max_theta = 0; + OD_CLEAR(y, n); + best_qtheta = 0; + m = 0; + s = 1; + corr = corr/(1e-100 + g*(double)gr/OD_SHL(1, xshift + rshift)); + corr = OD_MAXF(OD_MINF(corr, 1.), -1.); + if (is_keyframe) skip_dist = gain_weight*cg*cg*OD_CGAIN_SCALE_2; + else { + skip_dist = gain_weight*(cg - cgr)*(cg - cgr) + + cgr*(double)cg*(2 - 2*corr); + skip_dist *= OD_CGAIN_SCALE_2; + } + if (!is_keyframe) { + /* noref, gain=0 isn't allowed, but skip is allowed. */ + od_val32 scgr; + scgr = OD_MAXF(0,gain_offset); + if (icgr == 0) { + best_dist = gain_weight*(cg - scgr)*(cg - scgr) + + scgr*(double)cg*(2 - 2*corr); + best_dist *= OD_CGAIN_SCALE_2; + } + best_cost = best_dist + pvq_norm_lambda*od_pvq_rate(0, icgr, 0, 0, adapt, + NULL, 0, n, is_keyframe, pli, speed); + best_qtheta = 0; + *itheta = 0; + *max_theta = 0; + noref = 0; + } + dist0 = best_dist; + if (n <= OD_MAX_PVQ_SIZE && !od_vector_is_null(r0, n) && corr > 0) { + od_val16 xr[MAXN]; + int gain_bound; + int prev_k; + pvq_search_item items[MAX_PVQ_ITEMS]; + int idx; + int nitems; + double cos_dist; + idx = 0; + gain_bound = OD_SHR(cg - gain_offset, OD_CGAIN_SHIFT); + /* Perform theta search only if prediction is useful. */ + theta = OD_ROUND32(OD_THETA_SCALE*acos(corr)); + m = od_compute_householder(r16, n, gr, &s, rshift); + od_apply_householder(xr, x16, r16, n); + prev_k = 0; + for (i = m; i < n - 1; i++) xr[i] = xr[i + 1]; + /* Compute all candidate PVQ searches within a reasonable range of gain + and theta. */ + for (i = OD_MAXI(1, gain_bound - 1); i <= gain_bound + 1; i++) { + int j; + od_val32 qcg; + int ts; + int theta_lower; + int theta_upper; + /* Quantized companded gain */ + qcg = OD_SHL(i, OD_CGAIN_SHIFT) + gain_offset; + /* Set angular resolution (in ra) to match the encoded gain */ + ts = od_pvq_compute_max_theta(qcg, beta); + theta_lower = OD_MAXI(0, (int)floor(.5 + + theta*OD_THETA_SCALE_1*2/M_PI*ts) - 2); + theta_upper = OD_MINI(ts - 1, (int)ceil(theta*OD_THETA_SCALE_1*2/M_PI*ts)); + /* Include the angles within a reasonable range. */ + for (j = theta_lower; j <= theta_upper; j++) { + od_val32 qtheta; + qtheta = od_pvq_compute_theta(j, ts); + k = od_pvq_compute_k(qcg, j, qtheta, 0, n, beta, robust || is_keyframe); + items[idx].gain = i; + items[idx].theta = j; + items[idx].k = k; + items[idx].qcg = qcg; + items[idx].qtheta = qtheta; + items[idx].ts = ts; + idx++; + OD_ASSERT(idx < MAX_PVQ_ITEMS); + } + } + nitems = idx; + cos_dist = 0; + /* Sort PVQ search candidates in ascending order of pulses K so that + we can reuse all the previously searched pulses across searches. */ + qsort(items, nitems, sizeof(items[0]), + (int (*)(const void *, const void *))items_compare); + /* Search for the best gain/theta in order. */ + for (idx = 0; idx < nitems; idx++) { + int j; + od_val32 qcg; + int ts; + double cost; + double dist_theta; + double sin_prod; + od_val32 qtheta; + /* Quantized companded gain */ + qcg = items[idx].qcg; + i = items[idx].gain; + j = items[idx].theta; + /* Set angular resolution (in ra) to match the encoded gain */ + ts = items[idx].ts; + /* Search for the best angle within a reasonable range. */ + qtheta = items[idx].qtheta; + k = items[idx].k; + /* Compute the minimal possible distortion by not taking the PVQ + cos_dist into account. */ + dist_theta = 2 - 2.*od_pvq_cos(theta - qtheta)*OD_TRIG_SCALE_1; + dist = gain_weight*(qcg - cg)*(qcg - cg) + qcg*(double)cg*dist_theta; + dist *= OD_CGAIN_SCALE_2; + /* If we have no hope of beating skip (including a 1-bit worst-case + penalty), stop now. */ + if (dist > dist0 + 1.0*pvq_norm_lambda && k != 0) continue; + sin_prod = od_pvq_sin(theta)*OD_TRIG_SCALE_1*od_pvq_sin(qtheta)* + OD_TRIG_SCALE_1; + /* PVQ search, using a gain of qcg*cg*sin(theta)*sin(qtheta) since + that's the factor by which cos_dist is multiplied to get the + distortion metric. */ + if (k == 0) { + cos_dist = 0; + OD_CLEAR(y_tmp, n-1); + } + else if (k != prev_k) { + cos_dist = pvq_search_rdo_double(xr, n - 1, k, y_tmp, + qcg*(double)cg*sin_prod*OD_CGAIN_SCALE_2, pvq_norm_lambda, prev_k); + } + prev_k = k; + /* See Jmspeex' Journal of Dubious Theoretical Results. */ + dist_theta = 2 - 2.*od_pvq_cos(theta - qtheta)*OD_TRIG_SCALE_1 + + sin_prod*(2 - 2*cos_dist); + dist = gain_weight*(qcg - cg)*(qcg - cg) + qcg*(double)cg*dist_theta; + dist *= OD_CGAIN_SCALE_2; + /* Do approximate RDO. */ + cost = dist + pvq_norm_lambda*od_pvq_rate(i, icgr, j, ts, adapt, y_tmp, + k, n, is_keyframe, pli, speed); + if (cost < best_cost) { + best_cost = cost; + best_dist = dist; + qg = i; + best_k = k; + best_qtheta = qtheta; + *itheta = j; + *max_theta = ts; + noref = 0; + OD_COPY(y, y_tmp, n - 1); + } + } + } + /* Don't bother with no-reference version if there's a reasonable + correlation. The only exception is luma on a keyframe because + H/V prediction is unreliable. */ + if (n <= OD_MAX_PVQ_SIZE && + ((is_keyframe && pli == 0) || corr < .5 + || cg < (od_val32)(OD_SHL(2, OD_CGAIN_SHIFT)))) { + int gain_bound; + int prev_k; + gain_bound = OD_SHR(cg, OD_CGAIN_SHIFT); + prev_k = 0; + /* Search for the best gain (haven't determined reasonable range yet). */ + for (i = OD_MAXI(1, gain_bound); i <= gain_bound + 1; i++) { + double cos_dist; + double cost; + od_val32 qcg; + qcg = OD_SHL(i, OD_CGAIN_SHIFT); + k = od_pvq_compute_k(qcg, -1, -1, 1, n, beta, robust || is_keyframe); + /* Compute the minimal possible distortion by not taking the PVQ + cos_dist into account. */ + dist = gain_weight*(qcg - cg)*(qcg - cg); + dist *= OD_CGAIN_SCALE_2; + if (dist > dist0 && k != 0) continue; + cos_dist = pvq_search_rdo_double(x16, n, k, y_tmp, + qcg*(double)cg*OD_CGAIN_SCALE_2, pvq_norm_lambda, prev_k); + prev_k = k; + /* See Jmspeex' Journal of Dubious Theoretical Results. */ + dist = gain_weight*(qcg - cg)*(qcg - cg) + + qcg*(double)cg*(2 - 2*cos_dist); + dist *= OD_CGAIN_SCALE_2; + /* Do approximate RDO. */ + cost = dist + pvq_norm_lambda*od_pvq_rate(i, 0, -1, 0, adapt, y_tmp, k, + n, is_keyframe, pli, speed); + if (cost <= best_cost) { + best_cost = cost; + best_dist = dist; + qg = i; + noref = 1; + best_k = k; + *itheta = -1; + *max_theta = 0; + OD_COPY(y, y_tmp, n); + } + } + } + k = best_k; + theta = best_qtheta; + skip = 0; + if (noref) { + if (qg == 0) skip = OD_PVQ_SKIP_ZERO; + } + else { + if (!is_keyframe && qg == 0) { + skip = (icgr ? OD_PVQ_SKIP_ZERO : OD_PVQ_SKIP_COPY); + } + if (qg == icgr && *itheta == 0 && !cfl_enabled) skip = OD_PVQ_SKIP_COPY; + } + /* Synthesize like the decoder would. */ + if (skip) { + if (skip == OD_PVQ_SKIP_COPY) OD_COPY(out, r0, n); + else OD_CLEAR(out, n); + } + else { + if (noref) gain_offset = 0; + g = od_gain_expand(OD_SHL(qg, OD_CGAIN_SHIFT) + gain_offset, q0, beta); + od_pvq_synthesis_partial(out, y, r16, n, noref, g, theta, m, s, + qm_inv); + } + *vk = k; + *skip_diff += skip_dist - best_dist; + /* Encode gain differently depending on whether we use prediction or not. + Special encoding on inter frames where qg=0 is allowed for noref=0 + but not noref=1.*/ + if (is_keyframe) return noref ? qg : neg_interleave(qg, icgr); + else return noref ? qg - 1 : neg_interleave(qg + 1, icgr + 1); +} + +/** Encodes a single vector of integers (eg, a partition within a + * coefficient block) using PVQ + * + * @param [in,out] ec range encoder + * @param [in] qg quantized gain + * @param [in] theta quantized post-prediction theta + * @param [in] max_theta maximum possible quantized theta value + * @param [in] in coefficient vector to code + * @param [in] n number of coefficients in partition + * @param [in] k number of pulses in partition + * @param [in,out] model entropy encoder state + * @param [in,out] adapt adaptation context + * @param [in,out] exg ExQ16 expectation of gain value + * @param [in,out] ext ExQ16 expectation of theta value + * @param [in] nodesync do not use info that depend on the reference + * @param [in] cdf_ctx selects which cdf context to use + * @param [in] is_keyframe whether we're encoding a keyframe + * @param [in] code_skip whether the "skip rest" flag is allowed + * @param [in] skip_rest when set, we skip all higher bands + * @param [in] encode_flip whether we need to encode the CfL flip flag now + * @param [in] flip value of the CfL flip flag + */ +void pvq_encode_partition(od_ec_enc *ec, + int qg, + int theta, + int max_theta, + const od_coeff *in, + int n, + int k, + generic_encoder model[3], + od_adapt_ctx *adapt, + int *exg, + int *ext, + int nodesync, + int cdf_ctx, + int is_keyframe, + int code_skip, + int skip_rest, + int encode_flip, + int flip) { + int noref; + int id; + noref = (theta == -1); + id = (qg > 0) + 2*OD_MINI(theta + 1,3) + 8*code_skip*skip_rest; + if (is_keyframe) { + OD_ASSERT(id != 8); + if (id >= 8) id--; + } + else { + OD_ASSERT(id != 10); + if (id >= 10) id--; + } + /* Jointly code gain, theta and noref for small values. Then we handle + larger gain and theta values. For noref, theta = -1. */ + od_encode_cdf_adapt(ec, id, &adapt->pvq.pvq_gaintheta_cdf[cdf_ctx][0], + 8 + 7*code_skip, adapt->pvq.pvq_gaintheta_increment); + if (encode_flip) { + /* We could eventually do some smarter entropy coding here, but it would + have to be good enough to overcome the overhead of the entropy coder. + An early attempt using a "toogle" flag with simple adaptation wasn't + worth the trouble. */ + od_ec_enc_bits(ec, flip, 1); + } + if (qg > 0) { + int tmp; + tmp = *exg; + generic_encode(ec, &model[!noref], qg - 1, -1, &tmp, 2); + OD_IIR_DIADIC(*exg, qg << 16, 2); + } + if (theta > 1 && (nodesync || max_theta > 3)) { + int tmp; + tmp = *ext; + generic_encode(ec, &model[2], theta - 2, nodesync ? -1 : max_theta - 3, + &tmp, 2); + OD_IIR_DIADIC(*ext, theta << 16, 2); + } + od_encode_pvq_codeword(ec, &adapt->pvq.pvq_codeword_ctx, in, + n - (theta != -1), k); +} + +/** Quantizes a scalar with rate-distortion optimization (RDO) + * @param [in] x unquantized value + * @param [in] q quantization step size + * @param [in] delta0 rate increase for encoding a 1 instead of a 0 + * @param [in] pvq_norm_lambda enc->pvq_norm_lambda for quantized RDO + * @retval quantized value + */ +int od_rdo_quant(od_coeff x, int q, double delta0, double pvq_norm_lambda) { + int n; + /* Optimal quantization threshold is 1/2 + lambda*delta_rate/2. See + Jmspeex' Journal of Dubious Theoretical Results for details. */ + n = OD_DIV_R0(abs(x), q); + if ((double)abs(x)/q < (double)n/2 + pvq_norm_lambda*delta0/(2*n)) { + return 0; + } + else { + return OD_DIV_R0(x, q); + } +} + +#if OD_SIGNAL_Q_SCALING +void od_encode_quantizer_scaling(daala_enc_ctx *enc, int q_scaling, + int sbx, int sby, int skip) { + int nhsb; + OD_ASSERT(skip == !!skip); + nhsb = enc->state.nhsb; + OD_ASSERT(sbx < nhsb); + OD_ASSERT(sby < enc->state.nvsb); + OD_ASSERT(!skip || q_scaling == 0); + enc->state.sb_q_scaling[sby*nhsb + sbx] = q_scaling; + if (!skip) { + int above; + int left; + /* use value from neighbour if possible, otherwise use 0 */ + above = sby > 0 ? enc->state.sb_q_scaling[(sby - 1)*enc->state.nhsb + sbx] + : 0; + left = sbx > 0 ? enc->state.sb_q_scaling[sby*enc->state.nhsb + (sbx - 1)] + : 0; + od_encode_cdf_adapt(&enc->ec, q_scaling, + enc->state.adapt.q_cdf[above + left*4], 4, + enc->state.adapt.q_increment); + } +} +#endif + +/** Encode a coefficient block (excepting DC) using PVQ + * + * @param [in,out] enc daala encoder context + * @param [in] ref 'reference' (prediction) vector + * @param [in] in coefficient block to quantize and encode + * @param [out] out quantized coefficient block + * @param [in] q0 scale/quantizer + * @param [in] pli plane index + * @param [in] bs log of the block size minus two + * @param [in] beta per-band activity masking beta param + * @param [in] robust make stream robust to error in the reference + * @param [in] is_keyframe whether we're encoding a keyframe + * @param [in] q_scaling scaling factor to apply to quantizer + * @param [in] bx x-coordinate of this block + * @param [in] by y-coordinate of this block + * @param [in] qm QM with magnitude compensation + * @param [in] qm_inv Inverse of QM with magnitude compensation + * @param [in] speed Make search faster by making approximations + * @param [in] pvq_info If null, conisdered as RDO search mode + * @return Returns 1 if both DC and AC coefficients are skipped, + * zero otherwise + */ +int od_pvq_encode(daala_enc_ctx *enc, + od_coeff *ref, + const od_coeff *in, + od_coeff *out, + int q_dc, + int q_ac, + int pli, + int bs, + const od_val16 *beta, + int robust, + int is_keyframe, + int q_scaling, + int bx, + int by, + const int16_t *qm, + const int16_t *qm_inv, + int speed, + PVQ_INFO *pvq_info){ + int theta[PVQ_MAX_PARTITIONS]; + int max_theta[PVQ_MAX_PARTITIONS]; + int qg[PVQ_MAX_PARTITIONS]; + int k[PVQ_MAX_PARTITIONS]; + od_coeff y[OD_BSIZE_MAX*OD_BSIZE_MAX]; + int *exg; + int *ext; + int nb_bands; + int i; + const int *off; + int size[PVQ_MAX_PARTITIONS]; + generic_encoder *model; + double skip_diff; + int tell; + uint16_t *skip_cdf; + od_rollback_buffer buf; + int dc_quant; + int flip; + int cfl_encoded; + int skip_rest; + int skip_dir; + int skip_theta_value; + /* const unsigned char *pvq_qm; */ + double dc_rate; +#if !OD_SIGNAL_Q_SCALING + OD_UNUSED(q_scaling); + OD_UNUSED(bx); + OD_UNUSED(by); +#endif + /* TODO(yushin): Enable this for activity masking, + when pvq_qm_q4 is available in AOM. */ + /* pvq_qm = &enc->state.pvq_qm_q4[pli][0]; */ + exg = &enc->state.adapt.pvq.pvq_exg[pli][bs][0]; + ext = enc->state.adapt.pvq.pvq_ext + bs*PVQ_MAX_PARTITIONS; + skip_cdf = enc->state.adapt.skip_cdf[2*bs + (pli != 0)]; + model = enc->state.adapt.pvq.pvq_param_model; + nb_bands = OD_BAND_OFFSETS[bs][0]; + off = &OD_BAND_OFFSETS[bs][1]; + /*dc_quant = OD_MAXI(1, q0*pvq_qm[od_qm_get_index(bs, 0)] >> 4);*/ + dc_quant = OD_MAXI(1, q_dc); + tell = 0; + for (i = 0; i < nb_bands; i++) size[i] = off[i+1] - off[i]; + skip_diff = 0; + flip = 0; + /*If we are coding a chroma block of a keyframe, we are doing CfL.*/ + if (pli != 0 && is_keyframe) { + od_val32 xy; + xy = 0; + /*Compute the dot-product of the first band of chroma with the luma ref.*/ + for (i = off[0]; i < off[1]; i++) { +#if defined(OD_FLOAT_PVQ) + xy += ref[i]*(double)qm[i]*OD_QM_SCALE_1* + (double)in[i]*(double)qm[i]*OD_QM_SCALE_1; +#else + od_val32 rq; + od_val32 inq; + rq = ref[i]*qm[i]; + inq = in[i]*qm[i]; + xy += OD_SHR(rq*(int64_t)inq, OD_SHL(OD_QM_SHIFT + OD_CFL_FLIP_SHIFT, + 1)); +#endif + } + /*If cos(theta) < 0, then |theta| > pi/2 and we should negate the ref.*/ + if (xy < 0) { + flip = 1; + for(i = off[0]; i < off[nb_bands]; i++) ref[i] = -ref[i]; + } + } + for (i = 0; i < nb_bands; i++) { + int q; + /* TODO(yushin): Enable this for activity masking, + when pvq_qm_q4 is available in AOM. */ + /*q = OD_MAXI(1, q0*pvq_qm[od_qm_get_index(bs, i + 1)] >> 4);*/ + q = OD_MAXI(1, q_ac); + qg[i] = pvq_theta(out + off[i], in + off[i], ref + off[i], size[i], + q, y + off[i], &theta[i], &max_theta[i], + &k[i], beta[i], &skip_diff, robust, is_keyframe, pli, &enc->state.adapt, + qm + off[i], qm_inv + off[i], enc->pvq_norm_lambda, speed); + } + od_encode_checkpoint(enc, &buf); + if (is_keyframe) out[0] = 0; + else { + int n; + n = OD_DIV_R0(abs(in[0] - ref[0]), dc_quant); + if (n == 0) { + out[0] = 0; +#if PVQ_CHROMA_RD + } else if (pli == 0) { +#else + } else { +#endif + int tell2; + od_rollback_buffer dc_buf; + + dc_rate = -OD_LOG2((double)(skip_cdf[3] - skip_cdf[2])/ + (double)(skip_cdf[2] - skip_cdf[1])); + dc_rate += 1; + + tell2 = od_ec_enc_tell_frac(&enc->ec); + od_encode_checkpoint(enc, &dc_buf); + generic_encode(&enc->ec, &enc->state.adapt.model_dc[pli], + n - 1, -1, &enc->state.adapt.ex_dc[pli][bs][0], 2); + tell2 = od_ec_enc_tell_frac(&enc->ec) - tell2; + dc_rate += tell2/8.0; + od_encode_rollback(enc, &dc_buf); + + out[0] = od_rdo_quant(in[0] - ref[0], dc_quant, dc_rate, + enc->pvq_norm_lambda); + } + } + tell = od_ec_enc_tell_frac(&enc->ec); + /* Code as if we're not skipping. */ + od_encode_cdf_adapt(&enc->ec, 2 + (out[0] != 0), skip_cdf, + 4, enc->state.adapt.skip_increment); + if (pvq_info) + pvq_info->ac_dc_coded = 2 + (out[0] != 0); +#if OD_SIGNAL_Q_SCALING + if (bs == OD_NBSIZES - 1 && pli == 0) { + od_encode_quantizer_scaling(enc, q_scaling, bx >> (OD_NBSIZES - 1), + by >> (OD_NBSIZES - 1), 0); + } +#endif + cfl_encoded = 0; + skip_rest = 1; + skip_theta_value = is_keyframe ? -1 : 0; + for (i = 1; i < nb_bands; i++) { + if (theta[i] != skip_theta_value || qg[i]) skip_rest = 0; + } + skip_dir = 0; + if (nb_bands > 1) { + for (i = 0; i < 3; i++) { + int j; + int tmp; + tmp = 1; + for (j = i + 1; j < nb_bands; j += 3) { + if (theta[j] != skip_theta_value || qg[j]) tmp = 0; + } + skip_dir |= tmp << i; + } + } + if (theta[0] == skip_theta_value && qg[0] == 0 && skip_rest) nb_bands = 0; + + /* NOTE: There was no other better place to put this function. */ + if (pvq_info) + av1_store_pvq_enc_info(pvq_info, qg, theta, max_theta, k, + y, nb_bands, off, size, + skip_rest, skip_dir, bs); + + for (i = 0; i < nb_bands; i++) { + int encode_flip; + /* Encode CFL flip bit just after the first time it's used. */ + encode_flip = pli != 0 && is_keyframe && theta[i] != -1 && !cfl_encoded; + if (i == 0 || (!skip_rest && !(skip_dir & (1 << ((i - 1)%3))))) { + pvq_encode_partition(&enc->ec, qg[i], theta[i], max_theta[i], y + off[i], + size[i], k[i], model, &enc->state.adapt, exg + i, ext + i, + robust || is_keyframe, (pli != 0)*OD_NBSIZES*PVQ_MAX_PARTITIONS + + bs*PVQ_MAX_PARTITIONS + i, is_keyframe, i == 0 && (i < nb_bands - 1), + skip_rest, encode_flip, flip); + } + if (i == 0 && !skip_rest && bs > 0) { + od_encode_cdf_adapt(&enc->ec, skip_dir, + &enc->state.adapt.pvq.pvq_skip_dir_cdf[(pli != 0) + 2*(bs - 1)][0], 7, + enc->state.adapt.pvq.pvq_skip_dir_increment); + } + if (encode_flip) cfl_encoded = 1; + } + tell = od_ec_enc_tell_frac(&enc->ec) - tell; + /* Account for the rate of skipping the AC, based on the same DC decision + we made when trying to not skip AC. */ + { + double skip_rate; + if (out[0] != 0) { + skip_rate = -OD_LOG2((skip_cdf[1] - skip_cdf[0])/ + (double)skip_cdf[3]); + } + else { + skip_rate = -OD_LOG2(skip_cdf[0]/ + (double)skip_cdf[3]); + } + tell -= (int)floor(.5+8*skip_rate); + } + if (nb_bands == 0 || skip_diff <= enc->pvq_norm_lambda/8*tell) { + if (is_keyframe) out[0] = 0; + else { + int n; + n = OD_DIV_R0(abs(in[0] - ref[0]), dc_quant); + if (n == 0) { + out[0] = 0; +#if PVQ_CHROMA_RD + } else if (pli == 0) { +#else + } else { +#endif + int tell2; + od_rollback_buffer dc_buf; + + dc_rate = -OD_LOG2((double)(skip_cdf[1] - skip_cdf[0])/ + (double)skip_cdf[0]); + dc_rate += 1; + + tell2 = od_ec_enc_tell_frac(&enc->ec); + od_encode_checkpoint(enc, &dc_buf); + generic_encode(&enc->ec, &enc->state.adapt.model_dc[pli], + n - 1, -1, &enc->state.adapt.ex_dc[pli][bs][0], 2); + tell2 = od_ec_enc_tell_frac(&enc->ec) - tell2; + dc_rate += tell2/8.0; + od_encode_rollback(enc, &dc_buf); + + out[0] = od_rdo_quant(in[0] - ref[0], dc_quant, dc_rate, + enc->pvq_norm_lambda); + } + } + /* We decide to skip, roll back everything as it was before. */ + od_encode_rollback(enc, &buf); + od_encode_cdf_adapt(&enc->ec, out[0] != 0, skip_cdf, + 4, enc->state.adapt.skip_increment); + if (pvq_info) + pvq_info->ac_dc_coded = (out[0] != 0); +#if OD_SIGNAL_Q_SCALING + if (bs == OD_NBSIZES - 1 && pli == 0) { + int skip; + skip = out[0] == 0; + if (skip) { + q_scaling = 0; + } + od_encode_quantizer_scaling(enc, q_scaling, bx >> (OD_NBSIZES - 1), + by >> (OD_NBSIZES - 1), skip); + } +#endif + if (is_keyframe) for (i = 1; i < 1 << (2*bs + 4); i++) out[i] = 0; + else for (i = 1; i < 1 << (2*bs + 4); i++) out[i] = ref[i]; + if (out[0] == 0) return 1; + } + return 0; +} diff --git a/av1/encoder/pvq_encoder.h b/av1/encoder/pvq_encoder.h new file mode 100644 index 0000000000000000000000000000000000000000..6cf1c3bb2821552b17f1a13a5cd1ea5b85528eb1 --- /dev/null +++ b/av1/encoder/pvq_encoder.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +/* clang-format off */ + +#if !defined(_pvq_encoder_H) +# define _pvq_encoder_H (1) +# include "aom_dsp/entenc.h" +# include "av1/common/blockd.h" +# include "av1/common/pvq.h" +# include "av1/encoder/encint.h" + +#define PVQ_CHROMA_RD 1 + +void od_encode_band_pvq_splits(od_ec_enc *ec, od_pvq_codeword_ctx *adapt, + const int *y, int n, int k, int level); + +void od_laplace_encode_special(od_ec_enc *enc, int x, unsigned decay, int max); +void od_laplace_encode(od_ec_enc *enc, int x, int ex_q8, int k); +void od_laplace_encode_vector(od_ec_enc *enc, const od_coeff *y, int n, int k, + int32_t *curr, const int32_t *means); + +#if OD_SIGNAL_Q_SCALING +void od_encode_quantizer_scaling(daala_enc_ctx *enc, int q_scaling, int bx, + int by, int skip); +#endif + +void pvq_encode_partition(od_ec_enc *ec, + int qg, + int theta, + int max_theta, + const od_coeff *in, + int n, + int k, + generic_encoder model[3], + od_adapt_ctx *adapt, + int *exg, + int *ext, + int nodesync, + int cdf_ctx, + int is_keyframe, + int code_skip, + int skip_rest, + int encode_flip, + int flip); + +int od_pvq_encode(daala_enc_ctx *enc, od_coeff *ref, const od_coeff *in, + od_coeff *out, int q_dc, int q_ac, int pli, int bs, const od_val16 *beta, int robust, + int is_keyframe, int q_scaling, int bx, int by, const int16_t *qm, + const int16_t *qm_inv, int speed, PVQ_INFO *pvq_info); + +#endif diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c index 0b9c91e780799ba98c4dd36bd9276b88a9835c74..92ef15a9be11dc410385d26511d4046fc028c5a8 100644 --- a/av1/encoder/rdopt.c +++ b/av1/encoder/rdopt.c @@ -48,6 +48,10 @@ #include "av1/encoder/rd.h" #include "av1/encoder/rdopt.h" +#if CONFIG_PVQ +#include "av1/encoder/pvq_encoder.h" +#endif + #if CONFIG_EXT_REFS #define LAST_FRAME_MODE_MASK \ @@ -302,6 +306,33 @@ static void model_rd_for_sb(const AV1_COMP *const cpi, BLOCK_SIZE bsize, *out_dist_sum = dist_sum << 4; } +#if CONFIG_PVQ +// Without PVQ, av1_block_error_c() return two kind of errors, +// 1) reconstruction (i.e. decoded) error and +// 2) Squared sum of transformed residue (i.e. 'coeff') +// However, if PVQ is enabled, coeff does not keep the transformed residue +// but instead a transformed original is kept. +// Hence, new parameter ref vector (i.e. transformed predicted signal) +// is required to derive the residue signal, +// i.e. coeff - ref = residue (all transformed). + +// TODO(yushin) : Since 4x4 case does not need ssz, better to refactor into +// a separate function that does not do the extra computations for ssz. +int64_t av1_block_error2_c(const tran_low_t *coeff, const tran_low_t *dqcoeff, + const tran_low_t *ref, intptr_t block_size, + int64_t *ssz) { + int64_t error; + + // Use the existing sse codes for calculating distortion of decoded signal: + // i.e. (orig - decoded)^2 + error = av1_block_error_fp(coeff, dqcoeff, block_size); + // prediction residue^2 = (orig - ref)^2 + *ssz = av1_block_error_fp(coeff, ref, block_size); + + return error; +} +#endif + int64_t av1_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz) { int i; @@ -353,6 +384,7 @@ int64_t av1_highbd_block_error_c(const tran_low_t *coeff, } #endif // CONFIG_AOM_HIGHBITDEPTH +#if !CONFIG_PVQ /* The trailing '0' is a terminator which is used inside cost_coeffs() to * decide whether to include cost of a trailing EOB node or not (i.e. we * can skip this if the last coefficient in this transform block, e.g. the @@ -455,6 +487,7 @@ static int cost_coeffs(const AV1_COMMON *const cm, MACROBLOCK *x, int plane, return cost; } +#endif static void dist_block(MACROBLOCK *x, int plane, int block, TX_SIZE tx_size, int64_t *out_dist, int64_t *out_sse) { @@ -466,11 +499,18 @@ static void dist_block(MACROBLOCK *x, int plane, int block, TX_SIZE tx_size, int shift = tx_size == TX_32X32 ? 0 : 2; tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block); tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); +#if CONFIG_PVQ + tran_low_t *ref_coeff = BLOCK_OFFSET(pd->pvq_ref_coeff, block); +#endif #if CONFIG_AOM_HIGHBITDEPTH const int bd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd : 8; *out_dist = av1_highbd_block_error(coeff, dqcoeff, ss_txfrm_size, &this_sse, bd) >> shift; +#elif CONFIG_PVQ + *out_dist = + av1_block_error2_c(coeff, dqcoeff, ref_coeff, ss_txfrm_size, &this_sse) >> + shift; #else *out_dist = av1_block_error(coeff, dqcoeff, ss_txfrm_size, &this_sse) >> shift; @@ -478,12 +518,14 @@ static void dist_block(MACROBLOCK *x, int plane, int block, TX_SIZE tx_size, *out_sse = this_sse >> shift; } +#if !CONFIG_PVQ static int rate_block(int plane, int block, int blk_row, int blk_col, TX_SIZE tx_size, struct rdcost_block_args *args) { return cost_coeffs(args->cm, args->x, plane, block, args->t_above + blk_col, args->t_left + blk_row, tx_size, args->scan_order->scan, args->scan_order->neighbors, args->use_fast_coef_costing); } +#endif static void block_rd_txfm(int plane, int block, int blk_row, int blk_col, BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) { @@ -516,8 +558,11 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col, args->exit_early = 1; return; } - +#if !CONFIG_PVQ rate = rate_block(plane, block, blk_row, blk_col, tx_size, args); +#else + rate = x->rate; +#endif rd1 = RDCOST(x->rdmult, x->rddiv, rate, dist); rd2 = RDCOST(x->rdmult, x->rddiv, 0, sse); @@ -533,8 +578,11 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col, args->exit_early = 1; return; } - +#if !CONFIG_PVQ args->skippable &= !x->plane[plane].eobs[block]; +#else + args->skippable &= x->pvq_skip[plane]; +#endif } static void txfm_rd_in_plane(const AV1_COMMON *const cm, MACROBLOCK *x, @@ -599,11 +647,22 @@ static void choose_largest_tx_size(const AV1_COMP *const cpi, MACROBLOCK *x, *sse = INT64_MAX; mbmi->tx_size = AOMMIN(max_tx_size, largest_tx_size); + if (mbmi->tx_size < TX_32X32 && !xd->lossless[mbmi->segment_id]) { - for (tx_type = 0; tx_type < TX_TYPES; ++tx_type) { +#if CONFIG_PVQ + od_rollback_buffer pre_buf, post_buf; + + od_encode_checkpoint(&x->daala_enc, &pre_buf); + od_encode_checkpoint(&x->daala_enc, &post_buf); +#endif + + for (tx_type = DCT_DCT; tx_type < TX_TYPES; ++tx_type) { mbmi->tx_type = tx_type; txfm_rd_in_plane(cm, x, &r, &d, &s, &psse, ref_best_rd, 0, bs, mbmi->tx_size, cpi->sf.use_fast_coef_costing); +#if CONFIG_PVQ + od_encode_rollback(&x->daala_enc, &pre_buf); +#endif if (r == INT_MAX) continue; if (is_inter) r += cpi->inter_tx_type_costs[mbmi->tx_size][mbmi->tx_type]; @@ -625,8 +684,14 @@ static void choose_largest_tx_size(const AV1_COMP *const cpi, MACROBLOCK *x, *rate = r; *skip = s; *sse = psse; +#if CONFIG_PVQ + od_encode_checkpoint(&x->daala_enc, &post_buf); +#endif } } +#if CONFIG_PVQ + od_encode_rollback(&x->daala_enc, &post_buf); +#endif } else { txfm_rd_in_plane(cm, x, rate, distortion, skip, sse, ref_best_rd, 0, bs, mbmi->tx_size, cpi->sf.use_fast_coef_costing); @@ -670,6 +735,9 @@ static void choose_tx_size_from_rd(const AV1_COMP *const cpi, MACROBLOCK *x, const int is_inter = is_inter_block(mbmi); const aom_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc->tx_probs); +#if CONFIG_PVQ + od_rollback_buffer buf; +#endif assert(skip_prob > 0); s0 = av1_cost_bit(skip_prob, 0); s1 = av1_cost_bit(skip_prob, 1); @@ -689,6 +757,10 @@ static void choose_tx_size_from_rd(const AV1_COMP *const cpi, MACROBLOCK *x, *skip = 0; *psse = INT64_MAX; +#if CONFIG_PVQ + od_encode_checkpoint(&x->daala_enc, &buf); +#endif + for (tx_type = DCT_DCT; tx_type < TX_TYPES; ++tx_type) { #if CONFIG_REF_MV if (mbmi->ref_mv_idx > 0 && tx_type != DCT_DCT) continue; @@ -710,6 +782,9 @@ static void choose_tx_size_from_rd(const AV1_COMP *const cpi, MACROBLOCK *x, mbmi->tx_type = tx_type; txfm_rd_in_plane(cm, x, &r, &d, &s, &sse, ref_best_rd, 0, bs, n, cpi->sf.use_fast_coef_costing); +#if CONFIG_PVQ + od_encode_rollback(&x->daala_enc, &buf); +#endif if (n < TX_32X32 && !xd->lossless[xd->mi[0]->mbmi.segment_id] && r != INT_MAX) { if (is_inter) @@ -761,6 +836,11 @@ static void choose_tx_size_from_rd(const AV1_COMP *const cpi, MACROBLOCK *x, mbmi->tx_type = best_tx_type; if (mbmi->tx_size >= TX_32X32) assert(mbmi->tx_type == DCT_DCT); +#if CONFIG_PVQ + if (best_tx < TX_SIZES) + txfm_rd_in_plane(cm, x, &r, &d, &s, &sse, ref_best_rd, 0, bs, best_tx, + cpi->sf.use_fast_coef_costing); +#endif } static void super_block_yrd(const AV1_COMP *const cpi, MACROBLOCK *x, int *rate, @@ -954,7 +1034,9 @@ static int64_t rd_pick_intra4x4block(const AV1_COMP *const cpi, MACROBLOCK *x, ENTROPY_CONTEXT *l, int *bestrate, int *bestratey, int64_t *bestdistortion, BLOCK_SIZE bsize, int64_t rd_thresh) { +#if !CONFIG_PVQ const AV1_COMMON *const cm = &cpi->common; +#endif PREDICTION_MODE mode; MACROBLOCKD *const xd = &x->e_mbd; int64_t best_rd = rd_thresh; @@ -974,6 +1056,12 @@ static int64_t rd_pick_intra4x4block(const AV1_COMP *const cpi, MACROBLOCK *x, uint16_t best_dst16[8 * 8]; #endif +#if CONFIG_PVQ + od_rollback_buffer pre_buf, post_buf; + od_encode_checkpoint(&x->daala_enc, &pre_buf); + od_encode_checkpoint(&x->daala_enc, &post_buf); +#endif + memcpy(ta, a, num_4x4_blocks_wide * sizeof(a[0])); memcpy(tl, l, num_4x4_blocks_high * sizeof(l[0])); xd->mi[0]->mbmi.tx_size = TX_4X4; @@ -1082,6 +1170,10 @@ static int64_t rd_pick_intra4x4block(const AV1_COMP *const cpi, MACROBLOCK *x, } #endif // CONFIG_AOM_HIGHBITDEPTH +#if CONFIG_PVQ + od_encode_checkpoint(&x->daala_enc, &pre_buf); +#endif + for (mode = DC_PRED; mode <= TM_PRED; ++mode) { int64_t this_rd; int ratey = 0; @@ -1104,15 +1196,45 @@ static int64_t rd_pick_intra4x4block(const AV1_COMP *const cpi, MACROBLOCK *x, const int block = (row + idy) * 2 + (col + idx); const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride]; uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride]; + tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block); +#if !CONFIG_PVQ int16_t *const src_diff = av1_raster_block_offset_int16(BLOCK_8X8, block, p->src_diff); - tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block); +#else + int lossless = xd->lossless[xd->mi[0]->mbmi.segment_id]; + const int diff_stride = 8; + tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); + tran_low_t *ref_coeff = BLOCK_OFFSET(pd->pvq_ref_coeff, block); + int16_t *pred = &pd->pred[4 * (row * diff_stride + col)]; + int16_t *src_int16 = &p->src_int16[4 * (row * diff_stride + col)]; + int i, j, tx_blk_size; + TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block); + int rate_pvq; + int skip; +#endif xd->mi[0]->bmi[block].as_mode = mode; av1_predict_intra_block(xd, 1, 1, TX_4X4, mode, dst, dst_stride, dst, dst_stride, col + idx, row + idy, 0); +#if !CONFIG_PVQ aom_subtract_block(4, 4, src_diff, 8, src, src_stride, dst, dst_stride); +#else + if (lossless) tx_type = DCT_DCT; + // transform block size in pixels + tx_blk_size = 4; + + // copy uint8 orig and predicted block to int16 buffer + // in order to use existing VP10 transform functions + for (j = 0; j < tx_blk_size; j++) + for (i = 0; i < tx_blk_size; i++) { + src_int16[diff_stride * j + i] = src[src_stride * j + i]; + pred[diff_stride * j + i] = dst[dst_stride * j + i]; + } + av1_fwd_txfm_4x4(src_int16, coeff, diff_stride, tx_type, lossless); + av1_fwd_txfm_4x4(pred, ref_coeff, diff_stride, tx_type, lossless); +#endif if (xd->lossless[xd->mi[0]->mbmi.segment_id]) { +#if !CONFIG_PVQ TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block); const SCAN_ORDER *scan_order = get_scan(cm, TX_4X4, tx_type); av1_fwd_txfm_4x4(src_diff, coeff, 8, DCT_DCT, 1); @@ -1121,12 +1243,27 @@ static int64_t rd_pick_intra4x4block(const AV1_COMP *const cpi, MACROBLOCK *x, ratey += cost_coeffs(cm, x, 0, block, tempa + idx, templ + idy, TX_4X4, scan_order->scan, scan_order->neighbors, cpi->sf.use_fast_coef_costing); +#else + skip = av1_pvq_encode_helper(&x->daala_enc, coeff, ref_coeff, dqcoeff, + &p->eobs[block], pd->dequant, 0, TX_4X4, + tx_type, &rate_pvq, x->pvq_speed, NULL); + ratey += rate_pvq; +#endif if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd) goto next; - av1_inv_txfm_add_4x4(BLOCK_OFFSET(pd->dqcoeff, block), dst, - dst_stride, p->eobs[block], DCT_DCT, 1); +#if CONFIG_PVQ + if (!skip) { + for (j = 0; j < tx_blk_size; j++) + for (i = 0; i < tx_blk_size; i++) dst[j * dst_stride + i] = 0; +#endif + av1_inv_txfm_add_4x4(BLOCK_OFFSET(pd->dqcoeff, block), dst, + dst_stride, p->eobs[block], DCT_DCT, 1); +#if CONFIG_PVQ + } +#endif } else { int64_t unused; +#if !CONFIG_PVQ TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block); const SCAN_ORDER *scan_order = get_scan(cm, TX_4X4, tx_type); av1_fwd_txfm_4x4(src_diff, coeff, 8, tx_type, 0); @@ -1135,16 +1272,31 @@ static int64_t rd_pick_intra4x4block(const AV1_COMP *const cpi, MACROBLOCK *x, ratey += cost_coeffs(cm, x, 0, block, tempa + idx, templ + idy, TX_4X4, scan_order->scan, scan_order->neighbors, cpi->sf.use_fast_coef_costing); +#else + skip = av1_pvq_encode_helper(&x->daala_enc, coeff, ref_coeff, dqcoeff, + &p->eobs[block], pd->dequant, 0, TX_4X4, + tx_type, &rate_pvq, x->pvq_speed, NULL); + ratey += rate_pvq; +#endif + // No need for av1_block_error2_c because the ssz is unused distortion += av1_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, block), 16, &unused) >> 2; if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd) goto next; - av1_inv_txfm_add_4x4(BLOCK_OFFSET(pd->dqcoeff, block), dst, - dst_stride, p->eobs[block], tx_type, 0); +#if CONFIG_PVQ + if (!skip) { + for (j = 0; j < tx_blk_size; j++) + for (i = 0; i < tx_blk_size; i++) dst[j * dst_stride + i] = 0; +#endif + av1_inv_txfm_add_4x4(BLOCK_OFFSET(pd->dqcoeff, block), dst, + dst_stride, p->eobs[block], tx_type, 0); +#if CONFIG_PVQ + } +#endif } } - } + } // idy loop rate += ratey; this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion); @@ -1157,15 +1309,25 @@ static int64_t rd_pick_intra4x4block(const AV1_COMP *const cpi, MACROBLOCK *x, *best_mode = mode; memcpy(a, tempa, num_4x4_blocks_wide * sizeof(tempa[0])); memcpy(l, templ, num_4x4_blocks_high * sizeof(templ[0])); +#if CONFIG_PVQ + od_encode_checkpoint(&x->daala_enc, &post_buf); +#endif for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) memcpy(best_dst + idy * 8, dst_init + idy * dst_stride, num_4x4_blocks_wide * 4); } next : {} - } +#if CONFIG_PVQ + od_encode_rollback(&x->daala_enc, &pre_buf); +#endif + } // mode decision loop if (best_rd >= rd_thresh) return best_rd; +#if CONFIG_PVQ + od_encode_rollback(&x->daala_enc, &post_buf); +#endif + for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) memcpy(dst_init + idy * dst_stride, best_dst + idy * 8, num_4x4_blocks_wide * 4); @@ -1537,6 +1699,12 @@ static int64_t rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x, const MODE_INFO *left_mi = xd->left_mi; const PREDICTION_MODE A = av1_above_block_mode(xd->mi[0], above_mi, 0); const PREDICTION_MODE L = av1_left_block_mode(xd->mi[0], left_mi, 0); +#if CONFIG_PVQ + od_rollback_buffer pre_buf, post_buf; + + od_encode_checkpoint(&x->daala_enc, &pre_buf); + od_encode_checkpoint(&x->daala_enc, &post_buf); +#endif bmode_costs = cpi->y_mode_costs[A][L]; #if CONFIG_EXT_INTRA @@ -1564,6 +1732,9 @@ static int64_t rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x, for (mode = DC_PRED; mode <= TM_PRED; mode++) { mbmi->mode = mode; +#if CONFIG_PVQ + od_encode_rollback(&x->daala_enc, &pre_buf); +#endif #if CONFIG_EXT_INTRA if (is_directional_mode(mbmi->mode)) { if (directional_mode_skip_mask[mbmi->mode]) continue; @@ -1613,9 +1784,16 @@ static int64_t rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x, *rate_tokenonly = this_rate_tokenonly; *distortion = this_distortion; *skippable = s; +#if CONFIG_PVQ + od_encode_checkpoint(&x->daala_enc, &post_buf); +#endif } } +#if CONFIG_PVQ + od_encode_rollback(&x->daala_enc, &post_buf); +#endif + #if CONFIG_PALETTE if (cpi->common.allow_screen_content_tools) rd_pick_palette_intra_sby(cpi, x, bsize, palette_ctx, bmode_costs[DC_PRED], @@ -1659,12 +1837,12 @@ static int super_block_uvrd(const AV1_COMP *const cpi, MACROBLOCK *x, int *rate, int is_cost_valid = 1; if (ref_best_rd < 0) is_cost_valid = 0; - +#if !CONFIG_PVQ if (is_inter_block(mbmi) && is_cost_valid) { for (plane = 1; plane < MAX_MB_PLANE; ++plane) av1_subtract_plane(x, bsize, plane); } - +#endif *rate = 0; *distortion = 0; *sse = 0; @@ -1948,6 +2126,11 @@ static int64_t rd_pick_intra_sbuv_mode(const AV1_COMP *const cpi, MACROBLOCK *x, int64_t best_rd = INT64_MAX, this_rd; int this_rate_tokenonly, this_rate, s; int64_t this_distortion, this_sse; +#if CONFIG_PVQ + od_rollback_buffer buf; + + od_encode_checkpoint(&x->daala_enc, &buf); +#endif #if CONFIG_PALETTE MACROBLOCKD *const xd = &x->e_mbd; const int rows = @@ -1982,15 +2165,23 @@ static int64_t rd_pick_intra_sbuv_mode(const AV1_COMP *const cpi, MACROBLOCK *x, } else { mbmi->intra_angle_delta[1] = 0; if (!super_block_uvrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s, - &this_sse, bsize, best_rd)) + &this_sse, bsize, best_rd)) { +#if CONFIG_PVQ + od_encode_rollback(&x->daala_enc, &buf); +#endif continue; + } rate_overhead = cpi->intra_uv_mode_cost[mbmi->mode][mode]; } this_rate = this_rate_tokenonly + rate_overhead; #else if (!super_block_uvrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s, - &this_sse, bsize, best_rd)) + &this_sse, bsize, best_rd)) { +#if CONFIG_PVQ + od_encode_rollback(&x->daala_enc, &buf); +#endif continue; + } this_rate = this_rate_tokenonly + cpi->intra_uv_mode_cost[mbmi->mode][mode]; #endif // CONFIG_EXT_INTRA @@ -2000,7 +2191,22 @@ static int64_t rd_pick_intra_sbuv_mode(const AV1_COMP *const cpi, MACROBLOCK *x, this_rate += av1_cost_bit( av1_default_palette_uv_mode_prob[pmi->palette_size[0] > 0], 0); #endif // CONFIG_PALETTE + +#if CONFIG_PVQ + // For chroma channels, multiply lambda by 0.5 when doing intra prediction + // NOTE: Chroma intra prediction itself has a separate RDO, + // though final chroma intra mode's D and R is simply added to + // those of luma then global RDO is performed to decide the modes of SB. + // Also, for chroma, the RDO cannot decide tx_size (follow luma's decision) + // or tx_type (DCT only), then only the intra prediction is + // chroma's own mode decision based on separate RDO. + // TODO(yushin) : Seek for more reasonable solution than this. + this_rd = RDCOST(x->rdmult >> (1 * PVQ_CHROMA_RD), x->rddiv, this_rate, + this_distortion); + od_encode_rollback(&x->daala_enc, &buf); +#else this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion); +#endif if (this_rd < best_rd) { mode_selected = mode; @@ -2161,11 +2367,14 @@ static int set_and_cost_bmi_mvs(const AV1_COMP *const cpi, MACROBLOCK *x, } static int64_t encode_inter_mb_segment(const AV1_COMP *const cpi, MACROBLOCK *x, - int64_t best_yrd, int i, int *labelyrate, - int64_t *distortion, int64_t *sse, - ENTROPY_CONTEXT *ta, ENTROPY_CONTEXT *tl, - int ir, int ic, int mi_row, int mi_col) { + int64_t best_yrd, int block, + int *labelyrate, int64_t *distortion, + int64_t *sse, ENTROPY_CONTEXT *ta, + ENTROPY_CONTEXT *tl, int ir, int ic, + int mi_row, int mi_col) { +#if !CONFIG_PVQ const AV1_COMMON *const cm = &cpi->common; +#endif int k; MACROBLOCKD *xd = &x->e_mbd; struct macroblockd_plane *const pd = &xd->plane[0]; @@ -2176,17 +2385,22 @@ static int64_t encode_inter_mb_segment(const AV1_COMP *const cpi, MACROBLOCK *x, const int height = 4 * num_4x4_blocks_high_lookup[plane_bsize]; int idx, idy; void (*fwd_txm4x4)(const int16_t *input, tran_low_t *output, int stride); - const uint8_t *const src = - &p->src.buf[av1_raster_block_offset(BLOCK_8X8, i, p->src.stride)]; + &p->src.buf[av1_raster_block_offset(BLOCK_8X8, block, p->src.stride)]; uint8_t *const dst = - &pd->dst.buf[av1_raster_block_offset(BLOCK_8X8, i, pd->dst.stride)]; + &pd->dst.buf[av1_raster_block_offset(BLOCK_8X8, block, pd->dst.stride)]; int64_t thisdistortion = 0, thissse = 0; int thisrate = 0; - TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, i); + TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block); +#if !CONFIG_PVQ const SCAN_ORDER *scan_order = get_scan(cm, TX_4X4, tx_type); +#else + (void)cpi; + (void)ta; + (void)tl; +#endif - av1_build_inter_predictor_sub8x8(xd, 0, i, ir, ic, mi_row, mi_col); + av1_build_inter_predictor_sub8x8(xd, 0, block, ir, ic, mi_row, mi_col); #if CONFIG_AOM_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { @@ -2199,33 +2413,72 @@ static int64_t encode_inter_mb_segment(const AV1_COMP *const cpi, MACROBLOCK *x, fwd_txm4x4 = xd->lossless[mi->mbmi.segment_id] ? av1_fwht4x4 : aom_fdct4x4; #endif // CONFIG_AOM_HIGHBITDEPTH +#if !CONFIG_PVQ #if CONFIG_AOM_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { aom_highbd_subtract_block( - height, width, av1_raster_block_offset_int16(BLOCK_8X8, i, p->src_diff), - 8, src, p->src.stride, dst, pd->dst.stride, xd->bd); + height, width, + av1_raster_block_offset_int16(BLOCK_8X8, block, p->src_diff), 8, src, + p->src.stride, dst, pd->dst.stride, xd->bd); } else { - aom_subtract_block(height, width, - av1_raster_block_offset_int16(BLOCK_8X8, i, p->src_diff), + aom_subtract_block(height, width, av1_raster_block_offset_int16( + BLOCK_8X8, block, p->src_diff), 8, src, p->src.stride, dst, pd->dst.stride); } #else - aom_subtract_block(height, width, - av1_raster_block_offset_int16(BLOCK_8X8, i, p->src_diff), + aom_subtract_block(height, width, av1_raster_block_offset_int16( + BLOCK_8X8, block, p->src_diff), 8, src, p->src.stride, dst, pd->dst.stride); #endif // CONFIG_AOM_HIGHBITDEPTH +#endif // !CONFIG_PVQ - k = i; + k = block; for (idy = 0; idy < height / 4; ++idy) { for (idx = 0; idx < width / 4; ++idx) { int64_t ssz, rd, rd1, rd2; tran_low_t *coeff; - +#if CONFIG_PVQ + const int src_stride = p->src.stride; + const int dst_stride = pd->dst.stride; + const int diff_stride = 8; + tran_low_t *dqcoeff; + tran_low_t *ref_coeff; + int16_t *pred = &pd->pred[4 * (ir * diff_stride + ic)]; + int16_t *src_int16 = &p->src_int16[4 * (ir * diff_stride + ic)]; + int i, j, tx_blk_size; + int rate_pvq; +#endif k += (idy * 2 + idx); coeff = BLOCK_OFFSET(p->coeff, k); +#if !CONFIG_PVQ fwd_txm4x4(av1_raster_block_offset_int16(BLOCK_8X8, k, p->src_diff), coeff, 8); av1_regular_quantize_b_4x4(x, 0, k, scan_order->scan, scan_order->iscan); +#else + dqcoeff = BLOCK_OFFSET(pd->dqcoeff, k); + ref_coeff = BLOCK_OFFSET(pd->pvq_ref_coeff, k); + + // transform block size in pixels + tx_blk_size = 4; + + // copy uint8 orig and predicted block to int16 buffer + // in order to use existing VP10 transform functions + for (j = 0; j < tx_blk_size; j++) + for (i = 0; i < tx_blk_size; i++) { + src_int16[diff_stride * j + i] = + src[src_stride * (j + 4 * idy) + (i + 4 * idx)]; + pred[diff_stride * j + i] = + dst[dst_stride * (j + 4 * idy) + (i + 4 * idx)]; + } + + fwd_txm4x4(src_int16, coeff, diff_stride); + fwd_txm4x4(pred, ref_coeff, diff_stride); + + av1_pvq_encode_helper(&x->daala_enc, coeff, ref_coeff, dqcoeff, + &p->eobs[k], pd->dequant, 0, TX_4X4, tx_type, + &rate_pvq, x->pvq_speed, NULL); +#endif + #if CONFIG_AOM_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { thisdistortion += av1_highbd_block_error( @@ -2234,14 +2487,21 @@ static int64_t encode_inter_mb_segment(const AV1_COMP *const cpi, MACROBLOCK *x, thisdistortion += av1_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k), 16, &ssz); } +#elif CONFIG_PVQ + thisdistortion += av1_block_error2_c(coeff, BLOCK_OFFSET(pd->dqcoeff, k), + ref_coeff, 16, &ssz); #else thisdistortion += av1_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k), 16, &ssz); #endif // CONFIG_AOM_HIGHBITDEPTH thissse += ssz; +#if !CONFIG_PVQ thisrate += cost_coeffs(cm, x, 0, k, ta + (k & 1), tl + (k >> 1), TX_4X4, scan_order->scan, scan_order->neighbors, cpi->sf.use_fast_coef_costing); +#else + thisrate += rate_pvq; +#endif rd1 = RDCOST(x->rdmult, x->rddiv, thisrate, thisdistortion >> 2); rd2 = RDCOST(x->rdmult, x->rddiv, 0, thissse >> 2); rd = AOMMIN(rd1, rd2); @@ -2588,6 +2848,11 @@ static int64_t rd_pick_best_sub8x8_mode( const int has_second_rf = has_second_ref(mbmi); const int inter_mode_mask = cpi->sf.inter_mode_mask[bsize]; MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext; +#if CONFIG_PVQ + od_rollback_buffer pre_buf; + + od_encode_checkpoint(&x->daala_enc, &pre_buf); +#endif av1_zero(*bsi); @@ -2619,6 +2884,11 @@ static int64_t rd_pick_best_sub8x8_mode( int64_t new_best_rd = INT64_MAX; const int index = idy * 2 + idx; int ref; +#if CONFIG_PVQ + od_rollback_buffer idx_buf, post_buf; + od_encode_checkpoint(&x->daala_enc, &idx_buf); + od_encode_checkpoint(&x->daala_enc, &post_buf); +#endif for (ref = 0; ref < 1 + has_second_rf; ++ref) { const MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref]; @@ -2646,6 +2916,9 @@ static int64_t rd_pick_best_sub8x8_mode( sizeof(bsi->rdstat[index][mode_idx].ta)); memcpy(bsi->rdstat[index][mode_idx].tl, t_left, sizeof(bsi->rdstat[index][mode_idx].tl)); +#if CONFIG_PVQ + od_encode_rollback(&x->daala_enc, &idx_buf); +#endif // motion search for newmv (single predictor case only) if (!has_second_rf && this_mode == NEWMV && @@ -2847,6 +3120,9 @@ static int64_t rd_pick_best_sub8x8_mode( if (bsi->rdstat[index][mode_idx].brdcost < new_best_rd) { mode_selected = this_mode; new_best_rd = bsi->rdstat[index][mode_idx].brdcost; +#if CONFIG_PVQ + od_encode_checkpoint(&x->daala_enc, &post_buf); +#endif } continue; } @@ -2873,6 +3149,10 @@ static int64_t rd_pick_best_sub8x8_mode( if (bsi->rdstat[index][mode_idx].brdcost < new_best_rd) { mode_selected = this_mode; new_best_rd = bsi->rdstat[index][mode_idx].brdcost; + +#if CONFIG_PVQ + od_encode_checkpoint(&x->daala_enc, &post_buf); +#endif } } /*for each 4x4 mode*/ @@ -2882,12 +3162,18 @@ static int64_t rd_pick_best_sub8x8_mode( for (midx = 0; midx < INTER_MODES; ++midx) bsi->rdstat[iy][midx].brdcost = INT64_MAX; bsi->segment_rd = INT64_MAX; +#if CONFIG_PVQ + od_encode_rollback(&x->daala_enc, &pre_buf); +#endif return INT64_MAX; } mode_idx = INTER_OFFSET(mode_selected); memcpy(t_above, bsi->rdstat[index][mode_idx].ta, sizeof(t_above)); memcpy(t_left, bsi->rdstat[index][mode_idx].tl, sizeof(t_left)); +#if CONFIG_PVQ + od_encode_rollback(&x->daala_enc, &post_buf); +#endif set_and_cost_bmi_mvs(cpi, x, xd, index, mode_selected, mode_mv[mode_selected], frame_mv, seg_mvs[index], @@ -2905,10 +3191,16 @@ static int64_t rd_pick_best_sub8x8_mode( for (midx = 0; midx < INTER_MODES; ++midx) bsi->rdstat[iy][midx].brdcost = INT64_MAX; bsi->segment_rd = INT64_MAX; +#if CONFIG_PVQ + od_encode_rollback(&x->daala_enc, &pre_buf); +#endif return INT64_MAX; } } } /* for each label */ +#if CONFIG_PVQ + od_encode_rollback(&x->daala_enc, &pre_buf); +#endif bsi->r = br; bsi->d = bd; @@ -3721,8 +4013,10 @@ static int64_t handle_inter_mode( int64_t sseuv = INT64_MAX; int64_t rdcosty = INT64_MAX; - // Y cost and distortion +// Y cost and distortion +#if !CONFIG_PVQ av1_subtract_plane(x, bsize, 0); +#endif super_block_yrd(cpi, x, rate_y, &distortion_y, &skippable_y, psse, bsize, ref_best_rd); @@ -4087,6 +4381,10 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, int64_t mode_threshold[MAX_MODES]; int *mode_map = tile_data->mode_map[bsize]; const int mode_search_skip_flags = sf->mode_search_skip_flags; +#if CONFIG_PVQ + od_rollback_buffer pre_buf; +#endif + #if CONFIG_PALETTE || CONFIG_EXT_INTRA const int rows = 4 * num_4x4_blocks_high_lookup[bsize]; const int cols = 4 * num_4x4_blocks_wide_lookup[bsize]; @@ -4318,6 +4616,9 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, midx = end_pos; } +#if CONFIG_PVQ + od_encode_checkpoint(&x->daala_enc, &pre_buf); +#endif for (midx = 0; midx < MAX_MODES; ++midx) { int mode_index = mode_map[midx]; int mode_excluded = 0; @@ -4331,6 +4632,9 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, int64_t total_sse = INT64_MAX; #if CONFIG_REF_MV uint8_t ref_frame_type; +#endif +#if CONFIG_PVQ + od_encode_rollback(&x->daala_enc, &pre_buf); #endif this_mode = av1_mode_order[mode_index].mode; ref_frame = av1_mode_order[mode_index].ref_frame[0]; @@ -5231,6 +5535,11 @@ void av1_rd_pick_inter_mode_sub8x8(const AV1_COMP *cpi, TileDataEnc *tile_data, int ref_frame_skip_mask[2] = { 0 }; int internal_active_edge = av1_active_edge_sb(cpi, mi_row, mi_col) && av1_internal_image_edge(cpi); +#if CONFIG_PVQ + od_rollback_buffer pre_buf; + + od_encode_checkpoint(&x->daala_enc, &pre_buf); +#endif av1_zero(best_mbmode); @@ -5278,6 +5587,10 @@ void av1_rd_pick_inter_mode_sub8x8(const AV1_COMP *cpi, TileDataEnc *tile_data, int this_skip2 = 0; int64_t total_sse = INT_MAX; +#if CONFIG_PVQ + od_encode_rollback(&x->daala_enc, &pre_buf); +#endif + ref_frame = av1_ref_order[ref_index].ref_frame[0]; second_ref_frame = av1_ref_order[ref_index].ref_frame[1]; diff --git a/av1/encoder/tokenize.c b/av1/encoder/tokenize.c index d5afaae51ad6c0451655435981c9b2f5e13801a4..142bde8046b449d28e2cb7b4b0e73c2731bbb600 100644 --- a/av1/encoder/tokenize.c +++ b/av1/encoder/tokenize.c @@ -304,6 +304,7 @@ struct tokenize_b_args { TOKENEXTRA **tp; }; +#if !CONFIG_PVQ static void set_entropy_context_b(int plane, int block, int blk_row, int blk_col, BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) { @@ -473,6 +474,7 @@ static void tokenize_b(int plane, int block, int blk_row, int blk_col, av1_set_contexts(xd, pd, tx_size, c > 0, blk_col, blk_row); } +#endif struct is_skippable_args { uint16_t *eobs; @@ -520,6 +522,42 @@ int av1_has_high_freq_in_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) { return result; } +#if CONFIG_PVQ +void add_pvq_block(AV1_COMMON *const cm, MACROBLOCK *const x, PVQ_INFO *pvq) { + PVQ_QUEUE *q = x->pvq_q; + if (q->curr_pos >= q->buf_len) { + q->buf_len = 2 * q->buf_len + 1; + CHECK_MEM_ERROR(cm, q->buf, aom_realloc(q->buf, q->buf_len * sizeof(PVQ_INFO))); + } + //memcpy(q->buf + q->curr_pos, pvq, sizeof(PVQ_INFO)); + OD_COPY(q->buf + q->curr_pos, pvq, 1); + ++q->curr_pos; +} + +// NOTE: This does not actually generate tokens, instead we store the encoding +// decisions made for PVQ in a queue that we will read from when +// actually writing the bitstream in write_modes_b +static void tokenize_pvq(int plane, int block, int blk_row, int blk_col, + BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) { + struct tokenize_b_args *const args = arg; + const AV1_COMP *cpi = args->cpi; + const AV1_COMMON *const cm = &cpi->common; + ThreadData *const td = args->td; + MACROBLOCK *const x = &td->mb; + PVQ_INFO *pvq_info; + + (void)block; + (void)blk_row; + (void)blk_col; + (void)plane_bsize; + (void)tx_size; + + assert(block < MAX_PVQ_BLOCKS_IN_SB); + pvq_info = &x->pvq[block][plane]; + add_pvq_block((AV1_COMMON *const)cm, x, pvq_info); +} +#endif + void av1_tokenize_sb(const AV1_COMP *cpi, ThreadData *td, TOKENEXTRA **t, int dry_run, BLOCK_SIZE bsize) { const AV1_COMMON *const cm = &cpi->common; @@ -536,11 +574,11 @@ void av1_tokenize_sb(const AV1_COMP *cpi, ThreadData *td, TOKENEXTRA **t, return; } +#if !CONFIG_PVQ if (!dry_run) { int plane; td->counts->skip[ctx][0] += skip_inc; - for (plane = 0; plane < MAX_MB_PLANE; ++plane) { av1_foreach_transformed_block_in_plane(xd, bsize, plane, tokenize_b, &arg); @@ -550,4 +588,15 @@ void av1_tokenize_sb(const AV1_COMP *cpi, ThreadData *td, TOKENEXTRA **t, } else { av1_foreach_transformed_block(xd, bsize, set_entropy_context_b, &arg); } +#else + if (!dry_run) { + int plane; + + td->counts->skip[ctx][0] += skip_inc; + + for (plane = 0; plane < MAX_MB_PLANE; ++plane) + av1_foreach_transformed_block_in_plane(xd, bsize, plane, tokenize_pvq, + &arg); + } +#endif } diff --git a/configure b/configure index 64d48b0bebd4587384c4caf63cc28a643a0a9dd9..7f867e585627359ab58c302e0c6fb7c65507a0f0 100755 --- a/configure +++ b/configure @@ -259,6 +259,7 @@ EXPERIMENT_LIST=" ans ec_multisymbol daala_ec + pvq parallel_deblocking cb4x4 palette diff --git a/test/divu_small_test.cc b/test/divu_small_test.cc index ea6da474697c33666db27e72b092d46ca7625e36..d3a134a2bff36d2af285a15f734f67207e9de907 100644 --- a/test/divu_small_test.cc +++ b/test/divu_small_test.cc @@ -1,26 +1,13 @@ -/*Daala video codec -Copyright (c) 2013 Daala project contributors. All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -- Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -- Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/ +/* + * Copyright (c) 2016, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ #include