Commit 0818a7c8 authored by Yaowu Xu's avatar Yaowu Xu

Port commits related to clpf and qm experiments

Manually cherry-picked following commits from AOMedia git repository:
bb2727c2 Sort includess for "clpf.h"
c297fd00 Add quantisation matrix range parameters.
0527894f Add encoder option and signaling for quant matrix control.
4106232b Turn off trellis coding for quantization matrices.
4017fca3 Modify tests to allow quantization matrices.
1c122c24 Add quant and dequant functions for new quant matrices.
95a89994 Enable CLPF
f72782bb Fix a build issue
73bae509 Add quantisation matrices and selection functions
33208d20 Added support for constrained low pass filter (CLPF)

Change-Id: I60fc1ee1ac40e6b9d1d00affd97547ee5d5dd6be
parent ac917ec2
......@@ -252,6 +252,7 @@ HAVE_LIST="
EXPERIMENT_LIST="
fp_mb_stats
emulate_hardware
clpf
var_tx
rect_tx
ref_mv
......@@ -328,6 +329,7 @@ CONFIG_LIST="
better_hw_compatibility
experimental
size_limit
aom_qm
${EXPERIMENT_LIST}
"
CMDLINE_SELECT="
......@@ -386,6 +388,7 @@ CMDLINE_SELECT="
better_hw_compatibility
vp9_highbitdepth
experimental
aom_qm
"
process_cmdline() {
......
......@@ -26,6 +26,7 @@
#include "vpx_mem/vpx_mem.h"
namespace {
#if !CONFIG_AOM_QM
const int kNumBlocks = 25;
const int kNumBlockEntries = 16;
......@@ -199,4 +200,5 @@ INSTANTIATE_TEST_CASE_P(
make_tuple(&vp8_fast_quantize_b_msa, &vp8_fast_quantize_b_c),
make_tuple(&vp8_regular_quantize_b_msa, &vp8_regular_quantize_b_c)));
#endif // HAVE_MSA
#endif // CONFIG_AOM_QM
} // namespace
......@@ -295,11 +295,17 @@ typedef struct macroblockd_plane {
// log2 of n4_w, n4_h
uint8_t n4_wl, n4_hl;
#if CONFIG_AOM_QM
const qm_val_t *seg_iqmatrix[MAX_SEGMENTS][2][TX_SIZES];
#endif
// encoder
const int16_t *dequant;
#if CONFIG_NEW_QUANT
const dequant_val_type_nuq *dequant_val_nuq[QUANT_PROFILES];
#endif // CONFIG_NEW_QUANT
#if CONFIG_AOM_QM
const qm_val_t *seg_qmatrix[MAX_SEGMENTS][2][TX_SIZES];
#endif
} MACROBLOCKD_PLANE;
#define BLOCK_OFFSET(x, i) ((x) + (i)*16)
......
/*
Copyright (c) 2016 Cisco Systems
(Replace with proper AOM header)
*/
#include "vp10/common/clpf.h"
// Apply the filter on a single block
static void clpf_block(const uint8_t *src, uint8_t *dst, int sstride,
int dstride, int has_top, int has_left, int has_bottom,
int has_right, int width, int height) {
int x, y;
for (y = 0; y < height; y++) {
for (x = 0; x < width; x++) {
int X = src[(y + 0) * sstride + x + 0];
int A = has_top ? src[(y - 1) * sstride + x + 0] : X;
int B = has_left ? src[(y + 0) * sstride + x - 1] : X;
int C = has_right ? src[(y + 0) * sstride + x + 1] : X;
int D = has_bottom ? src[(y + 1) * sstride + x + 0] : X;
int delta = ((A > X) + (B > X) + (C > X) + (D > X) > 2) -
((A < X) + (B < X) + (C < X) + (D < X) > 2);
dst[y * dstride + x] = X + delta;
}
}
}
#define BS (MI_SIZE * MAX_MIB_SIZE)
// Iterate over blocks within a superblock
static void vp10_clpf_sb(const YV12_BUFFER_CONFIG *frame_buffer,
const VP10_COMMON *cm, MACROBLOCKD *xd,
MODE_INFO *const *mi_8x8, int xpos, int ypos) {
// Temporary buffer (to allow SIMD parallelism)
uint8_t buf_unaligned[BS * BS + 15];
uint8_t *buf = (uint8_t *)(((intptr_t)buf_unaligned + 15) & ~15);
int x, y, p;
for (p = 0; p < (CLPF_FILTER_ALL_PLANES ? MAX_MB_PLANE : 1); p++) {
for (y = 0; y < MAX_MIB_SIZE && ypos + y < cm->mi_rows; y++) {
for (x = 0; x < MAX_MIB_SIZE && xpos + x < cm->mi_cols; x++) {
const MB_MODE_INFO *mbmi =
&mi_8x8[(ypos + y) * cm->mi_stride + xpos + x]->mbmi;
// Do not filter if there is no residual
if (!mbmi->skip) {
// Do not filter frame edges
int has_top = ypos + y > 0;
int has_left = xpos + x > 0;
int has_bottom = ypos + y < cm->mi_rows - 1;
int has_right = xpos + x < cm->mi_cols - 1;
#if CLPF_ALLOW_BLOCK_PARALLELISM
// Do not filter superblock edges
has_top &= !!y;
has_left &= !!x;
has_bottom &= y != MAX_MIB_SIZE - 1;
has_right &= x != MAX_MIB_SIZE - 1;
#endif
vp10_setup_dst_planes(xd->plane, frame_buffer, ypos + y, xpos + x);
clpf_block(
xd->plane[p].dst.buf, CLPF_ALLOW_PIXEL_PARALLELISM
? buf + y * MI_SIZE * BS + x * MI_SIZE
: xd->plane[p].dst.buf,
xd->plane[p].dst.stride,
CLPF_ALLOW_PIXEL_PARALLELISM ? BS : xd->plane[p].dst.stride,
has_top, has_left, has_bottom, has_right,
MI_SIZE >> xd->plane[p].subsampling_x,
MI_SIZE >> xd->plane[p].subsampling_y);
}
}
}
#if CLPF_ALLOW_PIXEL_PARALLELISM
for (y = 0; y < MAX_MIB_SIZE && ypos + y < cm->mi_rows; y++) {
for (x = 0; x < MAX_MIB_SIZE && xpos + x < cm->mi_cols; x++) {
const MB_MODE_INFO *mbmi =
&mi_8x8[(ypos + y) * cm->mi_stride + xpos + x]->mbmi;
vp10_setup_dst_planes(xd->plane, frame_buffer, ypos + y, xpos + x);
if (!mbmi->skip) {
int i = 0;
for (i = 0; i<MI_SIZE>> xd->plane[p].subsampling_y; i++)
memcpy(xd->plane[p].dst.buf + i * xd->plane[p].dst.stride,
buf + (y * MI_SIZE + i) * BS + x * MI_SIZE,
MI_SIZE >> xd->plane[p].subsampling_x);
}
}
}
#endif
}
}
// Iterate over the superblocks of an entire frame
void vp10_clpf_frame(const YV12_BUFFER_CONFIG *frame, const VP10_COMMON *cm,
MACROBLOCKD *xd) {
int x, y;
for (y = 0; y < cm->mi_rows; y += MAX_MIB_SIZE)
for (x = 0; x < cm->mi_cols; x += MAX_MIB_SIZE)
vp10_clpf_sb(frame, cm, xd, cm->mi_grid_visible, x, y);
}
/*
Copyright (c) 2016, Cisco Systems
(Replace with proper AOM header)
*/
#ifndef VP10_COMMON_CLPF_H_
#define VP10_COMMON_CLPF_H_
#include "vp10/common/reconinter.h"
// Configuration
#define CLPF_ALLOW_PIXEL_PARALLELISM \
1 // 1 = SIMD friendly (adds a buffer requirement)
#define CLPF_ALLOW_BLOCK_PARALLELISM \
0 // 1 = MT friendly (degrades quality slighty)
#define CLPF_FILTER_ALL_PLANES \
0 // 1 = filter both luma and chroma, 0 = filter only luma
void vp10_clpf_frame(const YV12_BUFFER_CONFIG *frame, const VP10_COMMON *cm,
MACROBLOCKD *xd);
#endif
......@@ -144,6 +144,9 @@ typedef struct VP10Common {
// Marks if we need to use 16bit frame buffers (1: yes, 0: no).
int use_highbitdepth;
#endif
#if CONFIG_CLPF
int clpf;
#endif
YV12_BUFFER_CONFIG *frame_to_show;
RefCntBuffer *prev_frame;
......@@ -214,6 +217,23 @@ typedef struct VP10Common {
int uv_ac_delta_q;
int16_t y_dequant[MAX_SEGMENTS][2];
int16_t uv_dequant[MAX_SEGMENTS][2];
#if CONFIG_AOM_QM
// Global quant matrix tables
qm_val_t *giqmatrix[NUM_QM_LEVELS][2][2][TX_SIZES];
qm_val_t *gqmatrix[NUM_QM_LEVELS][2][2][TX_SIZES];
// Local quant matrix tables for each frame
qm_val_t *y_iqmatrix[MAX_SEGMENTS][2][TX_SIZES];
qm_val_t *uv_iqmatrix[MAX_SEGMENTS][2][TX_SIZES];
// Encoder
qm_val_t *y_qmatrix[MAX_SEGMENTS][2][TX_SIZES];
qm_val_t *uv_qmatrix[MAX_SEGMENTS][2][TX_SIZES];
int using_qmatrix;
int min_qmlevel;
int max_qmlevel;
#endif
#if CONFIG_NEW_QUANT
dequant_val_type_nuq y_dequant_nuq[MAX_SEGMENTS][QUANT_PROFILES][COEF_BANDS];
dequant_val_type_nuq uv_dequant_nuq[MAX_SEGMENTS][QUANT_PROFILES][COEF_BANDS];
......@@ -430,12 +450,20 @@ static INLINE void vp10_init_macroblockd(VP10_COMMON *cm, MACROBLOCKD *xd,
xd->above_context[i] = cm->above_context[i];
if (xd->plane[i].plane_type == PLANE_TYPE_Y) {
memcpy(xd->plane[i].seg_dequant, cm->y_dequant, sizeof(cm->y_dequant));
#if CONFIG_AOM_QM
memcpy(xd->plane[i].seg_iqmatrix, cm->y_iqmatrix, sizeof(cm->y_iqmatrix));
#endif
#if CONFIG_NEW_QUANT
memcpy(xd->plane[i].seg_dequant_nuq, cm->y_dequant_nuq,
sizeof(cm->y_dequant_nuq));
#endif
} else {
memcpy(xd->plane[i].seg_dequant, cm->uv_dequant, sizeof(cm->uv_dequant));
#if CONFIG_AOM_QM
memcpy(xd->plane[i].seg_iqmatrix, cm->uv_iqmatrix,
sizeof(cm->uv_iqmatrix));
#endif
#if CONFIG_NEW_QUANT
memcpy(xd->plane[i].seg_dequant_nuq, cm->uv_dequant_nuq,
sizeof(cm->uv_dequant_nuq));
......
This diff is collapsed.
......@@ -13,6 +13,7 @@
#include "vpx/vpx_codec.h"
#include "vp10/common/seg_common.h"
#include "vp10/common/enums.h"
#ifdef __cplusplus
extern "C" {
......@@ -22,12 +23,38 @@ extern "C" {
#define MAXQ 255
#define QINDEX_RANGE (MAXQ - MINQ + 1)
#define QINDEX_BITS 8
#if CONFIG_AOM_QM
// Total number of QM sets stored
#define QM_LEVEL_BITS 4
#define NUM_QM_LEVELS (1 << QM_LEVEL_BITS)
/* Offset into the list of QMs. Actual number of levels used is
(NUM_QM_LEVELS-AOM_QM_OFFSET)
Lower value of AOM_QM_OFFSET implies more heavily weighted matrices.*/
#define DEFAULT_QM_FIRST (NUM_QM_LEVELS / 2)
#define DEFAULT_QM_LAST (NUM_QM_LEVELS - 1)
#endif
struct VP10Common;
int16_t vp10_dc_quant(int qindex, int delta, vpx_bit_depth_t bit_depth);
int16_t vp10_ac_quant(int qindex, int delta, vpx_bit_depth_t bit_depth);
int vp10_get_qindex(const struct segmentation *seg, int segment_id,
int base_qindex);
#if CONFIG_AOM_QM
// Reduce the large number of quantizers to a smaller number of levels for which
// different matrices may be defined
static inline int aom_get_qmlevel(int qindex, int first, int last) {
int qmlevel = (qindex * (last + 1 - first) + QINDEX_RANGE / 2) / QINDEX_RANGE;
qmlevel = VPXMIN(qmlevel + first, NUM_QM_LEVELS - 1);
return qmlevel;
}
void aom_qm_init(struct VP10Common *cm);
qm_val_t *aom_iqmatrix(struct VP10Common *cm, int qindex, int comp,
int log2sizem2, int is_intra);
qm_val_t *aom_qmatrix(struct VP10Common *cm, int qindex, int comp,
int log2sizem2, int is_intra);
#endif
#if CONFIG_NEW_QUANT
......
......@@ -462,9 +462,36 @@ if (vpx_config("CONFIG_VP10_ENCODER") eq "yes") {
# ENCODEMB INVOKE
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
# the transform coefficients are held in 32-bit
# values, so the assembler code for vp10_block_error can no longer be used.
if (vpx_config("CONFIG_AOM_QM") eq "yes") {
if (vpx_config("CONFIG_VPX_HIGHBITDEPTH") eq "yes") {
# the transform coefficients are held in 32-bit
# values, so the assembler code for vp10_block_error can no longer be used.
add_proto qw/int64_t vp10_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
specialize qw/vp10_block_error/;
add_proto qw/void vp10_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
add_proto qw/void vp10_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
add_proto qw/void vp10_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
specialize qw/vp10_fdct8x8_quant/;
} else {
add_proto qw/int64_t vp10_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
specialize qw/vp10_block_error avx2 msa/, "$sse2_x86inc";
add_proto qw/int64_t vp10_block_error_fp/, "const int16_t *coeff, const int16_t *dqcoeff, int block_size";
specialize qw/vp10_block_error_fp neon/, "$sse2_x86inc";
add_proto qw/void vp10_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
add_proto qw/void vp10_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
add_proto qw/void vp10_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr";
}
} else {
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
# the transform coefficients are held in 32-bit
# values, so the assembler code for vp10_block_error can no longer be used.
add_proto qw/int64_t vp10_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
specialize qw/vp10_block_error/;
......@@ -476,7 +503,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/void vp10_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp10_fdct8x8_quant/;
} else {
} else {
add_proto qw/int64_t vp10_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
specialize qw/vp10_block_error sse2 avx2 msa/;
......@@ -491,6 +518,8 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/void vp10_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp10_fdct8x8_quant sse2 ssse3 neon/;
}
}
# fdct functions
......@@ -817,11 +846,17 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/int64_t vp10_highbd_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz, int bd";
specialize qw/vp10_highbd_block_error sse2/;
if (vpx_config("CONFIG_AOM_QM") eq "yes") {
add_proto qw/void vp10_highbd_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr";
add_proto qw/void vp10_highbd_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr";
} else {
add_proto qw/void vp10_highbd_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale";
specialize qw/vp10_highbd_quantize_fp sse4_1/;
add_proto qw/void vp10_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale";
specialize qw/vp10_highbd_quantize_b/;
}
# fdct functions
add_proto qw/void vp10_highbd_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
......
......@@ -25,6 +25,9 @@
#include "vpx_util/vpx_thread.h"
#include "vp10/common/alloccommon.h"
#if CONFIG_CLPF
#include "vp10/common/clpf.h"
#endif
#include "vp10/common/common.h"
#include "vp10/common/entropy.h"
#include "vp10/common/entropymode.h"
......@@ -1942,6 +1945,12 @@ static void setup_loopfilter(VP10_COMMON *cm, struct vpx_read_bit_buffer *rb) {
}
}
#if CONFIG_CLPF
static void setup_clpf(VP10_COMMON *cm, struct vpx_read_bit_buffer *rb) {
cm->clpf = vpx_rb_read_literal(rb, 1);
}
#endif
static INLINE int read_delta_q(struct vpx_read_bit_buffer *rb) {
return vpx_rb_read_bit(rb) ? vpx_rb_read_inv_signed_literal(rb, 6) : 0;
}
......@@ -1953,16 +1962,34 @@ static void setup_quantization(VP10_COMMON *const cm,
cm->uv_dc_delta_q = read_delta_q(rb);
cm->uv_ac_delta_q = read_delta_q(rb);
cm->dequant_bit_depth = cm->bit_depth;
#if CONFIG_AOM_QM
cm->using_qmatrix = vpx_rb_read_bit(rb);
if (cm->using_qmatrix) {
cm->min_qmlevel = vpx_rb_read_literal(rb, QM_LEVEL_BITS);
cm->max_qmlevel = vpx_rb_read_literal(rb, QM_LEVEL_BITS);
} else {
cm->min_qmlevel = 0;
cm->max_qmlevel = 0;
}
#endif
}
static void setup_segmentation_dequant(VP10_COMMON *const cm) {
// Build y/uv dequant values based on segmentation.
// Build y/uv dequant values based on segmentation.
int i = 0;
#if CONFIG_AOM_QM
int lossless;
int j = 0;
int qmlevel;
int using_qm = cm->using_qmatrix;
int minqm = cm->min_qmlevel;
int maxqm = cm->max_qmlevel;
#endif
#if CONFIG_NEW_QUANT
int b;
int dq;
#endif // CONFIG_NEW_QUANT
if (cm->seg.enabled) {
int i;
for (i = 0; i < MAX_SEGMENTS; ++i) {
const int qindex = vp10_get_qindex(&cm->seg, i, cm->base_qindex);
cm->y_dequant[i][0] =
......@@ -1972,6 +1999,21 @@ static void setup_segmentation_dequant(VP10_COMMON *const cm) {
vp10_dc_quant(qindex, cm->uv_dc_delta_q, cm->bit_depth);
cm->uv_dequant[i][1] =
vp10_ac_quant(qindex, cm->uv_ac_delta_q, cm->bit_depth);
#if CONFIG_AOM_QM
lossless = qindex == 0 && cm->y_dc_delta_q == 0 &&
cm->uv_dc_delta_q == 0 && cm->uv_ac_delta_q == 0;
// NB: depends on base index so there is only 1 set per frame
// No quant weighting when lossless or signalled not using QM
qmlevel = (lossless || using_qm == 0)
? NUM_QM_LEVELS - 1
: aom_get_qmlevel(cm->base_qindex, minqm, maxqm);
for (j = 0; j < TX_SIZES; ++j) {
cm->y_iqmatrix[i][1][j] = aom_iqmatrix(cm, qmlevel, 0, j, 1);
cm->y_iqmatrix[i][0][j] = aom_iqmatrix(cm, qmlevel, 0, j, 0);
cm->uv_iqmatrix[i][1][j] = aom_iqmatrix(cm, qmlevel, 1, j, 1);
cm->uv_iqmatrix[i][0][j] = aom_iqmatrix(cm, qmlevel, 1, j, 0);
}
#endif // CONFIG_AOM_QM
#if CONFIG_NEW_QUANT
for (dq = 0; dq < QUANT_PROFILES; dq++) {
for (b = 0; b < COEF_BANDS; ++b) {
......@@ -1994,6 +2036,20 @@ static void setup_segmentation_dequant(VP10_COMMON *const cm) {
vp10_dc_quant(qindex, cm->uv_dc_delta_q, cm->bit_depth);
cm->uv_dequant[0][1] =
vp10_ac_quant(qindex, cm->uv_ac_delta_q, cm->bit_depth);
#if CONFIG_AOM_QM
lossless = qindex == 0 && cm->y_dc_delta_q == 0 && cm->uv_dc_delta_q == 0 &&
cm->uv_ac_delta_q == 0;
// No quant weighting when lossless or signalled not using QM
qmlevel = (lossless || using_qm == 0)
? NUM_QM_LEVELS - 1
: aom_get_qmlevel(cm->base_qindex, minqm, maxqm);
for (j = 0; j < TX_SIZES; ++j) {
cm->y_iqmatrix[i][1][j] = aom_iqmatrix(cm, qmlevel, 0, j, 1);
cm->y_iqmatrix[i][0][j] = aom_iqmatrix(cm, qmlevel, 0, j, 0);
cm->uv_iqmatrix[i][1][j] = aom_iqmatrix(cm, qmlevel, 1, j, 1);
cm->uv_iqmatrix[i][0][j] = aom_iqmatrix(cm, qmlevel, 1, j, 0);
}
#endif
#if CONFIG_NEW_QUANT
for (dq = 0; dq < QUANT_PROFILES; dq++) {
for (b = 0; b < COEF_BANDS; ++b) {
......@@ -2646,6 +2702,10 @@ static const uint8_t *decode_tiles(VP10Decoder *pbi, const uint8_t *data,
winterface->execute(&pbi->lf_worker);
}
#endif // CONFIG_VAR_TX
#if CONFIG_CLPF
if (cm->clpf && !cm->skip_loop_filter)
vp10_clpf_frame(&pbi->cur_buf->buf, cm, &pbi->mb);
#endif
if (cm->frame_parallel_decode)
vp10_frameworker_broadcast(pbi->cur_buf, INT_MAX);
......@@ -3179,6 +3239,9 @@ static size_t read_uncompressed_header(VP10Decoder *pbi,
#endif // CONFIG_EXT_PARTITION
setup_loopfilter(cm, rb);
#if CONFIG_CLPF
setup_clpf(cm, rb);
#endif
#if CONFIG_LOOP_RESTORATION
setup_restoration(cm, rb);
#endif // CONFIG_LOOP_RESTORATION
......
......@@ -112,6 +112,10 @@ VP10Decoder *vp10_decoder_create(BufferPool *const pool) {
cm->setup_mi = vp10_dec_setup_mi;
vp10_loop_filter_init(cm);
#if CONFIG_AOM_QM
aom_qm_init(cm);
#endif
#if CONFIG_LOOP_RESTORATION
vp10_loop_restoration_precal();
#endif // CONFIG_LOOP_RESTORATION
......
......@@ -43,6 +43,13 @@ static INLINE int read_coeff(const vpx_prob *probs, int n, vp10_reader *r) {
return val;
}
#if CONFIG_AOM_QM
static int decode_coefs(const MACROBLOCKD *xd, PLANE_TYPE type,
tran_low_t *dqcoeff, TX_SIZE tx_size, TX_TYPE tx_type,
const int16_t *dq, int ctx, const int16_t *scan,
const int16_t *nb, vp10_reader *r,
const qm_val_t *iqm[2][TX_SIZES])
#else
static int decode_coefs(const MACROBLOCKD *xd, PLANE_TYPE type,
tran_low_t *dqcoeff, TX_SIZE tx_size, TX_TYPE tx_type,
const int16_t *dq,
......@@ -50,11 +57,16 @@ static int decode_coefs(const MACROBLOCKD *xd, PLANE_TYPE type,
dequant_val_type_nuq *dq_val,
#endif // CONFIG_NEW_QUANT
int ctx, const int16_t *scan, const int16_t *nb,
vp10_reader *r) {
vp10_reader *r)
#endif
{
FRAME_COUNTS *counts = xd->counts;
const int max_eob = get_tx2d_size(tx_size);
const FRAME_CONTEXT *const fc = xd->fc;
const int ref = is_inter_block(&xd->mi[0]->mbmi);
#if CONFIG_AOM_QM
const qm_val_t *iqmatrix = iqm[!ref][tx_size];
#endif
int band, c = 0;
const int tx_size_ctx = txsize_sqr_map[tx_size];
const vpx_prob(*coef_probs)[COEFF_CONTEXTS][UNCONSTRAINED_NODES] =
......@@ -197,9 +209,14 @@ static int decode_coefs(const MACROBLOCKD *xd, PLANE_TYPE type,
}
}
#if CONFIG_NEW_QUANT
v = vp10_dequant_abscoeff_nuq(val, dqv, dqv_val);
v = dq_shift ? ROUND_POWER_OF_TWO(v, dq_shift) : v;
#else
#if CONFIG_AOM_QM
dqv = ((iqmatrix[scan[c]] * (int)dqv) + (1 << (AOM_QM_BITS - 1))) >>
AOM_QM_BITS;
#endif
v = (val * dqv) >> dq_shift;
#endif // CONFIG_NEW_QUANT
......@@ -489,12 +506,18 @@ int vp10_decode_block_tokens(MACROBLOCKD *const xd, int plane,
#endif // CONFIG_NEW_QUANT
#if !CONFIG_ANS
#if CONFIG_AOM_QM
const int eob =
decode_coefs(xd, pd->plane_type, pd->dqcoeff, tx_size, tx_type, dequant,
ctx, sc->scan, sc->neighbors, r, pd->seg_iqmatrix[seg_id]);
#else
const int eob =
decode_coefs(xd, pd->plane_type, pd->dqcoeff, tx_size, tx_type, dequant,
#if CONFIG_NEW_QUANT
pd->seg_dequant_nuq[seg_id][dq],
#endif // CONFIG_NEW_QUANT
ctx, sc->scan, sc->neighbors, r);
#endif // CONFIG_AOM_QM
#else
const int eob = decode_coefs_ans(xd, pd->plane_type, pd->dqcoeff, tx_size,
tx_type, dequant,
......
......@@ -20,6 +20,9 @@
#include "vpx_ports/system_state.h"
#include "vpx_util/debug_util.h"
#if CONFIG_CLPF
#include "vp10/common/clpf.h"
#endif
#include "vp10/common/entropy.h"
#include "vp10/common/entropymode.h"
#include "vp10/common/entropymv.h"
......@@ -2437,6 +2440,13 @@ static void encode_loopfilter(VP10_COMMON *cm,
}
}
#if CONFIG_CLPF
static void encode_clpf(const VP10_COMMON *cm,
struct vpx_write_bit_buffer *wb) {
vpx_wb_write_literal(wb, cm->clpf, 1);
}
#endif
static void write_delta_q(struct vpx_write_bit_buffer *wb, int delta_q) {
if (delta_q != 0) {
vpx_wb_write_bit(wb, 1);
......@@ -2452,6 +2462,13 @@ static void encode_quantization(const VP10_COMMON *const cm,
write_delta_q(wb, cm->y_dc_delta_q);
write_delta_q(wb, cm->uv_dc_delta_q);
write_delta_q(wb, cm->uv_ac_delta_q);
#if CONFIG_AOM_QM
vpx_wb_write_bit(wb, cm->using_qmatrix);
if (cm->using_qmatrix) {
vpx_wb_write_literal(wb, cm->min_qmlevel, QM_LEVEL_BITS);
vpx_wb_write_literal(wb, cm->max_qmlevel, QM_LEVEL_BITS);
}
#endif
}
static void encode_segmentation(VP10_COMMON *cm, MACROBLOCKD *xd,
......@@ -3083,6 +3100,9 @@ static void write_uncompressed_header(VP10_COMP *cpi,
#endif // CONFIG_EXT_PARTITION
encode_loopfilter(cm, wb);
#if CONFIG_CLPF
encode_clpf(cm, wb);
#endif
#if CONFIG_LOOP_RESTORATION
encode_restoration(cm, wb);
#endif // CONFIG_LOOP_RESTORATION
......
......@@ -1561,7 +1561,12 @@ void vp10_fdct8x8_quant_c(const int16_t *input, int stride,
const int16_t *quant_shift_ptr,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan) {
const int16_t *scan, const int16_t *iscan
#if CONFIG_AOM_QM
,
const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr
#endif
) {
int eob = -1;
int i, j;
......@@ -1647,16 +1652,29 @@ void vp10_fdct8x8_quant_c(const int16_t *input, int stride,
for (i = 0; i < n_coeffs; i++) {
const int rc = scan[i];
const int coeff = coeff_ptr[rc];
#if CONFIG_AOM_QM
const qm_val_t wt = qm_ptr[rc];
const qm_val_t iwt = iqm_ptr[rc];
const int dequant =
(dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >>
AOM_QM_BITS;
#endif
const int coeff_sign = (coeff >> 31);
const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
int tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX);
tmp = (tmp * quant_ptr[rc != 0]) >> 16;
qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
int64_t tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX);
int tmp32;
#if CONFIG_AOM_QM
tmp32 = (tmp * quant_ptr[rc != 0] * wt) >> (16 + AOM_QM_BITS);
qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant;
#else
tmp32 = (tmp * quant_ptr[rc != 0]) >> 16;
qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0];
#endif
if (tmp) eob = i;
if (tmp32) eob = i;
}
}
*eob_ptr = eob + 1;
......
......@@ -88,6 +88,11 @@ int vp10_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size,
get_scan(tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi));
const int16_t *const scan = so->scan;
const int16_t *const nb = so->neighbors;
#if CONFIG_AOM_QM
int seg_id = xd->mi[0]->mbmi.segment_id;
int is_intra = !is_inter_block(&xd->mi[0]->mbmi);
const qm_val_t *iqmatrix = pd->seg_iqmatrix[seg_id][is_intra][tx_size];
#endif
const int shift = get_tx_scale(xd, tx_type, tx_size);
#if CONFIG_NEW_QUANT