Commit 5cedcd81 authored by Steinar Midtskogen's avatar Steinar Midtskogen Committed by Jean-Marc Valin

Avoid calculating dering direction and variance more than once

Gives identical bitstream and about 2.5% faster encoding with
used-cpu=4.

Change-Id: Ic7faf2224e51de01bb16af050bb29540862d54ac
parent a531571a
......@@ -155,6 +155,7 @@ void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
unsigned char *row_dering, *prev_row_dering, *curr_row_dering;
int dering_count;
int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS] = { { 0 } };
int var[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS] = { { 0 } };
int stride;
int bsize[3];
int dec[3];
......@@ -380,7 +381,7 @@ void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
if (threshold == 0 && clpf_strength == 0) continue;
od_dering(dst,
&src[OD_FILT_VBORDER * OD_FILT_BSTRIDE + OD_FILT_HBORDER],
dec[pli], dir, pli, dlist, dering_count, threshold,
dec[pli], dir, NULL, var, pli, dlist, dering_count, threshold,
clpf_strength, clpf_damping, coeff_shift);
#if CONFIG_AOM_HIGHBITDEPTH
if (cm->use_highbitdepth) {
......
......@@ -240,7 +240,8 @@ void copy_dering_16bit_to_16bit(uint16_t *dst, int dstride, uint16_t *src,
}
void od_dering(uint16_t *y, uint16_t *in, int xdec,
int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS], int pli,
int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS], int *dirinit,
int var[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS], int pli,
dering_list *dlist, int dering_count, int threshold,
int clpf_strength, int clpf_damping, int coeff_shift) {
int bi;
......@@ -252,12 +253,18 @@ void od_dering(uint16_t *y, uint16_t *in, int xdec,
};
bsize = OD_DERING_SIZE_LOG2 - xdec;
if (pli == 0) {
if (!dirinit || !*dirinit) {
for (bi = 0; bi < dering_count; bi++) {
by = dlist[bi].by;
bx = dlist[bi].bx;
dir[by][bx] = od_dir_find8(&in[8 * by * OD_FILT_BSTRIDE + 8 * bx],
OD_FILT_BSTRIDE, &var[by][bx], coeff_shift);
}
if (dirinit) *dirinit = 1;
}
for (bi = 0; bi < dering_count; bi++) {
int32_t var;
by = dlist[bi].by;
bx = dlist[bi].bx;
dir[by][bx] = od_dir_find8(&in[8 * by * OD_FILT_BSTRIDE + 8 * bx],
OD_FILT_BSTRIDE, &var, coeff_shift);
/* Deringing orthogonal to the direction uses a tighter threshold
because we want to be conservative. We've presumably already
achieved some deringing, so the amount of change is expected
......@@ -269,7 +276,7 @@ void od_dering(uint16_t *y, uint16_t *in, int xdec,
(filter_dering_direction[bsize - OD_LOG_BSIZE0])(
&y[bi << 2 * bsize], 1 << bsize,
&in[(by * OD_FILT_BSTRIDE << bsize) + (bx << bsize)],
od_adjust_thresh(threshold, var), dir[by][bx]);
od_adjust_thresh(threshold, var[by][bx]), dir[by][bx]);
}
} else {
for (bi = 0; bi < dering_count; bi++) {
......
......@@ -47,7 +47,8 @@ void copy_dering_16bit_to_16bit(uint16_t *dst, int dstride, uint16_t *src,
int bsize);
void od_dering(uint16_t *y, uint16_t *in, int xdec,
int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS], int pli,
int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS], int *dirinit,
int var[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS], int pli,
dering_list *dlist, int dering_count, int threshold,
int clpf_strength, int clpf_damping, int coeff_shift);
int od_filter_dering_direction_4x4_c(uint16_t *y, int ystride,
......
......@@ -100,6 +100,7 @@ void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
uint16_t *ref_coeff[3];
dering_list dlist[MAX_MIB_SIZE * MAX_MIB_SIZE];
int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS] = { { 0 } };
int var[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS] = { { 0 } };
int stride[3];
int bsize[3];
int dec[3];
......@@ -179,6 +180,7 @@ void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
for (sbc = 0; sbc < nhsb; sbc++) {
int nvb, nhb;
int gi;
int dirinit = 0;
DECLARE_ALIGNED(32, uint16_t, dst[MAX_MIB_SIZE * MAX_MIB_SIZE * 8 * 8]);
DECLARE_ALIGNED(32, uint16_t,
tmp_dst[MAX_MIB_SIZE * MAX_MIB_SIZE * 8 * 8]);
......@@ -225,9 +227,10 @@ void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
}
}
clpf_strength = gi % CLPF_STRENGTHS;
od_dering(tmp_dst, in, dec[pli], dir, pli, dlist, dering_count,
threshold, clpf_strength + (clpf_strength == 3),
clpf_damping, coeff_shift);
od_dering(tmp_dst, in, dec[pli], dir, &dirinit, var, pli, dlist,
dering_count, threshold,
clpf_strength + (clpf_strength == 3), clpf_damping,
coeff_shift);
copy_dering_16bit_to_16bit(dst, MAX_MIB_SIZE << bsize[pli], tmp_dst,
dlist, dering_count, bsize[pli]);
mse[pli][sb_count][gi] = (int)compute_dist(
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment