From 1579133291fcb900d20e1d3917e7cef6af9f6a55 Mon Sep 17 00:00:00 2001 From: "Nathan E. Egge" <negge@dgql.org> Date: Mon, 7 Mar 2016 13:31:40 -0500 Subject: [PATCH] Use OD_DIVU for small divisions in temporal_filter. Replaces an approximate unsigned integer division with the bit exact OD_ILOG() implementation. Removes the need to call vp10_temporal_filter_init() before calling vp10_temporal_filter_apply_c() by using a static table of constants. ntt-short-1: MEDIUM (%) HIGH (%) PSNR -0.023045 0.115705 PSNRHVS 0.023327 0.110230 SSIM -0.039964 0.083594 FASTSSIM 0.037416 -0.100936 subset1: RATE (%) DSNR (dB) PSNR 0.00000 0.00000 PSNRHVS 0.00000 0.00000 SSIM 0.00000 0.00000 FASTSSIM 0.00000 0.00000 Change-Id: I97c5817463fcd8cb557c403a143b9cfaee4f102c --- vp10/encoder/encoder.c | 1 - vp10/encoder/temporal_filter.c | 67 ++++++++++------------------------ vp10/encoder/temporal_filter.h | 1 - 3 files changed, 19 insertions(+), 50 deletions(-) diff --git a/vp10/encoder/encoder.c b/vp10/encoder/encoder.c index 01be7cc70c..a7dd1d777d 100644 --- a/vp10/encoder/encoder.c +++ b/vp10/encoder/encoder.c @@ -323,7 +323,6 @@ void vp10_initialize_enc(void) { vp10_init_me_luts(); vp10_rc_init_minq_luts(); vp10_entropy_mv_init(); - vp10_temporal_filter_init(); vp10_encode_token_init(); init_done = 1; } diff --git a/vp10/encoder/temporal_filter.c b/vp10/encoder/temporal_filter.c index a88564df0b..b19469a5e6 100644 --- a/vp10/encoder/temporal_filter.c +++ b/vp10/encoder/temporal_filter.c @@ -15,6 +15,7 @@ #include "vp10/common/onyxc_int.h" #include "vp10/common/quant_common.h" #include "vp10/common/reconinter.h" +#include "vp10/common/odintrin.h" #include "vp10/encoder/extend.h" #include "vp10/encoder/firstpass.h" #include "vp10/encoder/mcomp.h" @@ -29,8 +30,6 @@ #include "vpx_ports/vpx_timer.h" #include "vpx_scale/vpx_scale.h" -static int fixed_divide[512]; - static void temporal_filter_predictors_mb_c( MACROBLOCKD *xd, uint8_t *y_mb_ptr, uint8_t *u_mb_ptr, uint8_t *v_mb_ptr, int stride, int uv_block_width, int uv_block_height, int mv_row, int mv_col, @@ -80,13 +79,6 @@ static void temporal_filter_predictors_mb_c( which_mv, kernel, mv_precision_uv, x, y); } -void vp10_temporal_filter_init(void) { - int i; - - fixed_divide[0] = 0; - for (i = 1; i < 512; ++i) fixed_divide[i] = 0x80000 / i; -} - void vp10_temporal_filter_apply_c(uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, @@ -379,11 +371,8 @@ static void temporal_filter_iterate_c(VP10_COMP *cpi, byte = mb_y_offset; for (i = 0, k = 0; i < 16; i++) { for (j = 0; j < 16; j++, k++) { - unsigned int pval = accumulator[k] + (count[k] >> 1); - pval *= fixed_divide[count[k]]; - pval >>= 19; - - dst1_16[byte] = (uint16_t)pval; + dst1_16[byte] = + (uint16_t)OD_DIVU(accumulator[k] + (count[k] >> 1), count[k]); // move to next pixel byte++; @@ -403,16 +392,12 @@ static void temporal_filter_iterate_c(VP10_COMP *cpi, int m = k + 256; // U - unsigned int pval = accumulator[k] + (count[k] >> 1); - pval *= fixed_divide[count[k]]; - pval >>= 19; - dst1_16[byte] = (uint16_t)pval; + dst1_16[byte] = + (uint16_t)OD_DIVU(accumulator[k] + (count[k] >> 1), count[k]); // V - pval = accumulator[m] + (count[m] >> 1); - pval *= fixed_divide[count[m]]; - pval >>= 19; - dst2_16[byte] = (uint16_t)pval; + dst2_16[byte] = + (uint16_t)OD_DIVU(accumulator[m] + (count[m] >> 1), count[m]); // move to next pixel byte++; @@ -427,11 +412,8 @@ static void temporal_filter_iterate_c(VP10_COMP *cpi, byte = mb_y_offset; for (i = 0, k = 0; i < 16; i++) { for (j = 0; j < 16; j++, k++) { - unsigned int pval = accumulator[k] + (count[k] >> 1); - pval *= fixed_divide[count[k]]; - pval >>= 19; - - dst1[byte] = (uint8_t)pval; + dst1[byte] = + (uint8_t)OD_DIVU(accumulator[k] + (count[k] >> 1), count[k]); // move to next pixel byte++; @@ -448,16 +430,12 @@ static void temporal_filter_iterate_c(VP10_COMP *cpi, int m = k + 256; // U - unsigned int pval = accumulator[k] + (count[k] >> 1); - pval *= fixed_divide[count[k]]; - pval >>= 19; - dst1[byte] = (uint8_t)pval; + dst1[byte] = + (uint8_t)OD_DIVU(accumulator[k] + (count[k] >> 1), count[k]); // V - pval = accumulator[m] + (count[m] >> 1); - pval *= fixed_divide[count[m]]; - pval >>= 19; - dst2[byte] = (uint8_t)pval; + dst2[byte] = + (uint8_t)OD_DIVU(accumulator[m] + (count[m] >> 1), count[m]); // move to next pixel byte++; @@ -472,11 +450,8 @@ static void temporal_filter_iterate_c(VP10_COMP *cpi, byte = mb_y_offset; for (i = 0, k = 0; i < 16; i++) { for (j = 0; j < 16; j++, k++) { - unsigned int pval = accumulator[k] + (count[k] >> 1); - pval *= fixed_divide[count[k]]; - pval >>= 19; - - dst1[byte] = (uint8_t)pval; + dst1[byte] = + (uint8_t)OD_DIVU(accumulator[k] + (count[k] >> 1), count[k]); // move to next pixel byte++; @@ -493,16 +468,12 @@ static void temporal_filter_iterate_c(VP10_COMP *cpi, int m = k + 256; // U - unsigned int pval = accumulator[k] + (count[k] >> 1); - pval *= fixed_divide[count[k]]; - pval >>= 19; - dst1[byte] = (uint8_t)pval; + dst1[byte] = + (uint8_t)OD_DIVU(accumulator[k] + (count[k] >> 1), count[k]); // V - pval = accumulator[m] + (count[m] >> 1); - pval *= fixed_divide[count[m]]; - pval >>= 19; - dst2[byte] = (uint8_t)pval; + dst2[byte] = + (uint8_t)OD_DIVU(accumulator[m] + (count[m] >> 1), count[m]); // move to next pixel byte++; diff --git a/vp10/encoder/temporal_filter.h b/vp10/encoder/temporal_filter.h index 6e331e6ad0..ce5291a53d 100644 --- a/vp10/encoder/temporal_filter.h +++ b/vp10/encoder/temporal_filter.h @@ -15,7 +15,6 @@ extern "C" { #endif -void vp10_temporal_filter_init(void); void vp10_temporal_filter(VP10_COMP *cpi, int distance); #ifdef __cplusplus -- GitLab