Commit 15791332 authored by Nathan Egge's avatar Nathan Egge

Use OD_DIVU for small divisions in temporal_filter.

Replaces an approximate unsigned integer division with the bit exact
 OD_ILOG() implementation.
Removes the need to call vp10_temporal_filter_init() before calling
 vp10_temporal_filter_apply_c() by using a static table of constants.

ntt-short-1:

          MEDIUM (%) HIGH (%)
    PSNR -0.023045   0.115705
 PSNRHVS  0.023327   0.110230
    SSIM -0.039964   0.083594
FASTSSIM  0.037416  -0.100936

subset1:

         RATE (%) DSNR (dB)
    PSNR 0.00000  0.00000
 PSNRHVS 0.00000  0.00000
    SSIM 0.00000  0.00000
FASTSSIM 0.00000  0.00000

Change-Id: I97c5817463fcd8cb557c403a143b9cfaee4f102c
parent 03122298
...@@ -323,7 +323,6 @@ void vp10_initialize_enc(void) { ...@@ -323,7 +323,6 @@ void vp10_initialize_enc(void) {
vp10_init_me_luts(); vp10_init_me_luts();
vp10_rc_init_minq_luts(); vp10_rc_init_minq_luts();
vp10_entropy_mv_init(); vp10_entropy_mv_init();
vp10_temporal_filter_init();
vp10_encode_token_init(); vp10_encode_token_init();
init_done = 1; init_done = 1;
} }
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#include "vp10/common/onyxc_int.h" #include "vp10/common/onyxc_int.h"
#include "vp10/common/quant_common.h" #include "vp10/common/quant_common.h"
#include "vp10/common/reconinter.h" #include "vp10/common/reconinter.h"
#include "vp10/common/odintrin.h"
#include "vp10/encoder/extend.h" #include "vp10/encoder/extend.h"
#include "vp10/encoder/firstpass.h" #include "vp10/encoder/firstpass.h"
#include "vp10/encoder/mcomp.h" #include "vp10/encoder/mcomp.h"
...@@ -29,8 +30,6 @@ ...@@ -29,8 +30,6 @@
#include "vpx_ports/vpx_timer.h" #include "vpx_ports/vpx_timer.h"
#include "vpx_scale/vpx_scale.h" #include "vpx_scale/vpx_scale.h"
static int fixed_divide[512];
static void temporal_filter_predictors_mb_c( static void temporal_filter_predictors_mb_c(
MACROBLOCKD *xd, uint8_t *y_mb_ptr, uint8_t *u_mb_ptr, uint8_t *v_mb_ptr, MACROBLOCKD *xd, uint8_t *y_mb_ptr, uint8_t *u_mb_ptr, uint8_t *v_mb_ptr,
int stride, int uv_block_width, int uv_block_height, int mv_row, int mv_col, int stride, int uv_block_width, int uv_block_height, int mv_row, int mv_col,
...@@ -80,13 +79,6 @@ static void temporal_filter_predictors_mb_c( ...@@ -80,13 +79,6 @@ static void temporal_filter_predictors_mb_c(
which_mv, kernel, mv_precision_uv, x, y); which_mv, kernel, mv_precision_uv, x, y);
} }
void vp10_temporal_filter_init(void) {
int i;
fixed_divide[0] = 0;
for (i = 1; i < 512; ++i) fixed_divide[i] = 0x80000 / i;
}
void vp10_temporal_filter_apply_c(uint8_t *frame1, unsigned int stride, void vp10_temporal_filter_apply_c(uint8_t *frame1, unsigned int stride,
uint8_t *frame2, unsigned int block_width, uint8_t *frame2, unsigned int block_width,
unsigned int block_height, int strength, unsigned int block_height, int strength,
...@@ -379,11 +371,8 @@ static void temporal_filter_iterate_c(VP10_COMP *cpi, ...@@ -379,11 +371,8 @@ static void temporal_filter_iterate_c(VP10_COMP *cpi,
byte = mb_y_offset; byte = mb_y_offset;
for (i = 0, k = 0; i < 16; i++) { for (i = 0, k = 0; i < 16; i++) {
for (j = 0; j < 16; j++, k++) { for (j = 0; j < 16; j++, k++) {
unsigned int pval = accumulator[k] + (count[k] >> 1); dst1_16[byte] =
pval *= fixed_divide[count[k]]; (uint16_t)OD_DIVU(accumulator[k] + (count[k] >> 1), count[k]);
pval >>= 19;
dst1_16[byte] = (uint16_t)pval;
// move to next pixel // move to next pixel
byte++; byte++;
...@@ -403,16 +392,12 @@ static void temporal_filter_iterate_c(VP10_COMP *cpi, ...@@ -403,16 +392,12 @@ static void temporal_filter_iterate_c(VP10_COMP *cpi,
int m = k + 256; int m = k + 256;
// U // U
unsigned int pval = accumulator[k] + (count[k] >> 1); dst1_16[byte] =
pval *= fixed_divide[count[k]]; (uint16_t)OD_DIVU(accumulator[k] + (count[k] >> 1), count[k]);
pval >>= 19;
dst1_16[byte] = (uint16_t)pval;
// V // V
pval = accumulator[m] + (count[m] >> 1); dst2_16[byte] =
pval *= fixed_divide[count[m]]; (uint16_t)OD_DIVU(accumulator[m] + (count[m] >> 1), count[m]);
pval >>= 19;
dst2_16[byte] = (uint16_t)pval;
// move to next pixel // move to next pixel
byte++; byte++;
...@@ -427,11 +412,8 @@ static void temporal_filter_iterate_c(VP10_COMP *cpi, ...@@ -427,11 +412,8 @@ static void temporal_filter_iterate_c(VP10_COMP *cpi,
byte = mb_y_offset; byte = mb_y_offset;
for (i = 0, k = 0; i < 16; i++) { for (i = 0, k = 0; i < 16; i++) {
for (j = 0; j < 16; j++, k++) { for (j = 0; j < 16; j++, k++) {
unsigned int pval = accumulator[k] + (count[k] >> 1); dst1[byte] =
pval *= fixed_divide[count[k]]; (uint8_t)OD_DIVU(accumulator[k] + (count[k] >> 1), count[k]);
pval >>= 19;
dst1[byte] = (uint8_t)pval;
// move to next pixel // move to next pixel
byte++; byte++;
...@@ -448,16 +430,12 @@ static void temporal_filter_iterate_c(VP10_COMP *cpi, ...@@ -448,16 +430,12 @@ static void temporal_filter_iterate_c(VP10_COMP *cpi,
int m = k + 256; int m = k + 256;
// U // U
unsigned int pval = accumulator[k] + (count[k] >> 1); dst1[byte] =
pval *= fixed_divide[count[k]]; (uint8_t)OD_DIVU(accumulator[k] + (count[k] >> 1), count[k]);
pval >>= 19;
dst1[byte] = (uint8_t)pval;
// V // V
pval = accumulator[m] + (count[m] >> 1); dst2[byte] =
pval *= fixed_divide[count[m]]; (uint8_t)OD_DIVU(accumulator[m] + (count[m] >> 1), count[m]);
pval >>= 19;
dst2[byte] = (uint8_t)pval;
// move to next pixel // move to next pixel
byte++; byte++;
...@@ -472,11 +450,8 @@ static void temporal_filter_iterate_c(VP10_COMP *cpi, ...@@ -472,11 +450,8 @@ static void temporal_filter_iterate_c(VP10_COMP *cpi,
byte = mb_y_offset; byte = mb_y_offset;
for (i = 0, k = 0; i < 16; i++) { for (i = 0, k = 0; i < 16; i++) {
for (j = 0; j < 16; j++, k++) { for (j = 0; j < 16; j++, k++) {
unsigned int pval = accumulator[k] + (count[k] >> 1); dst1[byte] =
pval *= fixed_divide[count[k]]; (uint8_t)OD_DIVU(accumulator[k] + (count[k] >> 1), count[k]);
pval >>= 19;
dst1[byte] = (uint8_t)pval;
// move to next pixel // move to next pixel
byte++; byte++;
...@@ -493,16 +468,12 @@ static void temporal_filter_iterate_c(VP10_COMP *cpi, ...@@ -493,16 +468,12 @@ static void temporal_filter_iterate_c(VP10_COMP *cpi,
int m = k + 256; int m = k + 256;
// U // U
unsigned int pval = accumulator[k] + (count[k] >> 1); dst1[byte] =
pval *= fixed_divide[count[k]]; (uint8_t)OD_DIVU(accumulator[k] + (count[k] >> 1), count[k]);
pval >>= 19;
dst1[byte] = (uint8_t)pval;
// V // V
pval = accumulator[m] + (count[m] >> 1); dst2[byte] =
pval *= fixed_divide[count[m]]; (uint8_t)OD_DIVU(accumulator[m] + (count[m] >> 1), count[m]);
pval >>= 19;
dst2[byte] = (uint8_t)pval;
// move to next pixel // move to next pixel
byte++; byte++;
......
...@@ -15,7 +15,6 @@ ...@@ -15,7 +15,6 @@
extern "C" { extern "C" {
#endif #endif
void vp10_temporal_filter_init(void);
void vp10_temporal_filter(VP10_COMP *cpi, int distance); void vp10_temporal_filter(VP10_COMP *cpi, int distance);
#ifdef __cplusplus #ifdef __cplusplus
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment