Commit 15791332 authored by Nathan Egge's avatar Nathan Egge

Use OD_DIVU for small divisions in temporal_filter.

Replaces an approximate unsigned integer division with the bit exact
 OD_ILOG() implementation.
Removes the need to call vp10_temporal_filter_init() before calling
 vp10_temporal_filter_apply_c() by using a static table of constants.

ntt-short-1:

          MEDIUM (%) HIGH (%)
    PSNR -0.023045   0.115705
 PSNRHVS  0.023327   0.110230
    SSIM -0.039964   0.083594
FASTSSIM  0.037416  -0.100936

subset1:

         RATE (%) DSNR (dB)
    PSNR 0.00000  0.00000
 PSNRHVS 0.00000  0.00000
    SSIM 0.00000  0.00000
FASTSSIM 0.00000  0.00000

Change-Id: I97c5817463fcd8cb557c403a143b9cfaee4f102c
parent 03122298
......@@ -323,7 +323,6 @@ void vp10_initialize_enc(void) {
vp10_init_me_luts();
vp10_rc_init_minq_luts();
vp10_entropy_mv_init();
vp10_temporal_filter_init();
vp10_encode_token_init();
init_done = 1;
}
......
......@@ -15,6 +15,7 @@
#include "vp10/common/onyxc_int.h"
#include "vp10/common/quant_common.h"
#include "vp10/common/reconinter.h"
#include "vp10/common/odintrin.h"
#include "vp10/encoder/extend.h"
#include "vp10/encoder/firstpass.h"
#include "vp10/encoder/mcomp.h"
......@@ -29,8 +30,6 @@
#include "vpx_ports/vpx_timer.h"
#include "vpx_scale/vpx_scale.h"
static int fixed_divide[512];
static void temporal_filter_predictors_mb_c(
MACROBLOCKD *xd, uint8_t *y_mb_ptr, uint8_t *u_mb_ptr, uint8_t *v_mb_ptr,
int stride, int uv_block_width, int uv_block_height, int mv_row, int mv_col,
......@@ -80,13 +79,6 @@ static void temporal_filter_predictors_mb_c(
which_mv, kernel, mv_precision_uv, x, y);
}
void vp10_temporal_filter_init(void) {
int i;
fixed_divide[0] = 0;
for (i = 1; i < 512; ++i) fixed_divide[i] = 0x80000 / i;
}
void vp10_temporal_filter_apply_c(uint8_t *frame1, unsigned int stride,
uint8_t *frame2, unsigned int block_width,
unsigned int block_height, int strength,
......@@ -379,11 +371,8 @@ static void temporal_filter_iterate_c(VP10_COMP *cpi,
byte = mb_y_offset;
for (i = 0, k = 0; i < 16; i++) {
for (j = 0; j < 16; j++, k++) {
unsigned int pval = accumulator[k] + (count[k] >> 1);
pval *= fixed_divide[count[k]];
pval >>= 19;
dst1_16[byte] = (uint16_t)pval;
dst1_16[byte] =
(uint16_t)OD_DIVU(accumulator[k] + (count[k] >> 1), count[k]);
// move to next pixel
byte++;
......@@ -403,16 +392,12 @@ static void temporal_filter_iterate_c(VP10_COMP *cpi,
int m = k + 256;
// U
unsigned int pval = accumulator[k] + (count[k] >> 1);
pval *= fixed_divide[count[k]];
pval >>= 19;
dst1_16[byte] = (uint16_t)pval;
dst1_16[byte] =
(uint16_t)OD_DIVU(accumulator[k] + (count[k] >> 1), count[k]);
// V
pval = accumulator[m] + (count[m] >> 1);
pval *= fixed_divide[count[m]];
pval >>= 19;
dst2_16[byte] = (uint16_t)pval;
dst2_16[byte] =
(uint16_t)OD_DIVU(accumulator[m] + (count[m] >> 1), count[m]);
// move to next pixel
byte++;
......@@ -427,11 +412,8 @@ static void temporal_filter_iterate_c(VP10_COMP *cpi,
byte = mb_y_offset;
for (i = 0, k = 0; i < 16; i++) {
for (j = 0; j < 16; j++, k++) {
unsigned int pval = accumulator[k] + (count[k] >> 1);
pval *= fixed_divide[count[k]];
pval >>= 19;
dst1[byte] = (uint8_t)pval;
dst1[byte] =
(uint8_t)OD_DIVU(accumulator[k] + (count[k] >> 1), count[k]);
// move to next pixel
byte++;
......@@ -448,16 +430,12 @@ static void temporal_filter_iterate_c(VP10_COMP *cpi,
int m = k + 256;
// U
unsigned int pval = accumulator[k] + (count[k] >> 1);
pval *= fixed_divide[count[k]];
pval >>= 19;
dst1[byte] = (uint8_t)pval;
dst1[byte] =
(uint8_t)OD_DIVU(accumulator[k] + (count[k] >> 1), count[k]);
// V
pval = accumulator[m] + (count[m] >> 1);
pval *= fixed_divide[count[m]];
pval >>= 19;
dst2[byte] = (uint8_t)pval;
dst2[byte] =
(uint8_t)OD_DIVU(accumulator[m] + (count[m] >> 1), count[m]);
// move to next pixel
byte++;
......@@ -472,11 +450,8 @@ static void temporal_filter_iterate_c(VP10_COMP *cpi,
byte = mb_y_offset;
for (i = 0, k = 0; i < 16; i++) {
for (j = 0; j < 16; j++, k++) {
unsigned int pval = accumulator[k] + (count[k] >> 1);
pval *= fixed_divide[count[k]];
pval >>= 19;
dst1[byte] = (uint8_t)pval;
dst1[byte] =
(uint8_t)OD_DIVU(accumulator[k] + (count[k] >> 1), count[k]);
// move to next pixel
byte++;
......@@ -493,16 +468,12 @@ static void temporal_filter_iterate_c(VP10_COMP *cpi,
int m = k + 256;
// U
unsigned int pval = accumulator[k] + (count[k] >> 1);
pval *= fixed_divide[count[k]];
pval >>= 19;
dst1[byte] = (uint8_t)pval;
dst1[byte] =
(uint8_t)OD_DIVU(accumulator[k] + (count[k] >> 1), count[k]);
// V
pval = accumulator[m] + (count[m] >> 1);
pval *= fixed_divide[count[m]];
pval >>= 19;
dst2[byte] = (uint8_t)pval;
dst2[byte] =
(uint8_t)OD_DIVU(accumulator[m] + (count[m] >> 1), count[m]);
// move to next pixel
byte++;
......
......@@ -15,7 +15,6 @@
extern "C" {
#endif
void vp10_temporal_filter_init(void);
void vp10_temporal_filter(VP10_COMP *cpi, int distance);
#ifdef __cplusplus
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment