### Removing divisions from od_dir_find8()

```Instead of dividing the squared partial sums by the number n of pixels in the
line, we multiply by 840/n, where 840=3*5*7*8. This not only avoids the
divisions, but it also makes the optimization exact as there is no more
rounding.

ntt-short1 resuts:
MEDIUM (%) HIGH (%)
PSNR -0.012070 -0.059644
PSNRHVS -0.016845 -0.020871
SSIM -0.026984 -0.031257
FASTSSIM -0.026078 0.414901

Change-Id: Ie553d5e3a545dee860a00879d724ecfc00f0a974```
parent 0950ed82
 ... ... @@ -61,15 +61,21 @@ const double OD_DERING_GAIN_TABLE[OD_DERING_LEVELS] = { static int od_dir_find8(const od_dering_in *img, int stride, int32_t *var, int coeff_shift) { int i; int cost = {0}; int32_t cost = {0}; int partial = {{0}}; int best_cost = 0; int32_t best_cost = 0; int best_dir = 0; /* Instead of dividing by n between 2 and 8, we multiply by 3*5*7*8/n. The output is then 840 times larger, but we don't care for finding the max. */ static const int div_table[] = {0, 840, 420, 280, 210, 168, 140, 120, 105}; for (i = 0; i < 8; i++) { int j; for (j = 0; j < 8; j++) { int x; x = img[i*stride + j] >> coeff_shift; /* We subtract 128 here to reduce the maximum range of the squared partial sums. */ x = (img[i*stride + j] >> coeff_shift) - 128; partial[i + j] += x; partial[i + j/2] += x; partial[i] += x; ... ... @@ -81,25 +87,28 @@ static int od_dir_find8(const od_dering_in *img, int stride, int32_t *var, } } for (i = 0; i < 8; i++) { cost += partial[i]*partial[i] >> 3; cost += partial[i]*partial[i] >> 3; cost += partial[i]*partial[i]; cost += partial[i]*partial[i]; } cost *= div_table; cost *= div_table; for (i = 0; i < 7; i++) { cost += OD_DIVU_SMALL(partial[i]*partial[i], i + 1) + OD_DIVU_SMALL(partial[14 - i]*partial[14 - i], i + 1); cost += OD_DIVU_SMALL(partial[i]*partial[i], i + 1) + OD_DIVU_SMALL(partial[14 - i]*partial[14 - i], i + 1); cost += (partial[i]*partial[i] + partial[14 - i]*partial[14 - i])*div_table[i + 1]; cost += (partial[i]*partial[i] + partial[14 - i]*partial[14 - i])*div_table[i + 1]; } cost += partial*partial[8 - 1] >> 3; cost += partial*partial[8 - 1] >> 3; cost += partial*partial*div_table; cost += partial*partial*div_table; for (i = 1; i < 8; i += 2) { int j; for (j = 0; j < 4 + 1; j++) { cost[i] += partial[i][3 + j]*partial[i][3 + j] >> 3; cost[i] += partial[i][3 + j]*partial[i][3 + j]; } cost[i] *= div_table; for (j = 0; j < 4 - 1; j++) { cost[i] += OD_DIVU_SMALL(partial[i][j]*partial[i][j], 2*j + 2) + OD_DIVU_SMALL(partial[i][10 - j]*partial[i][10 - j], 2*j + 2); cost[i] += (partial[i][j]*partial[i][j] + partial[i][10 - j]*partial[i][10 - j])*div_table[2*j + 2]; } } for (i = 0; i < 8; i++) { ... ... @@ -111,6 +120,9 @@ static int od_dir_find8(const od_dering_in *img, int stride, int32_t *var, /* Difference between the optimal variance and the variance along the orthogonal direction. Again, the sum(x^2) terms cancel out. */ *var = best_cost - cost[(best_dir + 4) & 7]; /* We'd normally divide by 840, but dividing by 1024 is close enough for what we're going to do with this. */ *var >>= 10; return best_dir; } ... ...
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!