od_filter_dering_direction() no longer returns the total change

Change-Id: Ibe19f4422324680b0d3132eab79bdc035911d79d
parent be26e03d
...@@ -626,8 +626,8 @@ if (aom_config("CONFIG_CDEF") eq "yes") { ...@@ -626,8 +626,8 @@ if (aom_config("CONFIG_CDEF") eq "yes") {
add_proto qw/void aom_clpf_block/, "uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd"; add_proto qw/void aom_clpf_block/, "uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd";
add_proto qw/void aom_clpf_hblock/, "uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd"; add_proto qw/void aom_clpf_hblock/, "uint8_t *dst, const uint16_t *src, int dstride, int sstride, int sizex, int sizey, unsigned int strength, unsigned int bd";
add_proto qw/int od_dir_find8/, "const od_dering_in *img, int stride, int32_t *var, int coeff_shift"; add_proto qw/int od_dir_find8/, "const od_dering_in *img, int stride, int32_t *var, int coeff_shift";
add_proto qw/int od_filter_dering_direction_4x4/, "uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir"; add_proto qw/void od_filter_dering_direction_4x4/, "uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir";
add_proto qw/int od_filter_dering_direction_8x8/, "uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir"; add_proto qw/void od_filter_dering_direction_8x8/, "uint16_t *y, int ystride, const uint16_t *in, int threshold, int dir";
add_proto qw/void copy_8x8_16bit_to_8bit/, "uint8_t *dst, int dstride, const uint16_t *src, int sstride"; add_proto qw/void copy_8x8_16bit_to_8bit/, "uint8_t *dst, int dstride, const uint16_t *src, int sstride";
add_proto qw/void copy_4x4_16bit_to_8bit/, "uint8_t *dst, int dstride, const uint16_t *src, int sstride"; add_proto qw/void copy_4x4_16bit_to_8bit/, "uint8_t *dst, int dstride, const uint16_t *src, int sstride";
......
...@@ -113,14 +113,13 @@ int od_dir_find8_c(const uint16_t *img, int stride, int32_t *var, ...@@ -113,14 +113,13 @@ int od_dir_find8_c(const uint16_t *img, int stride, int32_t *var,
} }
/* Smooth in the direction detected. */ /* Smooth in the direction detected. */
int od_filter_dering_direction_8x8_c(uint16_t *y, int ystride, void od_filter_dering_direction_8x8_c(uint16_t *y, int ystride,
const uint16_t *in, int threshold, const uint16_t *in, int threshold,
int dir) { int dir) {
int i; int i;
int j; int j;
int k; int k;
static const int taps[3] = { 3, 2, 1 }; static const int taps[3] = { 3, 2, 1 };
int total_abs = 0;
for (i = 0; i < 8; i++) { for (i = 0; i < 8; i++) {
for (j = 0; j < 8; j++) { for (j = 0; j < 8; j++) {
int16_t sum; int16_t sum;
...@@ -139,23 +138,20 @@ int od_filter_dering_direction_8x8_c(uint16_t *y, int ystride, ...@@ -139,23 +138,20 @@ int od_filter_dering_direction_8x8_c(uint16_t *y, int ystride,
if (abs(p1) < threshold) sum += taps[k] * p1; if (abs(p1) < threshold) sum += taps[k] * p1;
} }
sum = (sum + 8) >> 4; sum = (sum + 8) >> 4;
total_abs += abs(sum);
yy = xx + sum; yy = xx + sum;
y[i * ystride + j] = yy; y[i * ystride + j] = yy;
} }
} }
return (total_abs + 8) >> 4;
} }
/* Smooth in the direction detected. */ /* Smooth in the direction detected. */
int od_filter_dering_direction_4x4_c(uint16_t *y, int ystride, void od_filter_dering_direction_4x4_c(uint16_t *y, int ystride,
const uint16_t *in, int threshold, const uint16_t *in, int threshold,
int dir) { int dir) {
int i; int i;
int j; int j;
int k; int k;
static const int taps[2] = { 4, 1 }; static const int taps[2] = { 4, 1 };
int total_abs = 0;
for (i = 0; i < 4; i++) { for (i = 0; i < 4; i++) {
for (j = 0; j < 4; j++) { for (j = 0; j < 4; j++) {
int16_t sum; int16_t sum;
...@@ -174,12 +170,10 @@ int od_filter_dering_direction_4x4_c(uint16_t *y, int ystride, ...@@ -174,12 +170,10 @@ int od_filter_dering_direction_4x4_c(uint16_t *y, int ystride,
if (abs(p1) < threshold) sum += taps[k] * p1; if (abs(p1) < threshold) sum += taps[k] * p1;
} }
sum = (sum + 8) >> 4; sum = (sum + 8) >> 4;
total_abs += abs(sum);
yy = xx + sum; yy = xx + sum;
y[i * ystride + j] = yy; y[i * ystride + j] = yy;
} }
} }
return (total_abs + 2) >> 2;
} }
/* This table approximates x^0.16 with the index being log2(x). It is clamped /* This table approximates x^0.16 with the index being log2(x). It is clamped
......
...@@ -39,7 +39,7 @@ typedef struct { ...@@ -39,7 +39,7 @@ typedef struct {
unsigned char bx; unsigned char bx;
} dering_list; } dering_list;
typedef int (*od_filter_dering_direction_func)(uint16_t *y, int ystride, typedef void (*od_filter_dering_direction_func)(uint16_t *y, int ystride,
const uint16_t *in, const uint16_t *in,
int threshold, int dir); int threshold, int dir);
void copy_dering_16bit_to_16bit(uint16_t *dst, int dstride, uint16_t *src, void copy_dering_16bit_to_16bit(uint16_t *dst, int dstride, uint16_t *src,
......
...@@ -214,9 +214,9 @@ static INLINE v128 od_cmplt_abs_epi16(v128 in, v128 threshold) { ...@@ -214,9 +214,9 @@ static INLINE v128 od_cmplt_abs_epi16(v128 in, v128 threshold) {
return v128_cmplt_s16(v128_abs_s16(in), threshold); return v128_cmplt_s16(v128_abs_s16(in), threshold);
} }
int SIMD_FUNC(od_filter_dering_direction_4x4)(uint16_t *y, int ystride, void SIMD_FUNC(od_filter_dering_direction_4x4)(uint16_t *y, int ystride,
const uint16_t *in, int threshold, const uint16_t *in,
int dir) { int threshold, int dir) {
int i; int i;
v128 sum; v128 sum;
v128 p; v128 p;
...@@ -225,11 +225,9 @@ int SIMD_FUNC(od_filter_dering_direction_4x4)(uint16_t *y, int ystride, ...@@ -225,11 +225,9 @@ int SIMD_FUNC(od_filter_dering_direction_4x4)(uint16_t *y, int ystride,
v128 res; v128 res;
v128 tmp; v128 tmp;
v128 thresh; v128 thresh;
v128 total_abs;
int off1, off2; int off1, off2;
off1 = OD_DIRECTION_OFFSETS_TABLE[dir][0]; off1 = OD_DIRECTION_OFFSETS_TABLE[dir][0];
off2 = OD_DIRECTION_OFFSETS_TABLE[dir][1]; off2 = OD_DIRECTION_OFFSETS_TABLE[dir][1];
total_abs = v128_zero();
thresh = v128_dup_16(threshold); thresh = v128_dup_16(threshold);
for (i = 0; i < 4; i += 2) { for (i = 0; i < 4; i += 2) {
sum = v128_zero(); sum = v128_zero();
...@@ -275,17 +273,15 @@ int SIMD_FUNC(od_filter_dering_direction_4x4)(uint16_t *y, int ystride, ...@@ -275,17 +273,15 @@ int SIMD_FUNC(od_filter_dering_direction_4x4)(uint16_t *y, int ystride,
/*res = row + ((sum + 8) >> 4)*/ /*res = row + ((sum + 8) >> 4)*/
res = v128_add_16(sum, v128_dup_16(8)); res = v128_add_16(sum, v128_dup_16(8));
res = v128_shr_n_s16(res, 4); res = v128_shr_n_s16(res, 4);
total_abs = v128_add_16(total_abs, v128_abs_s16(res));
res = v128_add_16(row, res); res = v128_add_16(row, res);
v64_store_aligned(&y[i * ystride], v128_low_v64(res)); v64_store_aligned(&y[i * ystride], v128_low_v64(res));
v64_store_aligned(&y[(i + 1) * ystride], v128_high_v64(res)); v64_store_aligned(&y[(i + 1) * ystride], v128_high_v64(res));
} }
return (int)((v128_dotp_s16(total_abs, v128_dup_16(1)) + 2) >> 2);
} }
int SIMD_FUNC(od_filter_dering_direction_8x8)(uint16_t *y, int ystride, void SIMD_FUNC(od_filter_dering_direction_8x8)(uint16_t *y, int ystride,
const uint16_t *in, int threshold, const uint16_t *in,
int dir) { int threshold, int dir) {
int i; int i;
v128 sum; v128 sum;
v128 p0, p1; v128 p0, p1;
...@@ -293,12 +289,10 @@ int SIMD_FUNC(od_filter_dering_direction_8x8)(uint16_t *y, int ystride, ...@@ -293,12 +289,10 @@ int SIMD_FUNC(od_filter_dering_direction_8x8)(uint16_t *y, int ystride,
v128 row; v128 row;
v128 res; v128 res;
v128 thresh; v128 thresh;
v128 total_abs;
int off1, off2, off3; int off1, off2, off3;
off1 = OD_DIRECTION_OFFSETS_TABLE[dir][0]; off1 = OD_DIRECTION_OFFSETS_TABLE[dir][0];
off2 = OD_DIRECTION_OFFSETS_TABLE[dir][1]; off2 = OD_DIRECTION_OFFSETS_TABLE[dir][1];
off3 = OD_DIRECTION_OFFSETS_TABLE[dir][2]; off3 = OD_DIRECTION_OFFSETS_TABLE[dir][2];
total_abs = v128_zero();
thresh = v128_dup_16(threshold); thresh = v128_dup_16(threshold);
for (i = 0; i < 8; i++) { for (i = 0; i < 8; i++) {
sum = v128_zero(); sum = v128_zero();
...@@ -353,11 +347,9 @@ int SIMD_FUNC(od_filter_dering_direction_8x8)(uint16_t *y, int ystride, ...@@ -353,11 +347,9 @@ int SIMD_FUNC(od_filter_dering_direction_8x8)(uint16_t *y, int ystride,
/*res = row + ((sum + 8) >> 4)*/ /*res = row + ((sum + 8) >> 4)*/
res = v128_add_16(sum, v128_dup_16(8)); res = v128_add_16(sum, v128_dup_16(8));
res = v128_shr_n_s16(res, 4); res = v128_shr_n_s16(res, 4);
total_abs = v128_add_16(total_abs, v128_abs_s16(res));
res = v128_add_16(row, res); res = v128_add_16(row, res);
v128_store_unaligned(&y[i * ystride], res); v128_store_unaligned(&y[i * ystride], res);
} }
return (int)((v128_dotp_s16(total_abs, v128_dup_16(1)) + 8) >> 4);
} }
void SIMD_FUNC(copy_8x8_16bit_to_8bit)(uint8_t *dst, int dstride, void SIMD_FUNC(copy_8x8_16bit_to_8bit)(uint8_t *dst, int dstride,
......
...@@ -27,7 +27,7 @@ using libaom_test::ACMRandom; ...@@ -27,7 +27,7 @@ using libaom_test::ACMRandom;
namespace { namespace {
typedef int (*dering_dir_t)(uint16_t *y, int ystride, const uint16_t *in, typedef void (*dering_dir_t)(uint16_t *y, int ystride, const uint16_t *in,
int threshold, int dir); int threshold, int dir);
typedef std::tr1::tuple<dering_dir_t, dering_dir_t, int> dering_dir_param_t; typedef std::tr1::tuple<dering_dir_t, dering_dir_t, int> dering_dir_param_t;
...@@ -52,10 +52,11 @@ class CDEFDeringDirTest : public ::testing::TestWithParam<dering_dir_param_t> { ...@@ -52,10 +52,11 @@ class CDEFDeringDirTest : public ::testing::TestWithParam<dering_dir_param_t> {
typedef CDEFDeringDirTest CDEFDeringSpeedTest; typedef CDEFDeringDirTest CDEFDeringSpeedTest;
void test_dering(int bsize, int iterations, void test_dering(int bsize, int iterations,
int (*dering)(uint16_t *y, int ystride, const uint16_t *in, void (*dering)(uint16_t *y, int ystride, const uint16_t *in,
int threshold, int dir), int threshold, int dir),
int (*ref_dering)(uint16_t *y, int ystride, const uint16_t *in, void (*ref_dering)(uint16_t *y, int ystride,
int threshold, int dir)) { const uint16_t *in, int threshold,
int dir)) {
const int size = 8; const int size = 8;
const int ysize = size + 2 * OD_FILT_VBORDER; const int ysize = size + 2 * OD_FILT_VBORDER;
ACMRandom rnd(ACMRandom::DeterministicSeed()); ACMRandom rnd(ACMRandom::DeterministicSeed());
...@@ -69,7 +70,6 @@ void test_dering(int bsize, int iterations, ...@@ -69,7 +70,6 @@ void test_dering(int bsize, int iterations,
int boundary, depth, bits, level, count, errdepth = 0, errthreshold = 0, int boundary, depth, bits, level, count, errdepth = 0, errthreshold = 0,
errboundary = 0; errboundary = 0;
unsigned int pos = 0; unsigned int pos = 0;
int ref_res = 0, res = 0;
for (boundary = 0; boundary < 16; boundary++) { for (boundary = 0; boundary < 16; boundary++) {
for (depth = 8; depth <= 12; depth += 2) { for (depth = 8; depth <= 12; depth += 2) {
...@@ -105,17 +105,15 @@ void test_dering(int bsize, int iterations, ...@@ -105,17 +105,15 @@ void test_dering(int bsize, int iterations,
for (dir = 0; dir < 8; dir++) { for (dir = 0; dir < 8; dir++) {
for (threshold = 0; threshold < 64 << (depth - 8) && !error; for (threshold = 0; threshold < 64 << (depth - 8) && !error;
threshold += !error << (depth - 8)) { threshold += !error << (depth - 8)) {
ref_res = ref_dering( ref_dering(ref_d, size, s + OD_FILT_HBORDER +
ref_d, size, OD_FILT_VBORDER * OD_FILT_BSTRIDE,
s + OD_FILT_HBORDER + OD_FILT_VBORDER * OD_FILT_BSTRIDE,
threshold, dir); threshold, dir);
// If dering and ref_dering are the same, we're just testing // If dering and ref_dering are the same, we're just testing
// speed // speed
if (dering != ref_dering) if (dering != ref_dering)
ASM_REGISTER_STATE_CHECK( ASM_REGISTER_STATE_CHECK(dering(
res = d, size,
dering(d, size, s + OD_FILT_HBORDER + s + OD_FILT_HBORDER + OD_FILT_VBORDER * OD_FILT_BSTRIDE,
OD_FILT_VBORDER * OD_FILT_BSTRIDE,
threshold, dir)); threshold, dir));
if (ref_dering != dering) { if (ref_dering != dering) {
for (pos = 0; pos < sizeof(d) / sizeof(*d) && !error; pos++) { for (pos = 0; pos < sizeof(d) / sizeof(*d) && !error; pos++) {
...@@ -124,7 +122,6 @@ void test_dering(int bsize, int iterations, ...@@ -124,7 +122,6 @@ void test_dering(int bsize, int iterations,
errthreshold = threshold; errthreshold = threshold;
errboundary = boundary; errboundary = boundary;
} }
error |= res != ref_res;
} }
} }
} }
...@@ -140,7 +137,6 @@ void test_dering(int bsize, int iterations, ...@@ -140,7 +137,6 @@ void test_dering(int bsize, int iterations,
<< "First error at " << pos % size << "," << pos / size << "First error at " << pos % size << "," << pos / size
<< " (" << (int16_t)ref_d[pos] << " : " << (int16_t)d[pos] << " (" << (int16_t)ref_d[pos] << " : " << (int16_t)d[pos]
<< ") " << std::endl << ") " << std::endl
<< "return: " << res << " : " << ref_res << std::endl
<< "threshold: " << errthreshold << std::endl << "threshold: " << errthreshold << std::endl
<< "depth: " << errdepth << std::endl << "depth: " << errdepth << std::endl
<< "size: " << bsize << std::endl << "size: " << bsize << std::endl
...@@ -149,10 +145,10 @@ void test_dering(int bsize, int iterations, ...@@ -149,10 +145,10 @@ void test_dering(int bsize, int iterations,
} }
void test_dering_speed(int bsize, int iterations, void test_dering_speed(int bsize, int iterations,
int (*dering)(uint16_t *y, int ystride, void (*dering)(uint16_t *y, int ystride,
const uint16_t *in, int threshold, const uint16_t *in, int threshold,
int dir), int dir),
int (*ref_dering)(uint16_t *y, int ystride, void (*ref_dering)(uint16_t *y, int ystride,
const uint16_t *in, int threshold, const uint16_t *in, int threshold,
int dir)) { int dir)) {
aom_usec_timer ref_timer; aom_usec_timer ref_timer;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment