Commit 21498529 authored by Dmitry Kovalev's avatar Dmitry Kovalev Committed by Gerrit Code Review
Browse files

Merge "Removing simple loopfilter and code duplication from loopfilter code." into experimental

parents 089ed30d 1c43e643
......@@ -49,9 +49,6 @@ struct loop_filter_info {
void sym(uint8_t *y, uint8_t *u, uint8_t *v, \
int ystride, int uv_stride, struct loop_filter_info *lfi)
#define prototype_simple_loopfilter(sym) \
void sym(uint8_t *y, int ystride, const unsigned char *blimit)
#if ARCH_X86 || ARCH_X86_64
#include "x86/vp9_loopfilter_x86.h"
#endif
......
......@@ -8,15 +8,13 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include <stdlib.h>
#include "vpx_config.h"
#include "vp9/common/vp9_common.h"
#include "vp9/common/vp9_loopfilter.h"
#include "vp9/common/vp9_onyxc_int.h"
static INLINE int8_t signed_char_clamp(int t) {
t = (t < -128 ? -128 : t);
t = (t > 127 ? 127 : t);
return (int8_t) t;
return (int8_t)clamp(t, -128, 127);
}
// should we apply any filter at all: 11111111 yes, 00000000 no
......@@ -36,7 +34,7 @@ static INLINE int8_t filter_mask(uint8_t limit, uint8_t blimit,
return ~mask;
}
// is there high variance internal edge: 11111111 yes, 00000000 no
// is there high edge variance internal edge: 11111111 yes, 00000000 no
static INLINE int8_t hevmask(uint8_t thresh, uint8_t p1, uint8_t p0,
uint8_t q0, uint8_t q1) {
int8_t hev = 0;
......@@ -68,12 +66,9 @@ static INLINE void filter(int8_t mask, uint8_t hev, uint8_t *op1,
*oq0 = signed_char_clamp(qs0 - filter1) ^ 0x80;
*op0 = signed_char_clamp(ps0 + filter2) ^ 0x80;
filter = filter1;
// outer tap adjustments
filter += 1;
filter >>= 1;
filter &= ~hev;
filter = ((filter1 + 1) >> 1) & ~hev;
*oq1 = signed_char_clamp(qs1 - filter) ^ 0x80;
*op1 = signed_char_clamp(ps1 + filter) ^ 0x80;
......@@ -84,23 +79,19 @@ void vp9_loop_filter_horizontal_edge_c(uint8_t *s, int p /* pitch */,
const uint8_t *limit,
const uint8_t *thresh,
int count) {
int i = 0;
int i;
// loop filter designed to work using chars so that we can make maximum use
// of 8 bit simd instructions.
do {
const int8_t mask = filter_mask(limit[0], blimit[0],
s[-4 * p], s[-3 * p], s[-2 * p], s[-1 * p],
s[0 * p], s[1 * p], s[2 * p], s[3 * p]);
// high edge variance
const int8_t hev = hevmask(thresh[0],
s[-2 * p], s[-1 * p], s[0 * p], s[1 * p]);
for (i = 0; i < 8 * count; ++i) {
const uint8_t p3 = s[-4 * p], p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p];
const uint8_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p], q3 = s[3 * p];
const int8_t mask = filter_mask(*limit, *blimit,
p3, p2, p1, p0, q0, q1, q2, q3);
const int8_t hev = hevmask(*thresh, p1, p0, q0, q1);
filter(mask, hev, s - 2 * p, s - 1 * p, s, s + 1 * p);
++s;
} while (++i < count * 8);
}
}
void vp9_loop_filter_vertical_edge_c(uint8_t *s, int pitch,
......@@ -108,21 +99,21 @@ void vp9_loop_filter_vertical_edge_c(uint8_t *s, int pitch,
const uint8_t *limit,
const uint8_t *thresh,
int count) {
int i = 0;
int i;
// loop filter designed to work using chars so that we can make maximum use
// of 8 bit simd instructions.
do {
const int8_t mask = filter_mask(limit[0], blimit[0],
s[-4], s[-3], s[-2], s[-1],
s[0], s[1], s[2], s[3]);
// high edge variance
const int8_t hev = hevmask(thresh[0], s[-2], s[-1], s[0], s[1]);
for (i = 0; i < 8 * count; ++i) {
const uint8_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1];
const uint8_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3];
const int8_t mask = filter_mask(*limit, *blimit,
p3, p2, p1, p0, q0, q1, q2, q3);
const int8_t hev = hevmask(*thresh, p1, p0, q0, q1);
filter(mask, hev, s - 2, s - 1, s, s + 1);
s += pitch;
} while (++i < count * 8);
}
}
static INLINE int8_t flatmask4(uint8_t thresh,
uint8_t p3, uint8_t p2,
uint8_t p1, uint8_t p0,
......@@ -157,14 +148,8 @@ static INLINE void mbfilter(int8_t mask, uint8_t hev, uint8_t flat,
uint8_t *oq2, uint8_t *oq3) {
// use a 7 tap filter [1, 1, 1, 2, 1, 1, 1] for flat line
if (flat && mask) {
const uint8_t p3 = *op3;
const uint8_t p2 = *op2;
const uint8_t p1 = *op1;
const uint8_t p0 = *op0;
const uint8_t q0 = *oq0;
const uint8_t q1 = *oq1;
const uint8_t q2 = *oq2;
const uint8_t q3 = *oq3;
const uint8_t p3 = *op3, p2 = *op2, p1 = *op1, p0 = *op0;
const uint8_t q0 = *oq0, q1 = *oq1, q2 = *oq2, q3 = *oq3;
*op2 = ROUND_POWER_OF_TWO(p3 + p3 + p3 + p2 + p2 + p1 + p0 + q0, 3);
*op1 = ROUND_POWER_OF_TWO(p3 + p3 + p2 + p1 + p1 + p0 + q0 + q1, 3);
......@@ -173,33 +158,7 @@ static INLINE void mbfilter(int8_t mask, uint8_t hev, uint8_t flat,
*oq1 = ROUND_POWER_OF_TWO(p1 + p0 + q0 + q1 + q1 + q2 + q3 + q3, 3);
*oq2 = ROUND_POWER_OF_TWO(p0 + q0 + q1 + q2 + q2 + q3 + q3 + q3, 3);
} else {
int8_t filter1, filter2;
const int8_t ps1 = (int8_t) *op1 ^ 0x80;
const int8_t ps0 = (int8_t) *op0 ^ 0x80;
const int8_t qs0 = (int8_t) *oq0 ^ 0x80;
const int8_t qs1 = (int8_t) *oq1 ^ 0x80;
// add outer taps if we have high edge variance
int8_t filter = signed_char_clamp(ps1 - qs1) & hev;
// inner taps
filter = signed_char_clamp(filter + 3 * (qs0 - ps0)) & mask;
filter1 = signed_char_clamp(filter + 4) >> 3;
filter2 = signed_char_clamp(filter + 3) >> 3;
*oq0 = signed_char_clamp(qs0 - filter1) ^ 0x80;
*op0 = signed_char_clamp(ps0 + filter2) ^ 0x80;
filter = filter1;
// outer tap adjustments
filter += 1;
filter >>= 1;
filter &= ~hev;
*oq1 = signed_char_clamp(qs1 - filter) ^ 0x80;
*op1 = signed_char_clamp(ps1 + filter) ^ 0x80;
filter(mask, hev, op1, op0, oq0, oq1);
}
}
......@@ -208,28 +167,23 @@ void vp9_mbloop_filter_horizontal_edge_c(uint8_t *s, int p,
const uint8_t *limit,
const uint8_t *thresh,
int count) {
int i = 0;
int i;
// loop filter designed to work using chars so that we can make maximum use
// of 8 bit simd instructions.
do {
const int8_t mask = filter_mask(limit[0], blimit[0],
s[-4 * p], s[-3 * p], s[-2 * p], s[-1 * p],
s[ 0 * p], s[ 1 * p], s[ 2 * p], s[ 3 * p]);
const int8_t hev = hevmask(thresh[0],
s[-2 * p], s[-1 * p], s[0 * p], s[1 * p]);
const int8_t flat = flatmask4(1,
s[-4 * p], s[-3 * p], s[-2 * p], s[-1 * p],
s[ 0 * p], s[ 1 * p], s[ 2 * p], s[ 3 * p]);
for (i = 0; i < 8 * count; ++i) {
const uint8_t p3 = s[-4 * p], p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p];
const uint8_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p], q3 = s[3 * p];
const int8_t mask = filter_mask(*limit, *blimit,
p3, p2, p1, p0, q0, q1, q2, q3);
const int8_t hev = hevmask(*thresh, p1, p0, q0, q1);
const int8_t flat = flatmask4(1, p3, p2, p1, p0, q0, q1, q2, q3);
mbfilter(mask, hev, flat,
s - 4 * p, s - 3 * p, s - 2 * p, s - 1 * p,
s, s + 1 * p, s + 2 * p, s + 3 * p);
++s;
} while (++i < count * 8);
}
}
void vp9_mbloop_filter_vertical_edge_c(uint8_t *s, int pitch,
......@@ -237,49 +191,19 @@ void vp9_mbloop_filter_vertical_edge_c(uint8_t *s, int pitch,
const uint8_t *limit,
const uint8_t *thresh,
int count) {
int i = 0;
do {
const int8_t mask = filter_mask(limit[0], blimit[0],
s[-4], s[-3], s[-2], s[-1],
s[0], s[1], s[2], s[3]);
const int8_t hev = hevmask(thresh[0], s[-2], s[-1], s[0], s[1]);
const int8_t flat = flatmask4(1, s[-4], s[-3], s[-2], s[-1],
s[ 0], s[ 1], s[ 2], s[ 3]);
int i;
for (i = 0; i < 8 * count; ++i) {
const uint8_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1];
const uint8_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3];
const int8_t mask = filter_mask(*limit, *blimit,
p3, p2, p1, p0, q0, q1, q2, q3);
const int8_t hev = hevmask(thresh[0], p1, p0, q0, q1);
const int8_t flat = flatmask4(1, p3, p2, p1, p0, q0, q1, q2, q3);
mbfilter(mask, hev, flat, s - 4, s - 3, s - 2, s - 1,
s, s + 1, s + 2, s + 3);
s += pitch;
} while (++i < count * 8);
}
// should we apply any filter at all: 11111111 yes, 00000000 no
static INLINE int8_t simple_filter_mask(uint8_t blimit,
uint8_t p1, uint8_t p0,
uint8_t q0, uint8_t q1) {
return (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 <= blimit) * -1;
}
static INLINE void simple_filter(int8_t mask,
uint8_t *op1, uint8_t *op0,
uint8_t *oq0, uint8_t *oq1) {
int8_t filter1, filter2;
const int8_t p1 = (int8_t) *op1 ^ 0x80;
const int8_t p0 = (int8_t) *op0 ^ 0x80;
const int8_t q0 = (int8_t) *oq0 ^ 0x80;
const int8_t q1 = (int8_t) *oq1 ^ 0x80;
int8_t filter = signed_char_clamp(p1 - q1);
filter = signed_char_clamp(filter + 3 * (q0 - p0));
filter &= mask;
// save bottom 3 bits so that we round one side +4 and the other +3
filter1 = signed_char_clamp(filter + 4) >> 3;
*oq0 = signed_char_clamp(q0 - filter1) ^ 0x80;
filter2 = signed_char_clamp(filter + 3) >> 3;
*op0 = signed_char_clamp(p0 + filter2) ^ 0x80;
}
}
/* Vertical MB Filtering */
......@@ -395,22 +319,11 @@ static INLINE void wide_mbfilter(int8_t mask, uint8_t hev,
uint8_t *oq7) {
// use a 15 tap filter [1,1,1,1,1,1,1,2,1,1,1,1,1,1,1] for flat line
if (flat2 && flat && mask) {
const uint8_t p7 = *op7;
const uint8_t p6 = *op6;
const uint8_t p5 = *op5;
const uint8_t p4 = *op4;
const uint8_t p3 = *op3;
const uint8_t p2 = *op2;
const uint8_t p1 = *op1;
const uint8_t p0 = *op0;
const uint8_t q0 = *oq0;
const uint8_t q1 = *oq1;
const uint8_t q2 = *oq2;
const uint8_t q3 = *oq3;
const uint8_t q4 = *oq4;
const uint8_t q5 = *oq5;
const uint8_t q6 = *oq6;
const uint8_t q7 = *oq7;
const uint8_t p7 = *op7, p6 = *op6, p5 = *op5, p4 = *op4,
p3 = *op3, p2 = *op2, p1 = *op1, p0 = *op0;
const uint8_t q0 = *oq0, q1 = *oq1, q2 = *oq2, q3 = *oq3,
q4 = *oq4, q5 = *oq5, q6 = *oq6, q7 = *oq7;
*op6 = ROUND_POWER_OF_TWO(p7 * 7 + p6 * 2 + p5 + p4 + p3 + p2 + p1 + p0 +
q0, 4);
......@@ -440,49 +353,8 @@ static INLINE void wide_mbfilter(int8_t mask, uint8_t hev,
q0 + q1 + q2 + q3 + q4 + q5 * 2 + q6 + q7 * 6, 4);
*oq6 = ROUND_POWER_OF_TWO(p0 +
q0 + q1 + q2 + q3 + q4 + q5 + q6 * 2 + q7 * 7, 4);
} else if (flat && mask) {
const uint8_t p3 = *op3;
const uint8_t p2 = *op2;
const uint8_t p1 = *op1;
const uint8_t p0 = *op0;
const uint8_t q0 = *oq0;
const uint8_t q1 = *oq1;
const uint8_t q2 = *oq2;
const uint8_t q3 = *oq3;
*op2 = ROUND_POWER_OF_TWO(p3 + p3 + p3 + p2 + p2 + p1 + p0 + q0, 3);
*op1 = ROUND_POWER_OF_TWO(p3 + p3 + p2 + p1 + p1 + p0 + q0 + q1, 3);
*op0 = ROUND_POWER_OF_TWO(p3 + p2 + p1 + p0 + p0 + q0 + q1 + q2, 3);
*oq0 = ROUND_POWER_OF_TWO(p2 + p1 + p0 + q0 + q0 + q1 + q2 + q3, 3);
*oq1 = ROUND_POWER_OF_TWO(p1 + p0 + q0 + q1 + q1 + q2 + q3 + q3, 3);
*oq2 = ROUND_POWER_OF_TWO(p0 + q0 + q1 + q2 + q2 + q3 + q3 + q3, 3);
} else {
int8_t filter1, filter2;
const int8_t ps1 = (int8_t) * op1 ^ 0x80;
const int8_t ps0 = (int8_t) * op0 ^ 0x80;
const int8_t qs0 = (int8_t) * oq0 ^ 0x80;
const int8_t qs1 = (int8_t) * oq1 ^ 0x80;
// add outer taps if we have high edge variance
int8_t filter = signed_char_clamp(ps1 - qs1) & hev;
// inner taps
filter = signed_char_clamp(filter + 3 * (qs0 - ps0)) & mask;
filter1 = signed_char_clamp(filter + 4) >> 3;
filter2 = signed_char_clamp(filter + 3) >> 3;
*oq0 = signed_char_clamp(qs0 - filter1) ^ 0x80;
*op0 = signed_char_clamp(ps0 + filter2) ^ 0x80;
filter = filter1;
// outer tap adjustments
filter += 1;
filter >>= 1;
filter &= ~hev;
*oq1 = signed_char_clamp(qs1 - filter) ^ 0x80;
*op1 = signed_char_clamp(ps1 + filter) ^ 0x80;
mbfilter(mask, hev, flat, op3, op2, op1, op0, oq0, oq1, oq2, oq3);
}
}
......@@ -491,25 +363,20 @@ void vp9_mb_lpf_horizontal_edge_w(uint8_t *s, int p,
const uint8_t *limit,
const uint8_t *thresh,
int count) {
int i = 0;
int i;
// loop filter designed to work using chars so that we can make maximum use
// of 8 bit simd instructions.
do {
const int8_t mask = filter_mask(limit[0], blimit[0],
s[-4 * p], s[-3 * p], s[-2 * p], s[-1 * p],
s[ 0 * p], s[ 1 * p], s[ 2 * p], s[ 3 * p]);
const int8_t hev = hevmask(thresh[0],
s[-2 * p], s[-1 * p], s[0 * p], s[1 * p]);
const int8_t flat = flatmask4(1,
s[-4 * p], s[-3 * p], s[-2 * p], s[-1 * p],
s[ 0 * p], s[ 1 * p], s[ 2 * p], s[ 3 * p]);
for (i = 0; i < 8 * count; ++i) {
const uint8_t p3 = s[-4 * p], p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p];
const uint8_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p], q3 = s[3 * p];
const int8_t mask = filter_mask(*limit, *blimit,
p3, p2, p1, p0, q0, q1, q2, q3);
const int8_t hev = hevmask(*thresh, p1, p0, q0, q1);
const int8_t flat = flatmask4(1, p3, p2, p1, p0, q0, q1, q2, q3);
const int8_t flat2 = flatmask5(1,
s[-8 * p], s[-7 * p], s[-6 * p], s[-5 * p], s[-1 * p],
s[ 0 * p], s[ 4 * p], s[ 5 * p], s[ 6 * p], s[ 7 * p]);
s[-8 * p], s[-7 * p], s[-6 * p], s[-5 * p], p0,
q0, s[4 * p], s[5 * p], s[6 * p], s[7 * p]);
wide_mbfilter(mask, hev, flat, flat2,
s - 8 * p, s - 7 * p, s - 6 * p, s - 5 * p,
......@@ -518,33 +385,31 @@ void vp9_mb_lpf_horizontal_edge_w(uint8_t *s, int p,
s + 4 * p, s + 5 * p, s + 6 * p, s + 7 * p);
++s;
} while (++i < count * 8);
}
}
void vp9_mb_lpf_vertical_edge_w(uint8_t *s, int p,
const uint8_t *blimit,
const uint8_t *limit,
const uint8_t *thresh,
int count) {
int i = 0;
do {
const int8_t mask = filter_mask(limit[0], blimit[0],
s[-4], s[-3], s[-2], s[-1],
s[0], s[1], s[2], s[3]);
const int8_t hev = hevmask(thresh[0], s[-2], s[-1], s[0], s[1]);
const int8_t flat = flatmask4(1, s[-4], s[-3], s[-2], s[-1],
s[ 0], s[ 1], s[ 2], s[ 3]);
const int8_t flat2 = flatmask5(1, s[-8], s[-7], s[-6], s[-5], s[-1],
s[ 0], s[ 4], s[ 5], s[ 6], s[ 7]);
int i;
for (i = 0; i < 8 * count; ++i) {
const uint8_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1];
const uint8_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3];
const int8_t mask = filter_mask(*limit, *blimit,
p3, p2, p1, p0, q0, q1, q2, q3);
const int8_t hev = hevmask(*thresh, p1, p0, q0, q1);
const int8_t flat = flatmask4(1, p3, p2, p1, p0, q0, q1, q2, q3);
const int8_t flat2 = flatmask5(1, s[-8], s[-7], s[-6], s[-5], p0,
q0, s[4], s[5], s[6], s[7]);
wide_mbfilter(mask, hev, flat, flat2,
s - 8, s - 7, s - 6, s - 5,
s - 4, s - 3, s - 2, s - 1,
s, s + 1, s + 2, s + 3,
s + 4, s + 5, s + 6, s + 7);
s - 8, s - 7, s - 6, s - 5, s - 4, s - 3, s - 2, s - 1,
s, s + 1, s + 2, s + 3, s + 4, s + 5, s + 6, s + 7);
s += p;
} while (++i < count * 8);
}
}
void vp9_lpf_mbv_w_c(uint8_t *y, uint8_t *u, uint8_t *v,
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment