Commit 64f7a4d8 authored by John Koleszar's avatar John Koleszar Committed by Jim Bankoski
Browse files

Wide loopfilter 16 pix at a time

Where possible, do the 16 pixel wide filter while doing the horizontal
filtering pass. The same approach can be taken for the mbloop_filter
when that's implemented. Doing so on the vertical pass is a little more
involved, but possible.

Change-Id: I010cb505e623464247ae8f67fa25a0cdac091320
parent 7494bba6
......@@ -186,14 +186,22 @@ static void filter_selectively_horiz(uint8_t *s, int pitch,
int only_4x4_1,
const struct loop_filter_info *lfi) {
unsigned int mask;
int count;
for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int;
mask; mask >>= 1) {
mask; mask >>= count) {
count =1;
if (mask & 1) {
if (!only_4x4_1) {
if (mask_16x16 & 1) {
vp9_mb_lpf_horizontal_edge_w(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr);
if ((mask_16x16 & 3) == 3) {
vp9_mb_lpf_horizontal_edge_w(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, 2);
count = 2;
} else {
vp9_mb_lpf_horizontal_edge_w(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, 1);
}
assert(!(mask_8x8 & 1));
assert(!(mask_4x4 & 1));
assert(!(mask_4x4_int & 1));
......@@ -214,12 +222,12 @@ static void filter_selectively_horiz(uint8_t *s, int pitch,
vp9_loop_filter_horizontal_edge(s + 4 * pitch, pitch, lfi->mblim,
lfi->lim, lfi->hev_thr, 1);
}
s += 8;
lfi++;
mask_16x16 >>= 1;
mask_8x8 >>= 1;
mask_4x4 >>= 1;
mask_4x4_int >>= 1;
s += 8 * count;
lfi += count;
mask_16x16 >>= count;
mask_8x8 >>= count;
mask_4x4 >>= count;
mask_4x4_int >>= count;
}
}
......
......@@ -258,12 +258,13 @@ static INLINE void wide_mbfilter(int8_t mask, uint8_t hev,
void vp9_mb_lpf_horizontal_edge_w_c(uint8_t *s, int p,
const uint8_t *blimit,
const uint8_t *limit,
const uint8_t *thresh) {
const uint8_t *thresh,
int count) {
int i;
// loop filter designed to work using chars so that we can make maximum use
// of 8 bit simd instructions.
for (i = 0; i < 8; ++i) {
for (i = 0; i < 8 * count; ++i) {
const uint8_t p3 = s[-4 * p], p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p];
const uint8_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p], q3 = s[3 * p];
const int8_t mask = filter_mask(*limit, *blimit,
......
......@@ -233,7 +233,7 @@ specialize vp9_mbloop_filter_vertical_edge sse2 neon
prototype void vp9_loop_filter_vertical_edge "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"
specialize vp9_loop_filter_vertical_edge mmx neon
prototype void vp9_mb_lpf_horizontal_edge_w "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"
prototype void vp9_mb_lpf_horizontal_edge_w "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"
specialize vp9_mb_lpf_horizontal_edge_w sse2
prototype void vp9_mbloop_filter_horizontal_edge "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"
......
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment