Commit e0a80519 authored by Scott LaVarnway's avatar Scott LaVarnway
Browse files

loopfilter improvements

Local variable offsets are now consistent for the functions,
removed unused parameters, reworked the assembly to eliminate
stalls/instructions.

Change-Id: Iaa37668f8a9bb8754df435f6a51c3a08d547f879
parent d9ca5245
......@@ -210,6 +210,8 @@ void vp8_loop_filter_frame
int mb_row;
int mb_col;
int mb_rows = cm->mb_rows;
int mb_cols = cm->mb_cols;
int filter_level;
......@@ -217,6 +219,8 @@ void vp8_loop_filter_frame
/* Point at base of Mb MODE_INFO list */
const MODE_INFO *mode_info_context = cm->mi;
int post_y_stride = post->y_stride;
int post_uv_stride = post->uv_stride;
/* Initialize the loop filter for this frame. */
vp8_loop_filter_frame_init(cm, mbd, cm->filter_level);
......@@ -227,23 +231,23 @@ void vp8_loop_filter_frame
v_ptr = post->v_buffer;
/* vp8_filter each macro block */
for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
if (cm->filter_type == NORMAL_LOOPFILTER)
{
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
for (mb_row = 0; mb_row < mb_rows; mb_row++)
{
int skip_lf = (mode_info_context->mbmi.mode != B_PRED &&
mode_info_context->mbmi.mode != SPLITMV &&
mode_info_context->mbmi.mb_skip_coeff);
for (mb_col = 0; mb_col < mb_cols; mb_col++)
{
int skip_lf = (mode_info_context->mbmi.mode != B_PRED &&
mode_info_context->mbmi.mode != SPLITMV &&
mode_info_context->mbmi.mb_skip_coeff);
const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode];
const int seg = mode_info_context->mbmi.segment_id;
const int ref_frame = mode_info_context->mbmi.ref_frame;
const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode];
const int seg = mode_info_context->mbmi.segment_id;
const int ref_frame = mode_info_context->mbmi.ref_frame;
filter_level = lfi_n->lvl[seg][ref_frame][mode_index];
filter_level = lfi_n->lvl[seg][ref_frame][mode_index];
if (filter_level)
{
if (cm->filter_type == NORMAL_LOOPFILTER)
if (filter_level)
{
const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level];
lfi.mblim = lfi_n->mblim[filter_level];
......@@ -253,54 +257,87 @@ void vp8_loop_filter_frame
if (mb_col > 0)
vp8_loop_filter_mbv
(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi);
(y_ptr, u_ptr, v_ptr, post_y_stride, post_uv_stride, &lfi);
if (!skip_lf)
vp8_loop_filter_bv
(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi);
(y_ptr, u_ptr, v_ptr, post_y_stride, post_uv_stride, &lfi);
/* don't apply across umv border */
if (mb_row > 0)
vp8_loop_filter_mbh
(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi);
(y_ptr, u_ptr, v_ptr, post_y_stride, post_uv_stride, &lfi);
if (!skip_lf)
vp8_loop_filter_bh
(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi);
(y_ptr, u_ptr, v_ptr, post_y_stride, post_uv_stride, &lfi);
}
else
y_ptr += 16;
u_ptr += 8;
v_ptr += 8;
mode_info_context++; /* step to next MB */
}
y_ptr += post_y_stride * 16 - post->y_width;
u_ptr += post_uv_stride * 8 - post->uv_width;
v_ptr += post_uv_stride * 8 - post->uv_width;
mode_info_context++; /* Skip border mb */
}
}
else /* SIMPLE_LOOPFILTER */
{
for (mb_row = 0; mb_row < mb_rows; mb_row++)
{
for (mb_col = 0; mb_col < mb_cols; mb_col++)
{
int skip_lf = (mode_info_context->mbmi.mode != B_PRED &&
mode_info_context->mbmi.mode != SPLITMV &&
mode_info_context->mbmi.mb_skip_coeff);
const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode];
const int seg = mode_info_context->mbmi.segment_id;
const int ref_frame = mode_info_context->mbmi.ref_frame;
filter_level = lfi_n->lvl[seg][ref_frame][mode_index];
if (filter_level)
{
const unsigned char * mblim = lfi_n->mblim[filter_level];
const unsigned char * blim = lfi_n->blim[filter_level];
if (mb_col > 0)
vp8_loop_filter_simple_mbv
(y_ptr, post->y_stride, lfi_n->mblim[filter_level]);
(y_ptr, post_y_stride, mblim);
if (!skip_lf)
vp8_loop_filter_simple_bv
(y_ptr, post->y_stride, lfi_n->blim[filter_level]);
(y_ptr, post_y_stride, blim);
/* don't apply across umv border */
if (mb_row > 0)
vp8_loop_filter_simple_mbh
(y_ptr, post->y_stride, lfi_n->mblim[filter_level]);
(y_ptr, post_y_stride, mblim);
if (!skip_lf)
vp8_loop_filter_simple_bh
(y_ptr, post->y_stride, lfi_n->blim[filter_level]);
(y_ptr, post_y_stride, blim);
}
}
y_ptr += 16;
u_ptr += 8;
v_ptr += 8;
y_ptr += 16;
u_ptr += 8;
v_ptr += 8;
mode_info_context++; /* step to next MB */
}
mode_info_context++; /* step to next MB */
}
y_ptr += post_y_stride * 16 - post->y_width;
u_ptr += post_uv_stride * 8 - post->uv_width;
v_ptr += post_uv_stride * 8 - post->uv_width;
y_ptr += post->y_stride * 16 - post->y_width;
u_ptr += post->uv_stride * 8 - post->uv_width;
v_ptr += post->uv_stride * 8 - post->uv_width;
mode_info_context++; /* Skip border mb */
mode_info_context++; /* Skip border mb */
}
}
}
......
This diff is collapsed.
......@@ -16,6 +16,10 @@
void sym(unsigned char *src, int pitch, const unsigned char *blimit,\
const unsigned char *limit, const unsigned char *thresh, int count)
#define prototype_loopfilter_nc(sym) \
void sym(unsigned char *src, int pitch, const unsigned char *blimit,\
const unsigned char *limit, const unsigned char *thresh)
#define prototype_simple_loopfilter(sym) \
void sym(unsigned char *y, int ystride, const unsigned char *blimit)
......@@ -30,11 +34,11 @@ prototype_simple_loopfilter(vp8_loop_filter_simple_vertical_edge_mmx);
prototype_loopfilter(vp8_loop_filter_bv_y_sse2);
prototype_loopfilter(vp8_loop_filter_bh_y_sse2);
#else
prototype_loopfilter(vp8_loop_filter_vertical_edge_sse2);
prototype_loopfilter(vp8_loop_filter_horizontal_edge_sse2);
prototype_loopfilter_nc(vp8_loop_filter_vertical_edge_sse2);
prototype_loopfilter_nc(vp8_loop_filter_horizontal_edge_sse2);
#endif
prototype_loopfilter(vp8_mbloop_filter_vertical_edge_sse2);
prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_sse2);
prototype_loopfilter_nc(vp8_mbloop_filter_vertical_edge_sse2);
prototype_loopfilter_nc(vp8_mbloop_filter_horizontal_edge_sse2);
extern loop_filter_uvfunction vp8_loop_filter_horizontal_edge_uv_sse2;
extern loop_filter_uvfunction vp8_loop_filter_vertical_edge_uv_sse2;
......@@ -124,7 +128,7 @@ void vp8_loop_filter_bvs_mmx(unsigned char *y_ptr, int y_stride, const unsigned
void vp8_loop_filter_mbh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi)
{
vp8_mbloop_filter_horizontal_edge_sse2(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
vp8_mbloop_filter_horizontal_edge_sse2(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr);
if (u_ptr)
vp8_mbloop_filter_horizontal_edge_uv_sse2(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, v_ptr);
......@@ -135,7 +139,7 @@ void vp8_loop_filter_mbh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsign
void vp8_loop_filter_mbv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi)
{
vp8_mbloop_filter_vertical_edge_sse2(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
vp8_mbloop_filter_vertical_edge_sse2(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr);
if (u_ptr)
vp8_mbloop_filter_vertical_edge_uv_sse2(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, v_ptr);
......@@ -149,9 +153,9 @@ void vp8_loop_filter_bh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigne
#if ARCH_X86_64
vp8_loop_filter_bh_y_sse2(y_ptr, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
#else
vp8_loop_filter_horizontal_edge_sse2(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
vp8_loop_filter_horizontal_edge_sse2(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
vp8_loop_filter_horizontal_edge_sse2(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
vp8_loop_filter_horizontal_edge_sse2(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr);
vp8_loop_filter_horizontal_edge_sse2(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr);
vp8_loop_filter_horizontal_edge_sse2(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr);
#endif
if (u_ptr)
......@@ -174,9 +178,9 @@ void vp8_loop_filter_bv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigne
#if ARCH_X86_64
vp8_loop_filter_bv_y_sse2(y_ptr, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
#else
vp8_loop_filter_vertical_edge_sse2(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
vp8_loop_filter_vertical_edge_sse2(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
vp8_loop_filter_vertical_edge_sse2(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
vp8_loop_filter_vertical_edge_sse2(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr);
vp8_loop_filter_vertical_edge_sse2(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr);
vp8_loop_filter_vertical_edge_sse2(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr);
#endif
if (u_ptr)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment