Skip to content
Snippets Groups Projects
Commit e0a80519 authored by Scott LaVarnway's avatar Scott LaVarnway
Browse files

loopfilter improvements

Local variable offsets are now consistent for the functions,
removed unused parameters, reworked the assembly to eliminate
stalls/instructions.

Change-Id: Iaa37668f8a9bb8754df435f6a51c3a08d547f879
parent d9ca5245
No related branches found
No related tags found
No related merge requests found
...@@ -210,6 +210,8 @@ void vp8_loop_filter_frame ...@@ -210,6 +210,8 @@ void vp8_loop_filter_frame
int mb_row; int mb_row;
int mb_col; int mb_col;
int mb_rows = cm->mb_rows;
int mb_cols = cm->mb_cols;
int filter_level; int filter_level;
...@@ -217,6 +219,8 @@ void vp8_loop_filter_frame ...@@ -217,6 +219,8 @@ void vp8_loop_filter_frame
/* Point at base of Mb MODE_INFO list */ /* Point at base of Mb MODE_INFO list */
const MODE_INFO *mode_info_context = cm->mi; const MODE_INFO *mode_info_context = cm->mi;
int post_y_stride = post->y_stride;
int post_uv_stride = post->uv_stride;
/* Initialize the loop filter for this frame. */ /* Initialize the loop filter for this frame. */
vp8_loop_filter_frame_init(cm, mbd, cm->filter_level); vp8_loop_filter_frame_init(cm, mbd, cm->filter_level);
...@@ -227,23 +231,23 @@ void vp8_loop_filter_frame ...@@ -227,23 +231,23 @@ void vp8_loop_filter_frame
v_ptr = post->v_buffer; v_ptr = post->v_buffer;
/* vp8_filter each macro block */ /* vp8_filter each macro block */
for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) if (cm->filter_type == NORMAL_LOOPFILTER)
{ {
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) for (mb_row = 0; mb_row < mb_rows; mb_row++)
{ {
int skip_lf = (mode_info_context->mbmi.mode != B_PRED && for (mb_col = 0; mb_col < mb_cols; mb_col++)
mode_info_context->mbmi.mode != SPLITMV && {
mode_info_context->mbmi.mb_skip_coeff); int skip_lf = (mode_info_context->mbmi.mode != B_PRED &&
mode_info_context->mbmi.mode != SPLITMV &&
mode_info_context->mbmi.mb_skip_coeff);
const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode]; const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode];
const int seg = mode_info_context->mbmi.segment_id; const int seg = mode_info_context->mbmi.segment_id;
const int ref_frame = mode_info_context->mbmi.ref_frame; const int ref_frame = mode_info_context->mbmi.ref_frame;
filter_level = lfi_n->lvl[seg][ref_frame][mode_index]; filter_level = lfi_n->lvl[seg][ref_frame][mode_index];
if (filter_level) if (filter_level)
{
if (cm->filter_type == NORMAL_LOOPFILTER)
{ {
const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level]; const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level];
lfi.mblim = lfi_n->mblim[filter_level]; lfi.mblim = lfi_n->mblim[filter_level];
...@@ -253,54 +257,87 @@ void vp8_loop_filter_frame ...@@ -253,54 +257,87 @@ void vp8_loop_filter_frame
if (mb_col > 0) if (mb_col > 0)
vp8_loop_filter_mbv vp8_loop_filter_mbv
(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi); (y_ptr, u_ptr, v_ptr, post_y_stride, post_uv_stride, &lfi);
if (!skip_lf) if (!skip_lf)
vp8_loop_filter_bv vp8_loop_filter_bv
(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi); (y_ptr, u_ptr, v_ptr, post_y_stride, post_uv_stride, &lfi);
/* don't apply across umv border */ /* don't apply across umv border */
if (mb_row > 0) if (mb_row > 0)
vp8_loop_filter_mbh vp8_loop_filter_mbh
(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi); (y_ptr, u_ptr, v_ptr, post_y_stride, post_uv_stride, &lfi);
if (!skip_lf) if (!skip_lf)
vp8_loop_filter_bh vp8_loop_filter_bh
(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi); (y_ptr, u_ptr, v_ptr, post_y_stride, post_uv_stride, &lfi);
} }
else
y_ptr += 16;
u_ptr += 8;
v_ptr += 8;
mode_info_context++; /* step to next MB */
}
y_ptr += post_y_stride * 16 - post->y_width;
u_ptr += post_uv_stride * 8 - post->uv_width;
v_ptr += post_uv_stride * 8 - post->uv_width;
mode_info_context++; /* Skip border mb */
}
}
else /* SIMPLE_LOOPFILTER */
{
for (mb_row = 0; mb_row < mb_rows; mb_row++)
{
for (mb_col = 0; mb_col < mb_cols; mb_col++)
{
int skip_lf = (mode_info_context->mbmi.mode != B_PRED &&
mode_info_context->mbmi.mode != SPLITMV &&
mode_info_context->mbmi.mb_skip_coeff);
const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode];
const int seg = mode_info_context->mbmi.segment_id;
const int ref_frame = mode_info_context->mbmi.ref_frame;
filter_level = lfi_n->lvl[seg][ref_frame][mode_index];
if (filter_level)
{ {
const unsigned char * mblim = lfi_n->mblim[filter_level];
const unsigned char * blim = lfi_n->blim[filter_level];
if (mb_col > 0) if (mb_col > 0)
vp8_loop_filter_simple_mbv vp8_loop_filter_simple_mbv
(y_ptr, post->y_stride, lfi_n->mblim[filter_level]); (y_ptr, post_y_stride, mblim);
if (!skip_lf) if (!skip_lf)
vp8_loop_filter_simple_bv vp8_loop_filter_simple_bv
(y_ptr, post->y_stride, lfi_n->blim[filter_level]); (y_ptr, post_y_stride, blim);
/* don't apply across umv border */ /* don't apply across umv border */
if (mb_row > 0) if (mb_row > 0)
vp8_loop_filter_simple_mbh vp8_loop_filter_simple_mbh
(y_ptr, post->y_stride, lfi_n->mblim[filter_level]); (y_ptr, post_y_stride, mblim);
if (!skip_lf) if (!skip_lf)
vp8_loop_filter_simple_bh vp8_loop_filter_simple_bh
(y_ptr, post->y_stride, lfi_n->blim[filter_level]); (y_ptr, post_y_stride, blim);
} }
}
y_ptr += 16; y_ptr += 16;
u_ptr += 8; u_ptr += 8;
v_ptr += 8; v_ptr += 8;
mode_info_context++; /* step to next MB */ mode_info_context++; /* step to next MB */
} }
y_ptr += post_y_stride * 16 - post->y_width;
u_ptr += post_uv_stride * 8 - post->uv_width;
v_ptr += post_uv_stride * 8 - post->uv_width;
y_ptr += post->y_stride * 16 - post->y_width; mode_info_context++; /* Skip border mb */
u_ptr += post->uv_stride * 8 - post->uv_width;
v_ptr += post->uv_stride * 8 - post->uv_width;
mode_info_context++; /* Skip border mb */ }
} }
} }
......
This diff is collapsed.
...@@ -16,6 +16,10 @@ ...@@ -16,6 +16,10 @@
void sym(unsigned char *src, int pitch, const unsigned char *blimit,\ void sym(unsigned char *src, int pitch, const unsigned char *blimit,\
const unsigned char *limit, const unsigned char *thresh, int count) const unsigned char *limit, const unsigned char *thresh, int count)
#define prototype_loopfilter_nc(sym) \
void sym(unsigned char *src, int pitch, const unsigned char *blimit,\
const unsigned char *limit, const unsigned char *thresh)
#define prototype_simple_loopfilter(sym) \ #define prototype_simple_loopfilter(sym) \
void sym(unsigned char *y, int ystride, const unsigned char *blimit) void sym(unsigned char *y, int ystride, const unsigned char *blimit)
...@@ -30,11 +34,11 @@ prototype_simple_loopfilter(vp8_loop_filter_simple_vertical_edge_mmx); ...@@ -30,11 +34,11 @@ prototype_simple_loopfilter(vp8_loop_filter_simple_vertical_edge_mmx);
prototype_loopfilter(vp8_loop_filter_bv_y_sse2); prototype_loopfilter(vp8_loop_filter_bv_y_sse2);
prototype_loopfilter(vp8_loop_filter_bh_y_sse2); prototype_loopfilter(vp8_loop_filter_bh_y_sse2);
#else #else
prototype_loopfilter(vp8_loop_filter_vertical_edge_sse2); prototype_loopfilter_nc(vp8_loop_filter_vertical_edge_sse2);
prototype_loopfilter(vp8_loop_filter_horizontal_edge_sse2); prototype_loopfilter_nc(vp8_loop_filter_horizontal_edge_sse2);
#endif #endif
prototype_loopfilter(vp8_mbloop_filter_vertical_edge_sse2); prototype_loopfilter_nc(vp8_mbloop_filter_vertical_edge_sse2);
prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_sse2); prototype_loopfilter_nc(vp8_mbloop_filter_horizontal_edge_sse2);
extern loop_filter_uvfunction vp8_loop_filter_horizontal_edge_uv_sse2; extern loop_filter_uvfunction vp8_loop_filter_horizontal_edge_uv_sse2;
extern loop_filter_uvfunction vp8_loop_filter_vertical_edge_uv_sse2; extern loop_filter_uvfunction vp8_loop_filter_vertical_edge_uv_sse2;
...@@ -124,7 +128,7 @@ void vp8_loop_filter_bvs_mmx(unsigned char *y_ptr, int y_stride, const unsigned ...@@ -124,7 +128,7 @@ void vp8_loop_filter_bvs_mmx(unsigned char *y_ptr, int y_stride, const unsigned
void vp8_loop_filter_mbh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, void vp8_loop_filter_mbh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi) int y_stride, int uv_stride, loop_filter_info *lfi)
{ {
vp8_mbloop_filter_horizontal_edge_sse2(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2); vp8_mbloop_filter_horizontal_edge_sse2(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr);
if (u_ptr) if (u_ptr)
vp8_mbloop_filter_horizontal_edge_uv_sse2(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, v_ptr); vp8_mbloop_filter_horizontal_edge_uv_sse2(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, v_ptr);
...@@ -135,7 +139,7 @@ void vp8_loop_filter_mbh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsign ...@@ -135,7 +139,7 @@ void vp8_loop_filter_mbh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsign
void vp8_loop_filter_mbv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, void vp8_loop_filter_mbv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi) int y_stride, int uv_stride, loop_filter_info *lfi)
{ {
vp8_mbloop_filter_vertical_edge_sse2(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2); vp8_mbloop_filter_vertical_edge_sse2(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr);
if (u_ptr) if (u_ptr)
vp8_mbloop_filter_vertical_edge_uv_sse2(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, v_ptr); vp8_mbloop_filter_vertical_edge_uv_sse2(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, v_ptr);
...@@ -149,9 +153,9 @@ void vp8_loop_filter_bh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigne ...@@ -149,9 +153,9 @@ void vp8_loop_filter_bh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigne
#if ARCH_X86_64 #if ARCH_X86_64
vp8_loop_filter_bh_y_sse2(y_ptr, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); vp8_loop_filter_bh_y_sse2(y_ptr, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
#else #else
vp8_loop_filter_horizontal_edge_sse2(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); vp8_loop_filter_horizontal_edge_sse2(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr);
vp8_loop_filter_horizontal_edge_sse2(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); vp8_loop_filter_horizontal_edge_sse2(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr);
vp8_loop_filter_horizontal_edge_sse2(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); vp8_loop_filter_horizontal_edge_sse2(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr);
#endif #endif
if (u_ptr) if (u_ptr)
...@@ -174,9 +178,9 @@ void vp8_loop_filter_bv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigne ...@@ -174,9 +178,9 @@ void vp8_loop_filter_bv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigne
#if ARCH_X86_64 #if ARCH_X86_64
vp8_loop_filter_bv_y_sse2(y_ptr, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); vp8_loop_filter_bv_y_sse2(y_ptr, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
#else #else
vp8_loop_filter_vertical_edge_sse2(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); vp8_loop_filter_vertical_edge_sse2(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr);
vp8_loop_filter_vertical_edge_sse2(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); vp8_loop_filter_vertical_edge_sse2(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr);
vp8_loop_filter_vertical_edge_sse2(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); vp8_loop_filter_vertical_edge_sse2(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr);
#endif #endif
if (u_ptr) if (u_ptr)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment