Commit ae6e6bc1 authored by Maxym Dmytrychenko's avatar Maxym Dmytrychenko

SSE2 optimizations for _6/_16 lowbd lpf functions

Includes vertical and horizontal implementations
and to fix 5/13 TAPs/Parallel deblocking support.

Re-working internals of the filters for better
re-usage across different sizes.

Tests are enabled.

Performance changes, SSE2 over C:
Horizontal methods: up to    3-4x
Vertical   methods: up to 1.5x-2x

Change-Id: I2e36035355d8c23c1d4b0d59d0e23f598e9d0e3f
parent 29d2f21e
......@@ -444,6 +444,11 @@ if (aom_config("CONFIG_PARALLEL_DEBLOCKING") ne "yes") {
$aom_lpf_vertical_16_dual_neon_asm=aom_lpf_vertical_16_dual_neon;
}
add_proto qw/void aom_lpf_vertical_6/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
if (aom_config("CONFIG_PARALLEL_DEBLOCKING") eq "yes") {
specialize qw/aom_lpf_vertical_6 sse2/;
}
add_proto qw/void aom_lpf_vertical_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
if (aom_config("CONFIG_PARALLEL_DEBLOCKING") eq "yes") {
specialize qw/aom_lpf_vertical_8 sse2/;
......@@ -485,6 +490,11 @@ if (aom_config("CONFIG_PARALLEL_DEBLOCKING") eq "yes") {
$aom_lpf_horizontal_16_dual_neon_asm=aom_lpf_horizontal_16_dual_neon;
}
add_proto qw/void aom_lpf_horizontal_6/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
if (aom_config("CONFIG_PARALLEL_DEBLOCKING") eq "yes") {
specialize qw/aom_lpf_horizontal_6 sse2/;
}
add_proto qw/void aom_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
if (aom_config("CONFIG_PARALLEL_DEBLOCKING") eq "yes") {
specialize qw/aom_lpf_horizontal_8 sse2/;
......
This diff is collapsed.
......@@ -42,12 +42,6 @@ static const int delta_lf_id_lut[MAX_MB_PLANE][2] = {
#endif
#if PARALLEL_DEBLOCKING_5_TAP_CHROMA
extern void aom_lpf_vertical_6_c(uint8_t *s, int pitch, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh);
extern void aom_lpf_horizontal_6_c(uint8_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh);
extern void aom_highbd_lpf_horizontal_6_c(uint16_t *s, int p,
const uint8_t *blimit,
const uint8_t *limit,
......@@ -2233,8 +2227,8 @@ static void av1_filter_block_plane_vert(
params.mblim, params.lim,
params.hev_thr, cm->bit_depth);
else
aom_lpf_vertical_6_c(p, dst_stride, params.mblim, params.lim,
params.hev_thr);
aom_lpf_vertical_6(p, dst_stride, params.mblim, params.lim,
params.hev_thr);
break;
#endif
// apply 8-tap filtering
......@@ -2261,13 +2255,8 @@ static void av1_filter_block_plane_vert(
cm->bit_depth);
#endif
else
#if CONFIG_DEBLOCK_13TAP
aom_lpf_vertical_16_c(p, dst_stride, params.mblim, params.lim,
params.hev_thr);
#else
aom_lpf_vertical_16(p, dst_stride, params.mblim, params.lim,
params.hev_thr);
#endif
break;
// no filtering
default: break;
......@@ -2335,8 +2324,8 @@ static void av1_filter_block_plane_horz(
params.mblim, params.lim,
params.hev_thr, cm->bit_depth);
else
aom_lpf_horizontal_6_c(p, dst_stride, params.mblim, params.lim,
params.hev_thr);
aom_lpf_horizontal_6(p, dst_stride, params.mblim, params.lim,
params.hev_thr);
break;
#endif
// apply 8-tap filtering
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment