Commit bd5a5bb5 authored by James Zern's avatar James Zern
Browse files

vpx_lpf_horizontal_8: remove unused count param

Change-Id: I48741e167a7b09b7c9ad3bfc1c4b88ef1029ae46
parent 109a47b3
......@@ -520,7 +520,8 @@ INSTANTIATE_TEST_CASE_P(
INSTANTIATE_TEST_CASE_P(
SSE2, Loop8Test6Param,
::testing::Values(
make_tuple(&vpx_lpf_horizontal_8_sse2, &vpx_lpf_horizontal_8_c, 8, 1),
make_tuple(&wrapper_nc<vpx_lpf_horizontal_8_sse2>,
&wrapper_nc<vpx_lpf_horizontal_8_c>, 8, 1),
make_tuple(&vpx_lpf_horizontal_16_sse2, &vpx_lpf_horizontal_16_c, 8, 1),
make_tuple(&vpx_lpf_horizontal_16_sse2, &vpx_lpf_horizontal_16_c, 8, 2),
make_tuple(&wrapper_nc<vpx_lpf_vertical_8_sse2>,
......@@ -604,8 +605,8 @@ INSTANTIATE_TEST_CASE_P(
make_tuple(&wrapper_nc<vpx_lpf_vertical_16_dual_neon>,
&wrapper_nc<vpx_lpf_vertical_16_dual_c>, 8, 1),
#endif // HAVE_NEON_ASM
make_tuple(&vpx_lpf_horizontal_8_neon,
&vpx_lpf_horizontal_8_c, 8, 1),
make_tuple(&wrapper_nc<vpx_lpf_horizontal_8_neon>,
&wrapper_nc<vpx_lpf_horizontal_8_c>, 8, 1),
make_tuple(&wrapper_nc<vpx_lpf_vertical_8_neon>,
&wrapper_nc<vpx_lpf_vertical_8_c>, 8, 1),
make_tuple(&vpx_lpf_horizontal_4_neon,
......@@ -633,7 +634,8 @@ INSTANTIATE_TEST_CASE_P(
DSPR2, Loop8Test6Param,
::testing::Values(
make_tuple(&vpx_lpf_horizontal_4_dspr2, &vpx_lpf_horizontal_4_c, 8, 1),
make_tuple(&vpx_lpf_horizontal_8_dspr2, &vpx_lpf_horizontal_8_c, 8, 1),
make_tuple(&wrapper_nc<vpx_lpf_horizontal_8_dspr2>,
&wrapper_nc<vpx_lpf_horizontal_8_c>, 8, 1),
make_tuple(&vpx_lpf_horizontal_16_dspr2,
&vpx_lpf_horizontal_16_c, 8, 1),
make_tuple(&vpx_lpf_horizontal_16_dspr2,
......@@ -665,7 +667,8 @@ INSTANTIATE_TEST_CASE_P(
MSA, Loop8Test6Param,
::testing::Values(
make_tuple(&vpx_lpf_horizontal_4_msa, &vpx_lpf_horizontal_4_c, 8, 1),
make_tuple(&vpx_lpf_horizontal_8_msa, &vpx_lpf_horizontal_8_c, 8, 1),
make_tuple(&wrapper_nc<vpx_lpf_horizontal_8_msa>,
&wrapper_nc<vpx_lpf_horizontal_8_c>, 8, 1),
make_tuple(&vpx_lpf_horizontal_16_msa, &vpx_lpf_horizontal_16_c, 8, 1),
make_tuple(&vpx_lpf_horizontal_16_msa, &vpx_lpf_horizontal_16_c, 8, 2),
make_tuple(&wrapper_nc<vpx_lpf_vertical_4_msa>,
......
......@@ -542,7 +542,7 @@ static void filter_selectively_horiz(uint8_t *s, int pitch,
}
count = 2;
} else {
vpx_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
vpx_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
if (mask_4x4_int & 1)
vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
......
......@@ -542,7 +542,7 @@ static void filter_selectively_horiz(uint8_t *s, int pitch,
}
count = 2;
} else {
vpx_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
vpx_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
if (mask_4x4_int & 1)
vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
......
......@@ -16,35 +16,26 @@
; Currently vpx only works on iterations 8 at a time. The vp8 loop filter
; works on 16 iterations at a time.
; TODO(fgalligan): See about removing the count code as this function is only
; called with a count of 1.
;
; void vpx_lpf_horizontal_8_neon(uint8_t *s, int p,
; const uint8_t *blimit,
; const uint8_t *limit,
; const uint8_t *thresh,
; int count)
; const uint8_t *thresh)
; r0 uint8_t *s,
; r1 int p, /* pitch */
; r2 const uint8_t *blimit,
; r3 const uint8_t *limit,
; sp const uint8_t *thresh,
; sp+4 int count
|vpx_lpf_horizontal_8_neon| PROC
push {r4-r5, lr}
vld1.8 {d0[]}, [r2] ; duplicate *blimit
ldr r12, [sp, #16] ; load count
ldr r2, [sp, #12] ; load thresh
add r1, r1, r1 ; double pitch
cmp r12, #0
beq end_vpx_mblf_h_edge
vld1.8 {d1[]}, [r3] ; duplicate *limit
vld1.8 {d2[]}, [r2] ; duplicate *thresh
count_mblf_h_loop
sub r3, r0, r1, lsl #1 ; move src pointer down by 4 lines
add r2, r3, r1, lsr #1 ; set to 3 lines down
......@@ -69,11 +60,6 @@ count_mblf_h_loop
vst1.u8 {d4}, [r2@64], r1 ; store oq1
vst1.u8 {d5}, [r3@64], r1 ; store oq2
add r0, r0, #8
subs r12, r12, #1
bne count_mblf_h_loop
end_vpx_mblf_h_edge
pop {r4-r5, pc}
ENDP ; |vpx_lpf_horizontal_8_neon|
......
......@@ -268,23 +268,19 @@ void vpx_lpf_horizontal_8_neon(
int pitch,
const uint8_t *blimit,
const uint8_t *limit,
const uint8_t *thresh,
int count) {
const uint8_t *thresh) {
int i;
uint8_t *s, *psrc;
uint8x8_t dblimit, dlimit, dthresh;
uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8;
uint8x8_t d16u8, d17u8, d18u8;
if (count == 0) // end_vpx_mblf_h_edge
return;
dblimit = vld1_u8(blimit);
dlimit = vld1_u8(limit);
dthresh = vld1_u8(thresh);
psrc = src - (pitch << 2);
for (i = 0; i < count; i++) {
for (i = 0; i < 1; i++) {
s = psrc + i * 8;
d3u8 = vld1_u8(s);
......
......@@ -33,8 +33,8 @@ void vpx_lpf_horizontal_8_dual_neon(uint8_t *s, int p /* pitch */,
const uint8_t *blimit1,
const uint8_t *limit1,
const uint8_t *thresh1) {
vpx_lpf_horizontal_8_neon(s, p, blimit0, limit0, thresh0, 1);
vpx_lpf_horizontal_8_neon(s + 8, p, blimit1, limit1, thresh1, 1);
vpx_lpf_horizontal_8_neon(s, p, blimit0, limit0, thresh0);
vpx_lpf_horizontal_8_neon(s + 8, p, blimit1, limit1, thresh1);
}
void vpx_lpf_vertical_8_dual_neon(uint8_t *s, int p,
......
......@@ -188,13 +188,12 @@ static INLINE void filter8(int8_t mask, uint8_t thresh, uint8_t flat,
}
void vpx_lpf_horizontal_8_c(uint8_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
int count) {
const uint8_t *limit, const uint8_t *thresh) {
int i;
// loop filter designed to work using chars so that we can make maximum use
// of 8 bit simd instructions.
for (i = 0; i < 8 * count; ++i) {
for (i = 0; i < 8; ++i) {
const uint8_t p3 = s[-4 * p], p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p];
const uint8_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p], q3 = s[3 * p];
......@@ -211,8 +210,8 @@ void vpx_lpf_horizontal_8_dual_c(uint8_t *s, int p, const uint8_t *blimit0,
const uint8_t *limit0, const uint8_t *thresh0,
const uint8_t *blimit1, const uint8_t *limit1,
const uint8_t *thresh1) {
vpx_lpf_horizontal_8_c(s, p, blimit0, limit0, thresh0, 1);
vpx_lpf_horizontal_8_c(s + 8, p, blimit1, limit1, thresh1, 1);
vpx_lpf_horizontal_8_c(s, p, blimit0, limit0, thresh0);
vpx_lpf_horizontal_8_c(s + 8, p, blimit1, limit1, thresh1);
}
void vpx_lpf_vertical_8_c(uint8_t *s, int pitch, const uint8_t *blimit,
......
......@@ -13,8 +13,7 @@
void vpx_lpf_horizontal_8_msa(uint8_t *src, int32_t pitch,
const uint8_t *b_limit_ptr,
const uint8_t *limit_ptr,
const uint8_t *thresh_ptr,
int32_t count) {
const uint8_t *thresh_ptr) {
uint64_t p2_d, p1_d, p0_d, q0_d, q1_d, q2_d;
v16u8 mask, hev, flat, thresh, b_limit, limit;
v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
......@@ -23,8 +22,6 @@ void vpx_lpf_horizontal_8_msa(uint8_t *src, int32_t pitch,
v8u16 p3_r, p2_r, p1_r, p0_r, q3_r, q2_r, q1_r, q0_r;
v16i8 zero = { 0 };
(void)count;
/* load vector elements */
LD_UB8((src - 4 * pitch), pitch, p3, p2, p1, p0, q0, q1, q2, q3);
......
......@@ -323,8 +323,8 @@ void vpx_lpf_horizontal_8_dual_dspr2(uint8_t *s, int p /* pitch */,
const uint8_t *blimit1,
const uint8_t *limit1,
const uint8_t *thresh1) {
vpx_lpf_horizontal_8_dspr2(s, p, blimit0, limit0, thresh0, 1);
vpx_lpf_horizontal_8_dspr2(s + 8, p, blimit1, limit1, thresh1, 1);
vpx_lpf_horizontal_8_dspr2(s, p, blimit0, limit0, thresh0);
vpx_lpf_horizontal_8_dspr2(s + 8, p, blimit1, limit1, thresh1);
}
void vpx_lpf_vertical_4_dual_dspr2(uint8_t *s, int p,
......
......@@ -23,8 +23,7 @@ void vpx_lpf_horizontal_8_dspr2(unsigned char *s,
int pitch,
const uint8_t *blimit,
const uint8_t *limit,
const uint8_t *thresh,
int count) {
const uint8_t *thresh) {
uint32_t mask;
uint32_t hev, flat;
uint8_t i;
......
......@@ -552,7 +552,7 @@ add_proto qw/void vpx_lpf_horizontal_16/, "uint8_t *s, int pitch, const uint8_t
specialize qw/vpx_lpf_horizontal_16 sse2 avx2 neon_asm dspr2 msa/;
$vpx_lpf_horizontal_16_neon_asm=vpx_lpf_horizontal_16_neon;
add_proto qw/void vpx_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
add_proto qw/void vpx_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
specialize qw/vpx_lpf_horizontal_8 sse2 neon dspr2 msa/;
add_proto qw/void vpx_lpf_horizontal_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
......
......@@ -730,7 +730,7 @@ void vpx_lpf_horizontal_16_sse2(unsigned char *s, int p,
void vpx_lpf_horizontal_8_sse2(unsigned char *s, int p,
const unsigned char *_blimit,
const unsigned char *_limit,
const unsigned char *_thresh, int count) {
const unsigned char *_thresh) {
DECLARE_ALIGNED(16, unsigned char, flat_op2[16]);
DECLARE_ALIGNED(16, unsigned char, flat_op1[16]);
DECLARE_ALIGNED(16, unsigned char, flat_op0[16]);
......@@ -745,8 +745,6 @@ void vpx_lpf_horizontal_8_sse2(unsigned char *s, int p,
__m128i p3, p2, p1, p0, q0, q1, q2, q3;
__m128i q3p3, q2p2, q1p1, q0p0, p1q1, p0q0;
(void)count;
q3p3 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 4 * p)),
_mm_loadl_epi64((__m128i *)(s + 3 * p)));
q2p2 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 3 * p)),
......@@ -1504,7 +1502,7 @@ void vpx_lpf_vertical_8_sse2(unsigned char *s, int p,
transpose(src, p, dst, 8, 1);
// Loop filtering
vpx_lpf_horizontal_8_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh, 1);
vpx_lpf_horizontal_8_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh);
src[0] = t_dst;
dst[0] = s - 4;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment