Commit 109a47b3 authored by James Zern's avatar James Zern
Browse files

vpx_lpf_vertical_4: remove unused count param

Change-Id: I43a191cb3d42e51e7bca266adfa11c6239a8064c
parent 37225744
...@@ -459,7 +459,8 @@ INSTANTIATE_TEST_CASE_P( ...@@ -459,7 +459,8 @@ INSTANTIATE_TEST_CASE_P(
MMX, Loop8Test6Param, MMX, Loop8Test6Param,
::testing::Values( ::testing::Values(
make_tuple(&vpx_lpf_horizontal_4_mmx, &vpx_lpf_horizontal_4_c, 8, 1), make_tuple(&vpx_lpf_horizontal_4_mmx, &vpx_lpf_horizontal_4_c, 8, 1),
make_tuple(&vpx_lpf_vertical_4_mmx, &vpx_lpf_vertical_4_c, 8, 1))); make_tuple(&wrapper_nc<vpx_lpf_vertical_4_mmx>,
&wrapper_nc<vpx_lpf_vertical_4_c>, 8, 1)));
#endif // HAVE_MMX #endif // HAVE_MMX
#if HAVE_SSE2 #if HAVE_SSE2
...@@ -609,8 +610,8 @@ INSTANTIATE_TEST_CASE_P( ...@@ -609,8 +610,8 @@ INSTANTIATE_TEST_CASE_P(
&wrapper_nc<vpx_lpf_vertical_8_c>, 8, 1), &wrapper_nc<vpx_lpf_vertical_8_c>, 8, 1),
make_tuple(&vpx_lpf_horizontal_4_neon, make_tuple(&vpx_lpf_horizontal_4_neon,
&vpx_lpf_horizontal_4_c, 8, 1), &vpx_lpf_horizontal_4_c, 8, 1),
make_tuple(&vpx_lpf_vertical_4_neon, make_tuple(&wrapper_nc<vpx_lpf_vertical_4_neon>,
&vpx_lpf_vertical_4_c, 8, 1))); &wrapper_nc<vpx_lpf_vertical_4_c>, 8, 1)));
INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P(
NEON, Loop8Test9Param, NEON, Loop8Test9Param,
::testing::Values( ::testing::Values(
...@@ -637,7 +638,8 @@ INSTANTIATE_TEST_CASE_P( ...@@ -637,7 +638,8 @@ INSTANTIATE_TEST_CASE_P(
&vpx_lpf_horizontal_16_c, 8, 1), &vpx_lpf_horizontal_16_c, 8, 1),
make_tuple(&vpx_lpf_horizontal_16_dspr2, make_tuple(&vpx_lpf_horizontal_16_dspr2,
&vpx_lpf_horizontal_16_c, 8, 2), &vpx_lpf_horizontal_16_c, 8, 2),
make_tuple(&vpx_lpf_vertical_4_dspr2, &vpx_lpf_vertical_4_c, 8, 1), make_tuple(&wrapper_nc<vpx_lpf_vertical_4_dspr2>,
&wrapper_nc<vpx_lpf_vertical_4_c>, 8, 1),
make_tuple(&wrapper_nc<vpx_lpf_vertical_8_dspr2>, make_tuple(&wrapper_nc<vpx_lpf_vertical_8_dspr2>,
&wrapper_nc<vpx_lpf_vertical_8_c>, 8, 1), &wrapper_nc<vpx_lpf_vertical_8_c>, 8, 1),
make_tuple(&wrapper_nc<vpx_lpf_vertical_16_dspr2>, make_tuple(&wrapper_nc<vpx_lpf_vertical_16_dspr2>,
...@@ -666,7 +668,8 @@ INSTANTIATE_TEST_CASE_P( ...@@ -666,7 +668,8 @@ INSTANTIATE_TEST_CASE_P(
make_tuple(&vpx_lpf_horizontal_8_msa, &vpx_lpf_horizontal_8_c, 8, 1), make_tuple(&vpx_lpf_horizontal_8_msa, &vpx_lpf_horizontal_8_c, 8, 1),
make_tuple(&vpx_lpf_horizontal_16_msa, &vpx_lpf_horizontal_16_c, 8, 1), make_tuple(&vpx_lpf_horizontal_16_msa, &vpx_lpf_horizontal_16_c, 8, 1),
make_tuple(&vpx_lpf_horizontal_16_msa, &vpx_lpf_horizontal_16_c, 8, 2), make_tuple(&vpx_lpf_horizontal_16_msa, &vpx_lpf_horizontal_16_c, 8, 2),
make_tuple(&vpx_lpf_vertical_4_msa, &vpx_lpf_vertical_4_c, 8, 1), make_tuple(&wrapper_nc<vpx_lpf_vertical_4_msa>,
&wrapper_nc<vpx_lpf_vertical_4_c>, 8, 1),
make_tuple(&wrapper_nc<vpx_lpf_vertical_8_msa>, make_tuple(&wrapper_nc<vpx_lpf_vertical_8_msa>,
&wrapper_nc<vpx_lpf_vertical_8_c>, 8, 1), &wrapper_nc<vpx_lpf_vertical_8_c>, 8, 1),
make_tuple(&wrapper_nc<vpx_lpf_vertical_16_msa>, make_tuple(&wrapper_nc<vpx_lpf_vertical_16_msa>,
......
...@@ -358,11 +358,10 @@ static void filter_selectively_vert_row2(int subsampling_factor, ...@@ -358,11 +358,10 @@ static void filter_selectively_vert_row2(int subsampling_factor,
lfi0->hev_thr, lfi1->mblim, lfi1->lim, lfi0->hev_thr, lfi1->mblim, lfi1->lim,
lfi1->hev_thr); lfi1->hev_thr);
} else if (mask_4x4_0 & 1) { } else if (mask_4x4_0 & 1) {
vpx_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr, vpx_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr);
1);
} else { } else {
vpx_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim, vpx_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,
lfi1->hev_thr, 1); lfi1->hev_thr);
} }
} }
...@@ -373,10 +372,10 @@ static void filter_selectively_vert_row2(int subsampling_factor, ...@@ -373,10 +372,10 @@ static void filter_selectively_vert_row2(int subsampling_factor,
lfi1->hev_thr); lfi1->hev_thr);
} else if (mask_4x4_int_0 & 1) { } else if (mask_4x4_int_0 & 1) {
vpx_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim, vpx_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim,
lfi0->hev_thr, 1); lfi0->hev_thr);
} else { } else {
vpx_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim, lfi1->lim, vpx_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim, lfi1->lim,
lfi1->hev_thr, 1); lfi1->hev_thr);
} }
} }
} }
...@@ -1128,11 +1127,11 @@ static void filter_selectively_vert(uint8_t *s, int pitch, ...@@ -1128,11 +1127,11 @@ static void filter_selectively_vert(uint8_t *s, int pitch,
} else if (mask_8x8 & 1) { } else if (mask_8x8 & 1) {
vpx_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); vpx_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
} else if (mask_4x4 & 1) { } else if (mask_4x4 & 1) {
vpx_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1); vpx_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
} }
} }
if (mask_4x4_int & 1) if (mask_4x4_int & 1)
vpx_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1); vpx_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
s += 8; s += 8;
lfl += 1; lfl += 1;
mask_16x16 >>= 1; mask_16x16 >>= 1;
......
...@@ -358,11 +358,10 @@ static void filter_selectively_vert_row2(int subsampling_factor, ...@@ -358,11 +358,10 @@ static void filter_selectively_vert_row2(int subsampling_factor,
lfi0->hev_thr, lfi1->mblim, lfi1->lim, lfi0->hev_thr, lfi1->mblim, lfi1->lim,
lfi1->hev_thr); lfi1->hev_thr);
} else if (mask_4x4_0 & 1) { } else if (mask_4x4_0 & 1) {
vpx_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr, vpx_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr);
1);
} else { } else {
vpx_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim, vpx_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,
lfi1->hev_thr, 1); lfi1->hev_thr);
} }
} }
...@@ -373,10 +372,10 @@ static void filter_selectively_vert_row2(int subsampling_factor, ...@@ -373,10 +372,10 @@ static void filter_selectively_vert_row2(int subsampling_factor,
lfi1->hev_thr); lfi1->hev_thr);
} else if (mask_4x4_int_0 & 1) { } else if (mask_4x4_int_0 & 1) {
vpx_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim, vpx_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim,
lfi0->hev_thr, 1); lfi0->hev_thr);
} else { } else {
vpx_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim, lfi1->lim, vpx_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim, lfi1->lim,
lfi1->hev_thr, 1); lfi1->hev_thr);
} }
} }
} }
...@@ -1103,11 +1102,11 @@ static void filter_selectively_vert(uint8_t *s, int pitch, ...@@ -1103,11 +1102,11 @@ static void filter_selectively_vert(uint8_t *s, int pitch,
} else if (mask_8x8 & 1) { } else if (mask_8x8 & 1) {
vpx_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); vpx_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
} else if (mask_4x4 & 1) { } else if (mask_4x4 & 1) {
vpx_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1); vpx_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
} }
} }
if (mask_4x4_int & 1) if (mask_4x4_int & 1)
vpx_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1); vpx_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
s += 8; s += 8;
lfl += 1; lfl += 1;
mask_16x16 >>= 1; mask_16x16 >>= 1;
......
...@@ -79,37 +79,29 @@ end_vpx_lf_h_edge ...@@ -79,37 +79,29 @@ end_vpx_lf_h_edge
; Currently vpx only works on iterations 8 at a time. The vp8 loop filter ; Currently vpx only works on iterations 8 at a time. The vp8 loop filter
; works on 16 iterations at a time. ; works on 16 iterations at a time.
; TODO(fgalligan): See about removing the count code as this function is only
; called with a count of 1.
; ;
; void vpx_lpf_vertical_4_neon(uint8_t *s, ; void vpx_lpf_vertical_4_neon(uint8_t *s,
; int p /* pitch */, ; int p /* pitch */,
; const uint8_t *blimit, ; const uint8_t *blimit,
; const uint8_t *limit, ; const uint8_t *limit,
; const uint8_t *thresh, ; const uint8_t *thresh)
; int count)
; ;
; r0 uint8_t *s, ; r0 uint8_t *s,
; r1 int p, /* pitch */ ; r1 int p, /* pitch */
; r2 const uint8_t *blimit, ; r2 const uint8_t *blimit,
; r3 const uint8_t *limit, ; r3 const uint8_t *limit,
; sp const uint8_t *thresh, ; sp const uint8_t *thresh,
; sp+4 int count
|vpx_lpf_vertical_4_neon| PROC |vpx_lpf_vertical_4_neon| PROC
push {lr} push {lr}
vld1.8 {d0[]}, [r2] ; duplicate *blimit vld1.8 {d0[]}, [r2] ; duplicate *blimit
ldr r12, [sp, #8] ; load count
vld1.8 {d1[]}, [r3] ; duplicate *limit vld1.8 {d1[]}, [r3] ; duplicate *limit
ldr r3, [sp, #4] ; load thresh ldr r3, [sp, #4] ; load thresh
sub r2, r0, #4 ; move s pointer down by 4 columns sub r2, r0, #4 ; move s pointer down by 4 columns
cmp r12, #0
beq end_vpx_lf_v_edge
vld1.8 {d2[]}, [r3] ; duplicate *thresh vld1.8 {d2[]}, [r3] ; duplicate *thresh
count_lf_v_loop
vld1.u8 {d3}, [r2], r1 ; load s data vld1.u8 {d3}, [r2], r1 ; load s data
vld1.u8 {d4}, [r2], r1 vld1.u8 {d4}, [r2], r1
vld1.u8 {d5}, [r2], r1 vld1.u8 {d5}, [r2], r1
...@@ -149,12 +141,6 @@ count_lf_v_loop ...@@ -149,12 +141,6 @@ count_lf_v_loop
vst4.8 {d4[6], d5[6], d6[6], d7[6]}, [r0], r1 vst4.8 {d4[6], d5[6], d6[6], d7[6]}, [r0], r1
vst4.8 {d4[7], d5[7], d6[7], d7[7]}, [r0] vst4.8 {d4[7], d5[7], d6[7], d7[7]}, [r0]
add r0, r0, r1, lsl #3 ; s += pitch * 8
subs r12, r12, #1
subne r2, r0, #4 ; move s pointer down by 4 columns
bne count_lf_v_loop
end_vpx_lf_v_edge
pop {pc} pop {pc}
ENDP ; |vpx_lpf_vertical_4_neon| ENDP ; |vpx_lpf_vertical_4_neon|
......
...@@ -170,8 +170,7 @@ void vpx_lpf_vertical_4_neon( ...@@ -170,8 +170,7 @@ void vpx_lpf_vertical_4_neon(
int pitch, int pitch,
const uint8_t *blimit, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *limit,
const uint8_t *thresh, const uint8_t *thresh) {
int count) {
int i, pitch8; int i, pitch8;
uint8_t *s; uint8_t *s;
uint8x8_t dblimit, dlimit, dthresh; uint8x8_t dblimit, dlimit, dthresh;
...@@ -181,15 +180,12 @@ void vpx_lpf_vertical_4_neon( ...@@ -181,15 +180,12 @@ void vpx_lpf_vertical_4_neon(
uint8x8x2_t d2tmp8, d2tmp9, d2tmp10, d2tmp11; uint8x8x2_t d2tmp8, d2tmp9, d2tmp10, d2tmp11;
uint8x8x4_t d4Result; uint8x8x4_t d4Result;
if (count == 0) // end_vpx_lf_h_edge
return;
dblimit = vld1_u8(blimit); dblimit = vld1_u8(blimit);
dlimit = vld1_u8(limit); dlimit = vld1_u8(limit);
dthresh = vld1_u8(thresh); dthresh = vld1_u8(thresh);
pitch8 = pitch * 8; pitch8 = pitch * 8;
for (i = 0; i < count; i++, src += pitch8) { for (i = 0; i < 1; i++, src += pitch8) {
s = src - (i + 1) * 4; s = src - (i + 1) * 4;
d3u8 = vld1_u8(s); d3u8 = vld1_u8(s);
......
...@@ -21,8 +21,8 @@ void vpx_lpf_vertical_4_dual_neon(uint8_t *s, int p, ...@@ -21,8 +21,8 @@ void vpx_lpf_vertical_4_dual_neon(uint8_t *s, int p,
const uint8_t *blimit1, const uint8_t *blimit1,
const uint8_t *limit1, const uint8_t *limit1,
const uint8_t *thresh1) { const uint8_t *thresh1) {
vpx_lpf_vertical_4_neon(s, p, blimit0, limit0, thresh0, 1); vpx_lpf_vertical_4_neon(s, p, blimit0, limit0, thresh0);
vpx_lpf_vertical_4_neon(s + 8 * p, p, blimit1, limit1, thresh1, 1); vpx_lpf_vertical_4_neon(s + 8 * p, p, blimit1, limit1, thresh1);
} }
#if HAVE_NEON_ASM #if HAVE_NEON_ASM
......
...@@ -143,13 +143,12 @@ void vpx_lpf_horizontal_4_dual_c(uint8_t *s, int p, const uint8_t *blimit0, ...@@ -143,13 +143,12 @@ void vpx_lpf_horizontal_4_dual_c(uint8_t *s, int p, const uint8_t *blimit0,
} }
void vpx_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit, void vpx_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh, const uint8_t *limit, const uint8_t *thresh) {
int count) {
int i; int i;
// loop filter designed to work using chars so that we can make maximum use // loop filter designed to work using chars so that we can make maximum use
// of 8 bit simd instructions. // of 8 bit simd instructions.
for (i = 0; i < 8 * count; ++i) { for (i = 0; i < 8; ++i) {
const uint8_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1]; const uint8_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1];
const uint8_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3]; const uint8_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3];
const int8_t mask = filter_mask(*limit, *blimit, const int8_t mask = filter_mask(*limit, *blimit,
...@@ -163,9 +162,8 @@ void vpx_lpf_vertical_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, ...@@ -163,9 +162,8 @@ void vpx_lpf_vertical_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0,
const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *limit0, const uint8_t *thresh0,
const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *blimit1, const uint8_t *limit1,
const uint8_t *thresh1) { const uint8_t *thresh1) {
vpx_lpf_vertical_4_c(s, pitch, blimit0, limit0, thresh0, 1); vpx_lpf_vertical_4_c(s, pitch, blimit0, limit0, thresh0);
vpx_lpf_vertical_4_c(s + 8 * pitch, pitch, blimit1, limit1, vpx_lpf_vertical_4_c(s + 8 * pitch, pitch, blimit1, limit1, thresh1);
thresh1, 1);
} }
static INLINE void filter8(int8_t mask, uint8_t thresh, uint8_t flat, static INLINE void filter8(int8_t mask, uint8_t thresh, uint8_t flat,
......
...@@ -74,14 +74,11 @@ void vpx_lpf_horizontal_4_dual_msa(uint8_t *src, int32_t pitch, ...@@ -74,14 +74,11 @@ void vpx_lpf_horizontal_4_dual_msa(uint8_t *src, int32_t pitch,
void vpx_lpf_vertical_4_msa(uint8_t *src, int32_t pitch, void vpx_lpf_vertical_4_msa(uint8_t *src, int32_t pitch,
const uint8_t *b_limit_ptr, const uint8_t *b_limit_ptr,
const uint8_t *limit_ptr, const uint8_t *limit_ptr,
const uint8_t *thresh_ptr, const uint8_t *thresh_ptr) {
int32_t count) {
v16u8 mask, hev, flat, limit, thresh, b_limit; v16u8 mask, hev, flat, limit, thresh, b_limit;
v16u8 p3, p2, p1, p0, q3, q2, q1, q0; v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
v8i16 vec0, vec1, vec2, vec3; v8i16 vec0, vec1, vec2, vec3;
(void)count;
LD_UB8((src - 4), pitch, p3, p2, p1, p0, q0, q1, q2, q3); LD_UB8((src - 4), pitch, p3, p2, p1, p0, q0, q1, q2, q3);
thresh = (v16u8)__msa_fill_b(*thresh_ptr); thresh = (v16u8)__msa_fill_b(*thresh_ptr);
......
...@@ -117,8 +117,7 @@ void vpx_lpf_vertical_4_dspr2(unsigned char *s, ...@@ -117,8 +117,7 @@ void vpx_lpf_vertical_4_dspr2(unsigned char *s,
int pitch, int pitch,
const uint8_t *blimit, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *limit,
const uint8_t *thresh, const uint8_t *thresh) {
int count) {
uint8_t i; uint8_t i;
uint32_t mask, hev; uint32_t mask, hev;
uint32_t pm1, p0, p1, p2, p3, p4, p5, p6; uint32_t pm1, p0, p1, p2, p3, p4, p5, p6;
...@@ -335,8 +334,8 @@ void vpx_lpf_vertical_4_dual_dspr2(uint8_t *s, int p, ...@@ -335,8 +334,8 @@ void vpx_lpf_vertical_4_dual_dspr2(uint8_t *s, int p,
const uint8_t *blimit1, const uint8_t *blimit1,
const uint8_t *limit1, const uint8_t *limit1,
const uint8_t *thresh1) { const uint8_t *thresh1) {
vpx_lpf_vertical_4_dspr2(s, p, blimit0, limit0, thresh0, 1); vpx_lpf_vertical_4_dspr2(s, p, blimit0, limit0, thresh0);
vpx_lpf_vertical_4_dspr2(s + 8 * p, p, blimit1, limit1, thresh1, 1); vpx_lpf_vertical_4_dspr2(s + 8 * p, p, blimit1, limit1, thresh1);
} }
void vpx_lpf_vertical_8_dual_dspr2(uint8_t *s, int p, void vpx_lpf_vertical_8_dual_dspr2(uint8_t *s, int p,
......
...@@ -542,7 +542,7 @@ add_proto qw/void vpx_lpf_vertical_8_dual/, "uint8_t *s, int pitch, const uint8_ ...@@ -542,7 +542,7 @@ add_proto qw/void vpx_lpf_vertical_8_dual/, "uint8_t *s, int pitch, const uint8_
specialize qw/vpx_lpf_vertical_8_dual sse2 neon_asm dspr2 msa/; specialize qw/vpx_lpf_vertical_8_dual sse2 neon_asm dspr2 msa/;
$vpx_lpf_vertical_8_dual_neon_asm=vpx_lpf_vertical_8_dual_neon; $vpx_lpf_vertical_8_dual_neon_asm=vpx_lpf_vertical_8_dual_neon;
add_proto qw/void vpx_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count"; add_proto qw/void vpx_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
specialize qw/vpx_lpf_vertical_4 mmx neon dspr2 msa/; specialize qw/vpx_lpf_vertical_4 mmx neon dspr2 msa/;
add_proto qw/void vpx_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; add_proto qw/void vpx_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
......
...@@ -230,14 +230,13 @@ sym(vpx_lpf_horizontal_4_mmx): ...@@ -230,14 +230,13 @@ sym(vpx_lpf_horizontal_4_mmx):
; int src_pixel_step, ; int src_pixel_step,
; const char *blimit, ; const char *blimit,
; const char *limit, ; const char *limit,
; const char *thresh, ; const char *thresh
; int count
;) ;)
global sym(vpx_lpf_vertical_4_mmx) PRIVATE global sym(vpx_lpf_vertical_4_mmx) PRIVATE
sym(vpx_lpf_vertical_4_mmx): sym(vpx_lpf_vertical_4_mmx):
push rbp push rbp
mov rbp, rsp mov rbp, rsp
SHADOW_ARGS_TO_STACK 6 SHADOW_ARGS_TO_STACK 5
GET_GOT rbx GET_GOT rbx
push rsi push rsi
push rdi push rdi
...@@ -254,8 +253,6 @@ sym(vpx_lpf_vertical_4_mmx): ...@@ -254,8 +253,6 @@ sym(vpx_lpf_vertical_4_mmx):
lea rsi, [rsi + rax*4 - 4] lea rsi, [rsi + rax*4 - 4]
movsxd rcx, dword ptr arg(5) ;count
.next8_v:
mov rdi, rsi ; rdi points to row +1 for indirect addressing mov rdi, rsi ; rdi points to row +1 for indirect addressing
add rdi, rax add rdi, rax
...@@ -579,10 +576,6 @@ sym(vpx_lpf_vertical_4_mmx): ...@@ -579,10 +576,6 @@ sym(vpx_lpf_vertical_4_mmx):
movd [rdi+rax*2+2], mm5 movd [rdi+rax*2+2], mm5
lea rsi, [rsi+rax*8]
dec rcx
jnz .next8_v
add rsp, 64 add rsp, 64
pop rsp pop rsp
; begin epilog ; begin epilog
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment