Commit 361717d2 authored by Yaowu Xu's avatar Yaowu Xu
Browse files

remove one set of 16x16 variance funcations

call to this set of functions are replaced by var16x16.

Change-Id: I5ff1effc6c1358ea06cda1517b88ec28ef551b0d
parent af49c112
...@@ -53,8 +53,7 @@ void vp8_arch_arm_encoder_init(VP8_COMP *cpi) ...@@ -53,8 +53,7 @@ void vp8_arch_arm_encoder_init(VP8_COMP *cpi)
cpi->rtcd.variance.mse16x16 = vp8_mse16x16_armv6; cpi->rtcd.variance.mse16x16 = vp8_mse16x16_armv6;
/*cpi->rtcd.variance.getmbss = vp8_get_mb_ss_c;*/ /*cpi->rtcd.variance.getmbss = vp8_get_mb_ss_c;*/
/*cpi->rtcd.variance.get16x16prederror = vp8_get16x16pred_error_c; /*cpi->rtcd.variance.get4x4sse_cs = vp8_get4x4sse_cs_c;*/
cpi->rtcd.variance.get4x4sse_cs = vp8_get4x4sse_cs_c;*/
/*cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_c; /*cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_c;
cpi->rtcd.fdct.short8x4 = vp8_short_fdct8x4_c;*/ cpi->rtcd.fdct.short8x4 = vp8_short_fdct8x4_c;*/
...@@ -101,7 +100,6 @@ void vp8_arch_arm_encoder_init(VP8_COMP *cpi) ...@@ -101,7 +100,6 @@ void vp8_arch_arm_encoder_init(VP8_COMP *cpi)
cpi->rtcd.variance.mse16x16 = vp8_mse16x16_neon; cpi->rtcd.variance.mse16x16 = vp8_mse16x16_neon;
/*cpi->rtcd.variance.getmbss = vp8_get_mb_ss_c;*/ /*cpi->rtcd.variance.getmbss = vp8_get_mb_ss_c;*/
cpi->rtcd.variance.get16x16prederror = vp8_get16x16pred_error_neon;
cpi->rtcd.variance.get4x4sse_cs = vp8_get4x4sse_cs_neon; cpi->rtcd.variance.get4x4sse_cs = vp8_get4x4sse_cs_neon;
cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_neon; cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_neon;
......
...@@ -10,7 +10,6 @@ ...@@ -10,7 +10,6 @@
EXPORT |vp8_mse16x16_neon| EXPORT |vp8_mse16x16_neon|
EXPORT |vp8_get16x16pred_error_neon|
EXPORT |vp8_get4x4sse_cs_neon| EXPORT |vp8_get4x4sse_cs_neon|
ARM ARM
...@@ -76,62 +75,6 @@ mse16x16_neon_loop ...@@ -76,62 +75,6 @@ mse16x16_neon_loop
ENDP ENDP
;============================
; r0 unsigned char *src_ptr
; r1 int src_stride
; r2 unsigned char *ref_ptr
; r3 int ref_stride
|vp8_get16x16pred_error_neon| PROC
vmov.i8 q8, #0 ;q8 - sum
vmov.i8 q9, #0 ;q9, q10 - pred_error
vmov.i8 q10, #0
mov r12, #8
get16x16pred_error_neon_loop
vld1.8 {q0}, [r0], r1 ;Load up source and reference
vld1.8 {q2}, [r2], r3
vld1.8 {q1}, [r0], r1
vld1.8 {q3}, [r2], r3
vsubl.u8 q11, d0, d4
vsubl.u8 q12, d1, d5
vsubl.u8 q13, d2, d6
vsubl.u8 q14, d3, d7
vpadal.s16 q8, q11
vmlal.s16 q9, d22, d22
vmlal.s16 q10, d23, d23
subs r12, r12, #1
vpadal.s16 q8, q12
vmlal.s16 q9, d24, d24
vmlal.s16 q10, d25, d25
vpadal.s16 q8, q13
vmlal.s16 q9, d26, d26
vmlal.s16 q10, d27, d27
vpadal.s16 q8, q14
vmlal.s16 q9, d28, d28
vmlal.s16 q10, d29, d29
bne get16x16pred_error_neon_loop
vadd.u32 q10, q9, q10
vpaddl.s32 q0, q8
vpaddl.u32 q1, q10
vadd.s64 d0, d0, d1
vadd.u64 d1, d2, d3
vmull.s32 q5, d0, d0
vshr.s32 d10, d10, #8
vsub.s32 d0, d1, d10
vmov.32 r0, d0[0]
bx lr
ENDP
;============================= ;=============================
; r0 unsigned char *src_ptr, ; r0 unsigned char *src_ptr,
......
...@@ -83,7 +83,6 @@ extern prototype_variance(vp8_variance_halfpixvar16x16_hv_neon); ...@@ -83,7 +83,6 @@ extern prototype_variance(vp8_variance_halfpixvar16x16_hv_neon);
//extern prototype_getmbss(vp8_get_mb_ss_c); //extern prototype_getmbss(vp8_get_mb_ss_c);
extern prototype_variance(vp8_mse16x16_neon); extern prototype_variance(vp8_mse16x16_neon);
extern prototype_get16x16prederror(vp8_get16x16pred_error_neon);
extern prototype_get16x16prederror(vp8_get4x4sse_cs_neon); extern prototype_get16x16prederror(vp8_get4x4sse_cs_neon);
#if !CONFIG_RUNTIME_CPU_DETECT #if !CONFIG_RUNTIME_CPU_DETECT
...@@ -147,9 +146,6 @@ extern prototype_get16x16prederror(vp8_get4x4sse_cs_neon); ...@@ -147,9 +146,6 @@ extern prototype_get16x16prederror(vp8_get4x4sse_cs_neon);
#undef vp8_variance_mse16x16 #undef vp8_variance_mse16x16
#define vp8_variance_mse16x16 vp8_mse16x16_neon #define vp8_variance_mse16x16 vp8_mse16x16_neon
#undef vp8_variance_get16x16prederror
#define vp8_variance_get16x16prederror vp8_get16x16pred_error_neon
#undef vp8_variance_get4x4sse_cs #undef vp8_variance_get4x4sse_cs
#define vp8_variance_get4x4sse_cs vp8_get4x4sse_cs_neon #define vp8_variance_get4x4sse_cs vp8_get4x4sse_cs_neon
#endif #endif
......
...@@ -67,7 +67,6 @@ void vp8_cmachine_specific_config(VP8_COMP *cpi) ...@@ -67,7 +67,6 @@ void vp8_cmachine_specific_config(VP8_COMP *cpi)
cpi->rtcd.variance.mse16x16 = vp8_mse16x16_c; cpi->rtcd.variance.mse16x16 = vp8_mse16x16_c;
cpi->rtcd.variance.getmbss = vp8_get_mb_ss_c; cpi->rtcd.variance.getmbss = vp8_get_mb_ss_c;
cpi->rtcd.variance.get16x16prederror = vp8_get16x16pred_error_c;
cpi->rtcd.variance.get4x4sse_cs = vp8_get4x4sse_cs_c; cpi->rtcd.variance.get4x4sse_cs = vp8_get4x4sse_cs_c;
cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_c; cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_c;
......
...@@ -43,7 +43,6 @@ extern const MV_REFERENCE_FRAME vp8_ref_frame_order[MAX_MODES]; ...@@ -43,7 +43,6 @@ extern const MV_REFERENCE_FRAME vp8_ref_frame_order[MAX_MODES];
extern const MB_PREDICTION_MODE vp8_mode_order[MAX_MODES]; extern const MB_PREDICTION_MODE vp8_mode_order[MAX_MODES];
extern unsigned int (*vp8_get16x16pred_error)(unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr, int ref_stride);
extern unsigned int (*vp8_get4x4sse_cs)(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride); extern unsigned int (*vp8_get4x4sse_cs)(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride);
extern int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *best_ref_mv, int best_rd, int *, int *, int *, int, int *mvcost[2], int, int fullpixel); extern int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *best_ref_mv, int best_rd, int *, int *, int *, int, int *mvcost[2], int, int fullpixel);
extern int vp8_cost_mv_ref(MB_PREDICTION_MODE m, const int near_mv_ref_ct[4]); extern int vp8_cost_mv_ref(MB_PREDICTION_MODE m, const int near_mv_ref_ct[4]);
...@@ -98,37 +97,6 @@ static int get_inter_mbpred_error(MACROBLOCK *mb, ...@@ -98,37 +97,6 @@ static int get_inter_mbpred_error(MACROBLOCK *mb,
} }
unsigned int vp8_get16x16pred_error_c
(
const unsigned char *src_ptr,
int src_stride,
const unsigned char *ref_ptr,
int ref_stride
)
{
unsigned pred_error = 0;
int i, j;
int sum = 0;
for (i = 0; i < 16; i++)
{
int diff;
for (j = 0; j < 16; j++)
{
diff = src_ptr[j] - ref_ptr[j];
sum += diff;
pred_error += diff * diff;
}
src_ptr += src_stride;
ref_ptr += ref_stride;
}
pred_error -= sum * sum / 256;
return pred_error;
}
unsigned int vp8_get4x4sse_cs_c unsigned int vp8_get4x4sse_cs_c
( (
...@@ -669,9 +637,9 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, ...@@ -669,9 +637,9 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
{ {
rate2 += rate; rate2 += rate;
distortion2 = VARIANCE_INVOKE distortion2 = VARIANCE_INVOKE
(&cpi->rtcd.variance, get16x16prederror)( (&cpi->rtcd.variance, var16x16)(
x->src.y_buffer, x->src.y_stride, x->src.y_buffer, x->src.y_stride,
x->e_mbd.predictor, 16); x->e_mbd.predictor, 16, &sse);
this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
if (this_rd < best_intra_rd) if (this_rd < best_intra_rd)
...@@ -694,7 +662,9 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, ...@@ -694,7 +662,9 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
case TM_PRED: case TM_PRED:
RECON_INVOKE(&cpi->common.rtcd.recon, build_intra_predictors_mby) RECON_INVOKE(&cpi->common.rtcd.recon, build_intra_predictors_mby)
(&x->e_mbd); (&x->e_mbd);
distortion2 = VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16prederror)(x->src.y_buffer, x->src.y_stride, x->e_mbd.predictor, 16); distortion2 = VARIANCE_INVOKE(&cpi->rtcd.variance, var16x16)
(x->src.y_buffer, x->src.y_stride,
x->e_mbd.predictor, 16, &sse);
rate2 += x->mbmode_cost[x->e_mbd.frame_type][x->e_mbd.mode_info_context->mbmi.mode]; rate2 += x->mbmode_cost[x->e_mbd.frame_type][x->e_mbd.mode_info_context->mbmi.mode];
this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
...@@ -960,6 +930,7 @@ void vp8_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate_) ...@@ -960,6 +930,7 @@ void vp8_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate_)
int rate, best_rate = 0, distortion, best_distortion; int rate, best_rate = 0, distortion, best_distortion;
MB_PREDICTION_MODE mode, best_mode = DC_PRED; MB_PREDICTION_MODE mode, best_mode = DC_PRED;
int this_rd; int this_rd;
unsigned int sse;
x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME; x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
...@@ -970,8 +941,8 @@ void vp8_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate_) ...@@ -970,8 +941,8 @@ void vp8_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate_)
x->e_mbd.mode_info_context->mbmi.mode = mode; x->e_mbd.mode_info_context->mbmi.mode = mode;
RECON_INVOKE(&cpi->common.rtcd.recon, build_intra_predictors_mby) RECON_INVOKE(&cpi->common.rtcd.recon, build_intra_predictors_mby)
(&x->e_mbd); (&x->e_mbd);
distortion = VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16prederror) distortion = VARIANCE_INVOKE(&cpi->rtcd.variance, var16x16)
(x->src.y_buffer, x->src.y_stride, x->e_mbd.predictor, 16); (x->src.y_buffer, x->src.y_stride, x->e_mbd.predictor, 16, &sse);
rate = x->mbmode_cost[x->e_mbd.frame_type][mode]; rate = x->mbmode_cost[x->e_mbd.frame_type][mode];
this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion); this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
......
...@@ -48,7 +48,6 @@ void (*vp8_subtract_mby)(short *diff, unsigned char *src, unsigned char *pred, i ...@@ -48,7 +48,6 @@ void (*vp8_subtract_mby)(short *diff, unsigned char *src, unsigned char *pred, i
void (*vp8_subtract_mbuv)(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride); void (*vp8_subtract_mbuv)(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride);
void (*vp8_fast_quantize_b)(BLOCK *b, BLOCKD *d); void (*vp8_fast_quantize_b)(BLOCK *b, BLOCKD *d);
unsigned int (*vp8_get16x16pred_error)(unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr, int ref_stride);
unsigned int (*vp8_get4x4sse_cs)(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride); unsigned int (*vp8_get4x4sse_cs)(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride);
// c imports // c imports
...@@ -86,7 +85,6 @@ extern sub_pixel_variance_function sub_pixel_variance16x8_c; ...@@ -86,7 +85,6 @@ extern sub_pixel_variance_function sub_pixel_variance16x8_c;
extern sub_pixel_variance_function sub_pixel_variance16x16_c; extern sub_pixel_variance_function sub_pixel_variance16x16_c;
extern unsigned int vp8_get_mb_ss_c(short *); extern unsigned int vp8_get_mb_ss_c(short *);
extern unsigned int vp8_get16x16pred_error_c(unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr, int ref_stride);
extern unsigned int vp8_get4x4sse_cs_c(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride); extern unsigned int vp8_get4x4sse_cs_c(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride);
// ppc // ppc
...@@ -145,7 +143,6 @@ void vp8_cmachine_specific_config(void) ...@@ -145,7 +143,6 @@ void vp8_cmachine_specific_config(void)
vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_ppc; vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_ppc;
vp8_get_mb_ss = vp8_get_mb_ss_c; vp8_get_mb_ss = vp8_get_mb_ss_c;
vp8_get16x16pred_error = vp8_get16x16pred_error_c;
vp8_get4x4sse_cs = vp8_get4x4sse_cs_c; vp8_get4x4sse_cs = vp8_get4x4sse_cs_c;
vp8_sad16x16 = vp8_sad16x16_ppc; vp8_sad16x16 = vp8_sad16x16_ppc;
......
...@@ -308,11 +308,6 @@ extern prototype_getmbss(vp8_variance_getmbss); ...@@ -308,11 +308,6 @@ extern prototype_getmbss(vp8_variance_getmbss);
#endif #endif
extern prototype_variance(vp8_variance_mse16x16); extern prototype_variance(vp8_variance_mse16x16);
#ifndef vp8_variance_get16x16prederror
#define vp8_variance_get16x16prederror vp8_get16x16pred_error_c
#endif
extern prototype_get16x16prederror(vp8_variance_get16x16prederror);
#ifndef vp8_variance_get4x4sse_cs #ifndef vp8_variance_get4x4sse_cs
#define vp8_variance_get4x4sse_cs vp8_get4x4sse_cs_c #define vp8_variance_get4x4sse_cs vp8_get4x4sse_cs_c
#endif #endif
...@@ -366,7 +361,6 @@ typedef struct ...@@ -366,7 +361,6 @@ typedef struct
vp8_getmbss_fn_t getmbss; vp8_getmbss_fn_t getmbss;
vp8_variance_fn_t mse16x16; vp8_variance_fn_t mse16x16;
vp8_get16x16prederror_fn_t get16x16prederror;
vp8_get16x16prederror_fn_t get4x4sse_cs; vp8_get16x16prederror_fn_t get4x4sse_cs;
vp8_sad_multi_fn_t sad16x16x3; vp8_sad_multi_fn_t sad16x16x3;
......
...@@ -843,136 +843,6 @@ filter_block2d_bil_var_mmx_loop: ...@@ -843,136 +843,6 @@ filter_block2d_bil_var_mmx_loop:
pop rbp pop rbp
ret ret
;unsigned int vp8_get16x16pred_error_mmx
;(
; unsigned char *src_ptr,
; int src_stride,
; unsigned char *ref_ptr,
; int ref_stride
;)
global sym(vp8_get16x16pred_error_mmx)
sym(vp8_get16x16pred_error_mmx):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 4
GET_GOT rbx
push rsi
push rdi
sub rsp, 16
; end prolog
mov rsi, arg(0) ;DWORD PTR [src_ptr]
mov rdi, arg(2) ;DWORD PTR [ref_ptr]
movsxd rax, DWORD PTR arg(1) ;[src_stride]
movsxd rdx, DWORD PTR arg(3) ;[ref_stride]
pxor mm0, mm0 ; clear xmm0 for unpack
pxor mm7, mm7 ; clear xmm7 for accumulating diffs
pxor mm6, mm6 ; clear xmm6 for accumulating sse
mov rcx, 16
var16loop:
movq mm1, [rsi]
movq mm2, [rdi]
movq mm3, mm1
movq mm4, mm2
punpcklbw mm1, mm0
punpckhbw mm3, mm0
punpcklbw mm2, mm0
punpckhbw mm4, mm0
psubw mm1, mm2
psubw mm3, mm4
paddw mm7, mm1
pmaddwd mm1, mm1
paddw mm7, mm3
pmaddwd mm3, mm3
paddd mm6, mm1
paddd mm6, mm3
movq mm1, [rsi+8]
movq mm2, [rdi+8]
movq mm3, mm1
movq mm4, mm2
punpcklbw mm1, mm0
punpckhbw mm3, mm0
punpcklbw mm2, mm0
punpckhbw mm4, mm0
psubw mm1, mm2
psubw mm3, mm4
paddw mm7, mm1
pmaddwd mm1, mm1
paddw mm7, mm3
pmaddwd mm3, mm3
paddd mm6, mm1
paddd mm6, mm3
add rsi, rax
add rdi, rdx
sub rcx, 1
jnz var16loop
movq mm1, mm6
pxor mm6, mm6
pxor mm5, mm5
punpcklwd mm6, mm7
punpckhwd mm5, mm7
psrad mm5, 16
psrad mm6, 16
paddd mm6, mm5
movq mm2, mm1
psrlq mm1, 32
paddd mm2, mm1
movq mm7, mm6
psrlq mm6, 32
paddd mm6, mm7
movd DWORD PTR [rsp], mm6 ;Sum
movd DWORD PTR [rsp+4], mm2 ;SSE
; return (SSE-((Sum*Sum)>>8));
movsxd rdx, dword ptr [rsp]
imul rdx, rdx
sar rdx, 8
movsxd rax, dword ptr [rsp + 4]
sub rax, rdx
; begin epilog
add rsp, 16
pop rdi
pop rsi
RESTORE_GOT
UNSHADOW_ARGS
pop rbp
ret
SECTION_RODATA SECTION_RODATA
;short mmx_bi_rd[4] = { 64, 64, 64, 64}; ;short mmx_bi_rd[4] = { 64, 64, 64, 64};
......
...@@ -213,122 +213,6 @@ var16loop: ...@@ -213,122 +213,6 @@ var16loop:
ret ret
;unsigned int vp8_get16x16pred_error_sse2
;(
; unsigned char *src_ptr,
; int src_stride,
; unsigned char *ref_ptr,
; int ref_stride
;)
global sym(vp8_get16x16pred_error_sse2)
sym(vp8_get16x16pred_error_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 4
SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
sub rsp, 16
; end prolog
mov rsi, arg(0) ;[src_ptr]
mov rdi, arg(2) ;[ref_ptr]
movsxd rax, DWORD PTR arg(1) ;[src_stride]
movsxd rdx, DWORD PTR arg(3) ;[ref_stride]
pxor xmm0, xmm0 ; clear xmm0 for unpack
pxor xmm7, xmm7 ; clear xmm7 for accumulating diffs
pxor xmm6, xmm6 ; clear xmm6 for accumulating sse
mov rcx, 16
var16peloop:
movdqu xmm1, XMMWORD PTR [rsi]
movdqu xmm2, XMMWORD PTR [rdi]
movdqa xmm3, xmm1
movdqa xmm4, xmm2
punpcklbw xmm1, xmm0
punpckhbw xmm3, xmm0
punpcklbw xmm2, xmm0
punpckhbw xmm4, xmm0
psubw xmm1, xmm2
psubw xmm3, xmm4
paddw xmm7, xmm1
pmaddwd xmm1, xmm1
paddw xmm7, xmm3
pmaddwd xmm3, xmm3
paddd xmm6, xmm1
paddd xmm6, xmm3
add rsi, rax
add rdi, rdx
sub rcx, 1
jnz var16peloop
movdqa xmm1, xmm6
pxor xmm6, xmm6
pxor xmm5, xmm5
punpcklwd xmm6, xmm7
punpckhwd xmm5, xmm7
psrad xmm5, 16
psrad xmm6, 16
paddd xmm6, xmm5
movdqa xmm2, xmm1
punpckldq xmm1, xmm0
punpckhdq xmm2, xmm0
movdqa xmm7, xmm6
paddd xmm1, xmm2
punpckldq xmm6, xmm0
punpckhdq xmm7, xmm0
paddd xmm6, xmm7
movdqa xmm2, xmm1
movdqa xmm7, xmm6
psrldq xmm1, 8
psrldq xmm6, 8
paddd xmm7, xmm6
paddd xmm1, xmm2
movd DWORD PTR [rsp], xmm7 ;Sum
movd DWORD PTR [rsp+4], xmm1 ;SSE
; return (SSE-((Sum*Sum)>>8));
movsxd rdx, dword ptr [rsp]
imul rdx, rdx
sar rdx, 8
movsxd rax, dword ptr [rsp + 4]
sub rax, rdx
; begin epilog
add rsp, 16
pop rdi
pop rsi
RESTORE_GOT
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
;unsigned int vp8_get8x8var_sse2 ;unsigned int vp8_get8x8var_sse2
......
...@@ -76,13 +76,6 @@ extern void vp8_filter_block2d_bil_var_mmx ...@@ -76,13 +76,6 @@ extern void vp8_filter_block2d_bil_var_mmx
int *sum, int *sum,
unsigned int *sumsquared unsigned int *sumsqua