Commit ccea000c authored by Scott LaVarnway's avatar Scott LaVarnway

Updated vp8_build_intra_predictors_mbuv_s(sse2/ssse3)

to work with the latest code.

Change-Id: Ie382bb55d00ea5929bdadba859eea15f696d4cd9
parent 403966ae
......@@ -126,7 +126,7 @@ prototype void vp8_build_intra_predictors_mby_s "struct macroblockd *x, unsigned
#TODO: fix assembly --- specialize vp8_build_intra_predictors_mby_s sse2 ssse3 neon
prototype void vp8_build_intra_predictors_mbuv_s "struct macroblockd *x, unsigned char * uabove_row, unsigned char * vabove_row, unsigned char *uleft, unsigned char *vleft, int left_stride, unsigned char * upred_ptr, unsigned char * vpred_ptr, int pred_stride"
#TODO: fix assembly --- specialize vp8_build_intra_predictors_mbuv_s sse2 ssse3
specialize vp8_build_intra_predictors_mbuv_s sse2 ssse3
prototype void vp8_intra4x4_predict "unsigned char *src, int src_stride, int b_mode, unsigned char *dst, int dst_stride"
specialize vp8_intra4x4_predict media
......
......@@ -119,35 +119,39 @@ sym(vp8_copy_mem16x16_sse2):
;void vp8_intra_pred_uv_dc_mmx2(
; unsigned char *dst,
; int dst_stride
; unsigned char *src,
; int src_stride,
; unsigned char *above,
; unsigned char *left,
; int left_stride,
; )
global sym(vp8_intra_pred_uv_dc_mmx2)
sym(vp8_intra_pred_uv_dc_mmx2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 4
SHADOW_ARGS_TO_STACK 5
push rsi
push rdi
; end prolog
; from top
mov rsi, arg(2) ;src;
movsxd rax, dword ptr arg(3) ;src_stride;
sub rsi, rax
mov rsi, arg(2) ;above;
pxor mm0, mm0
movq mm1, [rsi]
psadbw mm1, mm0
; from left
dec rsi
mov rsi, arg(3) ;left;
movsxd rax, dword ptr arg(4) ;left_stride;
lea rdi, [rax*3]
movzx ecx, byte [rsi+rax]
movzx ecx, byte [rsi]
movzx edx, byte [rsi+rax*1]
add ecx, edx
movzx edx, byte [rsi+rax*2]
add ecx, edx
movzx edx, byte [rsi+rdi]
add ecx, edx
lea rsi, [rsi+rax*4]
add ecx, edx
movzx edx, byte [rsi]
add ecx, edx
movzx edx, byte [rsi+rax]
......@@ -156,8 +160,6 @@ sym(vp8_intra_pred_uv_dc_mmx2):
add ecx, edx
movzx edx, byte [rsi+rdi]
add ecx, edx
movzx edx, byte [rsi+rax*4]
add ecx, edx
; add up
pextrw edx, mm1, 0x0
......@@ -192,23 +194,24 @@ sym(vp8_intra_pred_uv_dc_mmx2):
;void vp8_intra_pred_uv_dctop_mmx2(
; unsigned char *dst,
; int dst_stride
; unsigned char *src,
; int src_stride,
; unsigned char *above,
; unsigned char *left,
; int left_stride,
; )
global sym(vp8_intra_pred_uv_dctop_mmx2)
sym(vp8_intra_pred_uv_dctop_mmx2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 4
SHADOW_ARGS_TO_STACK 5
GET_GOT rbx
push rsi
push rdi
; end prolog
;arg(3), arg(4) not used
; from top
mov rsi, arg(2) ;src;
movsxd rax, dword ptr arg(3) ;src_stride;
sub rsi, rax
mov rsi, arg(2) ;above;
pxor mm0, mm0
movq mm1, [rsi]
psadbw mm1, mm0
......@@ -245,22 +248,24 @@ sym(vp8_intra_pred_uv_dctop_mmx2):
;void vp8_intra_pred_uv_dcleft_mmx2(
; unsigned char *dst,
; int dst_stride
; unsigned char *src,
; int src_stride,
; unsigned char *above,
; unsigned char *left,
; int left_stride,
; )
global sym(vp8_intra_pred_uv_dcleft_mmx2)
sym(vp8_intra_pred_uv_dcleft_mmx2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 4
SHADOW_ARGS_TO_STACK 5
push rsi
push rdi
; end prolog
;arg(2) not used
; from left
mov rsi, arg(2) ;src;
movsxd rax, dword ptr arg(3) ;src_stride;
dec rsi
mov rsi, arg(3) ;left;
movsxd rax, dword ptr arg(4) ;left_stride;
lea rdi, [rax*3]
movzx ecx, byte [rsi]
movzx edx, byte [rsi+rax]
......@@ -310,17 +315,20 @@ sym(vp8_intra_pred_uv_dcleft_mmx2):
;void vp8_intra_pred_uv_dc128_mmx(
; unsigned char *dst,
; int dst_stride
; unsigned char *src,
; int src_stride,
; unsigned char *above,
; unsigned char *left,
; int left_stride,
; )
global sym(vp8_intra_pred_uv_dc128_mmx)
sym(vp8_intra_pred_uv_dc128_mmx):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 4
SHADOW_ARGS_TO_STACK 5
GET_GOT rbx
; end prolog
;arg(2), arg(3), arg(4) not used
; write out
movq mm1, [GLOBAL(dc_128)]
mov rax, arg(0) ;dst;
......@@ -346,15 +354,16 @@ sym(vp8_intra_pred_uv_dc128_mmx):
;void vp8_intra_pred_uv_tm_sse2(
; unsigned char *dst,
; int dst_stride
; unsigned char *src,
; int src_stride,
; unsigned char *above,
; unsigned char *left,
; int left_stride,
; )
%macro vp8_intra_pred_uv_tm 1
global sym(vp8_intra_pred_uv_tm_%1)
sym(vp8_intra_pred_uv_tm_%1):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 4
SHADOW_ARGS_TO_STACK 5
GET_GOT rbx
push rsi
push rdi
......@@ -362,9 +371,8 @@ sym(vp8_intra_pred_uv_tm_%1):
; read top row
mov edx, 4
mov rsi, arg(2) ;src;
movsxd rax, dword ptr arg(3) ;src_stride;
sub rsi, rax
mov rsi, arg(2) ;above
movsxd rax, dword ptr arg(4) ;left_stride;
pxor xmm0, xmm0
%ifidn %1, ssse3
movdqa xmm2, [GLOBAL(dc_1024)]
......@@ -374,7 +382,7 @@ sym(vp8_intra_pred_uv_tm_%1):
; set up left ptrs ans subtract topleft
movd xmm3, [rsi-1]
lea rsi, [rsi+rax-1]
mov rsi, arg(3) ;left;
%ifidn %1, sse2
punpcklbw xmm3, xmm0
pshuflw xmm3, xmm3, 0x0
......@@ -427,20 +435,22 @@ vp8_intra_pred_uv_tm ssse3
;void vp8_intra_pred_uv_ve_mmx(
; unsigned char *dst,
; int dst_stride
; unsigned char *src,
; int src_stride,
; unsigned char *above,
; unsigned char *left,
; int left_stride,
; )
global sym(vp8_intra_pred_uv_ve_mmx)
sym(vp8_intra_pred_uv_ve_mmx):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 4
SHADOW_ARGS_TO_STACK 5
; end prolog
; arg(3), arg(4) not used
; read from top
mov rax, arg(2) ;src;
movsxd rdx, dword ptr arg(3) ;src_stride;
sub rax, rdx
movq mm1, [rax]
; write out
......@@ -466,15 +476,16 @@ sym(vp8_intra_pred_uv_ve_mmx):
;void vp8_intra_pred_uv_ho_mmx2(
; unsigned char *dst,
; int dst_stride
; unsigned char *src,
; int src_stride,
; unsigned char *above,
; unsigned char *left,
; int left_stride,
; )
%macro vp8_intra_pred_uv_ho 1
global sym(vp8_intra_pred_uv_ho_%1)
sym(vp8_intra_pred_uv_ho_%1):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 4
SHADOW_ARGS_TO_STACK 5
push rsi
push rdi
%ifidn %1, ssse3
......@@ -485,12 +496,14 @@ sym(vp8_intra_pred_uv_ho_%1):
%endif
; end prolog
;arg(2) not used
; read from left and write out
%ifidn %1, mmx2
mov edx, 4
%endif
mov rsi, arg(2) ;src;
movsxd rax, dword ptr arg(3) ;src_stride;
mov rsi, arg(3) ;left
movsxd rax, dword ptr arg(4) ;left_stride;
mov rdi, arg(0) ;dst;
movsxd rcx, dword ptr arg(1) ;dst_stride
%ifidn %1, ssse3
......@@ -498,7 +511,7 @@ sym(vp8_intra_pred_uv_ho_%1):
movdqa xmm2, [GLOBAL(dc_00001111)]
lea rbx, [rax*3]
%endif
dec rsi
%ifidn %1, mmx2
.vp8_intra_pred_uv_ho_%1_loop:
movd mm0, [rsi]
......
......@@ -15,7 +15,8 @@
#define build_intra_predictors_mbuv_prototype(sym) \
void sym(unsigned char *dst, int dst_stride, \
const unsigned char *src, int src_stride)
const unsigned char *above, \
const unsigned char *left, int left_stride)
typedef build_intra_predictors_mbuv_prototype((*build_intra_predictors_mbuv_fn_t));
extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_dc_mmx2);
......@@ -29,15 +30,19 @@ extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_tm_sse2);
extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_tm_ssse3);
static void vp8_build_intra_predictors_mbuv_x86(MACROBLOCKD *x,
unsigned char * uabove_row,
unsigned char * vabove_row,
unsigned char *dst_u,
unsigned char *dst_v,
int dst_stride,
unsigned char * uleft,
unsigned char * vleft,
int left_stride,
build_intra_predictors_mbuv_fn_t tm_func,
build_intra_predictors_mbuv_fn_t ho_func)
{
int mode = x->mode_info_context->mbmi.uv_mode;
build_intra_predictors_mbuv_fn_t fn;
int src_stride = x->dst.uv_stride;
switch (mode) {
case V_PRED: fn = vp8_intra_pred_uv_ve_mmx; break;
......@@ -59,38 +64,48 @@ static void vp8_build_intra_predictors_mbuv_x86(MACROBLOCKD *x,
default: return;
}
fn(dst_u, dst_stride, x->dst.u_buffer, src_stride);
fn(dst_v, dst_stride, x->dst.v_buffer, src_stride);
fn(dst_u, dst_stride, uabove_row, uleft, left_stride);
fn(dst_v, dst_stride, vabove_row, vleft, left_stride);
}
void vp8_build_intra_predictors_mbuv_sse2(MACROBLOCKD *x)
void vp8_build_intra_predictors_mbuv_s_sse2(MACROBLOCKD *x,
unsigned char * uabove_row,
unsigned char * vabove_row,
unsigned char * uleft,
unsigned char * vleft,
int left_stride,
unsigned char * upred_ptr,
unsigned char * vpred_ptr,
int pred_stride)
{
vp8_build_intra_predictors_mbuv_x86(x, &x->predictor[256],
&x->predictor[320], 8,
vp8_build_intra_predictors_mbuv_x86(x,
uabove_row, vabove_row,
upred_ptr,
vpred_ptr, pred_stride,
uleft,
vleft,
left_stride,
vp8_intra_pred_uv_tm_sse2,
vp8_intra_pred_uv_ho_mmx2);
}
void vp8_build_intra_predictors_mbuv_ssse3(MACROBLOCKD *x)
void vp8_build_intra_predictors_mbuv_s_ssse3(MACROBLOCKD *x,
unsigned char * uabove_row,
unsigned char * vabove_row,
unsigned char * uleft,
unsigned char * vleft,
int left_stride,
unsigned char * upred_ptr,
unsigned char * vpred_ptr,
int pred_stride)
{
vp8_build_intra_predictors_mbuv_x86(x, &x->predictor[256],
&x->predictor[320], 8,
vp8_intra_pred_uv_tm_ssse3,
vp8_intra_pred_uv_ho_ssse3);
}
void vp8_build_intra_predictors_mbuv_s_sse2(MACROBLOCKD *x)
{
vp8_build_intra_predictors_mbuv_x86(x, x->dst.u_buffer,
x->dst.v_buffer, x->dst.uv_stride,
vp8_intra_pred_uv_tm_sse2,
vp8_intra_pred_uv_ho_mmx2);
}
void vp8_build_intra_predictors_mbuv_s_ssse3(MACROBLOCKD *x)
{
vp8_build_intra_predictors_mbuv_x86(x, x->dst.u_buffer,
x->dst.v_buffer, x->dst.uv_stride,
vp8_build_intra_predictors_mbuv_x86(x,
uabove_row, vabove_row,
upred_ptr,
vpred_ptr, pred_stride,
uleft,
vleft,
left_stride,
vp8_intra_pred_uv_tm_ssse3,
vp8_intra_pred_uv_ho_ssse3);
}
......@@ -132,22 +147,10 @@ static void vp8_build_intra_predictors_mby_x86(MACROBLOCKD *x,
default: return;
}
fn(dst_y, dst_stride, x->dst.y_buffer, src_stride);
// fn(dst_y, dst_stride, x->dst.y_buffer, src_stride);
return;
}
void vp8_build_intra_predictors_mby_sse2(MACROBLOCKD *x)
{
vp8_build_intra_predictors_mby_x86(x, x->predictor, 16,
vp8_intra_pred_y_tm_sse2);
}
void vp8_build_intra_predictors_mby_ssse3(MACROBLOCKD *x)
{
vp8_build_intra_predictors_mby_x86(x, x->predictor, 16,
vp8_intra_pred_y_tm_ssse3);
}
void vp8_build_intra_predictors_mby_s_sse2(MACROBLOCKD *x)
{
vp8_build_intra_predictors_mby_x86(x, x->dst.y_buffer, x->dst.y_stride,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment