Commit 01433c50 authored by Johann's avatar Johann
Browse files

update x86 asm for loopfilter

Change-Id: I1ed739522db7c00c189851c7095c1b64ef6412ce
parent 62295844
......@@ -16,7 +16,7 @@
;(
; unsigned char *src_ptr,
; int src_pixel_step,
; const char *flimit,
; const char *blimit,
; const char *limit,
; const char *thresh,
; int count
......@@ -122,12 +122,10 @@ next8_h:
paddusb mm5, mm5 ; abs(p0-q0)*2
paddusb mm5, mm2 ; abs (p0 - q0) *2 + abs(p1-q1)/2
mov rdx, arg(2) ;flimit ; get flimit
movq mm2, [rdx] ; flimit mm2
paddb mm2, mm2 ; flimit*2 (less than 255)
paddb mm7, mm2 ; flimit * 2 + limit (less than 255)
mov rdx, arg(2) ;blimit ; get blimit
movq mm7, [rdx] ; blimit
psubusb mm5, mm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
psubusb mm5, mm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit
por mm1, mm5
pxor mm5, mm5
pcmpeqb mm1, mm5 ; mask mm1
......@@ -230,7 +228,7 @@ next8_h:
;(
; unsigned char *src_ptr,
; int src_pixel_step,
; const char *flimit,
; const char *blimit,
; const char *limit,
; const char *thresh,
; int count
......@@ -406,9 +404,9 @@ next8_v:
pand mm5, [GLOBAL(tfe)] ; set lsb of each byte to zero
psrlw mm5, 1 ; abs(p1-q1)/2
mov rdx, arg(2) ;flimit ;
mov rdx, arg(2) ;blimit ;
movq mm2, [rdx] ;flimit mm2
movq mm4, [rdx] ;blimit
movq mm1, mm3 ; mm1=mm3=p0
movq mm7, mm6 ; mm7=mm6=q0
......@@ -419,10 +417,7 @@ next8_v:
paddusb mm1, mm1 ; abs(q0-p0)*2
paddusb mm1, mm5 ; abs (p0 - q0) *2 + abs(p1-q1)/2
paddb mm2, mm2 ; flimit*2 (less than 255)
paddb mm4, mm2 ; flimit * 2 + limit (less than 255)
psubusb mm1, mm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
psubusb mm1, mm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit
por mm1, mm0; ; mask
pxor mm0, mm0
......@@ -603,7 +598,7 @@ next8_v:
;(
; unsigned char *src_ptr,
; int src_pixel_step,
; const char *flimit,
; const char *blimit,
; const char *limit,
; const char *thresh,
; int count
......@@ -719,17 +714,15 @@ next8_mbh:
paddusb mm5, mm5 ; abs(p0-q0)*2
paddusb mm5, mm2 ; abs (p0 - q0) *2 + abs(p1-q1)/2
mov rdx, arg(2) ;flimit ; get flimit
movq mm2, [rdx] ; flimit mm2
paddb mm2, mm2 ; flimit*2 (less than 255)
paddb mm7, mm2 ; flimit * 2 + limit (less than 255)
mov rdx, arg(2) ;blimit ; get blimit
movq mm7, [rdx] ; blimit
psubusb mm5, mm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
psubusb mm5, mm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit
por mm1, mm5
pxor mm5, mm5
pcmpeqb mm1, mm5 ; mask mm1
; mm1 = mask, mm0=q0, mm7 = flimit, t0 = abs(q0-q1) t1 = abs(p1-p0)
; mm1 = mask, mm0=q0, mm7 = blimit, t0 = abs(q0-q1) t1 = abs(p1-p0)
; mm6 = p0,
; calculate high edge variance
......@@ -922,7 +915,7 @@ next8_mbh:
;(
; unsigned char *src_ptr,
; int src_pixel_step,
; const char *flimit,
; const char *blimit,
; const char *limit,
; const char *thresh,
; int count
......@@ -1108,9 +1101,9 @@ next8_mbv:
pand mm5, [GLOBAL(tfe)] ; set lsb of each byte to zero
psrlw mm5, 1 ; abs(p1-q1)/2
mov rdx, arg(2) ;flimit ;
mov rdx, arg(2) ;blimit ;
movq mm2, [rdx] ;flimit mm2
movq mm4, [rdx] ;blimit
movq mm1, mm3 ; mm1=mm3=p0
movq mm7, mm6 ; mm7=mm6=q0
......@@ -1121,10 +1114,7 @@ next8_mbv:
paddusb mm1, mm1 ; abs(q0-p0)*2
paddusb mm1, mm5 ; abs (p0 - q0) *2 + abs(p1-q1)/2
paddb mm2, mm2 ; flimit*2 (less than 255)
paddb mm4, mm2 ; flimit * 2 + limit (less than 255)
psubusb mm1, mm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
psubusb mm1, mm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit
por mm1, mm0; ; mask
pxor mm0, mm0
......@@ -1392,16 +1382,13 @@ next8_mbv:
;(
; unsigned char *src_ptr,
; int src_pixel_step,
; const char *flimit,
; const char *limit,
; const char *thresh,
; int count
; const char *blimit
;)
global sym(vp8_loop_filter_simple_horizontal_edge_mmx)
sym(vp8_loop_filter_simple_horizontal_edge_mmx):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
SHADOW_ARGS_TO_STACK 3
GET_GOT rbx
push rsi
push rdi
......@@ -1410,14 +1397,10 @@ sym(vp8_loop_filter_simple_horizontal_edge_mmx):
mov rsi, arg(0) ;src_ptr
movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch?
movsxd rcx, dword ptr arg(5) ;count
mov rcx, 2 ; count
nexts8_h:
mov rdx, arg(3) ;limit
movq mm7, [rdx]
mov rdx, arg(2) ;flimit ; get flimit
mov rdx, arg(2) ;blimit ; get blimit
movq mm3, [rdx] ;
paddb mm3, mm3 ; flimit*2 (less than 255)
paddb mm3, mm7 ; flimit * 2 + limit (less than 255)
mov rdi, rsi ; rdi points to row +1 for indirect addressing
add rdi, rax
......@@ -1445,7 +1428,7 @@ nexts8_h:
paddusb mm5, mm5 ; abs(p0-q0)*2
paddusb mm5, mm1 ; abs (p0 - q0) *2 + abs(p1-q1)/2
psubusb mm5, mm3 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
psubusb mm5, mm3 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > blimit
pxor mm3, mm3
pcmpeqb mm5, mm3
......@@ -1515,16 +1498,13 @@ nexts8_h:
;(
; unsigned char *src_ptr,
; int src_pixel_step,
; const char *flimit,
; const char *limit,
; const char *thresh,
; int count
; const char *blimit
;)
global sym(vp8_loop_filter_simple_vertical_edge_mmx)
sym(vp8_loop_filter_simple_vertical_edge_mmx):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
SHADOW_ARGS_TO_STACK 3
GET_GOT rbx
push rsi
push rdi
......@@ -1539,7 +1519,7 @@ sym(vp8_loop_filter_simple_vertical_edge_mmx):
movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch?
lea rsi, [rsi + rax*4- 2]; ;
movsxd rcx, dword ptr arg(5) ;count
mov rcx, 2 ; count
nexts8_v:
lea rdi, [rsi + rax];
......@@ -1602,14 +1582,10 @@ nexts8_v:
paddusb mm5, mm5 ; abs(p0-q0)*2
paddusb mm5, mm6 ; abs (p0 - q0) *2 + abs(p1-q1)/2
mov rdx, arg(2) ;flimit ; get flimit
mov rdx, arg(2) ;blimit ; get blimit
movq mm7, [rdx]
mov rdx, arg(3) ; get limit
movq mm6, [rdx]
paddb mm7, mm7 ; flimit*2 (less than 255)
paddb mm7, mm6 ; flimit * 2 + limit (less than 255)
psubusb mm5, mm7 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
psubusb mm5, mm7 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > blimit
pxor mm7, mm7
pcmpeqb mm5, mm7 ; mm5 = mask
......
......@@ -110,7 +110,7 @@
psubusb xmm6, xmm5 ; p1-=p0
por xmm6, xmm4 ; abs(p1 - p0)
mov rdx, arg(2) ; get flimit
mov rdx, arg(2) ; get blimit
movdqa t1, xmm6 ; save to t1
......@@ -123,7 +123,7 @@
psubusb xmm1, xmm7
por xmm2, xmm3 ; abs(p1-q1)
movdqa xmm4, XMMWORD PTR [rdx] ; flimit
movdqa xmm7, XMMWORD PTR [rdx] ; blimit
movdqa xmm3, xmm0 ; q0
pand xmm2, [GLOBAL(tfe)] ; set lsb of each byte to zero
......@@ -134,13 +134,11 @@
psrlw xmm2, 1 ; abs(p1-q1)/2
psubusb xmm5, xmm3 ; p0-=q0
paddb xmm4, xmm4 ; flimit*2 (less than 255)
psubusb xmm3, xmm6 ; q0-=p0
por xmm5, xmm3 ; abs(p0 - q0)
paddusb xmm5, xmm5 ; abs(p0-q0)*2
paddb xmm7, xmm4 ; flimit * 2 + limit (less than 255)
movdqa xmm4, t0 ; hev get abs (q1 - q0)
......@@ -150,7 +148,7 @@
movdqa xmm2, XMMWORD PTR [rdx] ; hev
psubusb xmm5, xmm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
psubusb xmm5, xmm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit
psubusb xmm4, xmm2 ; hev
psubusb xmm3, xmm2 ; hev
......@@ -278,7 +276,7 @@
;(
; unsigned char *src_ptr,
; int src_pixel_step,
; const char *flimit,
; const char *blimit,
; const char *limit,
; const char *thresh,
; int count
......@@ -328,7 +326,7 @@ sym(vp8_loop_filter_horizontal_edge_sse2):
;(
; unsigned char *src_ptr,
; int src_pixel_step,
; const char *flimit,
; const char *blimit,
; const char *limit,
; const char *thresh,
; int count
......@@ -574,7 +572,7 @@ sym(vp8_loop_filter_horizontal_edge_uv_sse2):
;(
; unsigned char *src_ptr,
; int src_pixel_step,
; const char *flimit,
; const char *blimit,
; const char *limit,
; const char *thresh,
; int count
......@@ -624,7 +622,7 @@ sym(vp8_mbloop_filter_horizontal_edge_sse2):
;(
; unsigned char *u,
; int src_pixel_step,
; const char *flimit,
; const char *blimit,
; const char *limit,
; const char *thresh,
; unsigned char *v
......@@ -904,7 +902,7 @@ sym(vp8_mbloop_filter_horizontal_edge_uv_sse2):
movdqa xmm4, XMMWORD PTR [rdx]; limit
pmaxub xmm0, xmm7
mov rdx, arg(2) ; flimit
mov rdx, arg(2) ; blimit
psubusb xmm0, xmm4
movdqa xmm5, xmm2 ; q1
......@@ -921,12 +919,11 @@ sym(vp8_mbloop_filter_horizontal_edge_uv_sse2):
psrlw xmm5, 1 ; abs(p1-q1)/2
psubusb xmm6, xmm3 ; q0-p0
movdqa xmm2, XMMWORD PTR [rdx]; flimit
movdqa xmm4, XMMWORD PTR [rdx]; blimit
mov rdx, arg(4) ; get thresh
por xmm1, xmm6 ; abs(q0-p0)
paddb xmm2, xmm2 ; flimit*2 (less than 255)
movdqa xmm6, t0 ; get abs (q1 - q0)
......@@ -939,10 +936,9 @@ sym(vp8_mbloop_filter_horizontal_edge_uv_sse2):
paddusb xmm1, xmm5 ; abs (p0 - q0) *2 + abs(p1-q1)/2
psubusb xmm6, xmm7 ; abs(q1 - q0) > thresh
paddb xmm4, xmm2 ; flimit * 2 + limit (less than 255)
psubusb xmm3, xmm7 ; abs(p1 - p0)> thresh
psubusb xmm1, xmm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
psubusb xmm1, xmm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit
por xmm6, xmm3 ; abs(q1 - q0) > thresh || abs(p1 - p0) > thresh
por xmm1, xmm0 ; mask
......@@ -1014,7 +1010,7 @@ sym(vp8_mbloop_filter_horizontal_edge_uv_sse2):
;(
; unsigned char *src_ptr,
; int src_pixel_step,
; const char *flimit,
; const char *blimit,
; const char *limit,
; const char *thresh,
; int count
......@@ -1081,7 +1077,7 @@ sym(vp8_loop_filter_vertical_edge_sse2):
;(
; unsigned char *u,
; int src_pixel_step,
; const char *flimit,
; const char *blimit,
; const char *limit,
; const char *thresh,
; unsigned char *v
......@@ -1239,7 +1235,7 @@ sym(vp8_loop_filter_vertical_edge_uv_sse2):
;(
; unsigned char *src_ptr,
; int src_pixel_step,
; const char *flimit,
; const char *blimit,
; const char *limit,
; const char *thresh,
; int count
......@@ -1308,7 +1304,7 @@ sym(vp8_mbloop_filter_vertical_edge_sse2):
;(
; unsigned char *u,
; int src_pixel_step,
; const char *flimit,
; const char *blimit,
; const char *limit,
; const char *thresh,
; unsigned char *v
......@@ -1376,16 +1372,13 @@ sym(vp8_mbloop_filter_vertical_edge_uv_sse2):
;(
; unsigned char *src_ptr,
; int src_pixel_step,
; const char *flimit,
; const char *limit,
; const char *thresh,
; int count
; const char *blimit,
;)
global sym(vp8_loop_filter_simple_horizontal_edge_sse2)
sym(vp8_loop_filter_simple_horizontal_edge_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
SHADOW_ARGS_TO_STACK 3
SAVE_XMM 7
GET_GOT rbx
push rsi
......@@ -1394,13 +1387,8 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2):
mov rsi, arg(0) ;src_ptr
movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch?
mov rdx, arg(2) ;flimit ; get flimit
mov rdx, arg(2) ;blimit
movdqa xmm3, XMMWORD PTR [rdx]
mov rdx, arg(3) ;limit
movdqa xmm7, XMMWORD PTR [rdx]
paddb xmm3, xmm3 ; flimit*2 (less than 255)
paddb xmm3, xmm7 ; flimit * 2 + limit (less than 255)
mov rdi, rsi ; rdi points to row +1 for indirect addressing
add rdi, rax
......@@ -1428,7 +1416,7 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2):
paddusb xmm5, xmm5 ; abs(p0-q0)*2
paddusb xmm5, xmm1 ; abs (p0 - q0) *2 + abs(p1-q1)/2
psubusb xmm5, xmm3 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
psubusb xmm5, xmm3 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > blimit
pxor xmm3, xmm3
pcmpeqb xmm5, xmm3
......@@ -1493,16 +1481,13 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2):
;(
; unsigned char *src_ptr,
; int src_pixel_step,
; const char *flimit,
; const char *limit,
; const char *thresh,
; int count
; const char *blimit,
;)
global sym(vp8_loop_filter_simple_vertical_edge_sse2)
sym(vp8_loop_filter_simple_vertical_edge_sse2):
push rbp ; save old base pointer value.
mov rbp, rsp ; set new base pointer value.
SHADOW_ARGS_TO_STACK 6
SHADOW_ARGS_TO_STACK 3
SAVE_XMM 7
GET_GOT rbx ; save callee-saved reg
push rsi
......@@ -1607,14 +1592,10 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2):
paddusb xmm5, xmm5 ; abs(p0-q0)*2
paddusb xmm5, xmm6 ; abs (p0 - q0) *2 + abs(p1-q1)/2
mov rdx, arg(2) ;flimit
mov rdx, arg(2) ;blimit
movdqa xmm7, XMMWORD PTR [rdx]
mov rdx, arg(3) ; get limit
movdqa xmm6, XMMWORD PTR [rdx]
paddb xmm7, xmm7 ; flimit*2 (less than 255)
paddb xmm7, xmm6 ; flimit * 2 + limit (less than 255)
psubusb xmm5, xmm7 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
psubusb xmm5, xmm7 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > blimit
pxor xmm7, xmm7
pcmpeqb xmm5, xmm7 ; mm5 = mask
......
......@@ -9,30 +9,18 @@
*/
#include "vpx_ports/config.h"
#include "vpx_config.h"
#include "vp8/common/loopfilter.h"
prototype_loopfilter(vp8_loop_filter_horizontal_edge_c);
prototype_loopfilter(vp8_loop_filter_vertical_edge_c);
prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_c);
prototype_loopfilter(vp8_mbloop_filter_vertical_edge_c);
prototype_loopfilter(vp8_loop_filter_simple_horizontal_edge_c);
prototype_loopfilter(vp8_loop_filter_simple_vertical_edge_c);
prototype_loopfilter(vp8_mbloop_filter_vertical_edge_mmx);
prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_mmx);
prototype_loopfilter(vp8_loop_filter_vertical_edge_mmx);
prototype_loopfilter(vp8_loop_filter_horizontal_edge_mmx);
prototype_loopfilter(vp8_loop_filter_simple_vertical_edge_mmx);
prototype_loopfilter(vp8_loop_filter_simple_horizontal_edge_mmx);
prototype_loopfilter(vp8_loop_filter_vertical_edge_sse2);
prototype_loopfilter(vp8_loop_filter_horizontal_edge_sse2);
prototype_loopfilter(vp8_mbloop_filter_vertical_edge_sse2);
prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_sse2);
prototype_loopfilter(vp8_loop_filter_simple_vertical_edge_sse2);
prototype_loopfilter(vp8_loop_filter_simple_horizontal_edge_sse2);
prototype_loopfilter(vp8_fast_loop_filter_vertical_edges_sse2);
extern loop_filter_uvfunction vp8_loop_filter_horizontal_edge_uv_sse2;
extern loop_filter_uvfunction vp8_loop_filter_vertical_edge_uv_sse2;
......@@ -44,23 +32,13 @@ extern loop_filter_uvfunction vp8_mbloop_filter_vertical_edge_uv_sse2;
void vp8_loop_filter_mbh_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi)
{
vp8_mbloop_filter_horizontal_edge_mmx(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
vp8_mbloop_filter_horizontal_edge_mmx(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
if (u_ptr)
vp8_mbloop_filter_horizontal_edge_mmx(u_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1);
vp8_mbloop_filter_horizontal_edge_mmx(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
if (v_ptr)
vp8_mbloop_filter_horizontal_edge_mmx(v_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1);
}
void vp8_loop_filter_mbhs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi)
{
(void) u_ptr;
(void) v_ptr;
(void) uv_stride;
vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
vp8_mbloop_filter_horizontal_edge_mmx(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
}
......@@ -68,23 +46,13 @@ void vp8_loop_filter_mbhs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsign
void vp8_loop_filter_mbv_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi)
{
vp8_mbloop_filter_vertical_edge_mmx(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
vp8_mbloop_filter_vertical_edge_mmx(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
if (u_ptr)
vp8_mbloop_filter_vertical_edge_mmx(u_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1);
vp8_mbloop_filter_vertical_edge_mmx(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
if (v_ptr)
vp8_mbloop_filter_vertical_edge_mmx(v_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, 1);
}
void vp8_loop_filter_mbvs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi)
{
(void) u_ptr;
(void) v_ptr;
(void) uv_stride;
vp8_loop_filter_simple_vertical_edge_mmx(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
vp8_mbloop_filter_vertical_edge_mmx(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
}
......@@ -92,27 +60,23 @@ void vp8_loop_filter_mbvs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsign
void vp8_loop_filter_bh_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi)
{
vp8_loop_filter_horizontal_edge_mmx(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
vp8_loop_filter_horizontal_edge_mmx(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
vp8_loop_filter_horizontal_edge_mmx(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
vp8_loop_filter_horizontal_edge_mmx(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
vp8_loop_filter_horizontal_edge_mmx(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
vp8_loop_filter_horizontal_edge_mmx(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
if (u_ptr)
vp8_loop_filter_horizontal_edge_mmx(u_ptr + 4 * uv_stride, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1);
vp8_loop_filter_horizontal_edge_mmx(u_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
if (v_ptr)
vp8_loop_filter_horizontal_edge_mmx(v_ptr + 4 * uv_stride, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1);
vp8_loop_filter_horizontal_edge_mmx(v_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
}
void vp8_loop_filter_bhs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi)
void vp8_loop_filter_bhs_mmx(unsigned char *y_ptr, int y_stride, const unsigned char *blimit)
{
(void) u_ptr;
(void) v_ptr;
(void) uv_stride;
vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 4 * y_stride, y_stride, blimit);
vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 8 * y_stride, y_stride, blimit);
vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 12 * y_stride, y_stride, blimit);
}
......@@ -120,27 +84,23 @@ void vp8_loop_filter_bhs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigne
void vp8_loop_filter_bv_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi)
{
vp8_loop_filter_vertical_edge_mmx(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
vp8_loop_filter_vertical_edge_mmx(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
vp8_loop_filter_vertical_edge_mmx(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
vp8_loop_filter_vertical_edge_mmx(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
vp8_loop_filter_vertical_edge_mmx(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
vp8_loop_filter_vertical_edge_mmx(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
if (u_ptr)
vp8_loop_filter_vertical_edge_mmx(u_ptr + 4, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1);
vp8_loop_filter_vertical_edge_mmx(u_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
if (v_ptr)
vp8_loop_filter_vertical_edge_mmx(v_ptr + 4, uv_stride, lfi->flim, lfi->lim, lfi->thr, 1);
vp8_loop_filter_vertical_edge_mmx(v_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
}
void vp8_loop_filter_bvs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi)
void vp8_loop_filter_bvs_mmx(unsigned char *y_ptr, int y_stride, const unsigned char *blimit)
{
(void) u_ptr;
(void) v_ptr;
(void) uv_stride;
vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 4, y_stride, blimit);
vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 8, y_stride, blimit);
vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 12, y_stride, blimit);
}
#endif
......@@ -150,20 +110,10 @@ void vp8_loop_filter_bvs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigne
void vp8_loop_filter_mbh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi)
{
vp8_mbloop_filter_horizontal_edge_sse2(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
vp8_mbloop_filter_horizontal_edge_sse2(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
if (u_ptr)
vp8_mbloop_filter_horizontal_edge_uv_sse2(u_ptr, uv_stride, lfi->mbflim, lfi->lim, lfi->thr, v_ptr);
}
void vp8_loop_filter_mbhs_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi)
{
(void) u_ptr;
(void) v_ptr;
(void) uv_stride;
vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
vp8_mbloop_filter_horizontal_edge_uv_sse2(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, v_ptr);
}
......@@ -171,20 +121,10 @@ void vp8_loop_filter_mbhs_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsig
void vp8_loop_filter_mbv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi)
{
vp8_mbloop_filter_vertical_edge_sse2(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->thr, 2);
vp8_mbloop_filter_vertical_edge_sse2(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);