Commit 0bb49c4e authored by Ronald S. Bultje's avatar Ronald S. Bultje Committed by Gerrit Code Review
Browse files

Merge "Add SSE2 versions for rectangular sad and sad4d functions." into experimental

parents 642ac924 0c481f4d
......@@ -376,7 +376,8 @@ if [ "$CONFIG_VP9_ENCODER" = "yes" ]; then
# variance
[ $arch = "x86_64" ] && mmx_x86_64=mmx && sse2_x86_64=sse2
#if CONFIG_SBSEGMENT
if [ "$CONFIG_SBSEGMENT" = "yes" ]; then
prototype unsigned int vp9_variance32x16 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
specialize vp9_variance32x16
......@@ -388,7 +389,8 @@ specialize vp9_variance64x32
prototype unsigned int vp9_variance32x64 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
specialize vp9_variance32x64
#endif
fi
prototype unsigned int vp9_variance32x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
specialize vp9_variance32x32
......@@ -424,7 +426,8 @@ vp9_variance4x4_mmx=vp9_variance4x4_mmx
prototype unsigned int vp9_sub_pixel_variance64x64 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
specialize vp9_sub_pixel_variance64x64 sse2
#if CONFIG_SBSEGMENT
if [ "$CONFIG_SBSEGMENT" = "yes" ]; then
prototype unsigned int vp9_sub_pixel_variance32x64 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
specialize vp9_sub_pixel_variance32x64
......@@ -436,7 +439,8 @@ specialize vp9_sub_pixel_variance32x16
prototype unsigned int vp9_sub_pixel_variance16x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
specialize vp9_sub_pixel_variance16x32
#endif
fi
prototype unsigned int vp9_sub_pixel_variance32x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"
specialize vp9_sub_pixel_variance32x32 sse2
......@@ -464,19 +468,21 @@ vp9_sub_pixel_variance4x4_sse2=vp9_sub_pixel_variance4x4_wmt
prototype unsigned int vp9_sad64x64 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
specialize vp9_sad64x64 sse2
#if CONFIG_SBSEGMENT
if [ "$CONFIG_SBSEGMENT" = "yes" ]; then
prototype unsigned int vp9_sad32x64 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
specialize vp9_sad32x64
specialize vp9_sad32x64 sse2
prototype unsigned int vp9_sad64x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
specialize vp9_sad64x32
specialize vp9_sad64x32 sse2
prototype unsigned int vp9_sad32x16 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
specialize vp9_sad32x16
specialize vp9_sad32x16 sse2
prototype unsigned int vp9_sad16x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
specialize vp9_sad16x32
#endif
specialize vp9_sad16x32 sse2
fi
prototype unsigned int vp9_sad32x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"
specialize vp9_sad32x32 sse2
......@@ -571,19 +577,21 @@ specialize vp9_sad4x4x8 sse4
prototype void vp9_sad64x64x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"
specialize vp9_sad64x64x4d sse2
#if CONFIG_SBSEGMENT
if [ "$CONFIG_SBSEGMENT" = "yes" ]; then
prototype void vp9_sad32x64x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"
specialize vp9_sad32x64x4d
specialize vp9_sad32x64x4d sse2
prototype void vp9_sad64x32x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"
specialize vp9_sad64x32x4d
specialize vp9_sad64x32x4d sse2
prototype void vp9_sad32x16x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"
specialize vp9_sad32x16x4d
specialize vp9_sad32x16x4d sse2
prototype void vp9_sad16x32x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"
specialize vp9_sad16x32x4d
#endif
specialize vp9_sad16x32x4d sse2
fi
prototype void vp9_sad32x32x4d "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], int ref_stride, unsigned int *sad_array"
specialize vp9_sad32x32x4d sse2
......
......@@ -215,7 +215,11 @@ cglobal sad%1x%2x4d, 4, 7, 8, src, src_stride, ref1, ref_stride, \
INIT_XMM sse2
SADNXN4D 64, 64
SADNXN4D 64, 32
SADNXN4D 32, 64
SADNXN4D 32, 32
SADNXN4D 32, 16
SADNXN4D 16, 32
SADNXN4D 16, 16
SADNXN4D 16, 8
SADNXN4D 8, 16
......
......@@ -14,11 +14,11 @@ SECTION .text
; unsigned int vp9_sad64x64_sse2(uint8_t *src, int src_stride,
; uint8_t *ref, int ref_stride);
INIT_XMM sse2
cglobal sad64x64, 4, 5, 5, src, src_stride, ref, ref_stride, n_rows
%macro SAD64XN 1
cglobal sad64x%1, 4, 5, 5, src, src_stride, ref, ref_stride, n_rows
movsxdifnidn src_strideq, src_strided
movsxdifnidn ref_strideq, ref_strided
mov n_rowsd, 64
mov n_rowsd, %1
pxor m0, m0
.loop:
movu m1, [refq]
......@@ -42,14 +42,19 @@ cglobal sad64x64, 4, 5, 5, src, src_stride, ref, ref_stride, n_rows
paddd m0, m1
movd eax, m0
RET
%endmacro
INIT_XMM sse2
SAD64XN 64 ; sad64x64_sse2
SAD64XN 32 ; sad64x32_sse2
; unsigned int vp9_sad32x32_sse2(uint8_t *src, int src_stride,
; uint8_t *ref, int ref_stride);
INIT_XMM sse2
cglobal sad32x32, 4, 5, 5, src, src_stride, ref, ref_stride, n_rows
%macro SAD32XN 1
cglobal sad32x%1, 4, 5, 5, src, src_stride, ref, ref_stride, n_rows
movsxdifnidn src_strideq, src_strided
movsxdifnidn ref_strideq, ref_strided
mov n_rowsd, 16
mov n_rowsd, %1/2
pxor m0, m0
.loop:
......@@ -74,6 +79,12 @@ cglobal sad32x32, 4, 5, 5, src, src_stride, ref, ref_stride, n_rows
paddd m0, m1
movd eax, m0
RET
%endmacro
INIT_XMM sse2
SAD32XN 64 ; sad32x64_sse2
SAD32XN 32 ; sad32x32_sse2
SAD32XN 16 ; sad32x16_sse2
; unsigned int vp9_sad16x{8,16}_sse2(uint8_t *src, int src_stride,
; uint8_t *ref, int ref_stride);
......@@ -112,6 +123,7 @@ cglobal sad16x%1, 4, 7, 5, src, src_stride, ref, ref_stride, \
%endmacro
INIT_XMM sse2
SAD16XN 32 ; sad16x32_sse2
SAD16XN 16 ; sad16x16_sse2
SAD16XN 8 ; sad16x8_sse2
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment