Commit 27b390e1 authored by Scott LaVarnway's avatar Scott LaVarnway
Browse files

d153 intra prediction ssse3 using bytes

byte version of ronalds d153 ssse3 optimizations for
4x4 and 8x8
(commit: fc91a2a112238a1aee568f3b840585de4e928fca)

Change-Id: Iec4426032311483f615fd9e0dceba3ee85ddebd7
parent 35830879
......@@ -65,7 +65,7 @@ prototype void vp9_d135_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const u
specialize vp9_d135_predictor_4x4
prototype void vp9_d153_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
specialize vp9_d153_predictor_4x4
specialize vp9_d153_predictor_4x4 $ssse3_x86inc
prototype void vp9_v_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
specialize vp9_v_predictor_4x4 $sse_x86inc
......@@ -104,7 +104,7 @@ prototype void vp9_d135_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const u
specialize vp9_d135_predictor_8x8
prototype void vp9_d153_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
specialize vp9_d153_predictor_8x8
specialize vp9_d153_predictor_8x8 $ssse3_x86inc
prototype void vp9_v_predictor_8x8 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"
specialize vp9_v_predictor_8x8 $sse_x86inc
......
......@@ -13,9 +13,6 @@
SECTION_RODATA
pb_1: times 16 db 1
pw_2: times 8 dw 2
pb_7m1: times 8 db 7, -1
pb_15: times 16 db 15
sh_b01234577: db 0, 1, 2, 3, 4, 5, 7, 7, 0, 0, 0, 0, 0, 0, 0, 0
sh_b12345677: db 1, 2, 3, 4, 5, 6, 7, 7, 0, 0, 0, 0, 0, 0, 0, 0
......@@ -23,18 +20,17 @@ sh_b23456777: db 2, 3, 4, 5, 6, 7, 7, 7, 0, 0, 0, 0, 0, 0, 0, 0
sh_b0123456777777777: db 0, 1, 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7
sh_b1234567777777777: db 1, 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
sh_b2345677777777777: db 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
sh_b2w01234577: db 0, -1, 1, -1, 2, -1, 3, -1, 4, -1, 5, -1, 7, -1, 7, -1
sh_b2w12345677: db 1, -1, 2, -1, 3, -1, 4, -1, 5, -1, 6, -1, 7, -1, 7, -1
sh_b2w23456777: db 2, -1, 3, -1, 4, -1, 5, -1, 6, -1, 7, -1, 7, -1, 7, -1
sh_b2w01234567: db 0, -1, 1, -1, 2, -1, 3, -1, 4, -1, 5, -1, 6, -1, 7, -1
sh_b2w12345678: db 1, -1, 2, -1, 3, -1, 4, -1, 5, -1, 6, -1, 7, -1, 8, -1
sh_b2w23456789: db 2, -1, 3, -1, 4, -1, 5, -1, 6, -1, 7, -1, 8, -1, 9, -1
sh_b2w89abcdef: db 8, -1, 9, -1, 10, -1, 11, -1, 12, -1, 13, -1, 14, -1, 15, -1
sh_b2w9abcdeff: db 9, -1, 10, -1, 11, -1, 12, -1, 13, -1, 14, -1, 15, -1, 15, -1
sh_b2wabcdefff: db 10, -1, 11, -1, 12, -1, 13, -1, 14, -1, 15, -1, 15, -1, 15, -1
sh_b123456789abcdeff: db 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 15
sh_b23456789abcdefff: db 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 15, 15
sh_b32104567: db 3, 2, 1, 0, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0
sh_b8091a2b345: db 8, 0, 9, 1, 10, 2, 11, 3, 4, 5, 0, 0, 0, 0, 0, 0
sh_b76543210: db 7, 6, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0
sh_b65432108: db 6, 5, 4, 3, 2, 1, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0
sh_b54321089: db 5, 4, 3, 2, 1, 0, 8, 9, 0, 0, 0, 0, 0, 0, 0, 0
sh_b89abcdef: db 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0
SECTION .text
INIT_MMX ssse3
......@@ -455,3 +451,84 @@ cglobal d63_predictor_32x32, 3, 5, 8, dst, stride, above, line, goffset
jnz .loop
RESTORE_GOT
REP_RET
INIT_XMM ssse3
cglobal d153_predictor_4x4, 4, 5, 4, dst, stride, above, left, goffset
GET_GOT goffsetq
movd m0, [leftq] ; l1, l2, l3, l4
movd m1, [aboveq-1] ; tl, t1, t2, t3
punpckldq m0, m1 ; l1, l2, l3, l4, tl, t1, t2, t3
pshufb m0, [GLOBAL(sh_b32104567)]; l4, l3, l2, l1, tl, t1, t2, t3
psrldq m1, m0, 1 ; l3, l2, l1, tl, t1, t2, t3
psrldq m2, m0, 2 ; l2, l1, tl, t1, t2, t3
; comments below are for a predictor like this
; A1 B1 C1 D1
; A2 B2 A1 B1
; A3 B3 A2 B2
; A4 B4 A3 B3
X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m1, m2, m3 ; 3-tap avg B4 B3 B2 B1 C1 D1
pavgb m1, m0 ; 2-tap avg A4 A3 A2 A1
punpcklqdq m3, m1 ; B4 B3 B2 B1 C1 D1 x x A4 A3 A2 A1 ..
DEFINE_ARGS dst, stride, stride3
lea stride3q, [strideq*3]
pshufb m3, [GLOBAL(sh_b8091a2b345)] ; A4 B4 A3 B3 A2 B2 A1 B1 C1 D1 ..
movd [dstq+stride3q ], m3
psrldq m3, 2 ; A3 B3 A2 B2 A1 B1 C1 D1 ..
movd [dstq+strideq*2], m3
psrldq m3, 2 ; A2 B2 A1 B1 C1 D1 ..
movd [dstq+strideq ], m3
psrldq m3, 2 ; A1 B1 C1 D1 ..
movd [dstq ], m3
RESTORE_GOT
RET
INIT_XMM ssse3
cglobal d153_predictor_8x8, 4, 5, 8, dst, stride, above, left, goffset
GET_GOT goffsetq
movq m0, [leftq] ; [0- 7] l1-8 [byte]
movhps m0, [aboveq-1] ; [8-15] tl, t1-7 [byte]
pshufb m1, m0, [GLOBAL(sh_b76543210)] ; l8-1 [word]
pshufb m2, m0, [GLOBAL(sh_b65432108)] ; l7-1,tl [word]
pshufb m3, m0, [GLOBAL(sh_b54321089)] ; l6-1,tl,t1 [word]
pshufb m0, [GLOBAL(sh_b89abcdef)] ; tl,t1-7 [word]
psrldq m4, m0, 1 ; t1-7 [word]
psrldq m5, m0, 2 ; t2-7 [word]
; comments below are for a predictor like this
; A1 B1 C1 D1 E1 F1 G1 H1
; A2 B2 A1 B1 C1 D1 E1 F1
; A3 B3 A2 B2 A1 B1 C1 D1
; A4 B4 A3 B3 A2 B2 A1 B1
; A5 B5 A4 B4 A3 B3 A2 B2
; A6 B6 A5 B5 A4 B4 A3 B3
; A7 B7 A6 B6 A5 B5 A4 B4
; A8 B8 A7 B7 A6 B6 A5 B5
pavgb m6, m1, m2 ; 2-tap avg A8-A1
X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m4, m5, m7 ; 3-tap avg C-H1
X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m1, m2, m3, m0 ; 3-tap avg B8-1
punpcklbw m6, m0 ; A-B8, A-B7 ... A-B2, A-B1
DEFINE_ARGS dst, stride, stride3
lea stride3q, [strideq*3]
movhps [dstq+stride3q], m6 ; A-B4, A-B3, A-B2, A-B1
palignr m0, m7, m6, 10 ; A-B3, A-B2, A-B1, C-H1
movq [dstq+strideq*2], m0
psrldq m0, 2 ; A-B2, A-B1, C-H1
movq [dstq+strideq ], m0
psrldq m0, 2 ; A-H1
movq [dstq ], m0
lea dstq, [dstq+strideq*4]
movq [dstq+stride3q ], m6 ; A-B8, A-B7, A-B6, A-B5
psrldq m6, 2 ; A-B7, A-B6, A-B5, A-B4
movq [dstq+strideq*2], m6
psrldq m6, 2 ; A-B6, A-B5, A-B4, A-B3
movq [dstq+strideq ], m6
psrldq m6, 2 ; A-B5, A-B4, A-B3, A-B2
movq [dstq ], m6
RESTORE_GOT
RET
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment