Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Xiph.Org
aom-rav1e
Commits
98fcccfe
Commit
98fcccfe
authored
Jun 03, 2010
by
Jeff Muizelaar
Committed by
Fritz Koenig
Jul 23, 2010
Browse files
Change the x86 idct functions to do reconstruction at the same time
Change-Id: I896fe6f9664e6849c7cee2cc6bb4e045eb42540f
parent
b2fa74ac
Changes
6
Hide whitespace changes
Inline
Side-by-side
vp8/common/x86/idct_x86.h
View file @
98fcccfe
...
...
@@ -22,6 +22,7 @@
#if HAVE_MMX
extern
prototype_idct
(
vp8_short_idct4x4llm_1_mmx
);
extern
prototype_idct
(
vp8_short_idct4x4llm_mmx
);
extern
prototype_idct_scalar_add
(
vp8_dc_only_idct_add_mmx
);
extern
prototype_second_order
(
vp8_short_inv_walsh4x4_mmx
);
extern
prototype_second_order
(
vp8_short_inv_walsh4x4_1_mmx
);
...
...
@@ -33,6 +34,9 @@ extern prototype_second_order(vp8_short_inv_walsh4x4_1_mmx);
#undef vp8_idct_idct16
#define vp8_idct_idct16 vp8_short_idct4x4llm_mmx
#undef vp8_idct_idct1_scalar_add
#define vp8_idct_idct1_scalar_add vp8_dc_only_idct_add_mmx
#undef vp8_idct_iwalsh16
#define vp8_idct_iwalsh16 vp8_short_inv_walsh4x4_mmx
...
...
vp8/common/x86/idctllm_mmx.asm
View file @
98fcccfe
...
...
@@ -220,35 +220,61 @@ sym(vp8_short_idct4x4llm_1_mmx):
pop
rbp
ret
;void dc_only_idct_mmx(short input_dc,
short *output, int pitch
)
global
sym
(
vp8_dc_only_idct_mmx
)
sym
(
vp8_dc_only_idct_mmx
):
;void
vp8_
dc_only_idct_
add_
mmx(short input_dc,
unsigned char *pred_ptr, unsigned char *dst_ptr, int pitch, int stride
)
global
sym
(
vp8_dc_only_idct_
add_
mmx
)
sym
(
vp8_dc_only_idct_
add_
mmx
):
push
rbp
mov
rbp
,
rsp
SHADOW_ARGS_TO_STACK
3
SHADOW_ARGS_TO_STACK
5
GET_GOT
rbx
push
rsi
push
rdi
; end prolog
movd
mm0
,
arg
(
0
)
;input_dc
mov
rsi
,
arg
(
1
)
;s -- prediction
mov
rdi
,
arg
(
2
)
;d -- destination
movsxd
rax
,
dword
ptr
arg
(
4
)
;stride
movsxd
rdx
,
dword
ptr
arg
(
3
)
;pitch
pxor
mm0
,
mm0
paddw
mm0
,
[
fours
GLOBAL
]
mov
rdx
,
arg
(
1
)
;output
movd
mm5
,
arg
(
0
)
;input_dc
psraw
mm0
,
3
movsxd
rax
,
dword
ptr
arg
(
2
)
;pitch
paddw
mm5
,
[
fours
GLOBAL
]
punpcklwd
mm0
,
mm0
punpckldq
mm0
,
mm0
psraw
mm5
,
3
movq
[
rdx
],
mm
0
movq
[
rdx
+
rax
],
mm
0
punpcklwd
mm5
,
mm
5
punpckldq
mm5
,
mm
5
movq
[
rdx
+
rax
*
2
],
mm0
add
rdx
,
rax
movd
mm1
,
[
rsi
]
punpcklbw
mm1
,
mm0
paddsw
mm1
,
mm5
packuswb
mm1
,
mm0
; pack and unpack to saturate
movd
[
rdi
],
mm1
movq
[
rdx
+
rax
*
2
],
mm0
movd
mm2
,
[
rsi
+
rdx
]
punpcklbw
mm2
,
mm0
paddsw
mm2
,
mm5
packuswb
mm2
,
mm0
; pack and unpack to saturate
movd
[
rdi
+
rax
],
mm2
movd
mm3
,
[
rsi
+
2
*
rdx
]
punpcklbw
mm3
,
mm0
paddsw
mm3
,
mm5
packuswb
mm3
,
mm0
; pack and unpack to saturate
movd
[
rdi
+
2
*
rax
],
mm3
add
rdi
,
rax
add
rsi
,
rdx
movd
mm4
,
[
rsi
+
2
*
rdx
]
punpcklbw
mm4
,
mm0
paddsw
mm4
,
mm5
packuswb
mm4
,
mm0
; pack and unpack to saturate
movd
[
rdi
+
2
*
rax
],
mm4
; begin epilog
pop
rdi
pop
rsi
REST
ORE_GOT
UNSHADOW_ARGS
pop
rbp
...
...
vp8/common/x86/x86_systemdependent.c
View file @
98fcccfe
...
...
@@ -42,6 +42,7 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx)
{
rtcd
->
idct
.
idct1
=
vp8_short_idct4x4llm_1_mmx
;
rtcd
->
idct
.
idct16
=
vp8_short_idct4x4llm_mmx
;
rtcd
->
idct
.
idct1_scalar_add
=
vp8_dc_only_idct_add_mmx
;
rtcd
->
idct
.
iwalsh16
=
vp8_short_inv_walsh4x4_mmx
;
rtcd
->
idct
.
iwalsh1
=
vp8_short_inv_walsh4x4_1_mmx
;
...
...
vp8/decoder/x86/dequantize_mmx.asm
View file @
98fcccfe
...
...
@@ -50,12 +50,12 @@ sym(vp8_dequantize_b_impl_mmx):
ret
;void dequant_idct_mmx(short *input, short *dq,
short *output, int pitch
)
global
sym
(
vp8_dequant_idct_mmx
)
sym
(
vp8_dequant_idct_mmx
):
;void dequant_idct_
add_
mmx(short *input, short *dq,
unsigned char *pred, unsigned char *dest, int pitch, int stride
)
global
sym
(
vp8_dequant_idct_
add_
mmx
)
sym
(
vp8_dequant_idct_
add_
mmx
):
push
rbp
mov
rbp
,
rsp
SHADOW_ARGS_TO_STACK
4
SHADOW_ARGS_TO_STACK
6
GET_GOT
rbx
push
rsi
push
rdi
...
...
@@ -77,7 +77,8 @@ sym(vp8_dequant_idct_mmx):
movq
mm3
,
[
rax
+
24
]
pmullw
mm3
,
[
rdx
+
24
]
mov
rdx
,
arg
(
2
)
;output
mov
rdx
,
arg
(
3
)
;dest
mov
rsi
,
arg
(
2
)
;pred
pxor
mm7
,
mm7
...
...
@@ -88,7 +89,8 @@ sym(vp8_dequant_idct_mmx):
movq
[
rax
+
24
],
mm7
movsxd
rax
,
dword
ptr
arg
(
3
)
;pitch
movsxd
rax
,
dword
ptr
arg
(
4
)
;pitch
movsxd
rdi
,
dword
ptr
arg
(
5
)
;stride
psubw
mm0
,
mm2
; b1= 0-2
paddw
mm2
,
mm2
;
...
...
@@ -207,13 +209,34 @@ sym(vp8_dequant_idct_mmx):
punpckldq
mm2
,
mm4
; 32 22 12 02
punpckhdq
mm5
,
mm4
; 33 23 13 03
movq
[
rdx
],
mm
0
pxor
mm7
,
mm
7
movq
[
rdx
+
rax
],
mm1
movq
[
rdx
+
rax
*
2
],
mm2
movd
mm4
,
[
rsi
]
punpcklbw
mm4
,
mm7
paddsw
mm0
,
mm4
packuswb
mm0
,
mm7
movd
[
rdx
],
mm0
add
rdx
,
rax
movq
[
rdx
+
rax
*
2
],
mm5
movd
mm4
,
[
rsi
+
rax
]
punpcklbw
mm4
,
mm7
paddsw
mm1
,
mm4
packuswb
mm1
,
mm7
movd
[
rdx
+
rdi
],
mm1
movd
mm4
,
[
rsi
+
2
*
rax
]
punpcklbw
mm4
,
mm7
paddsw
mm2
,
mm4
packuswb
mm2
,
mm7
movd
[
rdx
+
rdi
*
2
],
mm2
add
rdx
,
rdi
add
rsi
,
rax
movd
mm4
,
[
rsi
+
2
*
rax
]
punpcklbw
mm4
,
mm7
paddsw
mm5
,
mm4
packuswb
mm5
,
mm7
movd
[
rdx
+
rdi
*
2
],
mm5
; begin epilog
pop
rdi
...
...
@@ -224,12 +247,12 @@ sym(vp8_dequant_idct_mmx):
ret
;void dequant_dc_idct_mmx(short *input, short *dq,
short *output, int pitch
, int Dc)
global
sym
(
vp8_dequant_dc_idct_mmx
)
sym
(
vp8_dequant_dc_idct_mmx
):
;void dequant_dc_idct_
add_
mmx(short *input, short *dq,
unsigned char *pred, unsigned char *dest, int pitch, int stride
, int Dc)
global
sym
(
vp8_dequant_dc_idct_
add_
mmx
)
sym
(
vp8_dequant_dc_idct_
add_
mmx
):
push
rbp
mov
rbp
,
rsp
SHADOW_ARGS_TO_STACK
5
SHADOW_ARGS_TO_STACK
7
GET_GOT
rbx
push
rsi
push
rdi
...
...
@@ -238,7 +261,7 @@ sym(vp8_dequant_dc_idct_mmx):
mov
rax
,
arg
(
0
)
;input
mov
rdx
,
arg
(
1
)
;dq
movsxd
rcx
,
dword
ptr
arg
(
4
)
;Dc
movsxd
rcx
,
dword
ptr
arg
(
6
)
;Dc
movq
mm0
,
[
rax
]
pmullw
mm0
,
[
rdx
]
...
...
@@ -252,7 +275,8 @@ sym(vp8_dequant_dc_idct_mmx):
movq
mm3
,
[
rax
+
24
]
pmullw
mm3
,
[
rdx
+
24
]
mov
rdx
,
arg
(
2
)
;output
mov
rdx
,
arg
(
3
)
;dest
mov
rsi
,
arg
(
2
)
;pred
pxor
mm7
,
mm7
...
...
@@ -262,8 +286,10 @@ sym(vp8_dequant_dc_idct_mmx):
movq
[
rax
+
16
],
mm7
movq
[
rax
+
24
],
mm7
pinsrw
mm0
,
rcx
,
0
movsxd
rax
,
dword
ptr
arg
(
3
)
;pitch
movsxd
rax
,
dword
ptr
arg
(
4
)
;pitch
movsxd
rdi
,
dword
ptr
arg
(
5
)
;stride
psubw
mm0
,
mm2
; b1= 0-2
paddw
mm2
,
mm2
;
...
...
@@ -382,13 +408,34 @@ sym(vp8_dequant_dc_idct_mmx):
punpckldq
mm2
,
mm4
; 32 22 12 02
punpckhdq
mm5
,
mm4
; 33 23 13 03
movq
[
rdx
],
mm0
movq
[
rdx
+
rax
],
mm1
movq
[
rdx
+
rax
*
2
],
mm2
add
rdx
,
rax
movq
[
rdx
+
rax
*
2
],
mm5
pxor
mm7
,
mm7
movd
mm4
,
[
rsi
]
punpcklbw
mm4
,
mm7
paddsw
mm0
,
mm4
packuswb
mm0
,
mm7
movd
[
rdx
],
mm0
movd
mm4
,
[
rsi
+
rax
]
punpcklbw
mm4
,
mm7
paddsw
mm1
,
mm4
packuswb
mm1
,
mm7
movd
[
rdx
+
rdi
],
mm1
movd
mm4
,
[
rsi
+
2
*
rax
]
punpcklbw
mm4
,
mm7
paddsw
mm2
,
mm4
packuswb
mm2
,
mm7
movd
[
rdx
+
rdi
*
2
],
mm2
add
rdx
,
rdi
add
rsi
,
rax
movd
mm4
,
[
rsi
+
2
*
rax
]
punpcklbw
mm4
,
mm7
paddsw
mm5
,
mm4
packuswb
mm5
,
mm7
movd
[
rdx
+
rdi
*
2
],
mm5
; begin epilog
pop
rdi
...
...
vp8/decoder/x86/dequantize_x86.h
View file @
98fcccfe
...
...
@@ -21,12 +21,20 @@
*/
#if HAVE_MMX
extern
prototype_dequant_block
(
vp8_dequantize_b_mmx
);
extern
prototype_dequant_idct_add
(
vp8_dequant_idct_add_mmx
);
extern
prototype_dequant_idct_dc_add
(
vp8_dequant_dc_idct_add_mmx
);
#if !CONFIG_RUNTIME_CPU_DETECT
#undef vp8_dequant_block
#define vp8_dequant_block vp8_dequantize_b_mmx
#undef vp8_dequant_idct_add
#define vp8_dequant_idct_add vp8_dequant_idct_add_mmx
#undef vp8_dequant_idct_dc
#define vp8_dequant_idct_add_dc vp8_dequant_dc_idct_add_mmx
#endif
#endif
...
...
vp8/decoder/x86/x86_dsystemdependent.c
View file @
98fcccfe
...
...
@@ -43,6 +43,8 @@ void vp8_arch_x86_decode_init(VP8D_COMP *pbi)
if
(
flags
&
HAS_MMX
)
{
pbi
->
dequant
.
block
=
vp8_dequantize_b_mmx
;
pbi
->
dequant
.
idct_add
=
vp8_dequant_idct_add_mmx
;
pbi
->
dequant
.
idct_dc_add
=
vp8_dequant_dc_idct_add_mmx
;
}
#endif
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment