Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Xiph.Org
aom-rav1e
Commits
5ef694cf
Commit
5ef694cf
authored
Feb 27, 2013
by
Yunqing Wang
Browse files
Remove unused file
Removed vp9_idctllm_mmx.asm Change-Id: I7152756f23a5a09ed69e8fb40edb2ab3237290fe
parent
4446af78
Changes
1
Hide whitespace changes
Inline
Side-by-side
vp9/common/x86/vp9_idctllm_mmx.asm
deleted
100644 → 0
View file @
4446af78
;
; Copyright (c) 2012 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
%include "third_party/x86inc/x86inc.asm"
SECTION
_RODATA
align
16
x_s1sqr2:
times
4
dw
0x8A8C
align
16
x_c1sqr2less1:
times
4
dw
0x4E7B
align
16
pw_16:
times
4
dw
16
SECTION
.text
; /****************************************************************************
; * Notes:
; *
; * This implementation makes use of 16 bit fixed point version of two multiply
; * constants:
; * 1. sqrt(2) * cos (pi/8)
; * 2. sqrt(2) * sin (pi/8)
; * Because the first constant is bigger than 1, to maintain the same 16 bit
; * fixed point precision as the second one, we use a trick of
; * x * a = x + x*(a-1)
; * so
; * x * sqrt(2) * cos (pi/8) = x + x * (sqrt(2) *cos(pi/8)-1).
; *
; * For the second constant, because of the 16bit version is 35468, which
; * is bigger than 32768, in signed 16 bit multiply, it becomes a negative
; * number.
; * (x * (unsigned)35468 >> 16) = x * (signed)35468 >> 16 + x
; *
; **************************************************************************/
INIT_MMX
;void short_idct4x4llm_mmx(short *input, short *output, int pitch)
cglobal
short_idct4x4llm_mmx
,
3
,
3
,
0
,
inp
,
out
,
pit
mova
m0
,
[
inpq
+
0
]
mova
m1
,
[
inpq
+
8
]
mova
m2
,
[
inpq
+
16
]
mova
m3
,
[
inpq
+
24
]
psubw
m0
,
m2
; b1= 0-2
paddw
m2
,
m2
;
mova
m5
,
m1
paddw
m2
,
m0
; a1 =0+2
pmulhw
m5
,
[
x_s1sqr2
]
;
paddw
m5
,
m1
; ip1 * sin(pi/8) * sqrt(2)
mova
m7
,
m3
;
pmulhw
m7
,
[
x_c1sqr2less1
]
;
paddw
m7
,
m3
; ip3 * cos(pi/8) * sqrt(2)
psubw
m7
,
m5
; c1
mova
m5
,
m1
mova
m4
,
m3
pmulhw
m5
,
[
x_c1sqr2less1
]
paddw
m5
,
m1
pmulhw
m3
,
[
x_s1sqr2
]
paddw
m3
,
m4
paddw
m3
,
m5
; d1
mova
m6
,
m2
; a1
mova
m4
,
m0
; b1
paddw
m2
,
m3
;0
paddw
m4
,
m7
;1
psubw
m0
,
m7
;2
psubw
m6
,
m3
;3
mova
m1
,
m2
; 03 02 01 00
mova
m3
,
m4
; 23 22 21 20
punpcklwd
m1
,
m0
; 11 01 10 00
punpckhwd
m2
,
m0
; 13 03 12 02
punpcklwd
m3
,
m6
; 31 21 30 20
punpckhwd
m4
,
m6
; 33 23 32 22
mova
m0
,
m1
; 11 01 10 00
mova
m5
,
m2
; 13 03 12 02
punpckldq
m0
,
m3
; 30 20 10 00
punpckhdq
m1
,
m3
; 31 21 11 01
punpckldq
m2
,
m4
; 32 22 12 02
punpckhdq
m5
,
m4
; 33 23 13 03
mova
m3
,
m5
; 33 23 13 03
psubw
m0
,
m2
; b1= 0-2
paddw
m2
,
m2
;
mova
m5
,
m1
paddw
m2
,
m0
; a1 =0+2
pmulhw
m5
,
[
x_s1sqr2
]
;
paddw
m5
,
m1
; ip1 * sin(pi/8) * sqrt(2)
mova
m7
,
m3
;
pmulhw
m7
,
[
x_c1sqr2less1
]
;
paddw
m7
,
m3
; ip3 * cos(pi/8) * sqrt(2)
psubw
m7
,
m5
; c1
mova
m5
,
m1
mova
m4
,
m3
pmulhw
m5
,
[
x_c1sqr2less1
]
paddw
m5
,
m1
pmulhw
m3
,
[
x_s1sqr2
]
paddw
m3
,
m4
paddw
m3
,
m5
; d1
paddw
m0
,
[
pw_16
]
paddw
m2
,
[
pw_16
]
mova
m6
,
m2
; a1
mova
m4
,
m0
; b1
paddw
m2
,
m3
;0
paddw
m4
,
m7
;1
psubw
m0
,
m7
;2
psubw
m6
,
m3
;3
psraw
m2
,
5
psraw
m0
,
5
psraw
m4
,
5
psraw
m6
,
5
mova
m1
,
m2
; 03 02 01 00
mova
m3
,
m4
; 23 22 21 20
punpcklwd
m1
,
m0
; 11 01 10 00
punpckhwd
m2
,
m0
; 13 03 12 02
punpcklwd
m3
,
m6
; 31 21 30 20
punpckhwd
m4
,
m6
; 33 23 32 22
mova
m0
,
m1
; 11 01 10 00
mova
m5
,
m2
; 13 03 12 02
punpckldq
m0
,
m3
; 30 20 10 00
punpckhdq
m1
,
m3
; 31 21 11 01
punpckldq
m2
,
m4
; 32 22 12 02
punpckhdq
m5
,
m4
; 33 23 13 03
mova
[
outq
],
m0
mova
[
outq
+
r2
],
m1
mova
[
outq
+
pitq
*
2
],
m2
add
outq
,
pitq
mova
[
outq
+
pitq
*
2
],
m5
RET
;void short_idct4x4llm_1_mmx(short *input, short *output, int pitch)
cglobal
short_idct4x4llm_1_mmx
,
3
,
3
,
0
,
inp
,
out
,
pit
movh
m0
,
[
inpq
]
paddw
m0
,
[
pw_16
]
psraw
m0
,
5
punpcklwd
m0
,
m0
punpckldq
m0
,
m0
mova
[
outq
],
m0
mova
[
outq
+
pitq
],
m0
mova
[
outq
+
pitq
*
2
],
m0
add
r1
,
r2
mova
[
outq
+
pitq
*
2
],
m0
RET
;void dc_only_idct_add_mmx(short input_dc, unsigned char *pred_ptr, unsigned char *dst_ptr, int pitch, int stride)
cglobal
dc_only_idct_add_mmx
,
4
,
5
,
0
,
in_dc
,
pred
,
ds
t
,
pit
,
stride
%if ARCH_X86_64
movsxd
strideq
,
dword
stridem
%else
mov
strideq
,
stridem
%endif
pxor
m0
,
m0
movh
m5
,
in_dcq
; dc
paddw
m5
,
[
pw_16
]
psraw
m5
,
5
punpcklwd
m5
,
m5
punpckldq
m5
,
m5
movh
m1
,
[
predq
]
punpcklbw
m1
,
m0
paddsw
m1
,
m5
packuswb
m1
,
m0
; pack and unpack to saturate
movh
[
ds
tq
],
m1
movh
m2
,
[
predq
+
pitq
]
punpcklbw
m2
,
m0
paddsw
m2
,
m5
packuswb
m2
,
m0
; pack and unpack to saturate
movh
[
ds
tq
+
strideq
],
m2
movh
m3
,
[
predq
+
2
*
pitq
]
punpcklbw
m3
,
m0
paddsw
m3
,
m5
packuswb
m3
,
m0
; pack and unpack to saturate
movh
[
ds
tq
+
2
*
strideq
],
m3
add
ds
tq
,
strideq
add
predq
,
pitq
movh
m4
,
[
predq
+
2
*
pitq
]
punpcklbw
m4
,
m0
paddsw
m4
,
m5
packuswb
m4
,
m0
; pack and unpack to saturate
movh
[
ds
tq
+
2
*
strideq
],
m4
RET
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment