Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Xiph.Org
aom-rav1e
Commits
ecf392a1
Commit
ecf392a1
authored
Feb 14, 2014
by
Yaowu Xu
Committed by
Gerrit Code Review
Feb 14, 2014
Browse files
Merge "minor spelling cleanup in comments"
parents
323a7120
549c31f8
Changes
12
Hide whitespace changes
Inline
Side-by-side
vp8/common/arm/armv6/vp8_variance16x16_armv6.asm
View file @
ecf392a1
...
...
@@ -53,7 +53,7 @@ loop
orr
r6
,
r6
,
r7
; differences of all 4 pixels
; calculate total sum
adds
r8
,
r8
,
r4
; add positive differences to sum
subs
r8
,
r8
,
r5
; sub
s
tract negative differences from sum
subs
r8
,
r8
,
r5
; subtract negative differences from sum
; calculate sse
uxtb16
r5
,
r6
; byte (two pixels) to halfwords
...
...
@@ -77,7 +77,7 @@ loop
; calculate total sum
add
r8
,
r8
,
r4
; add positive differences to sum
sub
r8
,
r8
,
r5
; sub
s
tract negative differences from sum
sub
r8
,
r8
,
r5
; subtract negative differences from sum
; calculate sse
uxtb16
r5
,
r6
; byte (two pixels) to halfwords
...
...
@@ -101,7 +101,7 @@ loop
; calculate total sum
add
r8
,
r8
,
r4
; add positive differences to sum
sub
r8
,
r8
,
r5
; sub
s
tract negative differences from sum
sub
r8
,
r8
,
r5
; subtract negative differences from sum
; calculate sse
uxtb16
r5
,
r6
; byte (two pixels) to halfwords
...
...
@@ -127,7 +127,7 @@ loop
; calculate total sum
add
r8
,
r8
,
r4
; add positive differences to sum
sub
r8
,
r8
,
r5
; sub
s
tract negative differences from sum
sub
r8
,
r8
,
r5
; subtract negative differences from sum
; calculate sse
uxtb16
r5
,
r6
; byte (two pixels) to halfwords
...
...
vp8/common/arm/armv6/vp8_variance8x8_armv6.asm
View file @
ecf392a1
...
...
@@ -51,7 +51,7 @@ loop
orr
r8
,
r8
,
r10
; differences of all 4 pixels
; calculate total sum
add
r4
,
r4
,
r6
; add positive differences to sum
sub
r4
,
r4
,
r7
; sub
s
tract negative differences from sum
sub
r4
,
r4
,
r7
; subtract negative differences from sum
; calculate sse
uxtb16
r7
,
r8
; byte (two pixels) to halfwords
...
...
@@ -77,7 +77,7 @@ loop
; calculate total sum
add
r4
,
r4
,
r6
; add positive differences to sum
sub
r4
,
r4
,
r7
; sub
s
tract negative differences from sum
sub
r4
,
r4
,
r7
; subtract negative differences from sum
; calculate sse
uxtb16
r7
,
r8
; byte (two pixels) to halfwords
...
...
vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm
View file @
ecf392a1
...
...
@@ -58,7 +58,7 @@ loop
orr
r6
,
r6
,
r7
; differences of all 4 pixels
; calculate total sum
adds
r8
,
r8
,
r4
; add positive differences to sum
subs
r8
,
r8
,
r5
; sub
s
tract negative differences from sum
subs
r8
,
r8
,
r5
; subtract negative differences from sum
; calculate sse
uxtb16
r5
,
r6
; byte (two pixels) to halfwords
...
...
@@ -89,7 +89,7 @@ loop
; calculate total sum
add
r8
,
r8
,
r4
; add positive differences to sum
sub
r8
,
r8
,
r5
; sub
s
tract negative differences from sum
sub
r8
,
r8
,
r5
; subtract negative differences from sum
; calculate sse
uxtb16
r5
,
r6
; byte (two pixels) to halfwords
...
...
@@ -120,7 +120,7 @@ loop
; calculate total sum
add
r8
,
r8
,
r4
; add positive differences to sum
sub
r8
,
r8
,
r5
; sub
s
tract negative differences from sum
sub
r8
,
r8
,
r5
; subtract negative differences from sum
; calculate sse
uxtb16
r5
,
r6
; byte (two pixels) to halfwords
...
...
@@ -153,7 +153,7 @@ loop
; calculate total sum
add
r8
,
r8
,
r4
; add positive differences to sum
sub
r8
,
r8
,
r5
; sub
s
tract negative differences from sum
sub
r8
,
r8
,
r5
; subtract negative differences from sum
; calculate sse
uxtb16
r5
,
r6
; byte (two pixels) to halfwords
...
...
vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm
View file @
ecf392a1
...
...
@@ -69,7 +69,7 @@ loop
orr
r6
,
r6
,
r7
; differences of all 4 pixels
; calculate total sum
adds
r8
,
r8
,
r4
; add positive differences to sum
subs
r8
,
r8
,
r5
; sub
s
tract negative differences from sum
subs
r8
,
r8
,
r5
; subtract negative differences from sum
; calculate sse
uxtb16
r5
,
r6
; byte (two pixels) to halfwords
...
...
@@ -111,7 +111,7 @@ loop
; calculate total sum
add
r8
,
r8
,
r4
; add positive differences to sum
sub
r8
,
r8
,
r5
; sub
s
tract negative differences from sum
sub
r8
,
r8
,
r5
; subtract negative differences from sum
; calculate sse
uxtb16
r5
,
r6
; byte (two pixels) to halfwords
...
...
@@ -153,7 +153,7 @@ loop
; calculate total sum
add
r8
,
r8
,
r4
; add positive differences to sum
sub
r8
,
r8
,
r5
; sub
s
tract negative differences from sum
sub
r8
,
r8
,
r5
; subtract negative differences from sum
; calculate sse
uxtb16
r5
,
r6
; byte (two pixels) to halfwords
...
...
@@ -195,7 +195,7 @@ loop
; calculate total sum
add
r8
,
r8
,
r4
; add positive differences to sum
sub
r8
,
r8
,
r5
; sub
s
tract negative differences from sum
sub
r8
,
r8
,
r5
; subtract negative differences from sum
; calculate sse
uxtb16
r5
,
r6
; byte (two pixels) to halfwords
...
...
vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm
View file @
ecf392a1
...
...
@@ -59,7 +59,7 @@ loop
orr
r6
,
r6
,
r7
; differences of all 4 pixels
; calculate total sum
adds
r8
,
r8
,
r4
; add positive differences to sum
subs
r8
,
r8
,
r5
; sub
s
tract negative differences from sum
subs
r8
,
r8
,
r5
; subtract negative differences from sum
; calculate sse
uxtb16
r5
,
r6
; byte (two pixels) to halfwords
...
...
@@ -90,7 +90,7 @@ loop
; calculate total sum
add
r8
,
r8
,
r4
; add positive differences to sum
sub
r8
,
r8
,
r5
; sub
s
tract negative differences from sum
sub
r8
,
r8
,
r5
; subtract negative differences from sum
; calculate sse
uxtb16
r5
,
r6
; byte (two pixels) to halfwords
...
...
@@ -121,7 +121,7 @@ loop
; calculate total sum
add
r8
,
r8
,
r4
; add positive differences to sum
sub
r8
,
r8
,
r5
; sub
s
tract negative differences from sum
sub
r8
,
r8
,
r5
; subtract negative differences from sum
; calculate sse
uxtb16
r5
,
r6
; byte (two pixels) to halfwords
...
...
@@ -154,7 +154,7 @@ loop
; calculate total sum
add
r8
,
r8
,
r4
; add positive differences to sum
sub
r8
,
r8
,
r5
; sub
s
tract negative differences from sum
sub
r8
,
r8
,
r5
; subtract negative differences from sum
; calculate sse
uxtb16
r5
,
r6
; byte (two pixels) to halfwords
...
...
vp8/common/x86/loopfilter_mmx.asm
View file @
ecf392a1
...
...
@@ -527,7 +527,7 @@ sym(vp8_loop_filter_vertical_edge_mmx):
pxor
mm7
,
[
GLOBAL
(
t80
)]
; unoffset
; mm7 = q1
; tranpose and write back
; tran
s
pose and write back
; mm1 = 72 62 52 42 32 22 12 02
; mm6 = 73 63 53 43 33 23 13 03
; mm3 = 74 64 54 44 34 24 14 04
...
...
@@ -1289,7 +1289,7 @@ sym(vp8_mbloop_filter_vertical_edge_mmx):
pxor
mm6
,
[
GLOBAL
(
t80
)]
; mm6 = 71 61 51 41 31 21 11 01
pxor
mm3
,
[
GLOBAL
(
t80
)]
; mm3 = 76 66 56 46 36 26 15 06
; tranpose and write back
; tran
s
pose and write back
movq
mm0
,
[
rdx
]
; mm0 = 70 60 50 40 30 20 10 00
movq
mm1
,
mm0
; mm0 = 70 60 50 40 30 20 10 00
...
...
vp8/common/x86/loopfilter_sse2.asm
View file @
ecf392a1
...
...
@@ -958,7 +958,7 @@ sym(vp8_loop_filter_vertical_edge_sse2):
; start work on filters
B_FILTER
2
; tranpose and write back - only work on q1, q0, p0, p1
; tran
s
pose and write back - only work on q1, q0, p0, p1
BV_TRANSPOSE
; store 16-line result
...
...
@@ -1023,7 +1023,7 @@ sym(vp8_loop_filter_vertical_edge_uv_sse2):
; start work on filters
B_FILTER
2
; tranpose and write back - only work on q1, q0, p0, p1
; tran
s
pose and write back - only work on q1, q0, p0, p1
BV_TRANSPOSE
lea
rdi
,
[
rsi
+
rax
]
; rdi points to row +1 for indirect addressing
...
...
vp9/common/arm/neon/vp9_idct32x32_add_neon.asm
View file @
ecf392a1
...
...
@@ -72,7 +72,7 @@ cospi_31_64 EQU 804
; reg1 = output[first_offset]
; reg2 = output[second_offset]
; for proper address calculation, the last offset used when manipulating
; output, wether
e
reading or storing) must be passed in. use 0 for first
; output, w
h
ether reading or storing) must be passed in. use 0 for first
; use.
MACRO
LOAD_FROM_OUTPUT
$
prev_offset
,
$
first_offset
,
$
second_offset
,
$
reg1
,
$
reg2
...
...
@@ -88,7 +88,7 @@ cospi_31_64 EQU 804
; output[first_offset] = reg1
; output[second_offset] = reg2
; for proper address calculation, the last offset used when manipulating
; output, wether
e
reading or storing) must be passed in. use 0 for first
; output, w
h
ether reading or storing) must be passed in. use 0 for first
; use.
MACRO
STORE_IN_OUTPUT
$
prev_offset
,
$
first_offset
,
$
second_offset
,
$
reg1
,
$
reg2
...
...
@@ -242,7 +242,7 @@ cospi_31_64 EQU 804
; TODO(cd): have special case to re-use constants when they are similar for
; consecutive butterflies
; TODO(cd): have special case when both constants are the same, do the
; additions/sub
s
tractions before the multiplies.
; additions/subtractions before the multiplies.
; generate the constants
; generate scalar constants
mov
r8
,
#
$
first_constant
&
0xFF00
...
...
@@ -260,7 +260,7 @@ cospi_31_64 EQU 804
vmull.s16
q11
,
$
regB
,
d31
vmull.s16
q12
,
$
regC
,
d31
; (used) five for intermediate (q8-q12), one for constants (q15)
; do some addition/sub
s
tractions (to get back two register)
; do some addition/subtractions (to get back two register)
vsub.s32
q8
,
q8
,
q10
vsub.s32
q9
,
q9
,
q11
; do more multiplications (ordered for maximum latency hiding)
...
...
@@ -268,7 +268,7 @@ cospi_31_64 EQU 804
vmull.s16
q11
,
$
regA
,
d30
vmull.s16
q15
,
$
regB
,
d30
; (used) six for intermediate (q8-q12, q15)
; do more addition/sub
s
tractions
; do more addition/subtractions
vadd.s32
q11
,
q12
,
q11
vadd.s32
q10
,
q10
,
q15
; (used) four for intermediate (q8-q11)
...
...
vp9/common/x86/vp9_loopfilter_mmx.asm
View file @
ecf392a1
...
...
@@ -527,7 +527,7 @@ sym(vp9_lpf_vertical_4_mmx):
pxor
mm7
,
[
GLOBAL
(
t80
)]
; unoffset
; mm7 = q1
; tranpose and write back
; tran
s
pose and write back
; mm1 = 72 62 52 42 32 22 12 02
; mm6 = 73 63 53 43 33 23 13 03
; mm3 = 74 64 54 44 34 24 14 04
...
...
vp9/encoder/vp9_dct.c
View file @
ecf392a1
...
...
@@ -47,7 +47,7 @@ void vp9_fdct4x4_c(const int16_t *input, int16_t *output, int stride) {
// The 2D transform is done with two passes which are actually pretty
// similar. In the first one, we transform the columns and transpose
// the results. In the second one, we transform the rows. To achieve that,
// as the first pass results are transposed, we tranpose the columns (that
// as the first pass results are transposed, we tran
s
pose the columns (that
// is the transposed rows) and transpose the results (so that it goes back
// in normal/row positions).
int
pass
;
...
...
@@ -315,7 +315,7 @@ void vp9_fdct16x16_c(const int16_t *input, int16_t *output, int stride) {
// The 2D transform is done with two passes which are actually pretty
// similar. In the first one, we transform the columns and transpose
// the results. In the second one, we transform the rows. To achieve that,
// as the first pass results are transposed, we tranpose the columns (that
// as the first pass results are transposed, we tran
s
pose the columns (that
// is the transposed rows) and transpose the results (so that it goes back
// in normal/row positions).
int
pass
;
...
...
vp9/encoder/x86/vp9_dct_avx2.c
View file @
ecf392a1
...
...
@@ -16,7 +16,7 @@ void vp9_fdct4x4_avx2(const int16_t *input, int16_t *output, int stride) {
// The 2D transform is done with two passes which are actually pretty
// similar. In the first one, we transform the columns and transpose
// the results. In the second one, we transform the rows. To achieve that,
// as the first pass results are transposed, we tranpose the columns (that
// as the first pass results are transposed, we tran
s
pose the columns (that
// is the transposed rows) and transpose the results (so that it goes back
// in normal/row positions).
int
pass
;
...
...
@@ -46,7 +46,7 @@ void vp9_fdct4x4_avx2(const int16_t *input, int16_t *output, int stride) {
in3
=
_mm_slli_epi16
(
in3
,
4
);
// if (i == 0 && input[0]) input[0] += 1;
{
// The mask will only contain wether the first value is zero, all
// The mask will only contain w
h
ether the first value is zero, all
// other comparison will fail as something shifted by 4 (above << 4)
// can never be equal to one. To increment in the non-zero case, we
// add the mask and one for the first element:
...
...
@@ -59,7 +59,7 @@ void vp9_fdct4x4_avx2(const int16_t *input, int16_t *output, int stride) {
}
// Do the two transform/transpose passes
for
(
pass
=
0
;
pass
<
2
;
++
pass
)
{
// Transform 1/2: Add/sub
s
tract
// Transform 1/2: Add/subtract
const
__m128i
r0
=
_mm_add_epi16
(
in0
,
in3
);
const
__m128i
r1
=
_mm_add_epi16
(
in1
,
in2
);
const
__m128i
r2
=
_mm_sub_epi16
(
in1
,
in2
);
...
...
@@ -317,7 +317,7 @@ void vp9_fdct8x8_avx2(const int16_t *input, int16_t *output, int stride) {
for
(
pass
=
0
;
pass
<
2
;
pass
++
)
{
// To store results of each pass before the transpose.
__m128i
res0
,
res1
,
res2
,
res3
,
res4
,
res5
,
res6
,
res7
;
// Add/sub
s
tract
// Add/subtract
const
__m128i
q0
=
_mm_add_epi16
(
in0
,
in7
);
const
__m128i
q1
=
_mm_add_epi16
(
in1
,
in6
);
const
__m128i
q2
=
_mm_add_epi16
(
in2
,
in5
);
...
...
@@ -328,7 +328,7 @@ void vp9_fdct8x8_avx2(const int16_t *input, int16_t *output, int stride) {
const
__m128i
q7
=
_mm_sub_epi16
(
in0
,
in7
);
// Work on first four results
{
// Add/sub
s
tract
// Add/subtract
const
__m128i
r0
=
_mm_add_epi16
(
q0
,
q3
);
const
__m128i
r1
=
_mm_add_epi16
(
q1
,
q2
);
const
__m128i
r2
=
_mm_sub_epi16
(
q1
,
q2
);
...
...
@@ -390,7 +390,7 @@ void vp9_fdct8x8_avx2(const int16_t *input, int16_t *output, int stride) {
// Combine
const
__m128i
r0
=
_mm_packs_epi32
(
s0
,
s1
);
const
__m128i
r1
=
_mm_packs_epi32
(
s2
,
s3
);
// Add/sub
s
tract
// Add/subtract
const
__m128i
x0
=
_mm_add_epi16
(
q4
,
r0
);
const
__m128i
x1
=
_mm_sub_epi16
(
q4
,
r0
);
const
__m128i
x2
=
_mm_sub_epi16
(
q7
,
r1
);
...
...
@@ -1071,7 +1071,7 @@ void vp9_fdct16x16_avx2(const int16_t *input, int16_t *output, int stride) {
// The 2D transform is done with two passes which are actually pretty
// similar. In the first one, we transform the columns and transpose
// the results. In the second one, we transform the rows. To achieve that,
// as the first pass results are transposed, we tranpose the columns (that
// as the first pass results are transposed, we tran
s
pose the columns (that
// is the transposed rows) and transpose the results (so that it goes back
// in normal/row positions).
int
pass
;
...
...
@@ -1228,7 +1228,7 @@ void vp9_fdct16x16_avx2(const int16_t *input, int16_t *output, int stride) {
}
// Work on the first eight values; fdct8(input, even_results);
{
// Add/sub
s
tract
// Add/subtract
const
__m128i
q0
=
_mm_add_epi16
(
input0
,
input7
);
const
__m128i
q1
=
_mm_add_epi16
(
input1
,
input6
);
const
__m128i
q2
=
_mm_add_epi16
(
input2
,
input5
);
...
...
@@ -1239,7 +1239,7 @@ void vp9_fdct16x16_avx2(const int16_t *input, int16_t *output, int stride) {
const
__m128i
q7
=
_mm_sub_epi16
(
input0
,
input7
);
// Work on first four results
{
// Add/sub
s
tract
// Add/subtract
const
__m128i
r0
=
_mm_add_epi16
(
q0
,
q3
);
const
__m128i
r1
=
_mm_add_epi16
(
q1
,
q2
);
const
__m128i
r2
=
_mm_sub_epi16
(
q1
,
q2
);
...
...
@@ -1303,7 +1303,7 @@ void vp9_fdct16x16_avx2(const int16_t *input, int16_t *output, int stride) {
// Combine
const
__m128i
r0
=
_mm_packs_epi32
(
s0
,
s1
);
const
__m128i
r1
=
_mm_packs_epi32
(
s2
,
s3
);
// Add/sub
s
tract
// Add/subtract
const
__m128i
x0
=
_mm_add_epi16
(
q4
,
r0
);
const
__m128i
x1
=
_mm_sub_epi16
(
q4
,
r0
);
const
__m128i
x2
=
_mm_sub_epi16
(
q7
,
r1
);
...
...
vp9/encoder/x86/vp9_dct_sse2.c
View file @
ecf392a1
...
...
@@ -16,7 +16,7 @@ void vp9_fdct4x4_sse2(const int16_t *input, int16_t *output, int stride) {
// The 2D transform is done with two passes which are actually pretty
// similar. In the first one, we transform the columns and transpose
// the results. In the second one, we transform the rows. To achieve that,
// as the first pass results are transposed, we tranpose the columns (that
// as the first pass results are transposed, we tran
s
pose the columns (that
// is the transposed rows) and transpose the results (so that it goes back
// in normal/row positions).
int
pass
;
...
...
@@ -47,7 +47,7 @@ void vp9_fdct4x4_sse2(const int16_t *input, int16_t *output, int stride) {
in1
=
_mm_slli_epi16
(
in1
,
4
);
// if (i == 0 && input[0]) input[0] += 1;
{
// The mask will only contain wether the first value is zero, all
// The mask will only contain w
h
ether the first value is zero, all
// other comparison will fail as something shifted by 4 (above << 4)
// can never be equal to one. To increment in the non-zero case, we
// add the mask and one for the first element:
...
...
@@ -60,7 +60,7 @@ void vp9_fdct4x4_sse2(const int16_t *input, int16_t *output, int stride) {
}
// Do the two transform/transpose passes
for
(
pass
=
0
;
pass
<
2
;
++
pass
)
{
// Transform 1/2: Add/sub
s
tract
// Transform 1/2: Add/subtract
const
__m128i
r0
=
_mm_add_epi16
(
in0
,
in1
);
const
__m128i
r1
=
_mm_sub_epi16
(
in0
,
in1
);
const
__m128i
r2
=
_mm_unpacklo_epi64
(
r0
,
r1
);
...
...
@@ -315,7 +315,7 @@ void vp9_fdct8x8_sse2(const int16_t *input, int16_t *output, int stride) {
for
(
pass
=
0
;
pass
<
2
;
pass
++
)
{
// To store results of each pass before the transpose.
__m128i
res0
,
res1
,
res2
,
res3
,
res4
,
res5
,
res6
,
res7
;
// Add/sub
s
tract
// Add/subtract
const
__m128i
q0
=
_mm_add_epi16
(
in0
,
in7
);
const
__m128i
q1
=
_mm_add_epi16
(
in1
,
in6
);
const
__m128i
q2
=
_mm_add_epi16
(
in2
,
in5
);
...
...
@@ -326,7 +326,7 @@ void vp9_fdct8x8_sse2(const int16_t *input, int16_t *output, int stride) {
const
__m128i
q7
=
_mm_sub_epi16
(
in0
,
in7
);
// Work on first four results
{
// Add/sub
s
tract
// Add/subtract
const
__m128i
r0
=
_mm_add_epi16
(
q0
,
q3
);
const
__m128i
r1
=
_mm_add_epi16
(
q1
,
q2
);
const
__m128i
r2
=
_mm_sub_epi16
(
q1
,
q2
);
...
...
@@ -388,7 +388,7 @@ void vp9_fdct8x8_sse2(const int16_t *input, int16_t *output, int stride) {
// Combine
const
__m128i
r0
=
_mm_packs_epi32
(
s0
,
s1
);
const
__m128i
r1
=
_mm_packs_epi32
(
s2
,
s3
);
// Add/sub
s
tract
// Add/subtract
const
__m128i
x0
=
_mm_add_epi16
(
q4
,
r0
);
const
__m128i
x1
=
_mm_sub_epi16
(
q4
,
r0
);
const
__m128i
x2
=
_mm_sub_epi16
(
q7
,
r1
);
...
...
@@ -1069,7 +1069,7 @@ void vp9_fdct16x16_sse2(const int16_t *input, int16_t *output, int stride) {
// The 2D transform is done with two passes which are actually pretty
// similar. In the first one, we transform the columns and transpose
// the results. In the second one, we transform the rows. To achieve that,
// as the first pass results are transposed, we tranpose the columns (that
// as the first pass results are transposed, we tran
s
pose the columns (that
// is the transposed rows) and transpose the results (so that it goes back
// in normal/row positions).
int
pass
;
...
...
@@ -1226,7 +1226,7 @@ void vp9_fdct16x16_sse2(const int16_t *input, int16_t *output, int stride) {
}
// Work on the first eight values; fdct8(input, even_results);
{
// Add/sub
s
tract
// Add/subtract
const
__m128i
q0
=
_mm_add_epi16
(
input0
,
input7
);
const
__m128i
q1
=
_mm_add_epi16
(
input1
,
input6
);
const
__m128i
q2
=
_mm_add_epi16
(
input2
,
input5
);
...
...
@@ -1237,7 +1237,7 @@ void vp9_fdct16x16_sse2(const int16_t *input, int16_t *output, int stride) {
const
__m128i
q7
=
_mm_sub_epi16
(
input0
,
input7
);
// Work on first four results
{
// Add/sub
s
tract
// Add/subtract
const
__m128i
r0
=
_mm_add_epi16
(
q0
,
q3
);
const
__m128i
r1
=
_mm_add_epi16
(
q1
,
q2
);
const
__m128i
r2
=
_mm_sub_epi16
(
q1
,
q2
);
...
...
@@ -1301,7 +1301,7 @@ void vp9_fdct16x16_sse2(const int16_t *input, int16_t *output, int stride) {
// Combine
const
__m128i
r0
=
_mm_packs_epi32
(
s0
,
s1
);
const
__m128i
r1
=
_mm_packs_epi32
(
s2
,
s3
);
// Add/sub
s
tract
// Add/subtract
const
__m128i
x0
=
_mm_add_epi16
(
q4
,
r0
);
const
__m128i
x1
=
_mm_sub_epi16
(
q4
,
r0
);
const
__m128i
x2
=
_mm_sub_epi16
(
q7
,
r1
);
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment