Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
A
aom-rav1e
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Incidents
Environments
Packages & Registries
Packages & Registries
Container Registry
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Guillaume Martres
aom-rav1e
Commits
549c31f8
Commit
549c31f8
authored
Feb 12, 2014
by
Andrew Russell
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
minor spelling cleanup in comments
Change-Id: Ia91c6c406273345b08505097ffe1af3896980f06
parent
f8604089
Changes
12
Hide whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
50 additions
and
50 deletions
+50
-50
vp8/common/arm/armv6/vp8_variance16x16_armv6.asm
vp8/common/arm/armv6/vp8_variance16x16_armv6.asm
+4
-4
vp8/common/arm/armv6/vp8_variance8x8_armv6.asm
vp8/common/arm/armv6/vp8_variance8x8_armv6.asm
+2
-2
vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm
...common/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm
+4
-4
vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm
...ommon/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm
+4
-4
vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm
...common/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm
+4
-4
vp8/common/x86/loopfilter_mmx.asm
vp8/common/x86/loopfilter_mmx.asm
+2
-2
vp8/common/x86/loopfilter_sse2.asm
vp8/common/x86/loopfilter_sse2.asm
+2
-2
vp9/common/arm/neon/vp9_idct32x32_add_neon.asm
vp9/common/arm/neon/vp9_idct32x32_add_neon.asm
+5
-5
vp9/common/x86/vp9_loopfilter_mmx.asm
vp9/common/x86/vp9_loopfilter_mmx.asm
+1
-1
vp9/encoder/vp9_dct.c
vp9/encoder/vp9_dct.c
+2
-2
vp9/encoder/x86/vp9_dct_avx2.c
vp9/encoder/x86/vp9_dct_avx2.c
+10
-10
vp9/encoder/x86/vp9_dct_sse2.c
vp9/encoder/x86/vp9_dct_sse2.c
+10
-10
No files found.
vp8/common/arm/armv6/vp8_variance16x16_armv6.asm
View file @
549c31f8
...
...
@@ -53,7 +53,7 @@ loop
orr
r6
,
r6
,
r7
; differences of all 4 pixels
; calculate total sum
adds
r8
,
r8
,
r4
; add positive differences to sum
subs
r8
,
r8
,
r5
; sub
s
tract negative differences from sum
subs
r8
,
r8
,
r5
; subtract negative differences from sum
; calculate sse
uxtb16
r5
,
r6
; byte (two pixels) to halfwords
...
...
@@ -77,7 +77,7 @@ loop
; calculate total sum
add
r8
,
r8
,
r4
; add positive differences to sum
sub
r8
,
r8
,
r5
; sub
s
tract negative differences from sum
sub
r8
,
r8
,
r5
; subtract negative differences from sum
; calculate sse
uxtb16
r5
,
r6
; byte (two pixels) to halfwords
...
...
@@ -101,7 +101,7 @@ loop
; calculate total sum
add
r8
,
r8
,
r4
; add positive differences to sum
sub
r8
,
r8
,
r5
; sub
s
tract negative differences from sum
sub
r8
,
r8
,
r5
; subtract negative differences from sum
; calculate sse
uxtb16
r5
,
r6
; byte (two pixels) to halfwords
...
...
@@ -127,7 +127,7 @@ loop
; calculate total sum
add
r8
,
r8
,
r4
; add positive differences to sum
sub
r8
,
r8
,
r5
; sub
s
tract negative differences from sum
sub
r8
,
r8
,
r5
; subtract negative differences from sum
; calculate sse
uxtb16
r5
,
r6
; byte (two pixels) to halfwords
...
...
vp8/common/arm/armv6/vp8_variance8x8_armv6.asm
View file @
549c31f8
...
...
@@ -51,7 +51,7 @@ loop
orr
r8
,
r8
,
r10
; differences of all 4 pixels
; calculate total sum
add
r4
,
r4
,
r6
; add positive differences to sum
sub
r4
,
r4
,
r7
; sub
s
tract negative differences from sum
sub
r4
,
r4
,
r7
; subtract negative differences from sum
; calculate sse
uxtb16
r7
,
r8
; byte (two pixels) to halfwords
...
...
@@ -77,7 +77,7 @@ loop
; calculate total sum
add
r4
,
r4
,
r6
; add positive differences to sum
sub
r4
,
r4
,
r7
; sub
s
tract negative differences from sum
sub
r4
,
r4
,
r7
; subtract negative differences from sum
; calculate sse
uxtb16
r7
,
r8
; byte (two pixels) to halfwords
...
...
vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm
View file @
549c31f8
...
...
@@ -58,7 +58,7 @@ loop
orr
r6
,
r6
,
r7
; differences of all 4 pixels
; calculate total sum
adds
r8
,
r8
,
r4
; add positive differences to sum
subs
r8
,
r8
,
r5
; sub
s
tract negative differences from sum
subs
r8
,
r8
,
r5
; subtract negative differences from sum
; calculate sse
uxtb16
r5
,
r6
; byte (two pixels) to halfwords
...
...
@@ -89,7 +89,7 @@ loop
; calculate total sum
add
r8
,
r8
,
r4
; add positive differences to sum
sub
r8
,
r8
,
r5
; sub
s
tract negative differences from sum
sub
r8
,
r8
,
r5
; subtract negative differences from sum
; calculate sse
uxtb16
r5
,
r6
; byte (two pixels) to halfwords
...
...
@@ -120,7 +120,7 @@ loop
; calculate total sum
add
r8
,
r8
,
r4
; add positive differences to sum
sub
r8
,
r8
,
r5
; sub
s
tract negative differences from sum
sub
r8
,
r8
,
r5
; subtract negative differences from sum
; calculate sse
uxtb16
r5
,
r6
; byte (two pixels) to halfwords
...
...
@@ -153,7 +153,7 @@ loop
; calculate total sum
add
r8
,
r8
,
r4
; add positive differences to sum
sub
r8
,
r8
,
r5
; sub
s
tract negative differences from sum
sub
r8
,
r8
,
r5
; subtract negative differences from sum
; calculate sse
uxtb16
r5
,
r6
; byte (two pixels) to halfwords
...
...
vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm
View file @
549c31f8
...
...
@@ -69,7 +69,7 @@ loop
orr
r6
,
r6
,
r7
; differences of all 4 pixels
; calculate total sum
adds
r8
,
r8
,
r4
; add positive differences to sum
subs
r8
,
r8
,
r5
; sub
s
tract negative differences from sum
subs
r8
,
r8
,
r5
; subtract negative differences from sum
; calculate sse
uxtb16
r5
,
r6
; byte (two pixels) to halfwords
...
...
@@ -111,7 +111,7 @@ loop
; calculate total sum
add
r8
,
r8
,
r4
; add positive differences to sum
sub
r8
,
r8
,
r5
; sub
s
tract negative differences from sum
sub
r8
,
r8
,
r5
; subtract negative differences from sum
; calculate sse
uxtb16
r5
,
r6
; byte (two pixels) to halfwords
...
...
@@ -153,7 +153,7 @@ loop
; calculate total sum
add
r8
,
r8
,
r4
; add positive differences to sum
sub
r8
,
r8
,
r5
; sub
s
tract negative differences from sum
sub
r8
,
r8
,
r5
; subtract negative differences from sum
; calculate sse
uxtb16
r5
,
r6
; byte (two pixels) to halfwords
...
...
@@ -195,7 +195,7 @@ loop
; calculate total sum
add
r8
,
r8
,
r4
; add positive differences to sum
sub
r8
,
r8
,
r5
; sub
s
tract negative differences from sum
sub
r8
,
r8
,
r5
; subtract negative differences from sum
; calculate sse
uxtb16
r5
,
r6
; byte (two pixels) to halfwords
...
...
vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm
View file @
549c31f8
...
...
@@ -59,7 +59,7 @@ loop
orr
r6
,
r6
,
r7
; differences of all 4 pixels
; calculate total sum
adds
r8
,
r8
,
r4
; add positive differences to sum
subs
r8
,
r8
,
r5
; sub
s
tract negative differences from sum
subs
r8
,
r8
,
r5
; subtract negative differences from sum
; calculate sse
uxtb16
r5
,
r6
; byte (two pixels) to halfwords
...
...
@@ -90,7 +90,7 @@ loop
; calculate total sum
add
r8
,
r8
,
r4
; add positive differences to sum
sub
r8
,
r8
,
r5
; sub
s
tract negative differences from sum
sub
r8
,
r8
,
r5
; subtract negative differences from sum
; calculate sse
uxtb16
r5
,
r6
; byte (two pixels) to halfwords
...
...
@@ -121,7 +121,7 @@ loop
; calculate total sum
add
r8
,
r8
,
r4
; add positive differences to sum
sub
r8
,
r8
,
r5
; sub
s
tract negative differences from sum
sub
r8
,
r8
,
r5
; subtract negative differences from sum
; calculate sse
uxtb16
r5
,
r6
; byte (two pixels) to halfwords
...
...
@@ -154,7 +154,7 @@ loop
; calculate total sum
add
r8
,
r8
,
r4
; add positive differences to sum
sub
r8
,
r8
,
r5
; sub
s
tract negative differences from sum
sub
r8
,
r8
,
r5
; subtract negative differences from sum
; calculate sse
uxtb16
r5
,
r6
; byte (two pixels) to halfwords
...
...
vp8/common/x86/loopfilter_mmx.asm
View file @
549c31f8
...
...
@@ -527,7 +527,7 @@ sym(vp8_loop_filter_vertical_edge_mmx):
pxor
mm7
,
[
GLOBAL
(
t80
)]
; unoffset
; mm7 = q1
; tranpose and write back
; tran
s
pose and write back
; mm1 = 72 62 52 42 32 22 12 02
; mm6 = 73 63 53 43 33 23 13 03
; mm3 = 74 64 54 44 34 24 14 04
...
...
@@ -1289,7 +1289,7 @@ sym(vp8_mbloop_filter_vertical_edge_mmx):
pxor
mm6
,
[
GLOBAL
(
t80
)]
; mm6 = 71 61 51 41 31 21 11 01
pxor
mm3
,
[
GLOBAL
(
t80
)]
; mm3 = 76 66 56 46 36 26 15 06
; tranpose and write back
; tran
s
pose and write back
movq
mm0
,
[
rdx
]
; mm0 = 70 60 50 40 30 20 10 00
movq
mm1
,
mm0
; mm0 = 70 60 50 40 30 20 10 00
...
...
vp8/common/x86/loopfilter_sse2.asm
View file @
549c31f8
...
...
@@ -958,7 +958,7 @@ sym(vp8_loop_filter_vertical_edge_sse2):
; start work on filters
B_FILTER
2
; tranpose and write back - only work on q1, q0, p0, p1
; tran
s
pose and write back - only work on q1, q0, p0, p1
BV_TRANSPOSE
; store 16-line result
...
...
@@ -1023,7 +1023,7 @@ sym(vp8_loop_filter_vertical_edge_uv_sse2):
; start work on filters
B_FILTER
2
; tranpose and write back - only work on q1, q0, p0, p1
; tran
s
pose and write back - only work on q1, q0, p0, p1
BV_TRANSPOSE
lea
rdi
,
[
rsi
+
rax
]
; rdi points to row +1 for indirect addressing
...
...
vp9/common/arm/neon/vp9_idct32x32_add_neon.asm
View file @
549c31f8
...
...
@@ -72,7 +72,7 @@ cospi_31_64 EQU 804
; reg1 = output[first_offset]
; reg2 = output[second_offset]
; for proper address calculation, the last offset used when manipulating
; output, w
ethere
reading or storing) must be passed in. use 0 for first
; output, w
hether
reading or storing) must be passed in. use 0 for first
; use.
MACRO
LOAD_FROM_OUTPUT
$
prev_offset
,
$
first_offset
,
$
second_offset
,
$
reg1
,
$
reg2
...
...
@@ -88,7 +88,7 @@ cospi_31_64 EQU 804
; output[first_offset] = reg1
; output[second_offset] = reg2
; for proper address calculation, the last offset used when manipulating
; output, w
ethere
reading or storing) must be passed in. use 0 for first
; output, w
hether
reading or storing) must be passed in. use 0 for first
; use.
MACRO
STORE_IN_OUTPUT
$
prev_offset
,
$
first_offset
,
$
second_offset
,
$
reg1
,
$
reg2
...
...
@@ -242,7 +242,7 @@ cospi_31_64 EQU 804
; TODO(cd): have special case to re-use constants when they are similar for
; consecutive butterflies
; TODO(cd): have special case when both constants are the same, do the
; additions/sub
s
tractions before the multiplies.
; additions/subtractions before the multiplies.
; generate the constants
; generate scalar constants
mov
r8
,
#
$
first_constant
&
0xFF00
...
...
@@ -260,7 +260,7 @@ cospi_31_64 EQU 804
vmull.s16
q11
,
$
regB
,
d31
vmull.s16
q12
,
$
regC
,
d31
; (used) five for intermediate (q8-q12), one for constants (q15)
; do some addition/sub
s
tractions (to get back two register)
; do some addition/subtractions (to get back two register)
vsub.s32
q8
,
q8
,
q10
vsub.s32
q9
,
q9
,
q11
; do more multiplications (ordered for maximum latency hiding)
...
...
@@ -268,7 +268,7 @@ cospi_31_64 EQU 804
vmull.s16
q11
,
$
regA
,
d30
vmull.s16
q15
,
$
regB
,
d30
; (used) six for intermediate (q8-q12, q15)
; do more addition/sub
s
tractions
; do more addition/subtractions
vadd.s32
q11
,
q12
,
q11
vadd.s32
q10
,
q10
,
q15
; (used) four for intermediate (q8-q11)
...
...
vp9/common/x86/vp9_loopfilter_mmx.asm
View file @
549c31f8
...
...
@@ -527,7 +527,7 @@ sym(vp9_lpf_vertical_4_mmx):
pxor
mm7
,
[
GLOBAL
(
t80
)]
; unoffset
; mm7 = q1
; tranpose and write back
; tran
s
pose and write back
; mm1 = 72 62 52 42 32 22 12 02
; mm6 = 73 63 53 43 33 23 13 03
; mm3 = 74 64 54 44 34 24 14 04
...
...
vp9/encoder/vp9_dct.c
View file @
549c31f8
...
...
@@ -47,7 +47,7 @@ void vp9_fdct4x4_c(const int16_t *input, int16_t *output, int stride) {
// The 2D transform is done with two passes which are actually pretty
// similar. In the first one, we transform the columns and transpose
// the results. In the second one, we transform the rows. To achieve that,
// as the first pass results are transposed, we tranpose the columns (that
// as the first pass results are transposed, we tran
s
pose the columns (that
// is the transposed rows) and transpose the results (so that it goes back
// in normal/row positions).
int
pass
;
...
...
@@ -315,7 +315,7 @@ void vp9_fdct16x16_c(const int16_t *input, int16_t *output, int stride) {
// The 2D transform is done with two passes which are actually pretty
// similar. In the first one, we transform the columns and transpose
// the results. In the second one, we transform the rows. To achieve that,
// as the first pass results are transposed, we tranpose the columns (that
// as the first pass results are transposed, we tran
s
pose the columns (that
// is the transposed rows) and transpose the results (so that it goes back
// in normal/row positions).
int
pass
;
...
...
vp9/encoder/x86/vp9_dct_avx2.c
View file @
549c31f8
...
...
@@ -16,7 +16,7 @@ void vp9_fdct4x4_avx2(const int16_t *input, int16_t *output, int stride) {
// The 2D transform is done with two passes which are actually pretty
// similar. In the first one, we transform the columns and transpose
// the results. In the second one, we transform the rows. To achieve that,
// as the first pass results are transposed, we tranpose the columns (that
// as the first pass results are transposed, we tran
s
pose the columns (that
// is the transposed rows) and transpose the results (so that it goes back
// in normal/row positions).
int
pass
;
...
...
@@ -46,7 +46,7 @@ void vp9_fdct4x4_avx2(const int16_t *input, int16_t *output, int stride) {
in3
=
_mm_slli_epi16
(
in3
,
4
);
// if (i == 0 && input[0]) input[0] += 1;
{
// The mask will only contain wether the first value is zero, all
// The mask will only contain w
h
ether the first value is zero, all
// other comparison will fail as something shifted by 4 (above << 4)
// can never be equal to one. To increment in the non-zero case, we
// add the mask and one for the first element:
...
...
@@ -59,7 +59,7 @@ void vp9_fdct4x4_avx2(const int16_t *input, int16_t *output, int stride) {
}
// Do the two transform/transpose passes
for
(
pass
=
0
;
pass
<
2
;
++
pass
)
{
// Transform 1/2: Add/sub
s
tract
// Transform 1/2: Add/subtract
const
__m128i
r0
=
_mm_add_epi16
(
in0
,
in3
);
const
__m128i
r1
=
_mm_add_epi16
(
in1
,
in2
);
const
__m128i
r2
=
_mm_sub_epi16
(
in1
,
in2
);
...
...
@@ -317,7 +317,7 @@ void vp9_fdct8x8_avx2(const int16_t *input, int16_t *output, int stride) {
for
(
pass
=
0
;
pass
<
2
;
pass
++
)
{
// To store results of each pass before the transpose.
__m128i
res0
,
res1
,
res2
,
res3
,
res4
,
res5
,
res6
,
res7
;
// Add/sub
s
tract
// Add/subtract
const
__m128i
q0
=
_mm_add_epi16
(
in0
,
in7
);
const
__m128i
q1
=
_mm_add_epi16
(
in1
,
in6
);
const
__m128i
q2
=
_mm_add_epi16
(
in2
,
in5
);
...
...
@@ -328,7 +328,7 @@ void vp9_fdct8x8_avx2(const int16_t *input, int16_t *output, int stride) {
const
__m128i
q7
=
_mm_sub_epi16
(
in0
,
in7
);
// Work on first four results
{
// Add/sub
s
tract
// Add/subtract
const
__m128i
r0
=
_mm_add_epi16
(
q0
,
q3
);
const
__m128i
r1
=
_mm_add_epi16
(
q1
,
q2
);
const
__m128i
r2
=
_mm_sub_epi16
(
q1
,
q2
);
...
...
@@ -390,7 +390,7 @@ void vp9_fdct8x8_avx2(const int16_t *input, int16_t *output, int stride) {
// Combine
const
__m128i
r0
=
_mm_packs_epi32
(
s0
,
s1
);
const
__m128i
r1
=
_mm_packs_epi32
(
s2
,
s3
);
// Add/sub
s
tract
// Add/subtract
const
__m128i
x0
=
_mm_add_epi16
(
q4
,
r0
);
const
__m128i
x1
=
_mm_sub_epi16
(
q4
,
r0
);
const
__m128i
x2
=
_mm_sub_epi16
(
q7
,
r1
);
...
...
@@ -1071,7 +1071,7 @@ void vp9_fdct16x16_avx2(const int16_t *input, int16_t *output, int stride) {
// The 2D transform is done with two passes which are actually pretty
// similar. In the first one, we transform the columns and transpose
// the results. In the second one, we transform the rows. To achieve that,
// as the first pass results are transposed, we tranpose the columns (that
// as the first pass results are transposed, we tran
s
pose the columns (that
// is the transposed rows) and transpose the results (so that it goes back
// in normal/row positions).
int
pass
;
...
...
@@ -1228,7 +1228,7 @@ void vp9_fdct16x16_avx2(const int16_t *input, int16_t *output, int stride) {
}
// Work on the first eight values; fdct8(input, even_results);
{
// Add/sub
s
tract
// Add/subtract
const
__m128i
q0
=
_mm_add_epi16
(
input0
,
input7
);
const
__m128i
q1
=
_mm_add_epi16
(
input1
,
input6
);
const
__m128i
q2
=
_mm_add_epi16
(
input2
,
input5
);
...
...
@@ -1239,7 +1239,7 @@ void vp9_fdct16x16_avx2(const int16_t *input, int16_t *output, int stride) {
const
__m128i
q7
=
_mm_sub_epi16
(
input0
,
input7
);
// Work on first four results
{
// Add/sub
s
tract
// Add/subtract
const
__m128i
r0
=
_mm_add_epi16
(
q0
,
q3
);
const
__m128i
r1
=
_mm_add_epi16
(
q1
,
q2
);
const
__m128i
r2
=
_mm_sub_epi16
(
q1
,
q2
);
...
...
@@ -1303,7 +1303,7 @@ void vp9_fdct16x16_avx2(const int16_t *input, int16_t *output, int stride) {
// Combine
const
__m128i
r0
=
_mm_packs_epi32
(
s0
,
s1
);
const
__m128i
r1
=
_mm_packs_epi32
(
s2
,
s3
);
// Add/sub
s
tract
// Add/subtract
const
__m128i
x0
=
_mm_add_epi16
(
q4
,
r0
);
const
__m128i
x1
=
_mm_sub_epi16
(
q4
,
r0
);
const
__m128i
x2
=
_mm_sub_epi16
(
q7
,
r1
);
...
...
vp9/encoder/x86/vp9_dct_sse2.c
View file @
549c31f8
...
...
@@ -16,7 +16,7 @@ void vp9_fdct4x4_sse2(const int16_t *input, int16_t *output, int stride) {
// The 2D transform is done with two passes which are actually pretty
// similar. In the first one, we transform the columns and transpose
// the results. In the second one, we transform the rows. To achieve that,
// as the first pass results are transposed, we tranpose the columns (that
// as the first pass results are transposed, we tran
s
pose the columns (that
// is the transposed rows) and transpose the results (so that it goes back
// in normal/row positions).
int
pass
;
...
...
@@ -47,7 +47,7 @@ void vp9_fdct4x4_sse2(const int16_t *input, int16_t *output, int stride) {
in1
=
_mm_slli_epi16
(
in1
,
4
);
// if (i == 0 && input[0]) input[0] += 1;
{
// The mask will only contain wether the first value is zero, all
// The mask will only contain w
h
ether the first value is zero, all
// other comparison will fail as something shifted by 4 (above << 4)
// can never be equal to one. To increment in the non-zero case, we
// add the mask and one for the first element:
...
...
@@ -60,7 +60,7 @@ void vp9_fdct4x4_sse2(const int16_t *input, int16_t *output, int stride) {
}
// Do the two transform/transpose passes
for
(
pass
=
0
;
pass
<
2
;
++
pass
)
{
// Transform 1/2: Add/sub
s
tract
// Transform 1/2: Add/subtract
const
__m128i
r0
=
_mm_add_epi16
(
in0
,
in1
);
const
__m128i
r1
=
_mm_sub_epi16
(
in0
,
in1
);
const
__m128i
r2
=
_mm_unpacklo_epi64
(
r0
,
r1
);
...
...
@@ -315,7 +315,7 @@ void vp9_fdct8x8_sse2(const int16_t *input, int16_t *output, int stride) {
for
(
pass
=
0
;
pass
<
2
;
pass
++
)
{
// To store results of each pass before the transpose.
__m128i
res0
,
res1
,
res2
,
res3
,
res4
,
res5
,
res6
,
res7
;
// Add/sub
s
tract
// Add/subtract
const
__m128i
q0
=
_mm_add_epi16
(
in0
,
in7
);
const
__m128i
q1
=
_mm_add_epi16
(
in1
,
in6
);
const
__m128i
q2
=
_mm_add_epi16
(
in2
,
in5
);
...
...
@@ -326,7 +326,7 @@ void vp9_fdct8x8_sse2(const int16_t *input, int16_t *output, int stride) {
const
__m128i
q7
=
_mm_sub_epi16
(
in0
,
in7
);
// Work on first four results
{
// Add/sub
s
tract
// Add/subtract
const
__m128i
r0
=
_mm_add_epi16
(
q0
,
q3
);
const
__m128i
r1
=
_mm_add_epi16
(
q1
,
q2
);
const
__m128i
r2
=
_mm_sub_epi16
(
q1
,
q2
);
...
...
@@ -388,7 +388,7 @@ void vp9_fdct8x8_sse2(const int16_t *input, int16_t *output, int stride) {
// Combine
const
__m128i
r0
=
_mm_packs_epi32
(
s0
,
s1
);
const
__m128i
r1
=
_mm_packs_epi32
(
s2
,
s3
);
// Add/sub
s
tract
// Add/subtract
const
__m128i
x0
=
_mm_add_epi16
(
q4
,
r0
);
const
__m128i
x1
=
_mm_sub_epi16
(
q4
,
r0
);
const
__m128i
x2
=
_mm_sub_epi16
(
q7
,
r1
);
...
...
@@ -1069,7 +1069,7 @@ void vp9_fdct16x16_sse2(const int16_t *input, int16_t *output, int stride) {
// The 2D transform is done with two passes which are actually pretty
// similar. In the first one, we transform the columns and transpose
// the results. In the second one, we transform the rows. To achieve that,
// as the first pass results are transposed, we tranpose the columns (that
// as the first pass results are transposed, we tran
s
pose the columns (that
// is the transposed rows) and transpose the results (so that it goes back
// in normal/row positions).
int
pass
;
...
...
@@ -1226,7 +1226,7 @@ void vp9_fdct16x16_sse2(const int16_t *input, int16_t *output, int stride) {
}
// Work on the first eight values; fdct8(input, even_results);
{
// Add/sub
s
tract
// Add/subtract
const
__m128i
q0
=
_mm_add_epi16
(
input0
,
input7
);
const
__m128i
q1
=
_mm_add_epi16
(
input1
,
input6
);
const
__m128i
q2
=
_mm_add_epi16
(
input2
,
input5
);
...
...
@@ -1237,7 +1237,7 @@ void vp9_fdct16x16_sse2(const int16_t *input, int16_t *output, int stride) {
const
__m128i
q7
=
_mm_sub_epi16
(
input0
,
input7
);
// Work on first four results
{
// Add/sub
s
tract
// Add/subtract
const
__m128i
r0
=
_mm_add_epi16
(
q0
,
q3
);
const
__m128i
r1
=
_mm_add_epi16
(
q1
,
q2
);
const
__m128i
r2
=
_mm_sub_epi16
(
q1
,
q2
);
...
...
@@ -1301,7 +1301,7 @@ void vp9_fdct16x16_sse2(const int16_t *input, int16_t *output, int stride) {
// Combine
const
__m128i
r0
=
_mm_packs_epi32
(
s0
,
s1
);
const
__m128i
r1
=
_mm_packs_epi32
(
s2
,
s3
);
// Add/sub
s
tract
// Add/subtract
const
__m128i
x0
=
_mm_add_epi16
(
q4
,
r0
);
const
__m128i
x1
=
_mm_sub_epi16
(
q4
,
r0
);
const
__m128i
x2
=
_mm_sub_epi16
(
q7
,
r1
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment