Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
A
aom-rav1e
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Incidents
Environments
Packages & Registries
Packages & Registries
Container Registry
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Xiph.Org
aom-rav1e
Commits
ebaf8094
Commit
ebaf8094
authored
May 03, 2017
by
Yaowu Xu
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Change to use unaligned load
BUG=aomedia:496 Change-Id: Ib49a34233b538c7543425acab305e9bc4ffcfea0
parent
2d0e9b75
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
32 additions
and
16 deletions
+32
-16
av1/common/x86/highbd_warp_plane_ssse3.c
av1/common/x86/highbd_warp_plane_ssse3.c
+32
-16
No files found.
av1/common/x86/highbd_warp_plane_ssse3.c
View file @
ebaf8094
...
...
@@ -104,10 +104,14 @@ void av1_highbd_warp_affine_ssse3(int32_t *mat, uint16_t *ref, int width,
_mm_loadu_si128
((
__m128i
*
)(
ref
+
iy
*
stride
+
ix4
+
1
));
// Filter even-index pixels
__m128i
tmp_0
=
filter
[(
sx
+
0
*
alpha
)
>>
WARPEDDIFF_PREC_BITS
];
__m128i
tmp_2
=
filter
[(
sx
+
2
*
alpha
)
>>
WARPEDDIFF_PREC_BITS
];
__m128i
tmp_4
=
filter
[(
sx
+
4
*
alpha
)
>>
WARPEDDIFF_PREC_BITS
];
__m128i
tmp_6
=
filter
[(
sx
+
6
*
alpha
)
>>
WARPEDDIFF_PREC_BITS
];
__m128i
tmp_0
=
_mm_loadu_si128
(
(
__m128i
*
)(
filter
+
((
sx
+
0
*
alpha
)
>>
WARPEDDIFF_PREC_BITS
)));
__m128i
tmp_2
=
_mm_loadu_si128
(
(
__m128i
*
)(
filter
+
((
sx
+
2
*
alpha
)
>>
WARPEDDIFF_PREC_BITS
)));
__m128i
tmp_4
=
_mm_loadu_si128
(
(
__m128i
*
)(
filter
+
((
sx
+
4
*
alpha
)
>>
WARPEDDIFF_PREC_BITS
)));
__m128i
tmp_6
=
_mm_loadu_si128
(
(
__m128i
*
)(
filter
+
((
sx
+
6
*
alpha
)
>>
WARPEDDIFF_PREC_BITS
)));
// coeffs 0 1 0 1 2 3 2 3 for pixels 0, 2
__m128i
tmp_8
=
_mm_unpacklo_epi32
(
tmp_0
,
tmp_2
);
...
...
@@ -145,10 +149,14 @@ void av1_highbd_warp_affine_ssse3(int32_t *mat, uint16_t *ref, int width,
HORSHEAR_REDUCE_PREC_BITS
);
// Filter odd-index pixels
__m128i
tmp_1
=
filter
[(
sx
+
1
*
alpha
)
>>
WARPEDDIFF_PREC_BITS
];
__m128i
tmp_3
=
filter
[(
sx
+
3
*
alpha
)
>>
WARPEDDIFF_PREC_BITS
];
__m128i
tmp_5
=
filter
[(
sx
+
5
*
alpha
)
>>
WARPEDDIFF_PREC_BITS
];
__m128i
tmp_7
=
filter
[(
sx
+
7
*
alpha
)
>>
WARPEDDIFF_PREC_BITS
];
__m128i
tmp_1
=
_mm_loadu_si128
(
(
__m128i
*
)(
filter
+
((
sx
+
1
*
alpha
)
>>
WARPEDDIFF_PREC_BITS
)));
__m128i
tmp_3
=
_mm_loadu_si128
(
(
__m128i
*
)(
filter
+
((
sx
+
3
*
alpha
)
>>
WARPEDDIFF_PREC_BITS
)));
__m128i
tmp_5
=
_mm_loadu_si128
(
(
__m128i
*
)(
filter
+
((
sx
+
5
*
alpha
)
>>
WARPEDDIFF_PREC_BITS
)));
__m128i
tmp_7
=
_mm_loadu_si128
(
(
__m128i
*
)(
filter
+
((
sx
+
7
*
alpha
)
>>
WARPEDDIFF_PREC_BITS
)));
__m128i
tmp_9
=
_mm_unpacklo_epi32
(
tmp_1
,
tmp_3
);
__m128i
tmp_11
=
_mm_unpacklo_epi32
(
tmp_5
,
tmp_7
);
...
...
@@ -196,10 +204,14 @@ void av1_highbd_warp_affine_ssse3(int32_t *mat, uint16_t *ref, int width,
__m128i
src_6
=
_mm_unpacklo_epi16
(
src
[
6
],
src
[
7
]);
// Filter even-index pixels
__m128i
tmp_0
=
filter
[(
sy
+
0
*
gamma
)
>>
WARPEDDIFF_PREC_BITS
];
__m128i
tmp_2
=
filter
[(
sy
+
2
*
gamma
)
>>
WARPEDDIFF_PREC_BITS
];
__m128i
tmp_4
=
filter
[(
sy
+
4
*
gamma
)
>>
WARPEDDIFF_PREC_BITS
];
__m128i
tmp_6
=
filter
[(
sy
+
6
*
gamma
)
>>
WARPEDDIFF_PREC_BITS
];
__m128i
tmp_0
=
_mm_loadu_si128
(
(
__m128i
*
)(
filter
+
((
sy
+
0
*
gamma
)
>>
WARPEDDIFF_PREC_BITS
)));
__m128i
tmp_2
=
_mm_loadu_si128
(
(
__m128i
*
)(
filter
+
((
sy
+
2
*
gamma
)
>>
WARPEDDIFF_PREC_BITS
)));
__m128i
tmp_4
=
_mm_loadu_si128
(
(
__m128i
*
)(
filter
+
((
sy
+
4
*
gamma
)
>>
WARPEDDIFF_PREC_BITS
)));
__m128i
tmp_6
=
_mm_loadu_si128
(
(
__m128i
*
)(
filter
+
((
sy
+
6
*
gamma
)
>>
WARPEDDIFF_PREC_BITS
)));
__m128i
tmp_8
=
_mm_unpacklo_epi32
(
tmp_0
,
tmp_2
);
__m128i
tmp_10
=
_mm_unpacklo_epi32
(
tmp_4
,
tmp_6
);
...
...
@@ -225,10 +237,14 @@ void av1_highbd_warp_affine_ssse3(int32_t *mat, uint16_t *ref, int width,
__m128i
src_5
=
_mm_unpackhi_epi16
(
src
[
4
],
src
[
5
]);
__m128i
src_7
=
_mm_unpackhi_epi16
(
src
[
6
],
src
[
7
]);
__m128i
tmp_1
=
filter
[(
sy
+
1
*
gamma
)
>>
WARPEDDIFF_PREC_BITS
];
__m128i
tmp_3
=
filter
[(
sy
+
3
*
gamma
)
>>
WARPEDDIFF_PREC_BITS
];
__m128i
tmp_5
=
filter
[(
sy
+
5
*
gamma
)
>>
WARPEDDIFF_PREC_BITS
];
__m128i
tmp_7
=
filter
[(
sy
+
7
*
gamma
)
>>
WARPEDDIFF_PREC_BITS
];
__m128i
tmp_1
=
_mm_loadu_si128
(
(
__m128i
*
)(
filter
+
((
sy
+
1
*
gamma
)
>>
WARPEDDIFF_PREC_BITS
)));
__m128i
tmp_3
=
_mm_loadu_si128
(
(
__m128i
*
)(
filter
+
((
sy
+
3
*
gamma
)
>>
WARPEDDIFF_PREC_BITS
)));
__m128i
tmp_5
=
_mm_loadu_si128
(
(
__m128i
*
)(
filter
+
((
sy
+
5
*
gamma
)
>>
WARPEDDIFF_PREC_BITS
)));
__m128i
tmp_7
=
_mm_loadu_si128
(
(
__m128i
*
)(
filter
+
((
sy
+
7
*
gamma
)
>>
WARPEDDIFF_PREC_BITS
)));
__m128i
tmp_9
=
_mm_unpacklo_epi32
(
tmp_1
,
tmp_3
);
__m128i
tmp_11
=
_mm_unpacklo_epi32
(
tmp_5
,
tmp_7
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment