Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
10
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
Xiph.Org
aom-rav1e
Commits
f826ebdf
Commit
f826ebdf
authored
Sep 01, 2016
by
Yaowu Xu
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Correct formatting
Change-Id: Ib30ba93e86f03eb5c4705d0327d0dfc6fd9a7f72
parent
7332ae06
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
49 additions
and
50 deletions
+49
-50
aom_util/aom_thread.c
aom_util/aom_thread.c
+1
-2
aom_util/aom_thread.h
aom_util/aom_thread.h
+1
-2
build/make/gen_asm_deps.sh
build/make/gen_asm_deps.sh
+1
-0
third_party/libyuv/source/compare_win.cc
third_party/libyuv/source/compare_win.cc
+3
-3
third_party/libyuv/source/row_win.cc
third_party/libyuv/source/row_win.cc
+20
-20
third_party/libyuv/source/scale_win.cc
third_party/libyuv/source/scale_win.cc
+23
-23
No files found.
aom_util/aom_thread.c
View file @
f826ebdf
...
...
@@ -12,8 +12,7 @@
// Multi-threaded worker
//
// Original source:
// http://git.chromium.org/webm/libwebp.git
// 100644 blob 264210ba2807e4da47eb5d18c04cf869d89b9784 src/utils/thread.c
// https://chromium.googlesource.com/webm/libwebp
#include <assert.h>
#include <string.h> // for memset()
...
...
aom_util/aom_thread.h
View file @
f826ebdf
...
...
@@ -12,8 +12,7 @@
// Multi-threaded worker
//
// Original source:
// http://git.chromium.org/webm/libwebp.git
// 100644 blob 7bd451b124ae3b81596abfbcc823e3cb129d3a38 src/utils/thread.h
// https://chromium.googlesource.com/webm/libwebp
#ifndef AOM_THREAD_H_
#define AOM_THREAD_H_
...
...
build/make/gen_asm_deps.sh
View file @
f826ebdf
...
...
@@ -10,6 +10,7 @@
## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
##
self
=
$0
show_help
()
{
echo
"usage:
$self
[options] <srcfile>"
...
...
third_party/libyuv/source/compare_win.cc
View file @
f826ebdf
...
...
@@ -64,9 +64,9 @@ uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
__declspec
(
naked
)
uint32
SumSquareError_AVX2
(
const
uint8
*
src_a
,
const
uint8
*
src_b
,
int
count
)
{
__asm
{
mov
eax
,
[
esp
+
4
]
// src_a
mov
edx
,
[
esp
+
8
]
// src_b
mov
ecx
,
[
esp
+
12
]
// count
mov
eax
,
[
esp
+
4
]
// src_a
mov
edx
,
[
esp
+
8
]
// src_b
mov
ecx
,
[
esp
+
12
]
// count
vpxor
ymm0
,
ymm0
,
ymm0
// sum
vpxor
ymm5
,
ymm5
,
ymm5
// constant 0 for unpck
sub
edx
,
eax
...
...
third_party/libyuv/source/row_win.cc
View file @
f826ebdf
...
...
@@ -4851,23 +4851,23 @@ void ARGBMultiplyRow_AVX2(const uint8* src_argb0, const uint8* src_argb1,
uint8
*
dst_argb
,
int
width
)
{
__asm
{
push
esi
mov
eax
,
[
esp
+
4
+
4
]
// src_argb0
mov
esi
,
[
esp
+
4
+
8
]
// src_argb1
mov
eax
,
[
esp
+
4
+
4
]
// src_argb0
mov
esi
,
[
esp
+
4
+
8
]
// src_argb1
mov
edx
,
[
esp
+
4
+
12
]
// dst_argb
mov
ecx
,
[
esp
+
4
+
16
]
// width
vpxor
ymm5
,
ymm5
,
ymm5
// constant 0
convertloop:
vmovdqu
ymm1
,
[
eax
]
// read 8 pixels from src_argb0
vmovdqu
ymm1
,
[
eax
]
// read 8 pixels from src_argb0
lea
eax
,
[
eax
+
32
]
vmovdqu
ymm3
,
[
esi
]
// read 8 pixels from src_argb1
vmovdqu
ymm3
,
[
esi
]
// read 8 pixels from src_argb1
lea
esi
,
[
esi
+
32
]
vpunpcklbw
ymm0
,
ymm1
,
ymm1
// low 4
vpunpckhbw
ymm1
,
ymm1
,
ymm1
// high 4
vpunpcklbw
ymm2
,
ymm3
,
ymm5
// low 4
vpunpckhbw
ymm3
,
ymm3
,
ymm5
// high 4
vpmulhuw
ymm0
,
ymm0
,
ymm2
// src_argb0 * src_argb1 low 4
vpmulhuw
ymm1
,
ymm1
,
ymm3
// src_argb0 * src_argb1 high 4
vpunpcklbw
ymm0
,
ymm1
,
ymm1
// low 4
vpunpckhbw
ymm1
,
ymm1
,
ymm1
// high 4
vpunpcklbw
ymm2
,
ymm3
,
ymm5
// low 4
vpunpckhbw
ymm3
,
ymm3
,
ymm5
// high 4
vpmulhuw
ymm0
,
ymm0
,
ymm2
// src_argb0 * src_argb1 low 4
vpmulhuw
ymm1
,
ymm1
,
ymm3
// src_argb0 * src_argb1 high 4
vpackuswb
ymm0
,
ymm0
,
ymm1
vmovdqu
[
edx
],
ymm0
lea
edx
,
[
edx
+
32
]
...
...
@@ -5512,8 +5512,8 @@ void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr,
__asm
{
push
esi
push
edi
mov
edi
,
[
esp
+
8
+
4
]
// dst_ptr
mov
esi
,
[
esp
+
8
+
8
]
// src_ptr
mov
edi
,
[
esp
+
8
+
4
]
// dst_ptr
mov
esi
,
[
esp
+
8
+
8
]
// src_ptr
mov
edx
,
[
esp
+
8
+
12
]
// src_stride
mov
ecx
,
[
esp
+
8
+
16
]
// dst_width
mov
eax
,
[
esp
+
8
+
20
]
// source_y_fraction (0..255)
...
...
@@ -5523,11 +5523,11 @@ void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr,
je
xloop100
// 0 / 128. Blend 100 / 0.
sub
edi
,
esi
cmp
eax
,
32
je
xloop75
// 32 / 128 is 0.25. Blend 75 / 25.
je
xloop75
// 32 / 128 is 0.25. Blend 75 / 25.
cmp
eax
,
64
je
xloop50
// 64 / 128 is 0.50. Blend 50 / 50.
je
xloop50
// 64 / 128 is 0.50. Blend 50 / 50.
cmp
eax
,
96
je
xloop25
// 96 / 128 is 0.75. Blend 25 / 75.
je
xloop25
// 96 / 128 is 0.75. Blend 25 / 75.
vmovd
xmm0
,
eax
// high fraction 0..127
neg
eax
...
...
@@ -5547,14 +5547,14 @@ void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr,
vpmaddubsw
ymm1
,
ymm1
,
ymm5
vpsrlw
ymm0
,
ymm0
,
7
vpsrlw
ymm1
,
ymm1
,
7
vpackuswb
ymm0
,
ymm0
,
ymm1
// unmutates
vpackuswb
ymm0
,
ymm0
,
ymm1
// unmutates
vmovdqu
[
esi
+
edi
],
ymm0
lea
esi
,
[
esi
+
32
]
sub
ecx
,
32
jg
xloop
jmp
xloop99
// Blend 25 / 75.
// Blend 25 / 75.
xloop25:
vmovdqu
ymm0
,
[
esi
]
vmovdqu
ymm1
,
[
esi
+
edx
]
...
...
@@ -5566,7 +5566,7 @@ void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr,
jg
xloop25
jmp
xloop99
// Blend 50 / 50.
// Blend 50 / 50.
xloop50:
vmovdqu
ymm0
,
[
esi
]
vpavgb
ymm0
,
ymm0
,
[
esi
+
edx
]
...
...
@@ -5576,7 +5576,7 @@ void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr,
jg
xloop50
jmp
xloop99
// Blend 75 / 25.
// Blend 75 / 25.
xloop75:
vmovdqu
ymm1
,
[
esi
]
vmovdqu
ymm0
,
[
esi
+
edx
]
...
...
@@ -5588,7 +5588,7 @@ void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr,
jg
xloop75
jmp
xloop99
// Blend 100 / 0 - Copy row unchanged.
// Blend 100 / 0 - Copy row unchanged.
xloop100:
rep
movsb
...
...
third_party/libyuv/source/scale_win.cc
View file @
f826ebdf
...
...
@@ -232,12 +232,12 @@ __declspec(naked)
void
ScaleRowDown2Linear_AVX2
(
const
uint8
*
src_ptr
,
ptrdiff_t
src_stride
,
uint8
*
dst_ptr
,
int
dst_width
)
{
__asm
{
mov
eax
,
[
esp
+
4
]
// src_ptr
// src_stride
mov
edx
,
[
esp
+
12
]
// dst_ptr
mov
ecx
,
[
esp
+
16
]
// dst_width
mov
eax
,
[
esp
+
4
]
// src_ptr
// src_stride
mov
edx
,
[
esp
+
12
]
// dst_ptr
mov
ecx
,
[
esp
+
16
]
// dst_width
vpcmpeqb
ymm4
,
ymm4
,
ymm4
// '1' constant, 8b
vpcmpeqb
ymm4
,
ymm4
,
ymm4
// '1' constant, 8b
vpsrlw
ymm4
,
ymm4
,
15
vpackuswb
ymm4
,
ymm4
,
ymm4
vpxor
ymm5
,
ymm5
,
ymm5
// constant 0
...
...
@@ -247,12 +247,12 @@ void ScaleRowDown2Linear_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
vmovdqu
ymm1
,
[
eax
+
32
]
lea
eax
,
[
eax
+
64
]
vpmaddubsw
ymm0
,
ymm0
,
ymm4
// average horizontally
vpmaddubsw
ymm0
,
ymm0
,
ymm4
// average horizontally
vpmaddubsw
ymm1
,
ymm1
,
ymm4
vpavgw
ymm0
,
ymm0
,
ymm5
// (x + 1) / 2
vpavgw
ymm0
,
ymm0
,
ymm5
// (x + 1) / 2
vpavgw
ymm1
,
ymm1
,
ymm5
vpackuswb
ymm0
,
ymm0
,
ymm1
vpermq
ymm0
,
ymm0
,
0xd8
// unmutate vpackuswb
vpermq
ymm0
,
ymm0
,
0xd8
// unmutate vpackuswb
vmovdqu
[
edx
],
ymm0
lea
edx
,
[
edx
+
32
]
...
...
@@ -270,29 +270,29 @@ void ScaleRowDown2Box_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8
*
dst_ptr
,
int
dst_width
)
{
__asm
{
push
esi
mov
eax
,
[
esp
+
4
+
4
]
// src_ptr
mov
esi
,
[
esp
+
4
+
8
]
// src_stride
mov
edx
,
[
esp
+
4
+
12
]
// dst_ptr
mov
ecx
,
[
esp
+
4
+
16
]
// dst_width
mov
eax
,
[
esp
+
4
+
4
]
// src_ptr
mov
esi
,
[
esp
+
4
+
8
]
// src_stride
mov
edx
,
[
esp
+
4
+
12
]
// dst_ptr
mov
ecx
,
[
esp
+
4
+
16
]
// dst_width
vpcmpeqb
ymm4
,
ymm4
,
ymm4
// '1' constant, 8b
vpcmpeqb
ymm4
,
ymm4
,
ymm4
// '1' constant, 8b
vpsrlw
ymm4
,
ymm4
,
15
vpackuswb
ymm4
,
ymm4
,
ymm4
vpxor
ymm5
,
ymm5
,
ymm5
// constant 0
wloop:
vmovdqu
ymm0
,
[
eax
]
// average rows
vmovdqu
ymm0
,
[
eax
]
// average rows
vmovdqu
ymm1
,
[
eax
+
32
]
vpavgb
ymm0
,
ymm0
,
[
eax
+
esi
]
vpavgb
ymm1
,
ymm1
,
[
eax
+
esi
+
32
]
lea
eax
,
[
eax
+
64
]
vpmaddubsw
ymm0
,
ymm0
,
ymm4
// average horizontally
vpmaddubsw
ymm0
,
ymm0
,
ymm4
// average horizontally
vpmaddubsw
ymm1
,
ymm1
,
ymm4
vpavgw
ymm0
,
ymm0
,
ymm5
// (x + 1) / 2
vpavgw
ymm0
,
ymm0
,
ymm5
// (x + 1) / 2
vpavgw
ymm1
,
ymm1
,
ymm5
vpackuswb
ymm0
,
ymm0
,
ymm1
vpermq
ymm0
,
ymm0
,
0xd8
// unmutate vpackuswb
vpermq
ymm0
,
ymm0
,
0xd8
// unmutate vpackuswb
vmovdqu
[
edx
],
ymm0
lea
edx
,
[
edx
+
32
]
...
...
@@ -831,21 +831,21 @@ void ScaleAddRow_SSE2(const uint8* src_ptr, uint16* dst_ptr, int src_width) {
__declspec
(
naked
)
void
ScaleAddRow_AVX2
(
const
uint8
*
src_ptr
,
uint16
*
dst_ptr
,
int
src_width
)
{
__asm
{
mov
eax
,
[
esp
+
4
]
// src_ptr
mov
edx
,
[
esp
+
8
]
// dst_ptr
mov
eax
,
[
esp
+
4
]
// src_ptr
mov
edx
,
[
esp
+
8
]
// dst_ptr
mov
ecx
,
[
esp
+
12
]
// src_width
vpxor
ymm5
,
ymm5
,
ymm5
// sum rows
// sum rows
xloop:
vmovdqu
ymm3
,
[
eax
]
// read 32 bytes
vmovdqu
ymm3
,
[
eax
]
// read 32 bytes
lea
eax
,
[
eax
+
32
]
vpermq
ymm3
,
ymm3
,
0xd8
// unmutate for vpunpck
vpunpcklbw
ymm2
,
ymm3
,
ymm5
vpunpckhbw
ymm3
,
ymm3
,
ymm5
vpaddusw
ymm0
,
ymm2
,
[
edx
]
// sum 16 words
vpaddusw
ymm0
,
ymm2
,
[
edx
]
// sum 16 words
vpaddusw
ymm1
,
ymm3
,
[
edx
+
32
]
vmovdqu
[
edx
],
ymm0
// write 32 words to destination
vmovdqu
[
edx
],
ymm0
// write 32 words to destination
vmovdqu
[
edx
+
32
],
ymm1
lea
edx
,
[
edx
+
64
]
sub
ecx
,
32
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment