Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
A
aom-rav1e
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Xiph.Org
aom-rav1e
Commits
c0f708c0
Commit
c0f708c0
authored
9 years ago
by
Angie Chiang
Committed by
Gerrit Code Review
9 years ago
Browse files
Options
Downloads
Plain Diff
Merge "convolve8 sse2 test" into nextgenv2
parents
f0e0a7e7
8878fa4f
No related branches found
Branches containing commit
No related tags found
2 merge requests
!6
Rav1e 11 yushin 1
,
!3
Rav1e 10 yushin
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
test/vp10_convolve_test.cc
+129
-0
129 additions, 0 deletions
test/vp10_convolve_test.cc
with
129 additions
and
0 deletions
test/vp10_convolve_test.cc
+
129
−
0
View file @
c0f708c0
...
...
@@ -5,6 +5,7 @@
#include
"vp10/common/filter.h"
#include
"vp10/common/vp10_convolve.h"
#include
"vpx_dsp/vpx_dsp_common.h"
#include
"vpx_ports/mem.h"
using
libvpx_test
::
ACMRandom
;
...
...
@@ -270,4 +271,132 @@ TEST(VP10ConvolveTest, vp10_highbd_convolve_avg) {
}
}
#endif // CONFIG_VP9_HIGHBITDEPTH
#define CONVOLVE_SPEED_TEST 0
#if CONVOLVE_SPEED_TEST
#define highbd_convolve_speed(func, block_size, frame_size) \
TEST(VP10ConvolveTest, func##_speed_##block_size##_##frame_size) { \
ACMRandom rnd(ACMRandom::DeterministicSeed()); \
INTERP_FILTER interp_filter = EIGHTTAP; \
InterpFilterParams filter_params = \
vp10_get_interp_filter_params(interp_filter); \
ptrdiff_t filter_size = filter_params.tap; \
int filter_center = filter_size / 2 - 1; \
DECLARE_ALIGNED(16, uint16_t, \
src[(frame_size + 7) * (frame_size + 7)]) = {0}; \
int src_stride = frame_size + 7; \
DECLARE_ALIGNED(16, uint16_t, dst[frame_size * frame_size]) = {0}; \
int dst_stride = frame_size; \
int x_step_q4 = 16; \
int y_step_q4 = 16; \
int subpel_x_q4 = 8; \
int subpel_y_q4 = 6; \
int bd = 10; \
\
int w = block_size; \
int h = block_size; \
\
const int16_t* filter_x = \
vp10_get_interp_filter_kernel(filter_params, subpel_x_q4); \
const int16_t* filter_y = \
vp10_get_interp_filter_kernel(filter_params, subpel_y_q4); \
\
for (int i = 0; i < src_stride * src_stride; i++) { \
src[i] = rnd.Rand16() % (1 << bd); \
} \
\
int offset = filter_center * src_stride + filter_center; \
int row_offset = 0; \
int col_offset = 0; \
for (int i = 0; i < 100000; i++) { \
int src_total_offset = offset + col_offset * src_stride + row_offset; \
int dst_total_offset = col_offset * dst_stride + row_offset; \
func(CONVERT_TO_BYTEPTR(src + src_total_offset), src_stride, \
CONVERT_TO_BYTEPTR(dst + dst_total_offset), dst_stride, filter_x, \
x_step_q4, filter_y, y_step_q4, w, h, bd); \
if (offset + w + w < frame_size) { \
row_offset += w; \
} else { \
row_offset = 0; \
col_offset += h; \
} \
if (col_offset + h >= frame_size) { \
col_offset = 0; \
} \
} \
}
#define lowbd_convolve_speed(func, block_size, frame_size) \
TEST(VP10ConvolveTest, func##_speed_l_##block_size##_##frame_size) { \
ACMRandom rnd(ACMRandom::DeterministicSeed()); \
INTERP_FILTER interp_filter = EIGHTTAP; \
InterpFilterParams filter_params = \
vp10_get_interp_filter_params(interp_filter); \
ptrdiff_t filter_size = filter_params.tap; \
int filter_center = filter_size / 2 - 1; \
DECLARE_ALIGNED(16, uint8_t, src[(frame_size + 7) * (frame_size + 7)]); \
int src_stride = frame_size + 7; \
DECLARE_ALIGNED(16, uint8_t, dst[frame_size * frame_size]); \
int dst_stride = frame_size; \
int x_step_q4 = 16; \
int y_step_q4 = 16; \
int subpel_x_q4 = 8; \
int subpel_y_q4 = 6; \
int bd = 8; \
\
int w = block_size; \
int h = block_size; \
\
const int16_t* filter_x = \
vp10_get_interp_filter_kernel(filter_params, subpel_x_q4); \
const int16_t* filter_y = \
vp10_get_interp_filter_kernel(filter_params, subpel_y_q4); \
\
for (int i = 0; i < src_stride * src_stride; i++) { \
src[i] = rnd.Rand16() % (1 << bd); \
} \
\
int offset = filter_center * src_stride + filter_center; \
int row_offset = 0; \
int col_offset = 0; \
for (int i = 0; i < 100000; i++) { \
func(src + offset, src_stride, dst, dst_stride, filter_x, x_step_q4, \
filter_y, y_step_q4, w, h); \
if (offset + w + w < frame_size) { \
row_offset += w; \
} else { \
row_offset = 0; \
col_offset += h; \
} \
if (col_offset + h >= frame_size) { \
col_offset = 0; \
} \
} \
}
// This experiment shows that when frame size is 64x64
// vpx_highbd_convolve8_sse2 and vpx_convolve8_sse2's speed are similar.
// However when frame size becomes 1024x1024
// vpx_highbd_convolve8_sse2 is around 50% slower than vpx_convolve8_sse2
// we think the bottleneck is from memory IO
highbd_convolve_speed
(
vpx_highbd_convolve8_sse2
,
8
,
64
);
highbd_convolve_speed
(
vpx_highbd_convolve8_sse2
,
16
,
64
);
highbd_convolve_speed
(
vpx_highbd_convolve8_sse2
,
32
,
64
);
highbd_convolve_speed
(
vpx_highbd_convolve8_sse2
,
64
,
64
);
lowbd_convolve_speed
(
vpx_convolve8_sse2
,
8
,
64
);
lowbd_convolve_speed
(
vpx_convolve8_sse2
,
16
,
64
);
lowbd_convolve_speed
(
vpx_convolve8_sse2
,
32
,
64
);
lowbd_convolve_speed
(
vpx_convolve8_sse2
,
64
,
64
);
highbd_convolve_speed
(
vpx_highbd_convolve8_sse2
,
8
,
1024
);
highbd_convolve_speed
(
vpx_highbd_convolve8_sse2
,
16
,
1024
);
highbd_convolve_speed
(
vpx_highbd_convolve8_sse2
,
32
,
1024
);
highbd_convolve_speed
(
vpx_highbd_convolve8_sse2
,
64
,
1024
);
lowbd_convolve_speed
(
vpx_convolve8_sse2
,
8
,
1024
);
lowbd_convolve_speed
(
vpx_convolve8_sse2
,
16
,
1024
);
lowbd_convolve_speed
(
vpx_convolve8_sse2
,
32
,
1024
);
lowbd_convolve_speed
(
vpx_convolve8_sse2
,
64
,
1024
);
#endif // CONVOLVE_SPEED_TEST
}
// namespace
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment