Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
A
aom-rav1e
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Incidents
Environments
Packages & Registries
Packages & Registries
Container Registry
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Xiph.Org
aom-rav1e
Commits
fd660f01
Commit
fd660f01
authored
Jul 24, 2015
by
Parag Salasakar
Committed by
Gerrit Code Review
Jul 24, 2015
Browse files
Options
Browse Files
Download
Plain Diff
Merge "mips msa vp8 copy mem optimization"
parents
d4657a7e
509fb0bc
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
100 additions
and
3 deletions
+100
-3
vp8/common/mips/msa/copymem_msa.c
vp8/common/mips/msa/copymem_msa.c
+70
-0
vp8/common/mips/msa/vp8_macros_msa.h
vp8/common/mips/msa/vp8_macros_msa.h
+26
-0
vp8/common/rtcd_defs.pl
vp8/common/rtcd_defs.pl
+3
-3
vp8/vp8_common.mk
vp8/vp8_common.mk
+1
-0
No files found.
vp8/common/mips/msa/copymem_msa.c
0 → 100644
View file @
fd660f01
/*
* Copyright (c) 2015 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "./vp8_rtcd.h"
#include "vp8/common/mips/msa/vp8_macros_msa.h"
static
void
copy_8x4_msa
(
uint8_t
*
src
,
int32_t
src_stride
,
uint8_t
*
dst
,
int32_t
dst_stride
)
{
uint64_t
src0
,
src1
,
src2
,
src3
;
LD4
(
src
,
src_stride
,
src0
,
src1
,
src2
,
src3
);
SD4
(
src0
,
src1
,
src2
,
src3
,
dst
,
dst_stride
);
}
static
void
copy_8x8_msa
(
uint8_t
*
src
,
int32_t
src_stride
,
uint8_t
*
dst
,
int32_t
dst_stride
)
{
uint64_t
src0
,
src1
,
src2
,
src3
;
LD4
(
src
,
src_stride
,
src0
,
src1
,
src2
,
src3
);
src
+=
(
4
*
src_stride
);
SD4
(
src0
,
src1
,
src2
,
src3
,
dst
,
dst_stride
);
dst
+=
(
4
*
dst_stride
);
LD4
(
src
,
src_stride
,
src0
,
src1
,
src2
,
src3
);
SD4
(
src0
,
src1
,
src2
,
src3
,
dst
,
dst_stride
);
}
static
void
copy_16x16_msa
(
uint8_t
*
src
,
int32_t
src_stride
,
uint8_t
*
dst
,
int32_t
dst_stride
)
{
v16u8
src0
,
src1
,
src2
,
src3
,
src4
,
src5
,
src6
,
src7
;
v16u8
src8
,
src9
,
src10
,
src11
,
src12
,
src13
,
src14
,
src15
;
LD_UB8
(
src
,
src_stride
,
src0
,
src1
,
src2
,
src3
,
src4
,
src5
,
src6
,
src7
);
src
+=
(
8
*
src_stride
);
LD_UB8
(
src
,
src_stride
,
src8
,
src9
,
src10
,
src11
,
src12
,
src13
,
src14
,
src15
);
ST_UB8
(
src0
,
src1
,
src2
,
src3
,
src4
,
src5
,
src6
,
src7
,
dst
,
dst_stride
);
dst
+=
(
8
*
dst_stride
);
ST_UB8
(
src8
,
src9
,
src10
,
src11
,
src12
,
src13
,
src14
,
src15
,
dst
,
dst_stride
);
}
void
vp8_copy_mem16x16_msa
(
uint8_t
*
src
,
int32_t
src_stride
,
uint8_t
*
dst
,
int32_t
dst_stride
)
{
copy_16x16_msa
(
src
,
src_stride
,
dst
,
dst_stride
);
}
void
vp8_copy_mem8x8_msa
(
uint8_t
*
src
,
int32_t
src_stride
,
uint8_t
*
dst
,
int32_t
dst_stride
)
{
copy_8x8_msa
(
src
,
src_stride
,
dst
,
dst_stride
);
}
void
vp8_copy_mem8x4_msa
(
uint8_t
*
src
,
int32_t
src_stride
,
uint8_t
*
dst
,
int32_t
dst_stride
)
{
copy_8x4_msa
(
src
,
src_stride
,
dst
,
dst_stride
);
}
vp8/common/mips/msa/vp8_macros_msa.h
View file @
fd660f01
...
...
@@ -222,6 +222,23 @@
out3 = LW((psrc) + 3 * stride); \
}
/* Description : Load double words with stride
Arguments : Inputs - psrc, stride
Outputs - out0, out1
Details : Load double word in 'out0' from (psrc)
Load double word in 'out1' from (psrc + stride)
*/
#define LD2(psrc, stride, out0, out1) \
{ \
out0 = LD((psrc)); \
out1 = LD((psrc) + stride); \
}
#define LD4(psrc, stride, out0, out1, out2, out3) \
{ \
LD2((psrc), stride, out0, out1); \
LD2((psrc) + 2 * stride, stride, out2, out3); \
}
/* Description : Store 4 words with stride
Arguments : Inputs - in0, in1, in2, in3, pdst, stride
Details : Store word from 'in0' to (pdst)
...
...
@@ -298,6 +315,7 @@
LD_B4(RTYPE, (psrc) + 4 * stride, stride, out4, out5, out6, out7); \
}
#define LD_UB8(...) LD_B8(v16u8, __VA_ARGS__)
#define LD_SB8(...) LD_B8(v16i8, __VA_ARGS__)
/* Description : Load vectors with 8 halfword elements with stride
Arguments : Inputs - psrc, stride
...
...
@@ -339,6 +357,14 @@
#define ST_UB4(...) ST_B4(v16u8, __VA_ARGS__)
#define ST_SB4(...) ST_B4(v16i8, __VA_ARGS__)
#define ST_B8(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
pdst, stride) \
{ \
ST_B4(RTYPE, in0, in1, in2, in3, pdst, stride); \
ST_B4(RTYPE, in4, in5, in6, in7, (pdst) + 4 * stride, stride); \
}
#define ST_UB8(...) ST_B8(v16u8, __VA_ARGS__)
/* Description : Store vectors of 8 halfword elements with stride
Arguments : Inputs - in0, in1, pdst, stride
Details : Store 8 halfword elements from 'in0' to (pdst)
...
...
vp8/common/rtcd_defs.pl
View file @
fd660f01
...
...
@@ -138,17 +138,17 @@ $vp8_dc_only_idct_add_dspr2=vp8_dc_only_idct_add_dspr2;
# RECON
#
add_proto
qw/void vp8_copy_mem16x16/
,
"
unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch
";
specialize
qw/vp8_copy_mem16x16 mmx sse2 media neon dspr2/
;
specialize
qw/vp8_copy_mem16x16 mmx sse2 media neon dspr2
msa
/
;
$vp8_copy_mem16x16_media
=
vp8_copy_mem16x16_v6
;
$vp8_copy_mem16x16_dspr2
=
vp8_copy_mem16x16_dspr2
;
add_proto
qw/void vp8_copy_mem8x8/
,
"
unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch
";
specialize
qw/vp8_copy_mem8x8 mmx media neon dspr2/
;
specialize
qw/vp8_copy_mem8x8 mmx media neon dspr2
msa
/
;
$vp8_copy_mem8x8_media
=
vp8_copy_mem8x8_v6
;
$vp8_copy_mem8x8_dspr2
=
vp8_copy_mem8x8_dspr2
;
add_proto
qw/void vp8_copy_mem8x4/
,
"
unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch
";
specialize
qw/vp8_copy_mem8x4 mmx media neon dspr2/
;
specialize
qw/vp8_copy_mem8x4 mmx media neon dspr2
msa
/
;
$vp8_copy_mem8x4_media
=
vp8_copy_mem8x4_v6
;
$vp8_copy_mem8x4_dspr2
=
vp8_copy_mem8x4_dspr2
;
...
...
vp8/vp8_common.mk
View file @
fd660f01
...
...
@@ -114,6 +114,7 @@ VP8_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/idct_blk_dspr2.c
VP8_COMMON_SRCS-$(HAVE_DSPR2)
+=
common/mips/dspr2/dequantize_dspr2.c
# common (c)
VP8_COMMON_SRCS-$(HAVE_MSA)
+=
common/mips/msa/copymem_msa.c
VP8_COMMON_SRCS-$(HAVE_MSA)
+=
common/mips/msa/idct_msa.c
VP8_COMMON_SRCS-$(HAVE_MSA)
+=
common/mips/msa/loopfilter_filters_msa.c
VP8_COMMON_SRCS-$(HAVE_MSA)
+=
common/mips/msa/sixtap_filter_msa.c
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment