Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
10
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
Xiph.Org
aom-rav1e
Commits
16dcf013
Commit
16dcf013
authored
Jul 24, 2015
by
Parag Salasakar
Committed by
Gerrit Code Review
Jul 24, 2015
Browse files
Options
Browse Files
Download
Plain Diff
Merge "mips msa vp8 bilinear filter optimization"
parents
a15edeb7
fb73ceae
Changes
4
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
992 additions
and
4 deletions
+992
-4
vp8/common/mips/msa/bilinear_filter_msa.c
vp8/common/mips/msa/bilinear_filter_msa.c
+911
-0
vp8/common/mips/msa/vp8_macros_msa.h
vp8/common/mips/msa/vp8_macros_msa.h
+76
-0
vp8/common/rtcd_defs.pl
vp8/common/rtcd_defs.pl
+4
-4
vp8/vp8_common.mk
vp8/vp8_common.mk
+1
-0
No files found.
vp8/common/mips/msa/bilinear_filter_msa.c
0 → 100644
View file @
16dcf013
This diff is collapsed.
Click to expand it.
vp8/common/mips/msa/vp8_macros_msa.h
View file @
16dcf013
...
...
@@ -460,6 +460,27 @@
SD4(out0_m, out1_m, out2_m, out3_m, pblk_8x4_m, stride); \
}
/* Description : Immediate number of elements to slide
Arguments : Inputs - in0_0, in0_1, in1_0, in1_1, slide_val
Outputs - out0, out1
Return Type - as per RTYPE
Details : Byte elements from 'in0_0' vector are slid into 'in1_0' by
value specified in the 'slide_val'
*/
#define SLDI_B2(RTYPE, in0_0, in0_1, in1_0, in1_1, out0, out1, slide_val) \
{ \
out0 = (RTYPE)__msa_sldi_b((v16i8)in0_0, (v16i8)in1_0, slide_val); \
out1 = (RTYPE)__msa_sldi_b((v16i8)in0_1, (v16i8)in1_1, slide_val); \
}
#define SLDI_B3(RTYPE, in0_0, in0_1, in0_2, in1_0, in1_1, in1_2, \
out0, out1, out2, slide_val) \
{ \
SLDI_B2(RTYPE, in0_0, in0_1, in1_0, in1_1, out0, out1, slide_val); \
out2 = (RTYPE)__msa_sldi_b((v16i8)in0_2, (v16i8)in1_2, slide_val); \
}
#define SLDI_B3_UH(...) SLDI_B3(v8u16, __VA_ARGS__)
/* Description : Shuffle byte vector elements as per mask vector
Arguments : Inputs - in0, in1, in2, in3, mask0, mask1
Outputs - out0, out1
...
...
@@ -472,7 +493,9 @@
out0 = (RTYPE)__msa_vshf_b((v16i8)mask0, (v16i8)in1, (v16i8)in0); \
out1 = (RTYPE)__msa_vshf_b((v16i8)mask1, (v16i8)in3, (v16i8)in2); \
}
#define VSHF_B2_UB(...) VSHF_B2(v16u8, __VA_ARGS__)
#define VSHF_B2_SB(...) VSHF_B2(v16i8, __VA_ARGS__)
#define VSHF_B2_UH(...) VSHF_B2(v8u16, __VA_ARGS__)
#define VSHF_B3(RTYPE, in0, in1, in2, in3, in4, in5, mask0, mask1, mask2, \
out0, out1, out2) \
...
...
@@ -482,6 +505,32 @@
}
#define VSHF_B3_SB(...) VSHF_B3(v16i8, __VA_ARGS__)
/* Description : Dot product of byte vector elements
Arguments : Inputs - mult0, mult1, cnst0, cnst1
Outputs - out0, out1
Return Type - as per RTYPE
Details : Unsigned byte elements from 'mult0' are multiplied with
unsigned byte elements from 'cnst0' producing a result
twice the size of input i.e. unsigned halfword.
The multiplication result of adjacent odd-even elements
are added together and written to the 'out0' vector
*/
#define DOTP_UB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) \
{ \
out0 = (RTYPE)__msa_dotp_u_h((v16u8)mult0, (v16u8)cnst0); \
out1 = (RTYPE)__msa_dotp_u_h((v16u8)mult1, (v16u8)cnst1); \
}
#define DOTP_UB2_UH(...) DOTP_UB2(v8u16, __VA_ARGS__)
#define DOTP_UB4(RTYPE, mult0, mult1, mult2, mult3, \
cnst0, cnst1, cnst2, cnst3, \
out0, out1, out2, out3) \
{ \
DOTP_UB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1); \
DOTP_UB2(RTYPE, mult2, mult3, cnst2, cnst3, out2, out3); \
}
#define DOTP_UB4_UH(...) DOTP_UB4(v8u16, __VA_ARGS__)
/* Description : Dot product of byte vector elements
Arguments : Inputs - mult0, mult1, cnst0, cnst1
Outputs - out0, out1
...
...
@@ -768,6 +817,7 @@
ILVR_D2(RTYPE, in0, in1, in2, in3, out0, out1); \
ILVR_D2(RTYPE, in4, in5, in6, in7, out2, out3); \
}
#define ILVR_D4_SB(...) ILVR_D4(v16i8, __VA_ARGS__)
#define ILVR_D4_UB(...) ILVR_D4(v16u8, __VA_ARGS__)
/* Description : Interleave both left and right half of input vectors
...
...
@@ -1252,4 +1302,30 @@
out_m = (v16u8)__msa_xori_b((v16u8)out_m, 128); \
out_m; \
})
/* Description : Pack even byte elements and store byte vector in destination
memory
Arguments : Inputs - in0, in1, pdst
*/
#define PCKEV_ST_SB(in0, in1, pdst) \
{ \
v16i8 tmp_m; \
tmp_m = __msa_pckev_b((v16i8)in1, (v16i8)in0); \
ST_SB(tmp_m, (pdst)); \
}
/* Description : Horizontal 2 tap filter kernel code
Arguments : Inputs - in0, in1, mask, coeff, shift
*/
#define HORIZ_2TAP_FILT_UH(in0, in1, mask, coeff, shift) \
({ \
v16i8 tmp0_m; \
v8u16 tmp1_m; \
\
tmp0_m = __msa_vshf_b((v16i8)mask, (v16i8)in1, (v16i8)in0); \
tmp1_m = __msa_dotp_u_h((v16u8)tmp0_m, (v16u8)coeff); \
tmp1_m = (v8u16)__msa_srari_h((v8i16)tmp1_m, shift); \
\
tmp1_m; \
})
#endif
/* VP8_COMMON_MIPS_MSA_VP8_MACROS_MSA_H_ */
vp8/common/rtcd_defs.pl
View file @
16dcf013
...
...
@@ -225,20 +225,20 @@ $vp8_sixtap_predict4x4_media=vp8_sixtap_predict4x4_armv6;
$vp8_sixtap_predict4x4_dspr2
=
vp8_sixtap_predict4x4_dspr2
;
add_proto
qw/void vp8_bilinear_predict16x16/
,
"
unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch
";
specialize
qw/vp8_bilinear_predict16x16 mmx sse2 ssse3 media neon/
;
specialize
qw/vp8_bilinear_predict16x16 mmx sse2 ssse3 media neon
msa
/
;
$vp8_bilinear_predict16x16_media
=
vp8_bilinear_predict16x16_armv6
;
add_proto
qw/void vp8_bilinear_predict8x8/
,
"
unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch
";
specialize
qw/vp8_bilinear_predict8x8 mmx sse2 ssse3 media neon/
;
specialize
qw/vp8_bilinear_predict8x8 mmx sse2 ssse3 media neon
msa
/
;
$vp8_bilinear_predict8x8_media
=
vp8_bilinear_predict8x8_armv6
;
add_proto
qw/void vp8_bilinear_predict8x4/
,
"
unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch
";
specialize
qw/vp8_bilinear_predict8x4 mmx media neon/
;
specialize
qw/vp8_bilinear_predict8x4 mmx media neon
msa
/
;
$vp8_bilinear_predict8x4_media
=
vp8_bilinear_predict8x4_armv6
;
add_proto
qw/void vp8_bilinear_predict4x4/
,
"
unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch
";
#TODO(johannkoenig): fix the neon version https://code.google.com/p/webm/issues/detail?id=892
specialize
qw/vp8_bilinear_predict4x4 mmx media/
;
specialize
qw/vp8_bilinear_predict4x4 mmx media
msa
/
;
$vp8_bilinear_predict4x4_media
=
vp8_bilinear_predict4x4_armv6
;
#
...
...
vp8/vp8_common.mk
View file @
16dcf013
...
...
@@ -114,6 +114,7 @@ VP8_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/idct_blk_dspr2.c
VP8_COMMON_SRCS-$(HAVE_DSPR2)
+=
common/mips/dspr2/dequantize_dspr2.c
# common (c)
VP8_COMMON_SRCS-$(HAVE_MSA)
+=
common/mips/msa/bilinear_filter_msa.c
VP8_COMMON_SRCS-$(HAVE_MSA)
+=
common/mips/msa/copymem_msa.c
VP8_COMMON_SRCS-$(HAVE_MSA)
+=
common/mips/msa/idct_msa.c
VP8_COMMON_SRCS-$(HAVE_MSA)
+=
common/mips/msa/loopfilter_filters_msa.c
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment