Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Raphael Zumer
aom-rav1e
Commits
16dcf013
Commit
16dcf013
authored
Jul 24, 2015
by
Parag Salasakar
Committed by
Gerrit Code Review
Jul 24, 2015
Browse files
Merge "mips msa vp8 bilinear filter optimization"
parents
a15edeb7
fb73ceae
Changes
4
Expand all
Hide whitespace changes
Inline
Side-by-side
vp8/common/mips/msa/bilinear_filter_msa.c
0 → 100644
View file @
16dcf013
This diff is collapsed.
Click to expand it.
vp8/common/mips/msa/vp8_macros_msa.h
View file @
16dcf013
...
...
@@ -460,6 +460,27 @@
SD4(out0_m, out1_m, out2_m, out3_m, pblk_8x4_m, stride); \
}
/* Description : Immediate number of elements to slide
Arguments : Inputs - in0_0, in0_1, in1_0, in1_1, slide_val
Outputs - out0, out1
Return Type - as per RTYPE
Details : Byte elements from 'in0_0' vector are slid into 'in1_0' by
value specified in the 'slide_val'
*/
#define SLDI_B2(RTYPE, in0_0, in0_1, in1_0, in1_1, out0, out1, slide_val) \
{ \
out0 = (RTYPE)__msa_sldi_b((v16i8)in0_0, (v16i8)in1_0, slide_val); \
out1 = (RTYPE)__msa_sldi_b((v16i8)in0_1, (v16i8)in1_1, slide_val); \
}
#define SLDI_B3(RTYPE, in0_0, in0_1, in0_2, in1_0, in1_1, in1_2, \
out0, out1, out2, slide_val) \
{ \
SLDI_B2(RTYPE, in0_0, in0_1, in1_0, in1_1, out0, out1, slide_val); \
out2 = (RTYPE)__msa_sldi_b((v16i8)in0_2, (v16i8)in1_2, slide_val); \
}
#define SLDI_B3_UH(...) SLDI_B3(v8u16, __VA_ARGS__)
/* Description : Shuffle byte vector elements as per mask vector
Arguments : Inputs - in0, in1, in2, in3, mask0, mask1
Outputs - out0, out1
...
...
@@ -472,7 +493,9 @@
out0 = (RTYPE)__msa_vshf_b((v16i8)mask0, (v16i8)in1, (v16i8)in0); \
out1 = (RTYPE)__msa_vshf_b((v16i8)mask1, (v16i8)in3, (v16i8)in2); \
}
#define VSHF_B2_UB(...) VSHF_B2(v16u8, __VA_ARGS__)
#define VSHF_B2_SB(...) VSHF_B2(v16i8, __VA_ARGS__)
#define VSHF_B2_UH(...) VSHF_B2(v8u16, __VA_ARGS__)
#define VSHF_B3(RTYPE, in0, in1, in2, in3, in4, in5, mask0, mask1, mask2, \
out0, out1, out2) \
...
...
@@ -482,6 +505,32 @@
}
#define VSHF_B3_SB(...) VSHF_B3(v16i8, __VA_ARGS__)
/* Description : Dot product of byte vector elements
Arguments : Inputs - mult0, mult1, cnst0, cnst1
Outputs - out0, out1
Return Type - as per RTYPE
Details : Unsigned byte elements from 'mult0' are multiplied with
unsigned byte elements from 'cnst0' producing a result
twice the size of input i.e. unsigned halfword.
The multiplication result of adjacent odd-even elements
are added together and written to the 'out0' vector
*/
#define DOTP_UB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) \
{ \
out0 = (RTYPE)__msa_dotp_u_h((v16u8)mult0, (v16u8)cnst0); \
out1 = (RTYPE)__msa_dotp_u_h((v16u8)mult1, (v16u8)cnst1); \
}
#define DOTP_UB2_UH(...) DOTP_UB2(v8u16, __VA_ARGS__)
#define DOTP_UB4(RTYPE, mult0, mult1, mult2, mult3, \
cnst0, cnst1, cnst2, cnst3, \
out0, out1, out2, out3) \
{ \
DOTP_UB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1); \
DOTP_UB2(RTYPE, mult2, mult3, cnst2, cnst3, out2, out3); \
}
#define DOTP_UB4_UH(...) DOTP_UB4(v8u16, __VA_ARGS__)
/* Description : Dot product of byte vector elements
Arguments : Inputs - mult0, mult1, cnst0, cnst1
Outputs - out0, out1
...
...
@@ -768,6 +817,7 @@
ILVR_D2(RTYPE, in0, in1, in2, in3, out0, out1); \
ILVR_D2(RTYPE, in4, in5, in6, in7, out2, out3); \
}
#define ILVR_D4_SB(...) ILVR_D4(v16i8, __VA_ARGS__)
#define ILVR_D4_UB(...) ILVR_D4(v16u8, __VA_ARGS__)
/* Description : Interleave both left and right half of input vectors
...
...
@@ -1252,4 +1302,30 @@
out_m = (v16u8)__msa_xori_b((v16u8)out_m, 128); \
out_m; \
})
/* Description : Pack even byte elements and store byte vector in destination
memory
Arguments : Inputs - in0, in1, pdst
*/
#define PCKEV_ST_SB(in0, in1, pdst) \
{ \
v16i8 tmp_m; \
tmp_m = __msa_pckev_b((v16i8)in1, (v16i8)in0); \
ST_SB(tmp_m, (pdst)); \
}
/* Description : Horizontal 2 tap filter kernel code
Arguments : Inputs - in0, in1, mask, coeff, shift
*/
#define HORIZ_2TAP_FILT_UH(in0, in1, mask, coeff, shift) \
({ \
v16i8 tmp0_m; \
v8u16 tmp1_m; \
\
tmp0_m = __msa_vshf_b((v16i8)mask, (v16i8)in1, (v16i8)in0); \
tmp1_m = __msa_dotp_u_h((v16u8)tmp0_m, (v16u8)coeff); \
tmp1_m = (v8u16)__msa_srari_h((v8i16)tmp1_m, shift); \
\
tmp1_m; \
})
#endif
/* VP8_COMMON_MIPS_MSA_VP8_MACROS_MSA_H_ */
vp8/common/rtcd_defs.pl
View file @
16dcf013
...
...
@@ -225,20 +225,20 @@ $vp8_sixtap_predict4x4_media=vp8_sixtap_predict4x4_armv6;
$vp8_sixtap_predict4x4_dspr2
=
vp8_sixtap_predict4x4_dspr2
;
add_proto
qw/void vp8_bilinear_predict16x16/
,
"
unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch
";
specialize
qw/vp8_bilinear_predict16x16 mmx sse2 ssse3 media neon/
;
specialize
qw/vp8_bilinear_predict16x16 mmx sse2 ssse3 media neon
msa
/
;
$vp8_bilinear_predict16x16_media
=
vp8_bilinear_predict16x16_armv6
;
add_proto
qw/void vp8_bilinear_predict8x8/
,
"
unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch
";
specialize
qw/vp8_bilinear_predict8x8 mmx sse2 ssse3 media neon/
;
specialize
qw/vp8_bilinear_predict8x8 mmx sse2 ssse3 media neon
msa
/
;
$vp8_bilinear_predict8x8_media
=
vp8_bilinear_predict8x8_armv6
;
add_proto
qw/void vp8_bilinear_predict8x4/
,
"
unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch
";
specialize
qw/vp8_bilinear_predict8x4 mmx media neon/
;
specialize
qw/vp8_bilinear_predict8x4 mmx media neon
msa
/
;
$vp8_bilinear_predict8x4_media
=
vp8_bilinear_predict8x4_armv6
;
add_proto
qw/void vp8_bilinear_predict4x4/
,
"
unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch
";
#TODO(johannkoenig): fix the neon version https://code.google.com/p/webm/issues/detail?id=892
specialize
qw/vp8_bilinear_predict4x4 mmx media/
;
specialize
qw/vp8_bilinear_predict4x4 mmx media
msa
/
;
$vp8_bilinear_predict4x4_media
=
vp8_bilinear_predict4x4_armv6
;
#
...
...
vp8/vp8_common.mk
View file @
16dcf013
...
...
@@ -114,6 +114,7 @@ VP8_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/idct_blk_dspr2.c
VP8_COMMON_SRCS-$(HAVE_DSPR2)
+=
common/mips/dspr2/dequantize_dspr2.c
# common (c)
VP8_COMMON_SRCS-$(HAVE_MSA)
+=
common/mips/msa/bilinear_filter_msa.c
VP8_COMMON_SRCS-$(HAVE_MSA)
+=
common/mips/msa/copymem_msa.c
VP8_COMMON_SRCS-$(HAVE_MSA)
+=
common/mips/msa/idct_msa.c
VP8_COMMON_SRCS-$(HAVE_MSA)
+=
common/mips/msa/loopfilter_filters_msa.c
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment