Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
10
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
Xiph.Org
aom-rav1e
Commits
6afa29d6
Commit
6afa29d6
authored
Jan 22, 2016
by
Yaowu Xu
Committed by
Gerrit Code Review
Jan 22, 2016
Browse files
Options
Browse Files
Download
Plain Diff
Merge "Remove or replace VP9 with VPX in vp10 and vpx_dsp"
parents
ead2ade2
355296c6
Changes
18
Hide whitespace changes
Inline
Side-by-side
Showing
18 changed files
with
169 additions
and
169 deletions
+169
-169
vp10/common/onyxc_int.h
vp10/common/onyxc_int.h
+3
-3
vp10/common/vp10_inv_txfm.h
vp10/common/vp10_inv_txfm.h
+1
-1
vpx_dsp/mips/fwd_txfm_msa.c
vpx_dsp/mips/fwd_txfm_msa.c
+4
-4
vpx_dsp/mips/fwd_txfm_msa.h
vpx_dsp/mips/fwd_txfm_msa.h
+2
-2
vpx_dsp/mips/idct16x16_msa.c
vpx_dsp/mips/idct16x16_msa.c
+34
-34
vpx_dsp/mips/idct32x32_msa.c
vpx_dsp/mips/idct32x32_msa.c
+8
-8
vpx_dsp/mips/idct4x4_msa.c
vpx_dsp/mips/idct4x4_msa.c
+2
-2
vpx_dsp/mips/idct8x8_msa.c
vpx_dsp/mips/idct8x8_msa.c
+18
-18
vpx_dsp/mips/inv_txfm_msa.h
vpx_dsp/mips/inv_txfm_msa.h
+41
-41
vpx_dsp/mips/loopfilter_16_msa.c
vpx_dsp/mips/loopfilter_16_msa.c
+18
-18
vpx_dsp/mips/loopfilter_4_msa.c
vpx_dsp/mips/loopfilter_4_msa.c
+4
-4
vpx_dsp/mips/loopfilter_8_msa.c
vpx_dsp/mips/loopfilter_8_msa.c
+14
-14
vpx_dsp/mips/loopfilter_filters_dspr2.h
vpx_dsp/mips/loopfilter_filters_dspr2.h
+3
-3
vpx_dsp/mips/loopfilter_macros_dspr2.h
vpx_dsp/mips/loopfilter_macros_dspr2.h
+3
-3
vpx_dsp/mips/loopfilter_masks_dspr2.h
vpx_dsp/mips/loopfilter_masks_dspr2.h
+3
-3
vpx_dsp/mips/loopfilter_msa.h
vpx_dsp/mips/loopfilter_msa.h
+5
-5
vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c
vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c
+1
-1
vpx_scale/generic/yv12config.c
vpx_scale/generic/yv12config.c
+5
-5
No files found.
vp10/common/onyxc_int.h
View file @
6afa29d6
...
...
@@ -203,7 +203,7 @@ typedef struct VP10Common {
MODE_INFO
*
mi
;
/* Corresponds to upper left visible macroblock */
// TODO(agrange): Move prev_mi into encoder structure.
// prev_mip and prev_mi will only be allocated in
VP9
encoder.
// prev_mip and prev_mi will only be allocated in encoder.
MODE_INFO
*
prev_mip
;
/* MODE_INFO array 'mip' from last decoded frame */
MODE_INFO
*
prev_mi
;
/* 'mi' from last frame (points into prev_mip) */
...
...
@@ -416,7 +416,7 @@ static INLINE void set_mi_row_col(MACROBLOCKD *xd, const TileInfo *const tile,
xd
->
left_available
=
(
mi_col
>
tile
->
mi_col_start
);
if
(
xd
->
up_available
)
{
xd
->
above_mi
=
xd
->
mi
[
-
xd
->
mi_stride
];
// above_mi may be NULL in
VP9
encoder's first pass.
// above_mi may be NULL in encoder's first pass.
xd
->
above_mbmi
=
xd
->
above_mi
?
&
xd
->
above_mi
->
mbmi
:
NULL
;
}
else
{
xd
->
above_mi
=
NULL
;
...
...
@@ -425,7 +425,7 @@ static INLINE void set_mi_row_col(MACROBLOCKD *xd, const TileInfo *const tile,
if
(
xd
->
left_available
)
{
xd
->
left_mi
=
xd
->
mi
[
-
1
];
// left_mi may be NULL in
VP9
encoder's first pass.
// left_mi may be NULL in encoder's first pass.
xd
->
left_mbmi
=
xd
->
left_mi
?
&
xd
->
left_mi
->
mbmi
:
NULL
;
}
else
{
xd
->
left_mi
=
NULL
;
...
...
vp10/common/vp10_inv_txfm.h
View file @
6afa29d6
...
...
@@ -69,7 +69,7 @@ static INLINE tran_low_t highbd_dct_const_round_shift(tran_high_t input,
#if CONFIG_EMULATE_HARDWARE
// When CONFIG_EMULATE_HARDWARE is 1 the transform performs a
// non-normative method to handle overflows. A stream that causes
// overflows in the inverse transform is considered invalid
in VP9
,
// overflows in the inverse transform is considered invalid,
// and a hardware implementer is free to choose any reasonable
// method to handle overflows. However to aid in hardware
// verification they can use a specific implementation of the
...
...
vpx_dsp/mips/fwd_txfm_msa.c
View file @
6afa29d6
...
...
@@ -186,9 +186,9 @@ void vpx_fdct4x4_msa(const int16_t *input, int16_t *output,
in0
+=
vec
;
}
VP
9
_FDCT4
(
in0
,
in1
,
in2
,
in3
,
in0
,
in1
,
in2
,
in3
);
VP
X
_FDCT4
(
in0
,
in1
,
in2
,
in3
,
in0
,
in1
,
in2
,
in3
);
TRANSPOSE4x4_SH_SH
(
in0
,
in1
,
in2
,
in3
,
in0
,
in1
,
in2
,
in3
);
VP
9
_FDCT4
(
in0
,
in1
,
in2
,
in3
,
in0
,
in1
,
in2
,
in3
);
VP
X
_FDCT4
(
in0
,
in1
,
in2
,
in3
,
in0
,
in1
,
in2
,
in3
);
TRANSPOSE4x4_SH_SH
(
in0
,
in1
,
in2
,
in3
,
in0
,
in1
,
in2
,
in3
);
ADD4
(
in0
,
1
,
in1
,
1
,
in2
,
1
,
in3
,
1
,
in0
,
in1
,
in2
,
in3
);
SRA_4V
(
in0
,
in1
,
in2
,
in3
,
2
);
...
...
@@ -203,11 +203,11 @@ void vpx_fdct8x8_msa(const int16_t *input, int16_t *output,
LD_SH8
(
input
,
src_stride
,
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
);
SLLI_4V
(
in0
,
in1
,
in2
,
in3
,
2
);
SLLI_4V
(
in4
,
in5
,
in6
,
in7
,
2
);
VP
9
_FDCT8
(
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
,
VP
X
_FDCT8
(
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
,
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
);
TRANSPOSE8x8_SH_SH
(
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
,
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
);
VP
9
_FDCT8
(
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
,
VP
X
_FDCT8
(
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
,
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
);
TRANSPOSE8x8_SH_SH
(
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
,
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
);
...
...
vpx_dsp/mips/fwd_txfm_msa.h
View file @
6afa29d6
...
...
@@ -29,7 +29,7 @@
HADD_SW_S32(vec_w_m); \
})
#define VP
9
_FDCT4(in0, in1, in2, in3, out0, out1, out2, out3) { \
#define VP
X
_FDCT4(in0, in1, in2, in3, out0, out1, out2, out3) { \
v8i16 cnst0_m, cnst1_m, cnst2_m, cnst3_m; \
v8i16 vec0_m, vec1_m, vec2_m, vec3_m; \
v4i32 vec4_m, vec5_m, vec6_m, vec7_m; \
...
...
@@ -67,7 +67,7 @@
in4, in5, in6, in7); \
}
#define VP
9
_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7, \
#define VP
X
_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7, \
out0, out1, out2, out3, out4, out5, out6, out7) { \
v8i16 s0_m, s1_m, s2_m, s3_m, s4_m, s5_m, s6_m; \
v8i16 s7_m, x0_m, x1_m, x2_m, x3_m; \
...
...
vpx_dsp/mips/idct16x16_msa.c
View file @
6afa29d6
...
...
@@ -189,16 +189,16 @@ void vpx_idct16_1d_columns_addblk_msa(int16_t *input, uint8_t *dst,
reg3
=
tmp7
;
SRARI_H4_SH
(
reg0
,
reg2
,
reg4
,
reg6
,
6
);
VP
9
_ADDBLK_ST8x4_UB
(
dst
,
dst_stride
,
reg0
,
reg2
,
reg4
,
reg6
);
VP
X
_ADDBLK_ST8x4_UB
(
dst
,
dst_stride
,
reg0
,
reg2
,
reg4
,
reg6
);
dst
+=
(
4
*
dst_stride
);
SRARI_H4_SH
(
reg8
,
reg10
,
reg12
,
reg14
,
6
);
VP
9
_ADDBLK_ST8x4_UB
(
dst
,
dst_stride
,
reg8
,
reg10
,
reg12
,
reg14
);
VP
X
_ADDBLK_ST8x4_UB
(
dst
,
dst_stride
,
reg8
,
reg10
,
reg12
,
reg14
);
dst
+=
(
4
*
dst_stride
);
SRARI_H4_SH
(
reg3
,
reg13
,
reg11
,
reg5
,
6
);
VP
9
_ADDBLK_ST8x4_UB
(
dst
,
dst_stride
,
reg3
,
reg13
,
reg11
,
reg5
);
VP
X
_ADDBLK_ST8x4_UB
(
dst
,
dst_stride
,
reg3
,
reg13
,
reg11
,
reg5
);
dst
+=
(
4
*
dst_stride
);
SRARI_H4_SH
(
reg7
,
reg9
,
reg1
,
reg15
,
6
);
VP
9
_ADDBLK_ST8x4_UB
(
dst
,
dst_stride
,
reg7
,
reg9
,
reg1
,
reg15
);
VP
X
_ADDBLK_ST8x4_UB
(
dst
,
dst_stride
,
reg7
,
reg9
,
reg1
,
reg15
);
}
void
vpx_idct16x16_256_add_msa
(
const
int16_t
*
input
,
uint8_t
*
dst
,
...
...
@@ -303,7 +303,7 @@ void vpx_iadst16_1d_rows_msa(const int16_t *input, int16_t *output) {
l8
,
l9
,
l10
,
l11
,
l12
,
l13
,
l14
,
l15
);
/* ADST in horizontal */
VP
9
_IADST8x16_1D
(
l0
,
l1
,
l2
,
l3
,
l4
,
l5
,
l6
,
l7
,
VP
X
_IADST8x16_1D
(
l0
,
l1
,
l2
,
l3
,
l4
,
l5
,
l6
,
l7
,
l8
,
l9
,
l10
,
l11
,
l12
,
l13
,
l14
,
l15
,
r0
,
r1
,
r2
,
r3
,
r4
,
r5
,
r6
,
r7
,
r8
,
r9
,
r10
,
r11
,
r12
,
r13
,
r14
,
r15
);
...
...
@@ -345,20 +345,20 @@ void vpx_iadst16_1d_columns_addblk_msa(int16_t *input, uint8_t *dst,
r15
=
LD_SH
(
input
+
15
*
16
);
/* stage 1 */
k0
=
VP
9
_SET_COSPI_PAIR
(
cospi_1_64
,
cospi_31_64
);
k1
=
VP
9
_SET_COSPI_PAIR
(
cospi_31_64
,
-
cospi_1_64
);
k2
=
VP
9
_SET_COSPI_PAIR
(
cospi_17_64
,
cospi_15_64
);
k3
=
VP
9
_SET_COSPI_PAIR
(
cospi_15_64
,
-
cospi_17_64
);
k0
=
VP
X
_SET_COSPI_PAIR
(
cospi_1_64
,
cospi_31_64
);
k1
=
VP
X
_SET_COSPI_PAIR
(
cospi_31_64
,
-
cospi_1_64
);
k2
=
VP
X
_SET_COSPI_PAIR
(
cospi_17_64
,
cospi_15_64
);
k3
=
VP
X
_SET_COSPI_PAIR
(
cospi_15_64
,
-
cospi_17_64
);
MADD_BF
(
r15
,
r0
,
r7
,
r8
,
k0
,
k1
,
k2
,
k3
,
g0
,
g1
,
g2
,
g3
);
k0
=
VP
9
_SET_COSPI_PAIR
(
cospi_9_64
,
cospi_23_64
);
k1
=
VP
9
_SET_COSPI_PAIR
(
cospi_23_64
,
-
cospi_9_64
);
k2
=
VP
9
_SET_COSPI_PAIR
(
cospi_25_64
,
cospi_7_64
);
k3
=
VP
9
_SET_COSPI_PAIR
(
cospi_7_64
,
-
cospi_25_64
);
k0
=
VP
X
_SET_COSPI_PAIR
(
cospi_9_64
,
cospi_23_64
);
k1
=
VP
X
_SET_COSPI_PAIR
(
cospi_23_64
,
-
cospi_9_64
);
k2
=
VP
X
_SET_COSPI_PAIR
(
cospi_25_64
,
cospi_7_64
);
k3
=
VP
X
_SET_COSPI_PAIR
(
cospi_7_64
,
-
cospi_25_64
);
MADD_BF
(
r11
,
r4
,
r3
,
r12
,
k0
,
k1
,
k2
,
k3
,
g8
,
g9
,
g10
,
g11
);
BUTTERFLY_4
(
g0
,
g2
,
g10
,
g8
,
h8
,
h9
,
v2
,
v0
);
k0
=
VP
9
_SET_COSPI_PAIR
(
cospi_4_64
,
cospi_28_64
);
k1
=
VP
9
_SET_COSPI_PAIR
(
cospi_28_64
,
-
cospi_4_64
);
k2
=
VP
9
_SET_COSPI_PAIR
(
-
cospi_28_64
,
cospi_4_64
);
k0
=
VP
X
_SET_COSPI_PAIR
(
cospi_4_64
,
cospi_28_64
);
k1
=
VP
X
_SET_COSPI_PAIR
(
cospi_28_64
,
-
cospi_4_64
);
k2
=
VP
X
_SET_COSPI_PAIR
(
-
cospi_28_64
,
cospi_4_64
);
MADD_BF
(
g1
,
g3
,
g9
,
g11
,
k0
,
k1
,
k2
,
k0
,
h0
,
h1
,
h2
,
h3
);
r1
=
LD_SH
(
input
+
1
*
16
);
...
...
@@ -370,15 +370,15 @@ void vpx_iadst16_1d_columns_addblk_msa(int16_t *input, uint8_t *dst,
r13
=
LD_SH
(
input
+
13
*
16
);
r14
=
LD_SH
(
input
+
14
*
16
);
k0
=
VP
9
_SET_COSPI_PAIR
(
cospi_5_64
,
cospi_27_64
);
k1
=
VP
9
_SET_COSPI_PAIR
(
cospi_27_64
,
-
cospi_5_64
);
k2
=
VP
9
_SET_COSPI_PAIR
(
cospi_21_64
,
cospi_11_64
);
k3
=
VP
9
_SET_COSPI_PAIR
(
cospi_11_64
,
-
cospi_21_64
);
k0
=
VP
X
_SET_COSPI_PAIR
(
cospi_5_64
,
cospi_27_64
);
k1
=
VP
X
_SET_COSPI_PAIR
(
cospi_27_64
,
-
cospi_5_64
);
k2
=
VP
X
_SET_COSPI_PAIR
(
cospi_21_64
,
cospi_11_64
);
k3
=
VP
X
_SET_COSPI_PAIR
(
cospi_11_64
,
-
cospi_21_64
);
MADD_BF
(
r13
,
r2
,
r5
,
r10
,
k0
,
k1
,
k2
,
k3
,
g4
,
g5
,
g6
,
g7
);
k0
=
VP
9
_SET_COSPI_PAIR
(
cospi_13_64
,
cospi_19_64
);
k1
=
VP
9
_SET_COSPI_PAIR
(
cospi_19_64
,
-
cospi_13_64
);
k2
=
VP
9
_SET_COSPI_PAIR
(
cospi_29_64
,
cospi_3_64
);
k3
=
VP
9
_SET_COSPI_PAIR
(
cospi_3_64
,
-
cospi_29_64
);
k0
=
VP
X
_SET_COSPI_PAIR
(
cospi_13_64
,
cospi_19_64
);
k1
=
VP
X
_SET_COSPI_PAIR
(
cospi_19_64
,
-
cospi_13_64
);
k2
=
VP
X
_SET_COSPI_PAIR
(
cospi_29_64
,
cospi_3_64
);
k3
=
VP
X
_SET_COSPI_PAIR
(
cospi_3_64
,
-
cospi_29_64
);
MADD_BF
(
r9
,
r6
,
r1
,
r14
,
k0
,
k1
,
k2
,
k3
,
g12
,
g13
,
g14
,
g15
);
BUTTERFLY_4
(
g4
,
g6
,
g14
,
g12
,
h10
,
h11
,
v6
,
v4
);
BUTTERFLY_4
(
h8
,
h9
,
h11
,
h10
,
out0
,
out1
,
h11
,
h10
);
...
...
@@ -393,9 +393,9 @@ void vpx_iadst16_1d_columns_addblk_msa(int16_t *input, uint8_t *dst,
ST8x1_UB
(
res0
,
dst
);
ST8x1_UB
(
res1
,
dst
+
15
*
dst_stride
);
k0
=
VP
9
_SET_COSPI_PAIR
(
cospi_12_64
,
cospi_20_64
);
k1
=
VP
9
_SET_COSPI_PAIR
(
-
cospi_20_64
,
cospi_12_64
);
k2
=
VP
9
_SET_COSPI_PAIR
(
cospi_20_64
,
-
cospi_12_64
);
k0
=
VP
X
_SET_COSPI_PAIR
(
cospi_12_64
,
cospi_20_64
);
k1
=
VP
X
_SET_COSPI_PAIR
(
-
cospi_20_64
,
cospi_12_64
);
k2
=
VP
X
_SET_COSPI_PAIR
(
cospi_20_64
,
-
cospi_12_64
);
MADD_BF
(
g7
,
g5
,
g15
,
g13
,
k0
,
k1
,
k2
,
k0
,
h4
,
h5
,
h6
,
h7
);
BUTTERFLY_4
(
h0
,
h2
,
h6
,
h4
,
out8
,
out9
,
out11
,
out10
);
out8
=
-
out8
;
...
...
@@ -410,9 +410,9 @@ void vpx_iadst16_1d_columns_addblk_msa(int16_t *input, uint8_t *dst,
ST8x1_UB
(
res8
,
dst
+
dst_stride
);
ST8x1_UB
(
res9
,
dst
+
14
*
dst_stride
);
k0
=
VP
9
_SET_COSPI_PAIR
(
cospi_8_64
,
cospi_24_64
);
k1
=
VP
9
_SET_COSPI_PAIR
(
cospi_24_64
,
-
cospi_8_64
);
k2
=
VP
9
_SET_COSPI_PAIR
(
-
cospi_24_64
,
cospi_8_64
);
k0
=
VP
X
_SET_COSPI_PAIR
(
cospi_8_64
,
cospi_24_64
);
k1
=
VP
X
_SET_COSPI_PAIR
(
cospi_24_64
,
-
cospi_8_64
);
k2
=
VP
X
_SET_COSPI_PAIR
(
-
cospi_24_64
,
cospi_8_64
);
MADD_BF
(
v0
,
v2
,
v4
,
v6
,
k0
,
k1
,
k2
,
k0
,
out4
,
out6
,
out5
,
out7
);
out4
=
-
out4
;
SRARI_H2_SH
(
out4
,
out5
,
6
);
...
...
@@ -437,8 +437,8 @@ void vpx_iadst16_1d_columns_addblk_msa(int16_t *input, uint8_t *dst,
ST8x1_UB
(
res12
,
dst
+
2
*
dst_stride
);
ST8x1_UB
(
res13
,
dst
+
13
*
dst_stride
);
k0
=
VP
9
_SET_COSPI_PAIR
(
cospi_16_64
,
cospi_16_64
);
k3
=
VP
9
_SET_COSPI_PAIR
(
-
cospi_16_64
,
cospi_16_64
);
k0
=
VP
X
_SET_COSPI_PAIR
(
cospi_16_64
,
cospi_16_64
);
k3
=
VP
X
_SET_COSPI_PAIR
(
-
cospi_16_64
,
cospi_16_64
);
MADD_SHORT
(
out6
,
out7
,
k0
,
k3
,
out6
,
out7
);
SRARI_H2_SH
(
out6
,
out7
,
6
);
dst6
=
LD_UB
(
dst
+
4
*
dst_stride
);
...
...
@@ -461,8 +461,8 @@ void vpx_iadst16_1d_columns_addblk_msa(int16_t *input, uint8_t *dst,
ST8x1_UB
(
res10
,
dst
+
6
*
dst_stride
);
ST8x1_UB
(
res11
,
dst
+
9
*
dst_stride
);
k1
=
VP
9
_SET_COSPI_PAIR
(
-
cospi_16_64
,
-
cospi_16_64
);
k2
=
VP
9
_SET_COSPI_PAIR
(
cospi_16_64
,
-
cospi_16_64
);
k1
=
VP
X
_SET_COSPI_PAIR
(
-
cospi_16_64
,
-
cospi_16_64
);
k2
=
VP
X
_SET_COSPI_PAIR
(
cospi_16_64
,
-
cospi_16_64
);
MADD_SHORT
(
h10
,
h11
,
k1
,
k2
,
out2
,
out3
);
SRARI_H2_SH
(
out2
,
out3
,
6
);
dst2
=
LD_UB
(
dst
+
7
*
dst_stride
);
...
...
vpx_dsp/mips/idct32x32_msa.c
View file @
6afa29d6
...
...
@@ -559,11 +559,11 @@ static void idct8x32_column_butterfly_addblk(int16_t *tmp_eve_buf,
ADD4
(
loc0
,
vec3
,
loc1
,
vec2
,
loc2
,
vec1
,
loc3
,
vec0
,
m0
,
m4
,
m2
,
m6
);
SRARI_H4_SH
(
m0
,
m2
,
m4
,
m6
,
6
);
VP
9
_ADDBLK_ST8x4_UB
(
dst
,
(
4
*
dst_stride
),
m0
,
m2
,
m4
,
m6
);
VP
X
_ADDBLK_ST8x4_UB
(
dst
,
(
4
*
dst_stride
),
m0
,
m2
,
m4
,
m6
);
SUB4
(
loc0
,
vec3
,
loc1
,
vec2
,
loc2
,
vec1
,
loc3
,
vec0
,
m6
,
m2
,
m4
,
m0
);
SRARI_H4_SH
(
m0
,
m2
,
m4
,
m6
,
6
);
VP
9
_ADDBLK_ST8x4_UB
((
dst
+
19
*
dst_stride
),
(
4
*
dst_stride
),
VP
X
_ADDBLK_ST8x4_UB
((
dst
+
19
*
dst_stride
),
(
4
*
dst_stride
),
m0
,
m2
,
m4
,
m6
);
/* Load 8 & Store 8 */
...
...
@@ -578,12 +578,12 @@ static void idct8x32_column_butterfly_addblk(int16_t *tmp_eve_buf,
ADD4
(
loc0
,
vec3
,
loc1
,
vec2
,
loc2
,
vec1
,
loc3
,
vec0
,
m1
,
m5
,
m3
,
m7
);
SRARI_H4_SH
(
m1
,
m3
,
m5
,
m7
,
6
);
VP
9
_ADDBLK_ST8x4_UB
((
dst
+
2
*
dst_stride
),
(
4
*
dst_stride
),
VP
X
_ADDBLK_ST8x4_UB
((
dst
+
2
*
dst_stride
),
(
4
*
dst_stride
),
m1
,
m3
,
m5
,
m7
);
SUB4
(
loc0
,
vec3
,
loc1
,
vec2
,
loc2
,
vec1
,
loc3
,
vec0
,
m7
,
m3
,
m5
,
m1
);
SRARI_H4_SH
(
m1
,
m3
,
m5
,
m7
,
6
);
VP
9
_ADDBLK_ST8x4_UB
((
dst
+
17
*
dst_stride
),
(
4
*
dst_stride
),
VP
X
_ADDBLK_ST8x4_UB
((
dst
+
17
*
dst_stride
),
(
4
*
dst_stride
),
m1
,
m3
,
m5
,
m7
);
/* Load 8 & Store 8 */
...
...
@@ -598,12 +598,12 @@ static void idct8x32_column_butterfly_addblk(int16_t *tmp_eve_buf,
ADD4
(
loc0
,
vec3
,
loc1
,
vec2
,
loc2
,
vec1
,
loc3
,
vec0
,
n0
,
n4
,
n2
,
n6
);
SRARI_H4_SH
(
n0
,
n2
,
n4
,
n6
,
6
);
VP
9
_ADDBLK_ST8x4_UB
((
dst
+
1
*
dst_stride
),
(
4
*
dst_stride
),
VP
X
_ADDBLK_ST8x4_UB
((
dst
+
1
*
dst_stride
),
(
4
*
dst_stride
),
n0
,
n2
,
n4
,
n6
);
SUB4
(
loc0
,
vec3
,
loc1
,
vec2
,
loc2
,
vec1
,
loc3
,
vec0
,
n6
,
n2
,
n4
,
n0
);
SRARI_H4_SH
(
n0
,
n2
,
n4
,
n6
,
6
);
VP
9
_ADDBLK_ST8x4_UB
((
dst
+
18
*
dst_stride
),
(
4
*
dst_stride
),
VP
X
_ADDBLK_ST8x4_UB
((
dst
+
18
*
dst_stride
),
(
4
*
dst_stride
),
n0
,
n2
,
n4
,
n6
);
/* Load 8 & Store 8 */
...
...
@@ -618,12 +618,12 @@ static void idct8x32_column_butterfly_addblk(int16_t *tmp_eve_buf,
ADD4
(
loc0
,
vec3
,
loc1
,
vec2
,
loc2
,
vec1
,
loc3
,
vec0
,
n1
,
n5
,
n3
,
n7
);
SRARI_H4_SH
(
n1
,
n3
,
n5
,
n7
,
6
);
VP
9
_ADDBLK_ST8x4_UB
((
dst
+
3
*
dst_stride
),
(
4
*
dst_stride
),
VP
X
_ADDBLK_ST8x4_UB
((
dst
+
3
*
dst_stride
),
(
4
*
dst_stride
),
n1
,
n3
,
n5
,
n7
);
SUB4
(
loc0
,
vec3
,
loc1
,
vec2
,
loc2
,
vec1
,
loc3
,
vec0
,
n7
,
n3
,
n5
,
n1
);
SRARI_H4_SH
(
n1
,
n3
,
n5
,
n7
,
6
);
VP
9
_ADDBLK_ST8x4_UB
((
dst
+
16
*
dst_stride
),
(
4
*
dst_stride
),
VP
X
_ADDBLK_ST8x4_UB
((
dst
+
16
*
dst_stride
),
(
4
*
dst_stride
),
n1
,
n3
,
n5
,
n7
);
}
...
...
vpx_dsp/mips/idct4x4_msa.c
View file @
6afa29d6
...
...
@@ -75,10 +75,10 @@ void vpx_idct4x4_16_add_msa(const int16_t *input, uint8_t *dst,
LD4x4_SH
(
input
,
in0
,
in1
,
in2
,
in3
);
/* rows */
TRANSPOSE4x4_SH_SH
(
in0
,
in1
,
in2
,
in3
,
in0
,
in1
,
in2
,
in3
);
VP
9
_IDCT4x4
(
in0
,
in1
,
in2
,
in3
,
in0
,
in1
,
in2
,
in3
);
VP
X
_IDCT4x4
(
in0
,
in1
,
in2
,
in3
,
in0
,
in1
,
in2
,
in3
);
/* columns */
TRANSPOSE4x4_SH_SH
(
in0
,
in1
,
in2
,
in3
,
in0
,
in1
,
in2
,
in3
);
VP
9
_IDCT4x4
(
in0
,
in1
,
in2
,
in3
,
in0
,
in1
,
in2
,
in3
);
VP
X
_IDCT4x4
(
in0
,
in1
,
in2
,
in3
,
in0
,
in1
,
in2
,
in3
);
/* rounding (add 2^3, divide by 2^4) */
SRARI_H4_SH
(
in0
,
in1
,
in2
,
in3
,
4
);
ADDBLK_ST4x4_UB
(
in0
,
in1
,
in2
,
in3
,
dst
,
dst_stride
);
...
...
vpx_dsp/mips/idct8x8_msa.c
View file @
6afa29d6
...
...
@@ -21,21 +21,21 @@ void vpx_idct8x8_64_add_msa(const int16_t *input, uint8_t *dst,
TRANSPOSE8x8_SH_SH
(
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
,
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
);
/* 1D idct8x8 */
VP
9
_IDCT8x8_1D
(
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
,
VP
X
_IDCT8x8_1D
(
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
,
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
);
/* columns transform */
TRANSPOSE8x8_SH_SH
(
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
,
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
);
/* 1D idct8x8 */
VP
9
_IDCT8x8_1D
(
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
,
VP
X
_IDCT8x8_1D
(
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
,
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
);
/* final rounding (add 2^4, divide by 2^5) and shift */
SRARI_H4_SH
(
in0
,
in1
,
in2
,
in3
,
5
);
SRARI_H4_SH
(
in4
,
in5
,
in6
,
in7
,
5
);
/* add block and store 8x8 */
VP
9
_ADDBLK_ST8x4_UB
(
dst
,
dst_stride
,
in0
,
in1
,
in2
,
in3
);
VP
X
_ADDBLK_ST8x4_UB
(
dst
,
dst_stride
,
in0
,
in1
,
in2
,
in3
);
dst
+=
(
4
*
dst_stride
);
VP
9
_ADDBLK_ST8x4_UB
(
dst
,
dst_stride
,
in4
,
in5
,
in6
,
in7
);
VP
X
_ADDBLK_ST8x4_UB
(
dst
,
dst_stride
,
in4
,
in5
,
in6
,
in7
);
}
void
vpx_idct8x8_12_add_msa
(
const
int16_t
*
input
,
uint8_t
*
dst
,
...
...
@@ -51,10 +51,10 @@ void vpx_idct8x8_12_add_msa(const int16_t *input, uint8_t *dst,
/* stage1 */
ILVL_H2_SH
(
in3
,
in0
,
in2
,
in1
,
s0
,
s1
);
k0
=
VP
9
_SET_COSPI_PAIR
(
cospi_28_64
,
-
cospi_4_64
);
k1
=
VP
9
_SET_COSPI_PAIR
(
cospi_4_64
,
cospi_28_64
);
k2
=
VP
9
_SET_COSPI_PAIR
(
-
cospi_20_64
,
cospi_12_64
);
k3
=
VP
9
_SET_COSPI_PAIR
(
cospi_12_64
,
cospi_20_64
);
k0
=
VP
X
_SET_COSPI_PAIR
(
cospi_28_64
,
-
cospi_4_64
);
k1
=
VP
X
_SET_COSPI_PAIR
(
cospi_4_64
,
cospi_28_64
);
k2
=
VP
X
_SET_COSPI_PAIR
(
-
cospi_20_64
,
cospi_12_64
);
k3
=
VP
X
_SET_COSPI_PAIR
(
cospi_12_64
,
cospi_20_64
);
DOTP_SH4_SW
(
s0
,
s0
,
s1
,
s1
,
k0
,
k1
,
k2
,
k3
,
tmp0
,
tmp1
,
tmp2
,
tmp3
);
SRARI_W4_SW
(
tmp0
,
tmp1
,
tmp2
,
tmp3
,
DCT_CONST_BITS
);
PCKEV_H2_SH
(
zero
,
tmp0
,
zero
,
tmp1
,
s0
,
s1
);
...
...
@@ -63,10 +63,10 @@ void vpx_idct8x8_12_add_msa(const int16_t *input, uint8_t *dst,
/* stage2 */
ILVR_H2_SH
(
in3
,
in1
,
in2
,
in0
,
s1
,
s0
);
k0
=
VP
9
_SET_COSPI_PAIR
(
cospi_16_64
,
cospi_16_64
);
k1
=
VP
9
_SET_COSPI_PAIR
(
cospi_16_64
,
-
cospi_16_64
);
k2
=
VP
9
_SET_COSPI_PAIR
(
cospi_24_64
,
-
cospi_8_64
);
k3
=
VP
9
_SET_COSPI_PAIR
(
cospi_8_64
,
cospi_24_64
);
k0
=
VP
X
_SET_COSPI_PAIR
(
cospi_16_64
,
cospi_16_64
);
k1
=
VP
X
_SET_COSPI_PAIR
(
cospi_16_64
,
-
cospi_16_64
);
k2
=
VP
X
_SET_COSPI_PAIR
(
cospi_24_64
,
-
cospi_8_64
);
k3
=
VP
X
_SET_COSPI_PAIR
(
cospi_8_64
,
cospi_24_64
);
DOTP_SH4_SW
(
s0
,
s0
,
s1
,
s1
,
k0
,
k1
,
k2
,
k3
,
tmp0
,
tmp1
,
tmp2
,
tmp3
);
SRARI_W4_SW
(
tmp0
,
tmp1
,
tmp2
,
tmp3
,
DCT_CONST_BITS
);
PCKEV_H2_SH
(
zero
,
tmp0
,
zero
,
tmp1
,
s0
,
s1
);
...
...
@@ -76,7 +76,7 @@ void vpx_idct8x8_12_add_msa(const int16_t *input, uint8_t *dst,
/* stage3 */
s0
=
__msa_ilvr_h
(
s6
,
s5
);
k1
=
VP
9
_SET_COSPI_PAIR
(
-
cospi_16_64
,
cospi_16_64
);
k1
=
VP
X
_SET_COSPI_PAIR
(
-
cospi_16_64
,
cospi_16_64
);
DOTP_SH2_SW
(
s0
,
s0
,
k1
,
k0
,
tmp0
,
tmp1
);
SRARI_W2_SW
(
tmp0
,
tmp1
,
DCT_CONST_BITS
);
PCKEV_H2_SH
(
zero
,
tmp0
,
zero
,
tmp1
,
s2
,
s3
);
...
...
@@ -86,7 +86,7 @@ void vpx_idct8x8_12_add_msa(const int16_t *input, uint8_t *dst,
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
);
TRANSPOSE4X8_SH_SH
(
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
,
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
);
VP
9
_IDCT8x8_1D
(
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
,
VP
X
_IDCT8x8_1D
(
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
,
in0
,
in1
,
in2
,
in3
,
in4
,
in5
,
in6
,
in7
);
/* final rounding (add 2^4, divide by 2^5) and shift */
...
...
@@ -94,9 +94,9 @@ void vpx_idct8x8_12_add_msa(const int16_t *input, uint8_t *dst,
SRARI_H4_SH
(
in4
,
in5
,
in6
,
in7
,
5
);
/* add block and store 8x8 */
VP
9
_ADDBLK_ST8x4_UB
(
dst
,
dst_stride
,
in0
,
in1
,
in2
,
in3
);
VP
X
_ADDBLK_ST8x4_UB
(
dst
,
dst_stride
,
in0
,
in1
,
in2
,
in3
);
dst
+=
(
4
*
dst_stride
);
VP
9
_ADDBLK_ST8x4_UB
(
dst
,
dst_stride
,
in4
,
in5
,
in6
,
in7
);
VP
X
_ADDBLK_ST8x4_UB
(
dst
,
dst_stride
,
in4
,
in5
,
in6
,
in7
);
}
void
vpx_idct8x8_1_add_msa
(
const
int16_t
*
input
,
uint8_t
*
dst
,
...
...
@@ -110,7 +110,7 @@ void vpx_idct8x8_1_add_msa(const int16_t *input, uint8_t *dst,
val
=
ROUND_POWER_OF_TWO
(
out
,
5
);
vec
=
__msa_fill_h
(
val
);
VP
9
_ADDBLK_ST8x4_UB
(
dst
,
dst_stride
,
vec
,
vec
,
vec
,
vec
);
VP
X
_ADDBLK_ST8x4_UB
(
dst
,
dst_stride
,
vec
,
vec
,
vec
,
vec
);
dst
+=
(
4
*
dst_stride
);
VP
9
_ADDBLK_ST8x4_UB
(
dst
,
dst_stride
,
vec
,
vec
,
vec
,
vec
);
VP
X
_ADDBLK_ST8x4_UB
(
dst
,
dst_stride
,
vec
,
vec
,
vec
,
vec
);
}
vpx_dsp/mips/inv_txfm_msa.h
View file @
6afa29d6
...
...
@@ -82,7 +82,7 @@
out5 = -out5; \
}
#define VP
9
_SET_COSPI_PAIR(c0_h, c1_h) ({ \
#define VP
X
_SET_COSPI_PAIR(c0_h, c1_h) ({ \
v8i16 out0_m, r0_m, r1_m; \
\
r0_m = __msa_fill_h(c0_h); \
...
...
@@ -92,7 +92,7 @@
out0_m; \
})
#define VP
9
_ADDBLK_ST8x4_UB(dst, dst_stride, in0, in1, in2, in3) { \
#define VP
X
_ADDBLK_ST8x4_UB(dst, dst_stride, in0, in1, in2, in3) { \
uint8_t *dst_m = (uint8_t *) (dst); \
v16u8 dst0_m, dst1_m, dst2_m, dst3_m; \
v16i8 tmp0_m, tmp1_m; \
...
...
@@ -109,18 +109,18 @@
ST8x4_UB(tmp0_m, tmp1_m, dst_m, dst_stride); \
}
#define VP
9
_IDCT4x4(in0, in1, in2, in3, out0, out1, out2, out3) { \
#define VP
X
_IDCT4x4(in0, in1, in2, in3, out0, out1, out2, out3) { \
v8i16 c0_m, c1_m, c2_m, c3_m; \
v8i16 step0_m, step1_m; \
v4i32 tmp0_m, tmp1_m, tmp2_m, tmp3_m; \
\
c0_m = VP
9
_SET_COSPI_PAIR(cospi_16_64, cospi_16_64); \
c1_m = VP
9
_SET_COSPI_PAIR(cospi_16_64, -cospi_16_64); \
c0_m = VP
X
_SET_COSPI_PAIR(cospi_16_64, cospi_16_64); \
c1_m = VP
X
_SET_COSPI_PAIR(cospi_16_64, -cospi_16_64); \
step0_m = __msa_ilvr_h(in2, in0); \
DOTP_SH2_SW(step0_m, step0_m, c0_m, c1_m, tmp0_m, tmp1_m); \
\
c2_m = VP
9
_SET_COSPI_PAIR(cospi_24_64, -cospi_8_64); \
c3_m = VP
9
_SET_COSPI_PAIR(cospi_8_64, cospi_24_64); \
c2_m = VP
X
_SET_COSPI_PAIR(cospi_24_64, -cospi_8_64); \
c3_m = VP
X
_SET_COSPI_PAIR(cospi_8_64, cospi_24_64); \
step1_m = __msa_ilvr_h(in3, in1); \
DOTP_SH2_SW(step1_m, step1_m, c2_m, c3_m, tmp2_m, tmp3_m); \
SRARI_W4_SW(tmp0_m, tmp1_m, tmp2_m, tmp3_m, DCT_CONST_BITS); \
...
...
@@ -132,7 +132,7 @@
out0, out1, out2, out3); \
}
#define VP
9
_IADST4x4(in0, in1, in2, in3, out0, out1, out2, out3) { \
#define VP
X
_IADST4x4(in0, in1, in2, in3, out0, out1, out2, out3) { \
v8i16 res0_m, res1_m, c0_m, c1_m; \
v8i16 k1_m, k2_m, k3_m, k4_m; \
v8i16 zero_m = { 0 }; \
...
...
@@ -210,7 +210,7 @@
}
/* idct 8x8 macro */
#define VP
9
_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7, \
#define VP
X
_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7, \
out0, out1, out2, out3, out4, out5, out6, out7) { \
v8i16 tp0_m, tp1_m, tp2_m, tp3_m, tp4_m, tp5_m, tp6_m, tp7_m; \
v8i16 k0_m, k1_m, k2_m, k3_m, res0_m, res1_m, res2_m, res3_m; \
...
...
@@ -234,8 +234,8 @@
tp4_m = in1 + in3; \
PCKEV_H2_SH(tmp1_m, tmp0_m, tmp3_m, tmp2_m, tp5_m, tp6_m); \
tp7_m = in7 + in5; \
k2_m = VP
9
_SET_COSPI_PAIR(cospi_24_64, -cospi_8_64); \
k3_m = VP
9
_SET_COSPI_PAIR(cospi_8_64, cospi_24_64); \
k2_m = VP
X
_SET_COSPI_PAIR(cospi_24_64, -cospi_8_64); \
k3_m = VP
X
_SET_COSPI_PAIR(cospi_8_64, cospi_24_64); \
VP9_MADD(in0, in4, in2, in6, k1_m, k0_m, k2_m, k3_m, \
in0, in4, in2, in6); \
BUTTERFLY_4(in0, in4, in2, in6, tp0_m, tp1_m, tp2_m, tp3_m); \
...
...
@@ -328,7 +328,7 @@
out7 = -in7; \
}
#define VP
9
_IADST8x16_1D(r0, r1, r2, r3, r4, r5, r6, r7, r8, \
#define VP
X
_IADST8x16_1D(r0, r1, r2, r3, r4, r5, r6, r7, r8, \
r9, r10, r11, r12, r13, r14, r15, \
out0, out1, out2, out3, out4, out5, \
out6, out7, out8, out9, out10, out11, \
...
...
@@ -340,40 +340,40 @@
v8i16 k0_m, k1_m, k2_m, k3_m; \
\
/* stage 1 */
\
k0_m = VP
9
_SET_COSPI_PAIR(cospi_1_64, cospi_31_64); \
k1_m = VP
9
_SET_COSPI_PAIR(cospi_31_64, -cospi_1_64); \
k2_m = VP
9
_SET_COSPI_PAIR(cospi_17_64, cospi_15_64); \
k3_m = VP
9
_SET_COSPI_PAIR(cospi_15_64, -cospi_17_64); \
k0_m = VP
X
_SET_COSPI_PAIR(cospi_1_64, cospi_31_64); \
k1_m = VP
X
_SET_COSPI_PAIR(cospi_31_64, -cospi_1_64); \
k2_m = VP
X
_SET_COSPI_PAIR(cospi_17_64, cospi_15_64); \
k3_m = VP
X
_SET_COSPI_PAIR(cospi_15_64, -cospi_17_64); \
MADD_BF(r15, r0, r7, r8, k0_m, k1_m, k2_m, k3_m, \
g0_m, g1_m, g2_m, g3_m); \
k0_m = VP
9
_SET_COSPI_PAIR(cospi_5_64, cospi_27_64); \
k1_m = VP
9
_SET_COSPI_PAIR(cospi_27_64, -cospi_5_64); \
k2_m = VP
9
_SET_COSPI_PAIR(cospi_21_64, cospi_11_64); \
k3_m = VP
9
_SET_COSPI_PAIR(cospi_11_64, -cospi_21_64); \
k0_m = VP
X
_SET_COSPI_PAIR(cospi_5_64, cospi_27_64); \
k1_m = VP
X
_SET_COSPI_PAIR(cospi_27_64, -cospi_5_64); \
k2_m = VP
X
_SET_COSPI_PAIR(cospi_21_64, cospi_11_64); \
k3_m = VP
X
_SET_COSPI_PAIR(cospi_11_64, -cospi_21_64); \
MADD_BF(r13, r2, r5, r10, k0_m, k1_m, k2_m, k3_m, \
g4_m, g5_m, g6_m, g7_m); \
k0_m = VP
9
_SET_COSPI_PAIR(cospi_9_64, cospi_23_64); \
k1_m = VP
9
_SET_COSPI_PAIR(cospi_23_64, -cospi_9_64); \
k2_m = VP
9
_SET_COSPI_PAIR(cospi_25_64, cospi_7_64); \
k3_m = VP
9
_SET_COSPI_PAIR(cospi_7_64, -cospi_25_64); \
k0_m = VP
X
_SET_COSPI_PAIR(cospi_9_64, cospi_23_64); \
k1_m = VP
X
_SET_COSPI_PAIR(cospi_23_64, -cospi_9_64); \
k2_m = VP
X
_SET_COSPI_PAIR(cospi_25_64, cospi_7_64); \
k3_m = VP
X
_SET_COSPI_PAIR(cospi_7_64, -cospi_25_64); \
MADD_BF(r11, r4, r3, r12, k0_m, k1_m, k2_m, k3_m, \
g8_m, g9_m, g10_m, g11_m); \
k0_m = VP
9
_SET_COSPI_PAIR(cospi_13_64, cospi_19_64); \
k1_m = VP
9
_SET_COSPI_PAIR(cospi_19_64, -cospi_13_64); \
k2_m = VP
9
_SET_COSPI_PAIR(cospi_29_64, cospi_3_64); \
k3_m = VP
9
_SET_COSPI_PAIR(cospi_3_64, -cospi_29_64); \
k0_m = VP
X
_SET_COSPI_PAIR(cospi_13_64, cospi_19_64); \
k1_m = VP
X
_SET_COSPI_PAIR(cospi_19_64, -cospi_13_64); \
k2_m = VP
X
_SET_COSPI_PAIR(cospi_29_64, cospi_3_64); \
k3_m = VP
X
_SET_COSPI_PAIR(cospi_3_64, -cospi_29_64); \
MADD_BF(r9, r6, r1, r14, k0_m, k1_m, k2_m, k3_m, \
g12_m, g13_m, g14_m, g15_m); \
\
/* stage 2 */
\
k0_m = VP
9
_SET_COSPI_PAIR(cospi_4_64, cospi_28_64); \
k1_m = VP
9
_SET_COSPI_PAIR(cospi_28_64, -cospi_4_64); \
k2_m = VP
9
_SET_COSPI_PAIR(-cospi_28_64, cospi_4_64); \
k0_m = VP
X
_SET_COSPI_PAIR(cospi_4_64, cospi_28_64); \
k1_m = VP
X
_SET_COSPI_PAIR(cospi_28_64, -cospi_4_64); \
k2_m = VP
X
_SET_COSPI_PAIR(-cospi_28_64, cospi_4_64); \
MADD_BF(g1_m, g3_m, g9_m, g11_m, k0_m, k1_m, k2_m, k0_m, \
h0_m, h1_m, h2_m, h3_m); \
k0_m = VP
9
_SET_COSPI_PAIR(cospi_12_64, cospi_20_64); \
k1_m = VP
9
_SET_COSPI_PAIR(-cospi_20_64, cospi_12_64); \
k2_m = VP
9
_SET_COSPI_PAIR(cospi_20_64, -cospi_12_64); \
k0_m = VP
X
_SET_COSPI_PAIR(cospi_12_64, cospi_20_64); \
k1_m = VP
X
_SET_COSPI_PAIR(-cospi_20_64, cospi_12_64); \
k2_m = VP
X
_SET_COSPI_PAIR(cospi_20_64, -cospi_12_64); \
MADD_BF(g7_m, g5_m, g15_m, g13_m, k0_m, k1_m, k2_m, k0_m, \
h4_m, h5_m, h6_m, h7_m); \
BUTTERFLY_4(h0_m, h2_m, h6_m, h4_m, out8, out9, out11, out10); \
...
...
@@ -382,19 +382,19 @@
\
/* stage 3 */
\
BUTTERFLY_4(h8_m, h9_m, h11_m, h10_m, out0, out1, h11_m, h10_m); \
k0_m = VP
9
_SET_COSPI_PAIR(cospi_8_64, cospi_24_64); \
k1_m = VP
9
_SET_COSPI_PAIR(cospi_24_64, -cospi_8_64); \
k2_m = VP
9
_SET_COSPI_PAIR(-cospi_24_64, cospi_8_64); \
k0_m = VP
X
_SET_COSPI_PAIR(cospi_8_64, cospi_24_64); \
k1_m = VP
X
_SET_COSPI_PAIR(cospi_24_64, -cospi_8_64); \
k2_m = VP
X
_SET_COSPI_PAIR(-cospi_24_64, cospi_8_64); \
MADD_BF(h0_m, h2_m, h4_m, h6_m, k0_m, k1_m, k2_m, k0_m, \
out4, out6, out5, out7); \
MADD_BF(h1_m, h3_m, h5_m, h7_m, k0_m, k1_m, k2_m, k0_m, \
out12, out14, out13, out15); \
\
/* stage 4 */
\
k0_m = VP
9
_SET_COSPI_PAIR(cospi_16_64, cospi_16_64); \
k1_m = VP
9
_SET_COSPI_PAIR(-cospi_16_64, -cospi_16_64); \
k2_m = VP
9
_SET_COSPI_PAIR(cospi_16_64, -cospi_16_64); \
k3_m = VP
9
_SET_COSPI_PAIR(-cospi_16_64, cospi_16_64); \
k0_m = VP
X
_SET_COSPI_PAIR(cospi_16_64, cospi_16_64); \
k1_m = VP
X
_SET_COSPI_PAIR(-cospi_16_64, -cospi_16_64); \
k2_m = VP
X
_SET_COSPI_PAIR(cospi_16_64, -cospi_16_64); \
k3_m = VP
X
_SET_COSPI_PAIR(-cospi_16_64, cospi_16_64); \
MADD_SHORT(h10_m, h11_m, k1_m, k2_m, out2, out3); \
MADD_SHORT(out6, out7, k0_m, k3_m, out6, out7); \
MADD_SHORT(out10, out11, k0_m, k3_m, out10, out11); \
...
...
vpx_dsp/mips/loopfilter_16_msa.c
View file @
6afa29d6
...
...
@@ -35,8 +35,8 @@ int32_t vpx_hz_lpf_t4_and_t8_16w(uint8_t *src, int32_t pitch,
/* mask and hev */
LPF_MASK_HEV
(
p3
,
p2
,
p1
,
p0
,
q0
,
q1
,
q2
,
q3
,
limit
,
b_limit
,
thresh
,
hev
,
mask
,
flat
);
VP
9
_FLAT4
(
p3
,
p2
,
p0
,
q0
,
q2
,
q3
,
flat
);
VP
9
_LPF_FILTER4_4W
(
p1
,
p0
,
q0
,
q1
,
mask
,
hev
,
p1_out
,
p0_out
,
q0_out
,
q1_out
);
VP
X
_FLAT4
(
p3
,
p2
,
p0
,
q0
,
q2
,
q3
,
flat
);
VP
X
_LPF_FILTER4_4W
(
p1
,
p0
,
q0
,
q1
,
mask
,
hev
,
p1_out
,
p0_out
,
q0_out
,
q1_out
);
if
(
__msa_test_bz_v
(
flat
))
{
ST_UB4
(
p1_out
,
p0_out
,
q0_out
,
q1_out
,
(
src
-
2
*
pitch
),
pitch
);
...
...
@@ -46,12 +46,12 @@ int32_t vpx_hz_lpf_t4_and_t8_16w(uint8_t *src, int32_t pitch,
ILVR_B8_UH
(
zero
,
p3
,
zero
,
p2
,
zero
,
p1
,
zero
,
p0
,
zero
,
q0
,
zero
,
q1
,
zero
,
q2
,
zero
,
q3
,
p3_r
,
p2_r
,
p1_r
,
p0_r
,
q0_r
,
q1_r
,
q2_r
,
q3_r
);
VP
9
_FILTER8
(
p3_r
,
p2_r
,
p1_r
,
p0_r
,
q0_r
,
q1_r
,
q2_r
,
q3_r
,
p2_filt8_r
,
VP
X
_FILTER8
(
p3_r
,
p2_r
,
p1_r
,
p0_r
,
q0_r
,
q1_r
,
q2_r
,
q3_r
,
p2_filt8_r
,
p1_filt8_r
,
p0_filt8_r
,
q0_filt8_r
,
q1_filt8_r
,
q2_filt8_r
);
ILVL_B4_UH
(
zero
,
p3
,
zero
,
p2
,
zero
,
p1
,
zero
,
p0
,
p3_l
,
p2_l
,
p1_l
,
p0_l
);
ILVL_B4_UH
(
zero
,
q0
,
zero
,
q1
,
zero
,
q2
,
zero
,
q3
,
q0_l
,
q1_l
,
q2_l
,
q3_l
);
VP
9
_FILTER8
(
p3_l
,
p2_l
,
p1_l
,
p0_l
,
q0_l
,
q1_l
,
q2_l
,
q3_l
,
p2_filt8_l
,
VP
X
_FILTER8
(
p3_l
,
p2_l
,
p1_l
,
p0_l
,
q0_l
,
q1_l
,
q2_l
,
q3_l
,
p2_filt8_l
,
p1_filt8_l
,
p0_filt8_l
,
q0_filt8_l
,
q1_filt8_l
,
q2_filt8_l
);
/* convert 16 bit output data into 8 bit */
...
...
@@ -94,7 +94,7 @@ void vpx_hz_lpf_t16_16w(uint8_t *src, int32_t pitch, uint8_t *filter48) {
LD_UB8
((
src
-
8
*
pitch
),
pitch
,
p7
,
p6
,
p5
,
p4
,
p3
,
p2
,
p1
,
p0
);
LD_UB8
(
src
,
pitch
,
q0
,
q1
,
q2
,
q3
,
q4
,
q5
,
q6
,
q7
);
VP
9
_FLAT5
(
p7
,
p6
,
p5
,
p4
,
p0
,
q0
,
q4
,
q5
,
q6
,
q7
,
flat
,
flat2
);
VP
X
_FLAT5
(
p7
,
p6
,
p5
,
p4
,
p0
,
q0
,
q4
,
q5
,
q6
,
q7
,
flat
,
flat2
);
if
(
__msa_test_bz_v
(
flat2
))
{
LD_UB4
(
filter48
,
16
,
p2
,
p1
,
p0
,
q0
);
...
...
@@ -451,8 +451,8 @@ void vpx_lpf_horizontal_16_msa(uint8_t *src, int32_t pitch,
LPF_MASK_HEV
(
p3
,
p2
,
p1
,
p0
,
q0
,
q1
,
q2
,
q3
,
limit
,
b_limit
,
thresh
,
hev
,
mask
,
flat
);
VP
9
_FLAT4
(
p3
,
p2
,
p0
,
q0
,
q2
,
q3
,
flat
);
VP
9
_LPF_FILTER4_8W
(
p1
,
p0
,
q0
,
q1
,
mask
,
hev
,
p1_out
,
p0_out
,
q0_out
,
VP
X
_FLAT4
(
p3
,
p2
,
p0
,
q0
,
q2
,
q3
,
flat
);