Commit 355296c6 authored by Yaowu Xu's avatar Yaowu Xu

Remove or replace VP9 with VPX in vp10 and vpx_dsp

Change-Id: I412eaec8c890bae70745082f5311cb3d385a5b80
parent 5657cade
......@@ -203,7 +203,7 @@ typedef struct VP10Common {
MODE_INFO *mi; /* Corresponds to upper left visible macroblock */
// TODO(agrange): Move prev_mi into encoder structure.
// prev_mip and prev_mi will only be allocated in VP9 encoder.
// prev_mip and prev_mi will only be allocated in encoder.
MODE_INFO *prev_mip; /* MODE_INFO array 'mip' from last decoded frame */
MODE_INFO *prev_mi; /* 'mi' from last frame (points into prev_mip) */
......@@ -416,7 +416,7 @@ static INLINE void set_mi_row_col(MACROBLOCKD *xd, const TileInfo *const tile,
xd->left_available = (mi_col > tile->mi_col_start);
if (xd->up_available) {
xd->above_mi = xd->mi[-xd->mi_stride];
// above_mi may be NULL in VP9 encoder's first pass.
// above_mi may be NULL in encoder's first pass.
xd->above_mbmi = xd->above_mi ? &xd->above_mi->mbmi : NULL;
} else {
xd->above_mi = NULL;
......@@ -425,7 +425,7 @@ static INLINE void set_mi_row_col(MACROBLOCKD *xd, const TileInfo *const tile,
if (xd->left_available) {
xd->left_mi = xd->mi[-1];
// left_mi may be NULL in VP9 encoder's first pass.
// left_mi may be NULL in encoder's first pass.
xd->left_mbmi = xd->left_mi ? &xd->left_mi->mbmi : NULL;
} else {
xd->left_mi = NULL;
......
......@@ -69,7 +69,7 @@ static INLINE tran_low_t highbd_dct_const_round_shift(tran_high_t input,
#if CONFIG_EMULATE_HARDWARE
// When CONFIG_EMULATE_HARDWARE is 1 the transform performs a
// non-normative method to handle overflows. A stream that causes
// overflows in the inverse transform is considered invalid in VP9,
// overflows in the inverse transform is considered invalid,
// and a hardware implementer is free to choose any reasonable
// method to handle overflows. However to aid in hardware
// verification they can use a specific implementation of the
......
......@@ -186,9 +186,9 @@ void vpx_fdct4x4_msa(const int16_t *input, int16_t *output,
in0 += vec;
}
VP9_FDCT4(in0, in1, in2, in3, in0, in1, in2, in3);
VPX_FDCT4(in0, in1, in2, in3, in0, in1, in2, in3);
TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
VP9_FDCT4(in0, in1, in2, in3, in0, in1, in2, in3);
VPX_FDCT4(in0, in1, in2, in3, in0, in1, in2, in3);
TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
ADD4(in0, 1, in1, 1, in2, 1, in3, 1, in0, in1, in2, in3);
SRA_4V(in0, in1, in2, in3, 2);
......@@ -203,11 +203,11 @@ void vpx_fdct8x8_msa(const int16_t *input, int16_t *output,
LD_SH8(input, src_stride, in0, in1, in2, in3, in4, in5, in6, in7);
SLLI_4V(in0, in1, in2, in3, 2);
SLLI_4V(in4, in5, in6, in7, 2);
VP9_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7,
VPX_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7,
in0, in1, in2, in3, in4, in5, in6, in7);
TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
in0, in1, in2, in3, in4, in5, in6, in7);
VP9_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7,
VPX_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7,
in0, in1, in2, in3, in4, in5, in6, in7);
TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
in0, in1, in2, in3, in4, in5, in6, in7);
......
......@@ -29,7 +29,7 @@
HADD_SW_S32(vec_w_m); \
})
#define VP9_FDCT4(in0, in1, in2, in3, out0, out1, out2, out3) { \
#define VPX_FDCT4(in0, in1, in2, in3, out0, out1, out2, out3) { \
v8i16 cnst0_m, cnst1_m, cnst2_m, cnst3_m; \
v8i16 vec0_m, vec1_m, vec2_m, vec3_m; \
v4i32 vec4_m, vec5_m, vec6_m, vec7_m; \
......@@ -67,7 +67,7 @@
in4, in5, in6, in7); \
}
#define VP9_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7, \
#define VPX_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7, \
out0, out1, out2, out3, out4, out5, out6, out7) { \
v8i16 s0_m, s1_m, s2_m, s3_m, s4_m, s5_m, s6_m; \
v8i16 s7_m, x0_m, x1_m, x2_m, x3_m; \
......
......@@ -189,16 +189,16 @@ void vpx_idct16_1d_columns_addblk_msa(int16_t *input, uint8_t *dst,
reg3 = tmp7;
SRARI_H4_SH(reg0, reg2, reg4, reg6, 6);
VP9_ADDBLK_ST8x4_UB(dst, dst_stride, reg0, reg2, reg4, reg6);
VPX_ADDBLK_ST8x4_UB(dst, dst_stride, reg0, reg2, reg4, reg6);
dst += (4 * dst_stride);
SRARI_H4_SH(reg8, reg10, reg12, reg14, 6);
VP9_ADDBLK_ST8x4_UB(dst, dst_stride, reg8, reg10, reg12, reg14);
VPX_ADDBLK_ST8x4_UB(dst, dst_stride, reg8, reg10, reg12, reg14);
dst += (4 * dst_stride);
SRARI_H4_SH(reg3, reg13, reg11, reg5, 6);
VP9_ADDBLK_ST8x4_UB(dst, dst_stride, reg3, reg13, reg11, reg5);
VPX_ADDBLK_ST8x4_UB(dst, dst_stride, reg3, reg13, reg11, reg5);
dst += (4 * dst_stride);
SRARI_H4_SH(reg7, reg9, reg1, reg15, 6);
VP9_ADDBLK_ST8x4_UB(dst, dst_stride, reg7, reg9, reg1, reg15);
VPX_ADDBLK_ST8x4_UB(dst, dst_stride, reg7, reg9, reg1, reg15);
}
void vpx_idct16x16_256_add_msa(const int16_t *input, uint8_t *dst,
......@@ -303,7 +303,7 @@ void vpx_iadst16_1d_rows_msa(const int16_t *input, int16_t *output) {
l8, l9, l10, l11, l12, l13, l14, l15);
/* ADST in horizontal */
VP9_IADST8x16_1D(l0, l1, l2, l3, l4, l5, l6, l7,
VPX_IADST8x16_1D(l0, l1, l2, l3, l4, l5, l6, l7,
l8, l9, l10, l11, l12, l13, l14, l15,
r0, r1, r2, r3, r4, r5, r6, r7,
r8, r9, r10, r11, r12, r13, r14, r15);
......@@ -345,20 +345,20 @@ void vpx_iadst16_1d_columns_addblk_msa(int16_t *input, uint8_t *dst,
r15 = LD_SH(input + 15 * 16);
/* stage 1 */
k0 = VP9_SET_COSPI_PAIR(cospi_1_64, cospi_31_64);
k1 = VP9_SET_COSPI_PAIR(cospi_31_64, -cospi_1_64);
k2 = VP9_SET_COSPI_PAIR(cospi_17_64, cospi_15_64);
k3 = VP9_SET_COSPI_PAIR(cospi_15_64, -cospi_17_64);
k0 = VPX_SET_COSPI_PAIR(cospi_1_64, cospi_31_64);
k1 = VPX_SET_COSPI_PAIR(cospi_31_64, -cospi_1_64);
k2 = VPX_SET_COSPI_PAIR(cospi_17_64, cospi_15_64);
k3 = VPX_SET_COSPI_PAIR(cospi_15_64, -cospi_17_64);
MADD_BF(r15, r0, r7, r8, k0, k1, k2, k3, g0, g1, g2, g3);
k0 = VP9_SET_COSPI_PAIR(cospi_9_64, cospi_23_64);
k1 = VP9_SET_COSPI_PAIR(cospi_23_64, -cospi_9_64);
k2 = VP9_SET_COSPI_PAIR(cospi_25_64, cospi_7_64);
k3 = VP9_SET_COSPI_PAIR(cospi_7_64, -cospi_25_64);
k0 = VPX_SET_COSPI_PAIR(cospi_9_64, cospi_23_64);
k1 = VPX_SET_COSPI_PAIR(cospi_23_64, -cospi_9_64);
k2 = VPX_SET_COSPI_PAIR(cospi_25_64, cospi_7_64);
k3 = VPX_SET_COSPI_PAIR(cospi_7_64, -cospi_25_64);
MADD_BF(r11, r4, r3, r12, k0, k1, k2, k3, g8, g9, g10, g11);
BUTTERFLY_4(g0, g2, g10, g8, h8, h9, v2, v0);
k0 = VP9_SET_COSPI_PAIR(cospi_4_64, cospi_28_64);
k1 = VP9_SET_COSPI_PAIR(cospi_28_64, -cospi_4_64);
k2 = VP9_SET_COSPI_PAIR(-cospi_28_64, cospi_4_64);
k0 = VPX_SET_COSPI_PAIR(cospi_4_64, cospi_28_64);
k1 = VPX_SET_COSPI_PAIR(cospi_28_64, -cospi_4_64);
k2 = VPX_SET_COSPI_PAIR(-cospi_28_64, cospi_4_64);
MADD_BF(g1, g3, g9, g11, k0, k1, k2, k0, h0, h1, h2, h3);
r1 = LD_SH(input + 1 * 16);
......@@ -370,15 +370,15 @@ void vpx_iadst16_1d_columns_addblk_msa(int16_t *input, uint8_t *dst,
r13 = LD_SH(input + 13 * 16);
r14 = LD_SH(input + 14 * 16);
k0 = VP9_SET_COSPI_PAIR(cospi_5_64, cospi_27_64);
k1 = VP9_SET_COSPI_PAIR(cospi_27_64, -cospi_5_64);
k2 = VP9_SET_COSPI_PAIR(cospi_21_64, cospi_11_64);
k3 = VP9_SET_COSPI_PAIR(cospi_11_64, -cospi_21_64);
k0 = VPX_SET_COSPI_PAIR(cospi_5_64, cospi_27_64);
k1 = VPX_SET_COSPI_PAIR(cospi_27_64, -cospi_5_64);
k2 = VPX_SET_COSPI_PAIR(cospi_21_64, cospi_11_64);
k3 = VPX_SET_COSPI_PAIR(cospi_11_64, -cospi_21_64);
MADD_BF(r13, r2, r5, r10, k0, k1, k2, k3, g4, g5, g6, g7);
k0 = VP9_SET_COSPI_PAIR(cospi_13_64, cospi_19_64);
k1 = VP9_SET_COSPI_PAIR(cospi_19_64, -cospi_13_64);
k2 = VP9_SET_COSPI_PAIR(cospi_29_64, cospi_3_64);
k3 = VP9_SET_COSPI_PAIR(cospi_3_64, -cospi_29_64);
k0 = VPX_SET_COSPI_PAIR(cospi_13_64, cospi_19_64);
k1 = VPX_SET_COSPI_PAIR(cospi_19_64, -cospi_13_64);
k2 = VPX_SET_COSPI_PAIR(cospi_29_64, cospi_3_64);
k3 = VPX_SET_COSPI_PAIR(cospi_3_64, -cospi_29_64);
MADD_BF(r9, r6, r1, r14, k0, k1, k2, k3, g12, g13, g14, g15);
BUTTERFLY_4(g4, g6, g14, g12, h10, h11, v6, v4);
BUTTERFLY_4(h8, h9, h11, h10, out0, out1, h11, h10);
......@@ -393,9 +393,9 @@ void vpx_iadst16_1d_columns_addblk_msa(int16_t *input, uint8_t *dst,
ST8x1_UB(res0, dst);
ST8x1_UB(res1, dst + 15 * dst_stride);
k0 = VP9_SET_COSPI_PAIR(cospi_12_64, cospi_20_64);
k1 = VP9_SET_COSPI_PAIR(-cospi_20_64, cospi_12_64);
k2 = VP9_SET_COSPI_PAIR(cospi_20_64, -cospi_12_64);
k0 = VPX_SET_COSPI_PAIR(cospi_12_64, cospi_20_64);
k1 = VPX_SET_COSPI_PAIR(-cospi_20_64, cospi_12_64);
k2 = VPX_SET_COSPI_PAIR(cospi_20_64, -cospi_12_64);
MADD_BF(g7, g5, g15, g13, k0, k1, k2, k0, h4, h5, h6, h7);
BUTTERFLY_4(h0, h2, h6, h4, out8, out9, out11, out10);
out8 = -out8;
......@@ -410,9 +410,9 @@ void vpx_iadst16_1d_columns_addblk_msa(int16_t *input, uint8_t *dst,
ST8x1_UB(res8, dst + dst_stride);
ST8x1_UB(res9, dst + 14 * dst_stride);
k0 = VP9_SET_COSPI_PAIR(cospi_8_64, cospi_24_64);
k1 = VP9_SET_COSPI_PAIR(cospi_24_64, -cospi_8_64);
k2 = VP9_SET_COSPI_PAIR(-cospi_24_64, cospi_8_64);
k0 = VPX_SET_COSPI_PAIR(cospi_8_64, cospi_24_64);
k1 = VPX_SET_COSPI_PAIR(cospi_24_64, -cospi_8_64);
k2 = VPX_SET_COSPI_PAIR(-cospi_24_64, cospi_8_64);
MADD_BF(v0, v2, v4, v6, k0, k1, k2, k0, out4, out6, out5, out7);
out4 = -out4;
SRARI_H2_SH(out4, out5, 6);
......@@ -437,8 +437,8 @@ void vpx_iadst16_1d_columns_addblk_msa(int16_t *input, uint8_t *dst,
ST8x1_UB(res12, dst + 2 * dst_stride);
ST8x1_UB(res13, dst + 13 * dst_stride);
k0 = VP9_SET_COSPI_PAIR(cospi_16_64, cospi_16_64);
k3 = VP9_SET_COSPI_PAIR(-cospi_16_64, cospi_16_64);
k0 = VPX_SET_COSPI_PAIR(cospi_16_64, cospi_16_64);
k3 = VPX_SET_COSPI_PAIR(-cospi_16_64, cospi_16_64);
MADD_SHORT(out6, out7, k0, k3, out6, out7);
SRARI_H2_SH(out6, out7, 6);
dst6 = LD_UB(dst + 4 * dst_stride);
......@@ -461,8 +461,8 @@ void vpx_iadst16_1d_columns_addblk_msa(int16_t *input, uint8_t *dst,
ST8x1_UB(res10, dst + 6 * dst_stride);
ST8x1_UB(res11, dst + 9 * dst_stride);
k1 = VP9_SET_COSPI_PAIR(-cospi_16_64, -cospi_16_64);
k2 = VP9_SET_COSPI_PAIR(cospi_16_64, -cospi_16_64);
k1 = VPX_SET_COSPI_PAIR(-cospi_16_64, -cospi_16_64);
k2 = VPX_SET_COSPI_PAIR(cospi_16_64, -cospi_16_64);
MADD_SHORT(h10, h11, k1, k2, out2, out3);
SRARI_H2_SH(out2, out3, 6);
dst2 = LD_UB(dst + 7 * dst_stride);
......
......@@ -559,11 +559,11 @@ static void idct8x32_column_butterfly_addblk(int16_t *tmp_eve_buf,
ADD4(loc0, vec3, loc1, vec2, loc2, vec1, loc3, vec0, m0, m4, m2, m6);
SRARI_H4_SH(m0, m2, m4, m6, 6);
VP9_ADDBLK_ST8x4_UB(dst, (4 * dst_stride), m0, m2, m4, m6);
VPX_ADDBLK_ST8x4_UB(dst, (4 * dst_stride), m0, m2, m4, m6);
SUB4(loc0, vec3, loc1, vec2, loc2, vec1, loc3, vec0, m6, m2, m4, m0);
SRARI_H4_SH(m0, m2, m4, m6, 6);
VP9_ADDBLK_ST8x4_UB((dst + 19 * dst_stride), (4 * dst_stride),
VPX_ADDBLK_ST8x4_UB((dst + 19 * dst_stride), (4 * dst_stride),
m0, m2, m4, m6);
/* Load 8 & Store 8 */
......@@ -578,12 +578,12 @@ static void idct8x32_column_butterfly_addblk(int16_t *tmp_eve_buf,
ADD4(loc0, vec3, loc1, vec2, loc2, vec1, loc3, vec0, m1, m5, m3, m7);
SRARI_H4_SH(m1, m3, m5, m7, 6);
VP9_ADDBLK_ST8x4_UB((dst + 2 * dst_stride), (4 * dst_stride),
VPX_ADDBLK_ST8x4_UB((dst + 2 * dst_stride), (4 * dst_stride),
m1, m3, m5, m7);
SUB4(loc0, vec3, loc1, vec2, loc2, vec1, loc3, vec0, m7, m3, m5, m1);
SRARI_H4_SH(m1, m3, m5, m7, 6);
VP9_ADDBLK_ST8x4_UB((dst + 17 * dst_stride), (4 * dst_stride),
VPX_ADDBLK_ST8x4_UB((dst + 17 * dst_stride), (4 * dst_stride),
m1, m3, m5, m7);
/* Load 8 & Store 8 */
......@@ -598,12 +598,12 @@ static void idct8x32_column_butterfly_addblk(int16_t *tmp_eve_buf,
ADD4(loc0, vec3, loc1, vec2, loc2, vec1, loc3, vec0, n0, n4, n2, n6);
SRARI_H4_SH(n0, n2, n4, n6, 6);
VP9_ADDBLK_ST8x4_UB((dst + 1 * dst_stride), (4 * dst_stride),
VPX_ADDBLK_ST8x4_UB((dst + 1 * dst_stride), (4 * dst_stride),
n0, n2, n4, n6);
SUB4(loc0, vec3, loc1, vec2, loc2, vec1, loc3, vec0, n6, n2, n4, n0);
SRARI_H4_SH(n0, n2, n4, n6, 6);
VP9_ADDBLK_ST8x4_UB((dst + 18 * dst_stride), (4 * dst_stride),
VPX_ADDBLK_ST8x4_UB((dst + 18 * dst_stride), (4 * dst_stride),
n0, n2, n4, n6);
/* Load 8 & Store 8 */
......@@ -618,12 +618,12 @@ static void idct8x32_column_butterfly_addblk(int16_t *tmp_eve_buf,
ADD4(loc0, vec3, loc1, vec2, loc2, vec1, loc3, vec0, n1, n5, n3, n7);
SRARI_H4_SH(n1, n3, n5, n7, 6);
VP9_ADDBLK_ST8x4_UB((dst + 3 * dst_stride), (4 * dst_stride),
VPX_ADDBLK_ST8x4_UB((dst + 3 * dst_stride), (4 * dst_stride),
n1, n3, n5, n7);
SUB4(loc0, vec3, loc1, vec2, loc2, vec1, loc3, vec0, n7, n3, n5, n1);
SRARI_H4_SH(n1, n3, n5, n7, 6);
VP9_ADDBLK_ST8x4_UB((dst + 16 * dst_stride), (4 * dst_stride),
VPX_ADDBLK_ST8x4_UB((dst + 16 * dst_stride), (4 * dst_stride),
n1, n3, n5, n7);
}
......
......@@ -75,10 +75,10 @@ void vpx_idct4x4_16_add_msa(const int16_t *input, uint8_t *dst,
LD4x4_SH(input, in0, in1, in2, in3);
/* rows */
TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
VP9_IDCT4x4(in0, in1, in2, in3, in0, in1, in2, in3);
VPX_IDCT4x4(in0, in1, in2, in3, in0, in1, in2, in3);
/* columns */
TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
VP9_IDCT4x4(in0, in1, in2, in3, in0, in1, in2, in3);
VPX_IDCT4x4(in0, in1, in2, in3, in0, in1, in2, in3);
/* rounding (add 2^3, divide by 2^4) */
SRARI_H4_SH(in0, in1, in2, in3, 4);
ADDBLK_ST4x4_UB(in0, in1, in2, in3, dst, dst_stride);
......
......@@ -21,21 +21,21 @@ void vpx_idct8x8_64_add_msa(const int16_t *input, uint8_t *dst,
TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
in0, in1, in2, in3, in4, in5, in6, in7);
/* 1D idct8x8 */
VP9_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7,
VPX_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7,
in0, in1, in2, in3, in4, in5, in6, in7);
/* columns transform */
TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
in0, in1, in2, in3, in4, in5, in6, in7);
/* 1D idct8x8 */
VP9_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7,
VPX_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7,
in0, in1, in2, in3, in4, in5, in6, in7);
/* final rounding (add 2^4, divide by 2^5) and shift */
SRARI_H4_SH(in0, in1, in2, in3, 5);
SRARI_H4_SH(in4, in5, in6, in7, 5);
/* add block and store 8x8 */
VP9_ADDBLK_ST8x4_UB(dst, dst_stride, in0, in1, in2, in3);
VPX_ADDBLK_ST8x4_UB(dst, dst_stride, in0, in1, in2, in3);
dst += (4 * dst_stride);
VP9_ADDBLK_ST8x4_UB(dst, dst_stride, in4, in5, in6, in7);
VPX_ADDBLK_ST8x4_UB(dst, dst_stride, in4, in5, in6, in7);
}
void vpx_idct8x8_12_add_msa(const int16_t *input, uint8_t *dst,
......@@ -51,10 +51,10 @@ void vpx_idct8x8_12_add_msa(const int16_t *input, uint8_t *dst,
/* stage1 */
ILVL_H2_SH(in3, in0, in2, in1, s0, s1);
k0 = VP9_SET_COSPI_PAIR(cospi_28_64, -cospi_4_64);
k1 = VP9_SET_COSPI_PAIR(cospi_4_64, cospi_28_64);
k2 = VP9_SET_COSPI_PAIR(-cospi_20_64, cospi_12_64);
k3 = VP9_SET_COSPI_PAIR(cospi_12_64, cospi_20_64);
k0 = VPX_SET_COSPI_PAIR(cospi_28_64, -cospi_4_64);
k1 = VPX_SET_COSPI_PAIR(cospi_4_64, cospi_28_64);
k2 = VPX_SET_COSPI_PAIR(-cospi_20_64, cospi_12_64);
k3 = VPX_SET_COSPI_PAIR(cospi_12_64, cospi_20_64);
DOTP_SH4_SW(s0, s0, s1, s1, k0, k1, k2, k3, tmp0, tmp1, tmp2, tmp3);
SRARI_W4_SW(tmp0, tmp1, tmp2, tmp3, DCT_CONST_BITS);
PCKEV_H2_SH(zero, tmp0, zero, tmp1, s0, s1);
......@@ -63,10 +63,10 @@ void vpx_idct8x8_12_add_msa(const int16_t *input, uint8_t *dst,
/* stage2 */
ILVR_H2_SH(in3, in1, in2, in0, s1, s0);
k0 = VP9_SET_COSPI_PAIR(cospi_16_64, cospi_16_64);
k1 = VP9_SET_COSPI_PAIR(cospi_16_64, -cospi_16_64);
k2 = VP9_SET_COSPI_PAIR(cospi_24_64, -cospi_8_64);
k3 = VP9_SET_COSPI_PAIR(cospi_8_64, cospi_24_64);
k0 = VPX_SET_COSPI_PAIR(cospi_16_64, cospi_16_64);
k1 = VPX_SET_COSPI_PAIR(cospi_16_64, -cospi_16_64);
k2 = VPX_SET_COSPI_PAIR(cospi_24_64, -cospi_8_64);
k3 = VPX_SET_COSPI_PAIR(cospi_8_64, cospi_24_64);
DOTP_SH4_SW(s0, s0, s1, s1, k0, k1, k2, k3, tmp0, tmp1, tmp2, tmp3);
SRARI_W4_SW(tmp0, tmp1, tmp2, tmp3, DCT_CONST_BITS);
PCKEV_H2_SH(zero, tmp0, zero, tmp1, s0, s1);
......@@ -76,7 +76,7 @@ void vpx_idct8x8_12_add_msa(const int16_t *input, uint8_t *dst,
/* stage3 */
s0 = __msa_ilvr_h(s6, s5);
k1 = VP9_SET_COSPI_PAIR(-cospi_16_64, cospi_16_64);
k1 = VPX_SET_COSPI_PAIR(-cospi_16_64, cospi_16_64);
DOTP_SH2_SW(s0, s0, k1, k0, tmp0, tmp1);
SRARI_W2_SW(tmp0, tmp1, DCT_CONST_BITS);
PCKEV_H2_SH(zero, tmp0, zero, tmp1, s2, s3);
......@@ -86,7 +86,7 @@ void vpx_idct8x8_12_add_msa(const int16_t *input, uint8_t *dst,
in0, in1, in2, in3, in4, in5, in6, in7);
TRANSPOSE4X8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
in0, in1, in2, in3, in4, in5, in6, in7);
VP9_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7,
VPX_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7,
in0, in1, in2, in3, in4, in5, in6, in7);
/* final rounding (add 2^4, divide by 2^5) and shift */
......@@ -94,9 +94,9 @@ void vpx_idct8x8_12_add_msa(const int16_t *input, uint8_t *dst,
SRARI_H4_SH(in4, in5, in6, in7, 5);
/* add block and store 8x8 */
VP9_ADDBLK_ST8x4_UB(dst, dst_stride, in0, in1, in2, in3);
VPX_ADDBLK_ST8x4_UB(dst, dst_stride, in0, in1, in2, in3);
dst += (4 * dst_stride);
VP9_ADDBLK_ST8x4_UB(dst, dst_stride, in4, in5, in6, in7);
VPX_ADDBLK_ST8x4_UB(dst, dst_stride, in4, in5, in6, in7);
}
void vpx_idct8x8_1_add_msa(const int16_t *input, uint8_t *dst,
......@@ -110,7 +110,7 @@ void vpx_idct8x8_1_add_msa(const int16_t *input, uint8_t *dst,
val = ROUND_POWER_OF_TWO(out, 5);
vec = __msa_fill_h(val);
VP9_ADDBLK_ST8x4_UB(dst, dst_stride, vec, vec, vec, vec);
VPX_ADDBLK_ST8x4_UB(dst, dst_stride, vec, vec, vec, vec);
dst += (4 * dst_stride);
VP9_ADDBLK_ST8x4_UB(dst, dst_stride, vec, vec, vec, vec);
VPX_ADDBLK_ST8x4_UB(dst, dst_stride, vec, vec, vec, vec);
}
This diff is collapsed.
......@@ -35,8 +35,8 @@ int32_t vpx_hz_lpf_t4_and_t8_16w(uint8_t *src, int32_t pitch,
/* mask and hev */
LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
hev, mask, flat);
VP9_FLAT4(p3, p2, p0, q0, q2, q3, flat);
VP9_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out, q1_out);
VPX_FLAT4(p3, p2, p0, q0, q2, q3, flat);
VPX_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out, q1_out);
if (__msa_test_bz_v(flat)) {
ST_UB4(p1_out, p0_out, q0_out, q1_out, (src - 2 * pitch), pitch);
......@@ -46,12 +46,12 @@ int32_t vpx_hz_lpf_t4_and_t8_16w(uint8_t *src, int32_t pitch,
ILVR_B8_UH(zero, p3, zero, p2, zero, p1, zero, p0, zero, q0, zero, q1,
zero, q2, zero, q3, p3_r, p2_r, p1_r, p0_r, q0_r, q1_r,
q2_r, q3_r);
VP9_FILTER8(p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r, p2_filt8_r,
VPX_FILTER8(p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r, p2_filt8_r,
p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r);
ILVL_B4_UH(zero, p3, zero, p2, zero, p1, zero, p0, p3_l, p2_l, p1_l, p0_l);
ILVL_B4_UH(zero, q0, zero, q1, zero, q2, zero, q3, q0_l, q1_l, q2_l, q3_l);
VP9_FILTER8(p3_l, p2_l, p1_l, p0_l, q0_l, q1_l, q2_l, q3_l, p2_filt8_l,
VPX_FILTER8(p3_l, p2_l, p1_l, p0_l, q0_l, q1_l, q2_l, q3_l, p2_filt8_l,
p1_filt8_l, p0_filt8_l, q0_filt8_l, q1_filt8_l, q2_filt8_l);
/* convert 16 bit output data into 8 bit */
......@@ -94,7 +94,7 @@ void vpx_hz_lpf_t16_16w(uint8_t *src, int32_t pitch, uint8_t *filter48) {
LD_UB8((src - 8 * pitch), pitch, p7, p6, p5, p4, p3, p2, p1, p0);
LD_UB8(src, pitch, q0, q1, q2, q3, q4, q5, q6, q7);
VP9_FLAT5(p7, p6, p5, p4, p0, q0, q4, q5, q6, q7, flat, flat2);
VPX_FLAT5(p7, p6, p5, p4, p0, q0, q4, q5, q6, q7, flat, flat2);
if (__msa_test_bz_v(flat2)) {
LD_UB4(filter48, 16, p2, p1, p0, q0);
......@@ -451,8 +451,8 @@ void vpx_lpf_horizontal_16_msa(uint8_t *src, int32_t pitch,
LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
hev, mask, flat);
VP9_FLAT4(p3, p2, p0, q0, q2, q3, flat);
VP9_LPF_FILTER4_8W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out,
VPX_FLAT4(p3, p2, p0, q0, q2, q3, flat);
VPX_LPF_FILTER4_8W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out,
q1_out);
flat = (v16u8)__msa_ilvr_d((v2i64)zero, (v2i64)flat);
......@@ -468,7 +468,7 @@ void vpx_lpf_horizontal_16_msa(uint8_t *src, int32_t pitch,
ILVR_B8_UH(zero, p3, zero, p2, zero, p1, zero, p0, zero, q0, zero, q1,
zero, q2, zero, q3, p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r,
q3_r);
VP9_FILTER8(p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r, p2_filter8,
VPX_FILTER8(p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r, p2_filter8,
p1_filter8, p0_filter8, q0_filter8, q1_filter8, q2_filter8);
/* convert 16 bit output data into 8 bit */
......@@ -489,7 +489,7 @@ void vpx_lpf_horizontal_16_msa(uint8_t *src, int32_t pitch,
LD_UB4((src - 8 * pitch), pitch, p7, p6, p5, p4);
LD_UB4(src + (4 * pitch), pitch, q4, q5, q6, q7);
VP9_FLAT5(p7, p6, p5, p4, p0, q0, q4, q5, q6, q7, flat, flat2);
VPX_FLAT5(p7, p6, p5, p4, p0, q0, q4, q5, q6, q7, flat, flat2);
if (__msa_test_bz_v(flat2)) {
p2_d = __msa_copy_u_d((v2i64)p2_out, 0);
......@@ -768,9 +768,9 @@ int32_t vpx_vt_lpf_t4_and_t8_8w(uint8_t *src, uint8_t *filter48,
LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
hev, mask, flat);
/* flat4 */
VP9_FLAT4(p3, p2, p0, q0, q2, q3, flat);
VPX_FLAT4(p3, p2, p0, q0, q2, q3, flat);
/* filter4 */
VP9_LPF_FILTER4_8W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out, q1_out);
VPX_LPF_FILTER4_8W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out, q1_out);
flat = (v16u8)__msa_ilvr_d((v2i64)zero, (v2i64)flat);
......@@ -783,7 +783,7 @@ int32_t vpx_vt_lpf_t4_and_t8_8w(uint8_t *src, uint8_t *filter48,
ILVR_B8_UH(zero, p3, zero, p2, zero, p1, zero, p0, zero, q0, zero, q1,
zero, q2, zero, q3, p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r,
q3_r);
VP9_FILTER8(p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r, p2_filt8_r,
VPX_FILTER8(p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r, p2_filt8_r,
p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r);
/* convert 16 bit output data into 8 bit */
......@@ -827,7 +827,7 @@ int32_t vpx_vt_lpf_t16_8w(uint8_t *src, uint8_t *src_org, int32_t pitch,
LD_UB8((src - 8 * 16), 16, p7, p6, p5, p4, p3, p2, p1, p0);
LD_UB8(src, 16, q0, q1, q2, q3, q4, q5, q6, q7);
VP9_FLAT5(p7, p6, p5, p4, p0, q0, q4, q5, q6, q7, flat, flat2);
VPX_FLAT5(p7, p6, p5, p4, p0, q0, q4, q5, q6, q7, flat, flat2);
if (__msa_test_bz_v(flat2)) {
v8i16 vec0, vec1, vec2, vec3, vec4;
......@@ -1082,9 +1082,9 @@ int32_t vpx_vt_lpf_t4_and_t8_16w(uint8_t *src, uint8_t *filter48,
LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
hev, mask, flat);
/* flat4 */
VP9_FLAT4(p3, p2, p0, q0, q2, q3, flat);
VPX_FLAT4(p3, p2, p0, q0, q2, q3, flat);
/* filter4 */
VP9_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out, q1_out);
VPX_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out, q1_out);
if (__msa_test_bz_v(flat)) {
ILVR_B2_SH(p0_out, p1_out, q1_out, q0_out, vec0, vec1);
......@@ -1102,11 +1102,11 @@ int32_t vpx_vt_lpf_t4_and_t8_16w(uint8_t *src, uint8_t *filter48,
ILVR_B8_UH(zero, p3, zero, p2, zero, p1, zero, p0, zero, q0, zero, q1,
zero, q2, zero, q3, p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r,
q3_r);
VP9_FILTER8(p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r, p2_filt8_r,
VPX_FILTER8(p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r, p2_filt8_r,
p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r);
ILVL_B4_UH(zero, p3, zero, p2, zero, p1, zero, p0, p3_l, p2_l, p1_l, p0_l);
ILVL_B4_UH(zero, q0, zero, q1, zero, q2, zero, q3, q0_l, q1_l, q2_l, q3_l);
VP9_FILTER8(p3_l, p2_l, p1_l, p0_l, q0_l, q1_l, q2_l, q3_l, p2_filt8_l,
VPX_FILTER8(p3_l, p2_l, p1_l, p0_l, q0_l, q1_l, q2_l, q3_l, p2_filt8_l,
p1_filt8_l, p0_filt8_l, q0_filt8_l, q1_filt8_l, q2_filt8_l);
/* convert 16 bit output data into 8 bit */
......@@ -1151,7 +1151,7 @@ int32_t vpx_vt_lpf_t16_16w(uint8_t *src, uint8_t *src_org, int32_t pitch,
LD_UB8((src - 8 * 16), 16, p7, p6, p5, p4, p3, p2, p1, p0);
LD_UB8(src, 16, q0, q1, q2, q3, q4, q5, q6, q7);
VP9_FLAT5(p7, p6, p5, p4, p0, q0, q4, q5, q6, q7, flat, flat2);
VPX_FLAT5(p7, p6, p5, p4, p0, q0, q4, q5, q6, q7, flat, flat2);
if (__msa_test_bz_v(flat2)) {
v8i16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
......
......@@ -30,7 +30,7 @@ void vpx_lpf_horizontal_4_msa(uint8_t *src, int32_t pitch,
LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
hev, mask, flat);
VP9_LPF_FILTER4_8W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out, q1_out);
VPX_LPF_FILTER4_8W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out, q1_out);
p1_d = __msa_copy_u_d((v2i64)p1_out, 0);
p0_d = __msa_copy_u_d((v2i64)p0_out, 0);
......@@ -66,7 +66,7 @@ void vpx_lpf_horizontal_4_dual_msa(uint8_t *src, int32_t pitch,
LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit0, b_limit0, thresh0,
hev, mask, flat);
VP9_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev, p1, p0, q0, q1);
VPX_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev, p1, p0, q0, q1);
ST_UB4(p1, p0, q0, q1, (src - 2 * pitch), pitch);
}
......@@ -92,7 +92,7 @@ void vpx_lpf_vertical_4_msa(uint8_t *src, int32_t pitch,
p3, p2, p1, p0, q0, q1, q2, q3);
LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
hev, mask, flat);
VP9_LPF_FILTER4_8W(p1, p0, q0, q1, mask, hev, p1, p0, q0, q1);
VPX_LPF_FILTER4_8W(p1, p0, q0, q1, mask, hev, p1, p0, q0, q1);
ILVR_B2_SH(p0, p1, q1, q0, vec0, vec1);
ILVRL_H2_SH(vec1, vec0, vec2, vec3);
......@@ -138,7 +138,7 @@ void vpx_lpf_vertical_4_dual_msa(uint8_t *src, int32_t pitch,
LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit0, b_limit0, thresh0,
hev, mask, flat);
VP9_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev, p1, p0, q0, q1);
VPX_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev, p1, p0, q0, q1);
ILVR_B2_SH(p0, p1, q1, q0, tmp0, tmp1);
ILVRL_H2_SH(tmp1, tmp0, tmp2, tmp3);
ILVL_B2_SH(p0, p1, q1, q0, tmp0, tmp1);
......
......@@ -34,8 +34,8 @@ void vpx_lpf_horizontal_8_msa(uint8_t *src, int32_t pitch,
LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
hev, mask, flat);
VP9_FLAT4(p3, p2, p0, q0, q2, q3, flat);
VP9_LPF_FILTER4_8W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out, q1_out);
VPX_FLAT4(p3, p2, p0, q0, q2, q3, flat);
VPX_LPF_FILTER4_8W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out, q1_out);
flat = (v16u8)__msa_ilvr_d((v2i64)zero, (v2i64)flat);
......@@ -49,7 +49,7 @@ void vpx_lpf_horizontal_8_msa(uint8_t *src, int32_t pitch,
ILVR_B8_UH(zero, p3, zero, p2, zero, p1, zero, p0, zero, q0, zero, q1,
zero, q2, zero, q3, p3_r, p2_r, p1_r, p0_r, q0_r, q1_r,
q2_r, q3_r);
VP9_FILTER8(p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r, p2_filter8,
VPX_FILTER8(p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r, p2_filter8,
p1_filter8, p0_filter8, q0_filter8, q1_filter8, q2_filter8);
/* convert 16 bit output data into 8 bit */
......@@ -117,8 +117,8 @@ void vpx_lpf_horizontal_8_dual_msa(uint8_t *src, int32_t pitch,
/* mask and hev */
LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
hev, mask, flat);
VP9_FLAT4(p3, p2, p0, q0, q2, q3, flat);
VP9_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out, q1_out);
VPX_FLAT4(p3, p2, p0, q0, q2, q3, flat);
VPX_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out, q1_out);
if (__msa_test_bz_v(flat)) {
ST_UB4(p1_out, p0_out, q0_out, q1_out, (src - 2 * pitch), pitch);
......@@ -126,12 +126,12 @@ void vpx_lpf_horizontal_8_dual_msa(uint8_t *src, int32_t pitch,
ILVR_B8_UH(zero, p3, zero, p2, zero, p1, zero, p0, zero, q0, zero, q1,
zero, q2, zero, q3, p3_r, p2_r, p1_r, p0_r, q0_r, q1_r,
q2_r, q3_r);
VP9_FILTER8(p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r, p2_filt8_r,
VPX_FILTER8(p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r, p2_filt8_r,
p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r);
ILVL_B4_UH(zero, p3, zero, p2, zero, p1, zero, p0, p3_l, p2_l, p1_l, p0_l);
ILVL_B4_UH(zero, q0, zero, q1, zero, q2, zero, q3, q0_l, q1_l, q2_l, q3_l);
VP9_FILTER8(p3_l, p2_l, p1_l, p0_l, q0_l, q1_l, q2_l, q3_l, p2_filt8_l,
VPX_FILTER8(p3_l, p2_l, p1_l, p0_l, q0_l, q1_l, q2_l, q3_l, p2_filt8_l,
p1_filt8_l, p0_filt8_l, q0_filt8_l, q1_filt8_l, q2_filt8_l);
/* convert 16 bit output data into 8 bit */
......@@ -187,9 +187,9 @@ void vpx_lpf_vertical_8_msa(uint8_t *src, int32_t pitch,
LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
hev, mask, flat);
/* flat4 */
VP9_FLAT4(p3, p2, p0, q0, q2, q3, flat);
VPX_FLAT4(p3, p2, p0, q0, q2, q3, flat);
/