Commit 13d2aee7 authored by Yi Luo's avatar Yi Luo
Browse files

Add the missing IDTX type optimization to hybrid txfm

Change-Id: I99b15e5270bfefe2eb3e982aeba06ed564540d73
parent 72e2e982
......@@ -470,6 +470,10 @@ void av1_iht16x16_256_add_avx2(const tran_low_t *input, uint8_t *dest,
iadst16(in);
flip_col(&dest, &stride, 16);
break;
case IDTX:
iidtx16(in);
iidtx16(in);
break;
case V_DCT:
iidtx16(in);
idct16(in);
......
......@@ -494,6 +494,10 @@ void av1_iht16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest,
aom_iadst16_sse2(in0, in1);
FLIPUD_PTR(dest, stride, 16);
break;
case IDTX:
iidtx16_sse2(in0, in1);
iidtx16_sse2(in0, in1);
break;
case V_DCT:
iidtx16_sse2(in0, in1);
aom_idct16_sse2(in0, in1);
......
......@@ -52,11 +52,6 @@ static void fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff,
return;
}
#if CONFIG_EXT_TX
if (tx_type == IDTX)
av1_fwd_idtx_c(src_diff, coeff, diff_stride, 4, tx_type);
else
#endif
av1_fht4x4(src_diff, coeff, diff_stride, tx_type);
}
......@@ -106,11 +101,6 @@ static void fwd_txfm_8x8(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TX_TYPE tx_type,
FWD_TXFM_OPT fwd_txfm_opt) {
(void)fwd_txfm_opt;
#if CONFIG_EXT_TX
if (tx_type == IDTX)
av1_fwd_idtx_c(src_diff, coeff, diff_stride, 8, tx_type);
else
#endif
av1_fht8x8(src_diff, coeff, diff_stride, tx_type);
}
......@@ -118,11 +108,6 @@ static void fwd_txfm_16x16(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TX_TYPE tx_type,
FWD_TXFM_OPT fwd_txfm_opt) {
(void)fwd_txfm_opt;
#if CONFIG_EXT_TX
if (tx_type == IDTX)
av1_fwd_idtx_c(src_diff, coeff, diff_stride, 16, tx_type);
else
#endif
av1_fht16x16(src_diff, coeff, diff_stride, tx_type);
}
......@@ -130,11 +115,6 @@ static void fwd_txfm_32x32(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TX_TYPE tx_type,
FWD_TXFM_OPT fwd_txfm_opt) {
(void)fwd_txfm_opt;
#if CONFIG_EXT_TX
if (tx_type == IDTX)
av1_fwd_idtx_c(src_diff, coeff, diff_stride, 32, tx_type);
else
#endif
av1_fht32x32(src_diff, coeff, diff_stride, tx_type);
}
......
......@@ -257,6 +257,12 @@ void av1_fht4x4_sse2(const int16_t *input, tran_low_t *output, int stride,
fadst4_sse2(in);
write_buffer_4x4(output, in);
break;
case IDTX:
load_buffer_4x4(input, in, stride, 0, 0);
fidtx4_sse2(in);
fidtx4_sse2(in);
write_buffer_4x4(output, in);
break;
case V_DCT:
load_buffer_4x4(input, in, stride, 0, 0);
fdct4_sse2(in);
......@@ -1357,6 +1363,13 @@ void av1_fht8x8_sse2(const int16_t *input, tran_low_t *output, int stride,
right_shift_8x8(in, 1);
write_buffer_8x8(output, in, 8);
break;
case IDTX:
load_buffer_8x8(input, in, stride, 0, 0);
fidtx8_sse2(in);
fidtx8_sse2(in);
right_shift_8x8(in, 1);
write_buffer_8x8(output, in, 8);
break;
case V_DCT:
load_buffer_8x8(input, in, stride, 0, 0);
fdct8_sse2(in);
......@@ -2579,6 +2592,13 @@ void av1_fht16x16_sse2(const int16_t *input, tran_low_t *output, int stride,
fadst16_sse2(in0, in1);
write_buffer_16x16(output, in0, in1, 16);
break;
case IDTX:
load_buffer_16x16(input, in0, in1, stride, 0, 0);
fidtx16_sse2(in0, in1);
right_shift_16x16(in0, in1);
fidtx16_sse2(in0, in1);
write_buffer_16x16(output, in0, in1, 16);
break;
case V_DCT:
load_buffer_16x16(input, in0, in1, stride, 0, 0);
fdct16_sse2(in0, in1);
......
......@@ -1025,6 +1025,13 @@ void av1_fht16x16_avx2(const int16_t *input, tran_low_t *output, int stride,
right_shift_16x16(in);
fadst16_avx2(in);
break;
case IDTX:
load_buffer_16x16(input, stride, 0, 0, in);
fidtx16_avx2(in);
mm256_transpose_16x16(in);
right_shift_16x16(in);
fidtx16_avx2(in);
break;
case V_DCT:
load_buffer_16x16(input, stride, 0, 0, in);
fdct16_avx2(in);
......@@ -1621,6 +1628,12 @@ void av1_fht32x32_avx2(const int16_t *input, tran_low_t *output, int stride,
right_shift_32x32(in0, in1);
fhalfright32_avx2(in0, in1);
break;
case IDTX:
load_buffer_32x32(input, stride, 0, 0, in0, in1);
fidtx32_avx2(in0, in1);
right_shift_32x32(in0, in1);
fidtx32_avx2(in0, in1);
break;
case V_DCT:
load_buffer_32x32(input, stride, 0, 0, in0, in1);
fdct32_avx2(in0, in1);
......
......@@ -184,6 +184,8 @@ const Ht16x16Param kArrayHt16x16Param_sse2[] = {
256),
make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, 8, AOM_BITS_8,
256),
make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, 9, AOM_BITS_8,
256),
make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, 10, AOM_BITS_8,
256),
make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_sse2, 11, AOM_BITS_8,
......@@ -223,6 +225,8 @@ const Ht16x16Param kArrayHt16x16Param_avx2[] = {
256),
make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, 8, AOM_BITS_8,
256),
make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, 9, AOM_BITS_8,
256),
make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, 10, AOM_BITS_8,
256),
make_tuple(&av1_fht16x16_avx2, &av1_iht16x16_256_add_avx2, 11, AOM_BITS_8,
......
......@@ -177,6 +177,7 @@ const Ht4x4Param kArrayHt4x4Param_sse2[] = {
make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, 6, AOM_BITS_8, 16),
make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, 7, AOM_BITS_8, 16),
make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, 8, AOM_BITS_8, 16),
make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, 9, AOM_BITS_8, 16),
make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, 10, AOM_BITS_8, 16),
make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, 11, AOM_BITS_8, 16),
make_tuple(&av1_fht4x4_sse2, &av1_iht4x4_16_add_sse2, 12, AOM_BITS_8, 16),
......
......@@ -177,6 +177,7 @@ const Ht8x8Param kArrayHt8x8Param_sse2[] = {
make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, 6, AOM_BITS_8, 64),
make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, 7, AOM_BITS_8, 64),
make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, 8, AOM_BITS_8, 64),
make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, 9, AOM_BITS_8, 64),
make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, 10, AOM_BITS_8, 64),
make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, 11, AOM_BITS_8, 64),
make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_sse2, 12, AOM_BITS_8, 64),
......
......@@ -201,6 +201,7 @@ const Ht32x32Param kArrayHt32x32Param_avx2[] = {
make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, 6, AOM_BITS_8, 1024),
make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, 7, AOM_BITS_8, 1024),
make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, 8, AOM_BITS_8, 1024),
make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, 9, AOM_BITS_8, 1024),
make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, 10, AOM_BITS_8, 1024),
make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, 11, AOM_BITS_8, 1024),
make_tuple(&av1_fht32x32_avx2, &dummy_inv_txfm, 12, AOM_BITS_8, 1024),
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment