Commit 72081457 authored by Debargha Mukherjee's avatar Debargha Mukherjee

Adding 8x16/16x8/32x16/16x32 transforms

Adds forward, inverse transforms and scan orders.

Change-Id: Iab6994f4b0ef65e660b714d111b79b1c8172d6a8
parent 814986b8
...@@ -60,41 +60,41 @@ static const uint8_t num_16x16_blocks_high_lookup[BLOCK_SIZES] = { ...@@ -60,41 +60,41 @@ static const uint8_t num_16x16_blocks_high_lookup[BLOCK_SIZES] = {
1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 4, 2, 4, IF_EXT_PARTITION(8, 4, 8) 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 4, 2, 4, IF_EXT_PARTITION(8, 4, 8)
}; };
static const uint8_t num_4x4_blocks_txsize_lookup[TX_SIZES_ALL] = { 1, 4, static const uint8_t num_4x4_blocks_txsize_lookup[TX_SIZES_ALL] = {
16, 64, 1, 4, 16, 64,
#if CONFIG_EXT_TX #if CONFIG_EXT_TX
2, 2 2, 2, 8, 8, 32, 32
#endif // CONFIG_EXT_TX #endif // CONFIG_EXT_TX
}; };
static const uint8_t num_4x4_blocks_wide_txsize_lookup[TX_SIZES_ALL] = { 1, 2, static const uint8_t num_4x4_blocks_wide_txsize_lookup[TX_SIZES_ALL] = {
4, 8, 1, 2, 4, 8,
#if CONFIG_EXT_TX #if CONFIG_EXT_TX
1, 2 1, 2, 2, 4, 4, 8
#endif // CONFIG_EXT_TX #endif // CONFIG_EXT_TX
}; };
static const uint8_t num_4x4_blocks_high_txsize_lookup[TX_SIZES_ALL] = { 1, 2, static const uint8_t num_4x4_blocks_high_txsize_lookup[TX_SIZES_ALL] = {
4, 8, 1, 2, 4, 8,
#if CONFIG_EXT_TX #if CONFIG_EXT_TX
2, 1 2, 1, 4, 2, 8, 4
#endif // CONFIG_EXT_TX #endif // CONFIG_EXT_TX
}; };
static const uint8_t num_4x4_blocks_txsize_log2_lookup[TX_SIZES_ALL] = { 0, 2, static const uint8_t num_4x4_blocks_txsize_log2_lookup[TX_SIZES_ALL] = {
4, 6, 0, 2, 4, 6,
#if CONFIG_EXT_TX #if CONFIG_EXT_TX
1, 1 1, 1, 3, 3, 5, 5
#endif // CONFIG_EXT_TX #endif // CONFIG_EXT_TX
}; };
static const uint8_t num_4x4_blocks_wide_txsize_log2_lookup[TX_SIZES_ALL] = { static const uint8_t num_4x4_blocks_wide_txsize_log2_lookup[TX_SIZES_ALL] = {
0, 1, 2, 3, 0, 1, 2, 3,
#if CONFIG_EXT_TX #if CONFIG_EXT_TX
0, 1 0, 1, 1, 2, 2, 3
#endif // CONFIG_EXT_TX #endif // CONFIG_EXT_TX
}; };
static const uint8_t num_4x4_blocks_high_txsize_log2_lookup[TX_SIZES_ALL] = { static const uint8_t num_4x4_blocks_high_txsize_log2_lookup[TX_SIZES_ALL] = {
0, 1, 2, 3, 0, 1, 2, 3,
#if CONFIG_EXT_TX #if CONFIG_EXT_TX
1, 0 1, 0, 2, 1, 3, 2
#endif // CONFIG_EXT_TX #endif // CONFIG_EXT_TX
}; };
...@@ -374,9 +374,13 @@ static const BLOCK_SIZE txsize_to_bsize[TX_SIZES_ALL] = { ...@@ -374,9 +374,13 @@ static const BLOCK_SIZE txsize_to_bsize[TX_SIZES_ALL] = {
BLOCK_16X16, // TX_16X16 BLOCK_16X16, // TX_16X16
BLOCK_32X32, // TX_32X32 BLOCK_32X32, // TX_32X32
#if CONFIG_EXT_TX #if CONFIG_EXT_TX
BLOCK_4X8, // TX_4X8 BLOCK_4X8, // TX_4X8
BLOCK_8X4, // TX_8X4 BLOCK_8X4, // TX_8X4
#endif // CONFIG_EXT_TX BLOCK_8X16, // TX_8X16
BLOCK_16X8, // TX_16X8
BLOCK_16X32, // TX_16X32
BLOCK_32X16, // TX_32X16
#endif // CONFIG_EXT_TX
}; };
static const TX_SIZE txsize_sqr_map[TX_SIZES_ALL] = { static const TX_SIZE txsize_sqr_map[TX_SIZES_ALL] = {
...@@ -385,9 +389,13 @@ static const TX_SIZE txsize_sqr_map[TX_SIZES_ALL] = { ...@@ -385,9 +389,13 @@ static const TX_SIZE txsize_sqr_map[TX_SIZES_ALL] = {
TX_16X16, // TX_16X16 TX_16X16, // TX_16X16
TX_32X32, // TX_32X32 TX_32X32, // TX_32X32
#if CONFIG_EXT_TX #if CONFIG_EXT_TX
TX_4X4, // TX_4X8 TX_4X4, // TX_4X8
TX_4X4, // TX_8X4 TX_4X4, // TX_8X4
#endif // CONFIG_EXT_TX TX_8X8, // TX_8X16
TX_8X8, // TX_16X8
TX_16X16, // TX_16X32
TX_16X16, // TX_32X16
#endif // CONFIG_EXT_TX
}; };
static const TX_SIZE txsize_sqr_up_map[TX_SIZES_ALL] = { static const TX_SIZE txsize_sqr_up_map[TX_SIZES_ALL] = {
...@@ -396,9 +404,13 @@ static const TX_SIZE txsize_sqr_up_map[TX_SIZES_ALL] = { ...@@ -396,9 +404,13 @@ static const TX_SIZE txsize_sqr_up_map[TX_SIZES_ALL] = {
TX_16X16, // TX_16X16 TX_16X16, // TX_16X16
TX_32X32, // TX_32X32 TX_32X32, // TX_32X32
#if CONFIG_EXT_TX #if CONFIG_EXT_TX
TX_8X8, // TX_4X8 TX_8X8, // TX_4X8
TX_8X8, // TX_8X4 TX_8X8, // TX_8X4
#endif // CONFIG_EXT_TX TX_16X16, // TX_8X16
TX_16X16, // TX_16X8
TX_32X32, // TX_16X32
TX_32X32, // TX_32X16
#endif // CONFIG_EXT_TX
}; };
static const TX_SIZE tx_mode_to_biggest_tx_size[TX_MODES] = { static const TX_SIZE tx_mode_to_biggest_tx_size[TX_MODES] = {
......
...@@ -59,7 +59,12 @@ const uint16_t band_count_table[TX_SIZES_ALL][8] = { ...@@ -59,7 +59,12 @@ const uint16_t band_count_table[TX_SIZES_ALL][8] = {
{ 1, 2, 3, 4, 3, 16 - 13, 0 }, { 1, 2, 3, 4, 11, 64 - 21, 0 }, { 1, 2, 3, 4, 3, 16 - 13, 0 }, { 1, 2, 3, 4, 11, 64 - 21, 0 },
{ 1, 2, 3, 4, 11, 256 - 21, 0 }, { 1, 2, 3, 4, 11, 1024 - 21, 0 }, { 1, 2, 3, 4, 11, 256 - 21, 0 }, { 1, 2, 3, 4, 11, 1024 - 21, 0 },
#if CONFIG_EXT_TX #if CONFIG_EXT_TX
{ 1, 2, 3, 4, 8, 32 - 18, 0 }, { 1, 2, 3, 4, 8, 32 - 18, 0 }, { 1, 2, 3, 4, 8, 32 - 18, 0 },
{ 1, 2, 3, 4, 8, 32 - 18, 0 },
{ 1, 2, 3, 4, 11, 128 - 21, 0 },
{ 1, 2, 3, 4, 11, 128 - 21, 0 },
{ 1, 2, 3, 4, 11, 512 - 21, 0 },
{ 1, 2, 3, 4, 11, 512 - 21, 0 },
#endif // CONFIG_EXT_TX #endif // CONFIG_EXT_TX
}; };
...@@ -67,7 +72,12 @@ const uint16_t band_cum_count_table[TX_SIZES_ALL][8] = { ...@@ -67,7 +72,12 @@ const uint16_t band_cum_count_table[TX_SIZES_ALL][8] = {
{ 0, 1, 3, 6, 10, 13, 16, 0 }, { 0, 1, 3, 6, 10, 21, 64, 0 }, { 0, 1, 3, 6, 10, 13, 16, 0 }, { 0, 1, 3, 6, 10, 21, 64, 0 },
{ 0, 1, 3, 6, 10, 21, 256, 0 }, { 0, 1, 3, 6, 10, 21, 1024, 0 }, { 0, 1, 3, 6, 10, 21, 256, 0 }, { 0, 1, 3, 6, 10, 21, 1024, 0 },
#if CONFIG_EXT_TX #if CONFIG_EXT_TX
{ 0, 1, 3, 6, 10, 18, 32, 0 }, { 0, 1, 3, 6, 10, 18, 32, 0 }, { 0, 1, 3, 6, 10, 18, 32, 0 },
{ 0, 1, 3, 6, 10, 18, 32, 0 },
{ 0, 1, 3, 6, 10, 21, 128, 0 },
{ 0, 1, 3, 6, 10, 21, 128, 0 },
{ 0, 1, 3, 6, 10, 21, 512, 0 },
{ 0, 1, 3, 6, 10, 21, 512, 0 },
#endif // CONFIG_EXT_TX #endif // CONFIG_EXT_TX
}; };
...@@ -116,7 +126,7 @@ const uint8_t vp10_coefband_trans_8x8plus[1024] = { ...@@ -116,7 +126,7 @@ const uint8_t vp10_coefband_trans_8x8plus[1024] = {
}; };
#if CONFIG_EXT_TX #if CONFIG_EXT_TX
const uint8_t vp10_coefband_trans_8x4_4x8[32] = { const uint8_t vp10_coefband_trans_4x8_8x4[32] = {
0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4,
4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
}; };
......
...@@ -156,7 +156,7 @@ void vp10_partial_adapt_probs(struct VP10Common *cm, int mi_row, int mi_col); ...@@ -156,7 +156,7 @@ void vp10_partial_adapt_probs(struct VP10Common *cm, int mi_row, int mi_col);
DECLARE_ALIGNED(16, extern const uint8_t, vp10_coefband_trans_8x8plus[1024]); DECLARE_ALIGNED(16, extern const uint8_t, vp10_coefband_trans_8x8plus[1024]);
#if CONFIG_EXT_TX #if CONFIG_EXT_TX
DECLARE_ALIGNED(16, extern const uint8_t, vp10_coefband_trans_8x4_4x8[32]); DECLARE_ALIGNED(16, extern const uint8_t, vp10_coefband_trans_4x8_8x4[32]);
#endif // CONFIG_EXT_TX #endif // CONFIG_EXT_TX
DECLARE_ALIGNED(16, extern const uint8_t, vp10_coefband_trans_4x4[16]); DECLARE_ALIGNED(16, extern const uint8_t, vp10_coefband_trans_4x4[16]);
...@@ -169,7 +169,7 @@ static INLINE const uint8_t *get_band_translate(TX_SIZE tx_size) { ...@@ -169,7 +169,7 @@ static INLINE const uint8_t *get_band_translate(TX_SIZE tx_size) {
case TX_4X4: return vp10_coefband_trans_4x4; case TX_4X4: return vp10_coefband_trans_4x4;
#if CONFIG_EXT_TX #if CONFIG_EXT_TX
case TX_4X8: case TX_4X8:
case TX_8X4: return vp10_coefband_trans_8x4_4x8; return vp10_coefband_trans_4x8_8x4;
#endif // CONFIG_EXT_TX #endif // CONFIG_EXT_TX
default: return vp10_coefband_trans_8x8plus; default: return vp10_coefband_trans_8x8plus;
} }
...@@ -228,6 +228,22 @@ static INLINE int get_entropy_context(TX_SIZE tx_size, const ENTROPY_CONTEXT *a, ...@@ -228,6 +228,22 @@ static INLINE int get_entropy_context(TX_SIZE tx_size, const ENTROPY_CONTEXT *a,
above_ec = !!*(const uint16_t *)a; above_ec = !!*(const uint16_t *)a;
left_ec = l[0] != 0; left_ec = l[0] != 0;
break; break;
case TX_8X16:
above_ec = !!*(const uint16_t *)a;
left_ec = !!*(const uint32_t *)l;
break;
case TX_16X8:
above_ec = !!*(const uint32_t *)a;
left_ec = !!*(const uint16_t *)l;
break;
case TX_16X32:
above_ec = !!*(const uint32_t *)a;
left_ec = !!*(const uint64_t *)l;
break;
case TX_32X16:
above_ec = !!*(const uint64_t *)a;
left_ec = !!*(const uint32_t *)l;
break;
#endif // CONFIG_EXT_TX #endif // CONFIG_EXT_TX
case TX_8X8: case TX_8X8:
above_ec = !!*(const uint16_t *)a; above_ec = !!*(const uint16_t *)a;
......
...@@ -138,9 +138,13 @@ typedef uint8_t TX_SIZE; ...@@ -138,9 +138,13 @@ typedef uint8_t TX_SIZE;
#define TX_SIZES ((TX_SIZE)4) #define TX_SIZES ((TX_SIZE)4)
#if CONFIG_EXT_TX #if CONFIG_EXT_TX
#define TX_4X8 ((TX_SIZE)4) // 4x8 transform #define TX_4X8 ((TX_SIZE)4) // 4x8 transform
#define TX_8X4 ((TX_SIZE)5) // 8x4 transform #define TX_8X4 ((TX_SIZE)5) // 8x4 transform
#define TX_SIZES_ALL ((TX_SIZE)6) // Includes rectangular transforms #define TX_8X16 ((TX_SIZE)6) // 8x16 transform
#define TX_16X8 ((TX_SIZE)7) // 16x8 transform
#define TX_16X32 ((TX_SIZE)8) // 16x32 transform
#define TX_32X16 ((TX_SIZE)9) // 32x16 transform
#define TX_SIZES_ALL ((TX_SIZE)10) // Includes rectangular transforms
#else #else
#define TX_SIZES_ALL ((TX_SIZE)4) #define TX_SIZES_ALL ((TX_SIZE)4)
#endif // CONFIG_EXT_TX #endif // CONFIG_EXT_TX
......
This diff is collapsed.
This source diff could not be displayed because it is too large. You can view the blob instead.
This diff is collapsed.
...@@ -1251,6 +1251,82 @@ static const transform_2d FHT_8x4[] = { ...@@ -1251,6 +1251,82 @@ static const transform_2d FHT_8x4[] = {
{ fadst4, fidtx8 }, // V_FLIPADST { fadst4, fidtx8 }, // V_FLIPADST
{ fidtx4, fadst8 }, // H_FLIPADST { fidtx4, fadst8 }, // H_FLIPADST
}; };
static const transform_2d FHT_8x16[] = {
{ fdct16, fdct8 }, // DCT_DCT
{ fadst16, fdct8 }, // ADST_DCT
{ fdct16, fadst8 }, // DCT_ADST
{ fadst16, fadst8 }, // ADST_ADST
{ fadst16, fdct8 }, // FLIPADST_DCT
{ fdct16, fadst8 }, // DCT_FLIPADST
{ fadst16, fadst8 }, // FLIPADST_FLIPADST
{ fadst16, fadst8 }, // ADST_FLIPADST
{ fadst16, fadst8 }, // FLIPADST_ADST
{ fidtx16, fidtx8 }, // IDTX
{ fdct16, fidtx8 }, // V_DCT
{ fidtx16, fdct8 }, // H_DCT
{ fadst16, fidtx8 }, // V_ADST
{ fidtx16, fadst8 }, // H_ADST
{ fadst16, fidtx8 }, // V_FLIPADST
{ fidtx16, fadst8 }, // H_FLIPADST
};
static const transform_2d FHT_16x8[] = {
{ fdct8, fdct16 }, // DCT_DCT
{ fadst8, fdct16 }, // ADST_DCT
{ fdct8, fadst16 }, // DCT_ADST
{ fadst8, fadst16 }, // ADST_ADST
{ fadst8, fdct16 }, // FLIPADST_DCT
{ fdct8, fadst16 }, // DCT_FLIPADST
{ fadst8, fadst16 }, // FLIPADST_FLIPADST
{ fadst8, fadst16 }, // ADST_FLIPADST
{ fadst8, fadst16 }, // FLIPADST_ADST
{ fidtx8, fidtx16 }, // IDTX
{ fdct8, fidtx16 }, // V_DCT
{ fidtx8, fdct16 }, // H_DCT
{ fadst8, fidtx16 }, // V_ADST
{ fidtx8, fadst16 }, // H_ADST
{ fadst8, fidtx16 }, // V_FLIPADST
{ fidtx8, fadst16 }, // H_FLIPADST
};
static const transform_2d FHT_16x32[] = {
{ fdct32, fdct16 }, // DCT_DCT
{ fhalfright32, fdct16 }, // ADST_DCT
{ fdct32, fadst16 }, // DCT_ADST
{ fhalfright32, fadst16 }, // ADST_ADST
{ fhalfright32, fdct16 }, // FLIPADST_DCT
{ fdct32, fadst16 }, // DCT_FLIPADST
{ fhalfright32, fadst16 }, // FLIPADST_FLIPADST
{ fhalfright32, fadst16 }, // ADST_FLIPADST
{ fhalfright32, fadst16 }, // FLIPADST_ADST
{ fidtx32, fidtx16 }, // IDTX
{ fdct32, fidtx16 }, // V_DCT
{ fidtx32, fdct16 }, // H_DCT
{ fhalfright32, fidtx16 }, // V_ADST
{ fidtx32, fadst16 }, // H_ADST
{ fhalfright32, fidtx16 }, // V_FLIPADST
{ fidtx32, fadst16 }, // H_FLIPADST
};
static const transform_2d FHT_32x16[] = {
{ fdct16, fdct32 }, // DCT_DCT
{ fadst16, fdct32 }, // ADST_DCT
{ fdct16, fhalfright32 }, // DCT_ADST
{ fadst16, fhalfright32 }, // ADST_ADST
{ fadst16, fdct32 }, // FLIPADST_DCT
{ fdct16, fhalfright32 }, // DCT_FLIPADST
{ fadst16, fhalfright32 }, // FLIPADST_FLIPADST
{ fadst16, fhalfright32 }, // ADST_FLIPADST
{ fadst16, fhalfright32 }, // FLIPADST_ADST
{ fidtx16, fidtx32 }, // IDTX
{ fdct16, fidtx32 }, // V_DCT
{ fidtx16, fdct32 }, // H_DCT
{ fadst16, fidtx32 }, // V_ADST
{ fidtx16, fhalfright32 }, // H_ADST
{ fadst16, fidtx32 }, // V_FLIPADST
{ fidtx16, fhalfright32 }, // H_FLIPADST
};
#endif // CONFIG_EXT_TX #endif // CONFIG_EXT_TX
void vp10_fht4x4_c(const int16_t *input, tran_low_t *output, int stride, void vp10_fht4x4_c(const int16_t *input, tran_low_t *output, int stride,
...@@ -1299,10 +1375,12 @@ void vp10_fht4x8_c(const int16_t *input, tran_low_t *output, int stride, ...@@ -1299,10 +1375,12 @@ void vp10_fht4x8_c(const int16_t *input, tran_low_t *output, int stride,
// Columns // Columns
for (i = 0; i < n; ++i) { for (i = 0; i < n; ++i) {
for (j = 0; j < n2; ++j) temp_in[j] = input[j * stride + i] * 8; for (j = 0; j < n2; ++j)
temp_in[j] = (tran_low_t)fdct_round_shift(
input[j * stride + i] * 8 * Sqrt2);
ht.cols(temp_in, temp_out); ht.cols(temp_in, temp_out);
for (j = 0; j < n2; ++j) for (j = 0; j < n2; ++j)
out[j * n + i] = (tran_low_t)fdct_round_shift(temp_out[j] * Sqrt2); out[j * n + i] = temp_out[j];
} }
// Rows // Rows
...@@ -1327,10 +1405,12 @@ void vp10_fht8x4_c(const int16_t *input, tran_low_t *output, int stride, ...@@ -1327,10 +1405,12 @@ void vp10_fht8x4_c(const int16_t *input, tran_low_t *output, int stride,
// Columns // Columns
for (i = 0; i < n2; ++i) { for (i = 0; i < n2; ++i) {
for (j = 0; j < n; ++j) temp_in[j] = input[j * stride + i] * 8; for (j = 0; j < n; ++j)
temp_in[j] = (tran_low_t)fdct_round_shift(
input[j * stride + i] * 8 * Sqrt2);
ht.cols(temp_in, temp_out); ht.cols(temp_in, temp_out);
for (j = 0; j < n; ++j) for (j = 0; j < n; ++j)
out[j * n2 + i] = (tran_low_t)fdct_round_shift(temp_out[j] * Sqrt2); out[j * n2 + i] = temp_out[j];
} }
// Rows // Rows
...@@ -1341,6 +1421,137 @@ void vp10_fht8x4_c(const int16_t *input, tran_low_t *output, int stride, ...@@ -1341,6 +1421,137 @@ void vp10_fht8x4_c(const int16_t *input, tran_low_t *output, int stride,
} }
// Note: overall scale factor of transform is 8 times unitary // Note: overall scale factor of transform is 8 times unitary
} }
void vp10_fht8x16_c(const int16_t *input, tran_low_t *output,
int stride, int tx_type) {
const int n = 8;
const int n2 = 16;
tran_low_t out[16 * 8];
tran_low_t temp_in[16], temp_out[16];
int i, j;
const transform_2d ht = FHT_8x16[tx_type];
int16_t flipped_input[16 * 8];
maybe_flip_input(&input, &stride, n2, n, flipped_input, tx_type);
// Columns
for (i = 0; i < n; ++i) {
for (j = 0; j < n2; ++j)
temp_in[j] = (tran_low_t)fdct_round_shift(
input[j * stride + i] * 4 * Sqrt2);
ht.cols(temp_in, temp_out);
for (j = 0; j < n2; ++j)
out[j * n + i] = temp_out[j];
}
// Rows
for (i = 0; i < n2; ++i) {
for (j = 0; j < n; ++j)
temp_in[j] = out[j + i * n];
ht.rows(temp_in, temp_out);
for (j = 0; j < n; ++j)
output[j + i * n] = (temp_out[j] + 1) >> 1;
}
// Note: overall scale factor of transform is 8 times unitary
}
void vp10_fht16x8_c(const int16_t *input, tran_low_t *output,
int stride, int tx_type) {
const int n = 8;
const int n2 = 16;
tran_low_t out[16 * 8];
tran_low_t temp_in[16], temp_out[16];
int i, j;
const transform_2d ht = FHT_16x8[tx_type];
int16_t flipped_input[16 * 8];
maybe_flip_input(&input, &stride, n, n2, flipped_input, tx_type);
// Columns
for (i = 0; i < n2; ++i) {
for (j = 0; j < n; ++j)
temp_in[j] = (tran_low_t)fdct_round_shift(
input[j * stride + i] * 4 * Sqrt2);
ht.cols(temp_in, temp_out);
for (j = 0; j < n; ++j)
out[j * n2 + i] = temp_out[j];
}
// Rows
for (i = 0; i < n; ++i) {
for (j = 0; j < n2; ++j)
temp_in[j] = out[j + i * n2];
ht.rows(temp_in, temp_out);
for (j = 0; j < n2; ++j)
output[j + i * n2] = (temp_out[j] + 1) >> 1;
}
// Note: overall scale factor of transform is 8 times unitary
}
void vp10_fht16x32_c(const int16_t *input, tran_low_t *output,
int stride, int tx_type) {
const int n = 16;
const int n2 = 32;
tran_low_t out[32 * 16];
tran_low_t temp_in[32], temp_out[32];
int i, j;
const transform_2d ht = FHT_16x32[tx_type];
int16_t flipped_input[32 * 16];
maybe_flip_input(&input, &stride, n2, n, flipped_input, tx_type);
// Columns
for (i = 0; i < n; ++i) {
for (j = 0; j < n2; ++j)
temp_in[j] = (tran_low_t)fdct_round_shift(
input[j * stride + i] * Sqrt2);
ht.cols(temp_in, temp_out);
for (j = 0; j < n2; ++j)
out[j * n + i] = temp_out[j];
}
// Rows
for (i = 0; i < n2; ++i) {
for (j = 0; j < n; ++j)
temp_in[j] = out[j + i * n];
ht.rows(temp_in, temp_out);
for (j = 0; j < n; ++j)
output[j + i * n] =
(tran_low_t)((temp_out[j] + 1 + (temp_out[j] < 0)) >> 2);
}
// Note: overall scale factor of transform is 4 times unitary
}
void vp10_fht32x16_c(const int16_t *input, tran_low_t *output,
int stride, int tx_type) {
const int n = 16;
const int n2 = 32;
tran_low_t out[32 * 16];
tran_low_t temp_in[32], temp_out[32];
int i, j;
const transform_2d ht = FHT_32x16[tx_type];
int16_t flipped_input[32 * 16];
maybe_flip_input(&input, &stride, n, n2, flipped_input, tx_type);
// Columns
for (i = 0; i < n2; ++i) {
for (j = 0; j < n; ++j)
temp_in[j] = (tran_low_t)fdct_round_shift(
input[j * stride + i] * Sqrt2);
ht.cols(temp_in, temp_out);
for (j = 0; j < n; ++j)
out[j * n2 + i] = temp_out[j];
}
// Rows
for (i = 0; i < n; ++i) {
for (j = 0; j < n2; ++j)
temp_in[j] = out[j + i * n2];
ht.rows(temp_in, temp_out);
for (j = 0; j < n2; ++j)
output[j + i * n2] =
(tran_low_t)((temp_out[j] + 1 + (temp_out[j] < 0)) >> 2);
}
// Note: overall scale factor of transform is 4 times unitary
}
#endif // CONFIG_EXT_TX #endif // CONFIG_EXT_TX
void vp10_fdct8x8_quant_c(const int16_t *input, int stride, void vp10_fdct8x8_quant_c(const int16_t *input, int stride,
...@@ -1578,14 +1789,34 @@ void vp10_highbd_fht4x4_c(const int16_t *input, tran_low_t *output, int stride, ...@@ -1578,14 +1789,34 @@ void vp10_highbd_fht4x4_c(const int16_t *input, tran_low_t *output, int stride,
} }
#if CONFIG_EXT_TX #if CONFIG_EXT_TX
void vp10_highbd_fht8x4_c(const int16_t *input, tran_low_t *output, int stride, void vp10_highbd_fht4x8_c(const int16_t *input, tran_low_t *output,
int tx_type) { int stride, int tx_type) {
vp10_fht4x8_c(input, output, stride, tx_type);
}
void vp10_highbd_fht8x4_c(const int16_t *input, tran_low_t *output,
int stride, int tx_type) {
vp10_fht8x4_c(input, output, stride, tx_type); vp10_fht8x4_c(input, output, stride, tx_type);
} }
void vp10_highbd_fht4x8_c(const int16_t *input, tran_low_t *output, int stride, void vp10_highbd_fht8x16_c(const int16_t *input, tran_low_t *output,
int tx_type) { int stride, int tx_type) {
vp10_fht4x8_c(input, output, stride, tx_type); vp10_fht8x16_c(input, output, stride, tx_type);
}
void vp10_highbd_fht16x8_c(const int16_t *input, tran_low_t *output,
int stride, int tx_type) {
vp10_fht16x8_c(input, output, stride, tx_type);
}
void vp10_highbd_fht16x32_c(const int16_t *input, tran_low_t *output,
int stride, int tx_type) {
vp10_fht16x32_c(input, output, stride, tx_type);
}
void vp10_highbd_fht32x16_c(const int16_t *input, tran_low_t *output,
int stride, int tx_type) {
vp10_fht32x16_c(input, output, stride, tx_type);
} }
#endif // CONFIG_EXT_TX #endif // CONFIG_EXT_TX
......
...@@ -55,18 +55,46 @@ static void fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff, ...@@ -55,18 +55,46 @@ static void fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff,
} }
#if CONFIG_EXT_TX #if CONFIG_EXT_TX
static void fwd_txfm_8x4(const int16_t *src_diff, tran_low_t *coeff, static void fwd_txfm_4x8(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TX_TYPE tx_type, int diff_stride, TX_TYPE tx_type,
FWD_TXFM_OPT fwd_txfm_opt) { FWD_TXFM_OPT fwd_txfm_opt) {
(void)fwd_txfm_opt; (void) fwd_txfm_opt;
vp10_fht8x4(src_diff, coeff, diff_stride, tx_type); vp10_fht4x8(src_diff, coeff, diff_stride, tx_type);
} }
static void fwd_txfm_4x8(const int16_t *src_diff, tran_low_t *coeff, static void fwd_txfm_8x4(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TX_TYPE tx_type, int diff_stride, TX_TYPE tx_type,
FWD_TXFM_OPT fwd_txfm_opt) { FWD_TXFM_OPT fwd_txfm_opt) {
(void)fwd_txfm_opt; (void) fwd_txfm_opt;
vp10_fht4x8(src_diff, coeff, diff_stride, tx_type); vp10_fht8x4(src_diff, coeff, diff_stride, tx_type);
}
static void fwd_txfm_8x16(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TX_TYPE tx_type,
FWD_TXFM_OPT fwd_txfm_opt) {
(void) fwd_txfm_opt;
vp10_fht8x16(src_diff, coeff, diff_stride, tx_type);
}
static void fwd_txfm_16x8(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TX_TYPE tx_type,
FWD_TXFM_OPT fwd_txfm_opt) {
(void) fwd_txfm_opt;
vp10_fht16x8(src_diff, coeff, diff_stride, tx_type);
}
static void fwd_txfm_16x32(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TX_TYPE tx_type,
FWD_TXFM_OPT fwd_txfm_opt) {
(void) fwd_txfm_opt;
vp10_fht16x32(src_diff, coeff, diff_stride, tx_type);
}
static void fwd_txfm_32x16(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TX_TYPE tx_type,
FWD_TXFM_OPT fwd_txfm_opt) {
(void) fwd_txfm_opt;
vp10_fht32x16(src_diff, coeff, diff_stride, tx_type);
} }
#endif // CONFIG_EXT_TX #endif // CONFIG_EXT_TX
...@@ -213,20 +241,52 @@ static void highbd_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff, ...@@ -213,20 +241,52 @@ static void highbd_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff,
} }
#if CONFIG_EXT_TX #if CONFIG_EXT_TX
static void highbd_fwd_txfm_8x4(const int16_t *src_diff, tran_low_t *coeff, static void highbd_fwd_txfm_4x8(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TX_TYPE tx_type, int diff_stride, TX_TYPE tx_type,
FWD_TXFM_OPT fwd_txfm_opt, const int bd) { FWD_TXFM_OPT fwd_txfm_opt, const int bd) {
(void)fwd_txfm_opt; (void) fwd_txfm_opt;
(void)bd; (void) bd;
vp10_highbd_fht8x4(src_diff, coeff, diff_stride, tx_type);