Commit 2b43501b authored by Debargha Mukherjee's avatar Debargha Mukherjee

Implement 64x32 and 32x64 transforms

Change-Id: Ifa983d83a509cdfad78f6400df7d60c8f5b4f68c
parent 26d3e45f
......@@ -257,6 +257,7 @@ void av1_fwd_txfm2d_32x32_c(const int16_t *input, int32_t *output, int stride,
fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
}
#if CONFIG_TX64X64
void av1_fwd_txfm2d_64x64_c(const int16_t *input, int32_t *output, int stride,
int tx_type, int bd) {
int32_t txfm_buf[64 * 64];
......@@ -264,6 +265,21 @@ void av1_fwd_txfm2d_64x64_c(const int16_t *input, int32_t *output, int stride,
fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
}
void av1_fwd_txfm2d_32x64_c(const int16_t *input, int32_t *output, int stride,
int tx_type, int bd) {
int32_t txfm_buf[32 * 64];
TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_32x64_cfg(tx_type);
fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
}
void av1_fwd_txfm2d_64x32_c(const int16_t *input, int32_t *output, int stride,
int tx_type, int bd) {
int32_t txfm_buf[64 * 32];
TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_64x32_cfg(tx_type);
fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
}
#endif // CONFIG_TX64X64
static const TXFM_1D_CFG *fwd_txfm_col_cfg_ls[TX_TYPES_1D][TX_SIZES] = {
// DCT
{
......@@ -342,6 +358,39 @@ TXFM_2D_FLIP_CFG av1_get_fwd_txfm_cfg(int tx_type, int tx_size) {
return cfg;
}
#if CONFIG_TX64X64
TXFM_2D_FLIP_CFG av1_get_fwd_txfm_32x64_cfg(int tx_type) {
TXFM_2D_FLIP_CFG cfg;
const int tx_type_row = htx_tab[tx_type];
const int tx_size_row = txsize_horz_map[TX_32X64];
switch (tx_type) {
case DCT_DCT:
cfg.col_cfg = &fwd_txfm_1d_col_cfg_dct_64;
cfg.row_cfg = fwd_txfm_row_cfg_ls[tx_type_row][tx_size_row];
cfg.ud_flip = 0;
cfg.lr_flip = 0;
break;
default: assert(0);
}
return cfg;
}
TXFM_2D_FLIP_CFG av1_get_fwd_txfm_64x32_cfg(int tx_type) {
TXFM_2D_FLIP_CFG cfg;
const int tx_type_col = vtx_tab[tx_type];
const int tx_size_col = txsize_vert_map[TX_64X32];
switch (tx_type) {
case DCT_DCT:
cfg.col_cfg = fwd_txfm_col_cfg_ls[tx_type_col][tx_size_col];
cfg.row_cfg = &fwd_txfm_1d_row_cfg_dct_64;
cfg.ud_flip = 0;
cfg.lr_flip = 0;
break;
default: assert(0);
}
return cfg;
}
TXFM_2D_FLIP_CFG av1_get_fwd_txfm_64x64_cfg(int tx_type) {
TXFM_2D_FLIP_CFG cfg;
switch (tx_type) {
......@@ -358,3 +407,4 @@ TXFM_2D_FLIP_CFG av1_get_fwd_txfm_64x64_cfg(int tx_type) {
}
return cfg;
}
#endif // CONFIG_TX64X64
......@@ -349,12 +349,24 @@ void av1_inv_txfm2d_add_32x32_c(const int32_t *input, uint16_t *output,
inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_32X32, bd);
}
#if CONFIG_TX64X64
void av1_inv_txfm2d_add_64x64_c(const int32_t *input, uint16_t *output,
int stride, int tx_type, int bd) {
int txfm_buf[64 * 64 + 64 + 64];
TXFM_2D_FLIP_CFG cfg = av1_get_inv_txfm_64x64_cfg(tx_type);
inv_txfm2d_add_c(input, output, stride, &cfg, txfm_buf, -4, bd);
#if CONFIG_TX64X64
assert(fwd_shift_sum[TX_64X64] == -4);
#endif
}
void av1_inv_txfm2d_add_64x32_c(const int32_t *input, uint16_t *output,
int stride, int tx_type, int bd) {
int txfm_buf[64 * 32 + 64 + 64];
inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_32X64, bd);
}
void av1_inv_txfm2d_add_32x64_c(const int32_t *input, uint16_t *output,
int stride, int tx_type, int bd) {
int txfm_buf[64 * 32 + 64 + 64];
inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_32X64, bd);
}
#endif // CONFIG_TX64X64
......@@ -2813,6 +2813,10 @@ static const uint32_t av1_transform_masks[NUM_EDGE_DIRS][TX_SIZES_ALL] = {
16 - 1, // TX_16X8
16 - 1, // TX_16X32
32 - 1, // TX_32X16
#if CONFIG_TX64X64
32 - 1, // TX_32X64
64 - 1, // TX_64X32
#endif // CONFIG_TX64X64
4 - 1, // TX_4X16
16 - 1, // TX_16X4
8 - 1, // TX_8X32
......@@ -2835,6 +2839,10 @@ static const uint32_t av1_transform_masks[NUM_EDGE_DIRS][TX_SIZES_ALL] = {
8 - 1, // TX_16X8
32 - 1, // TX_16X32
16 - 1, // TX_32X16
#if CONFIG_TX64X64
64 - 1, // TX_32X64
32 - 1, // TX_64X32
#endif // CONFIG_TX64X64
16 - 1, // TX_4X16
4 - 1, // TX_16X4
32 - 1, // TX_8X32
......
......@@ -160,6 +160,8 @@ add_proto qw/void av1_iht32x32_1024_add/, "const tran_low_t *input, uint8_t *out
if (aom_config("CONFIG_TX64X64") eq "yes") {
add_proto qw/void av1_iht64x64_4096_add/, "const tran_low_t *input, uint8_t *output, int pitch, const struct txfm_param *param";
add_proto qw/void av1_iht32x64_2048_add/, "const tran_low_t *input, uint8_t *output, int pitch, const struct txfm_param *param";
add_proto qw/void av1_iht64x32_2048_add/, "const tran_low_t *input, uint8_t *output, int pitch, const struct txfm_param *param";
}
if (aom_config("CONFIG_NEW_QUANT") eq "yes") {
......@@ -285,7 +287,11 @@ add_proto qw/void av1_inv_txfm2d_add_32x32/, "const int32_t *input, uint16_t *ou
if (aom_config("CONFIG_DAALA_DCT32") ne "yes") {
specialize qw/av1_inv_txfm2d_add_32x32 avx2/;
}
add_proto qw/void av1_inv_txfm2d_add_64x64/, "const int32_t *input, uint16_t *output, int stride, int tx_type, int bd";
if (aom_config("CONFIG_TX64X64") eq "yes") {
add_proto qw/void av1_inv_txfm2d_add_64x64/, "const int32_t *input, uint16_t *output, int stride, int tx_type, int bd";
add_proto qw/void av1_inv_txfm2d_add_64x32/, "const int32_t *input, uint16_t *output, int stride, int tx_type, int bd";
add_proto qw/void av1_inv_txfm2d_add_32x64/, "const int32_t *input, uint16_t *output, int stride, int tx_type, int bd";
}
#
# Encoder functions below this point.
......@@ -354,6 +360,8 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
if (aom_config("CONFIG_TX64X64") eq "yes") {
add_proto qw/void av1_fht64x64/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
add_proto qw/void av1_fht64x32/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
add_proto qw/void av1_fht32x64/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
}
add_proto qw/void av1_fht4x8/, "const int16_t *input, tran_low_t *output, int stride, struct txfm_param *param";
......@@ -396,7 +404,7 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
}
}
add_proto qw/void av1_fwd_idtx/, "const int16_t *src_diff, tran_low_t *coeff, int stride, int bs, int tx_type";
add_proto qw/void av1_fwd_idtx/, "const int16_t *src_diff, tran_low_t *coeff, int stride, int bsx, int bsy, int tx_type";
#fwd txfm
add_proto qw/void av1_fwd_txfm2d_4x8/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd";
......@@ -421,9 +429,14 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
if (aom_config("CONFIG_DAALA_DCT32") ne "yes") {
specialize qw/av1_fwd_txfm2d_32x32 sse4_1/;
}
add_proto qw/void av1_fwd_txfm2d_64x64/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd";
if (aom_config("CONFIG_DAALA_DCT64") ne "yes") {
specialize qw/av1_fwd_txfm2d_64x64 sse4_1/;
if (aom_config("CONFIG_TX64X64") eq "yes") {
add_proto qw/void av1_fwd_txfm2d_32x64/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd";
add_proto qw/void av1_fwd_txfm2d_64x32/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd";
add_proto qw/void av1_fwd_txfm2d_64x64/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd";
if (aom_config("CONFIG_DAALA_DCT64") ne "yes") {
specialize qw/av1_fwd_txfm2d_64x64 sse4_1/;
}
}
#
# Motion search
......
......@@ -207,6 +207,8 @@ static INLINE int av1_rotate_tx_size(int tx_size) {
case TX_32X32: return TX_32X32;
#if CONFIG_TX64X64
case TX_64X64: return TX_64X64;
case TX_32X64: return TX_64X32;
case TX_64X32: return TX_32X64;
#endif
case TX_4X8: return TX_8X4;
case TX_8X4: return TX_4X8;
......@@ -352,7 +354,11 @@ void av1_gen_inv_stage_range(int8_t *stage_range_col, int8_t *stage_range_row,
int bd);
TXFM_2D_FLIP_CFG av1_get_fwd_txfm_cfg(int tx_type, int tx_size);
#if CONFIG_TX64X64
TXFM_2D_FLIP_CFG av1_get_fwd_txfm_64x64_cfg(int tx_type);
TXFM_2D_FLIP_CFG av1_get_fwd_txfm_64x32_cfg(int tx_type);
TXFM_2D_FLIP_CFG av1_get_fwd_txfm_32x64_cfg(int tx_type);
#endif // CONFIG_TX64X64
TXFM_2D_FLIP_CFG av1_get_inv_txfm_cfg(int tx_type, int tx_size);
#ifdef __cplusplus
}
......
This diff is collapsed.
......@@ -149,6 +149,9 @@ const uint16_t band_count_table[TX_SIZES_ALL][8] = {
{ 1, 2, 3, 4, 8, 32 - 18, 0 }, { 1, 2, 3, 4, 8, 32 - 18, 0 },
{ 1, 2, 3, 4, 11, 128 - 21, 0 }, { 1, 2, 3, 4, 11, 128 - 21, 0 },
{ 1, 2, 3, 4, 11, 512 - 21, 0 }, { 1, 2, 3, 4, 11, 512 - 21, 0 },
#if CONFIG_TX64X64
{ 1, 2, 3, 4, 11, 2048 - 21, 0 }, { 1, 2, 3, 4, 11, 2048 - 21, 0 },
#endif // CONFIG_TX64X64
{ 1, 2, 3, 4, 11, 64 - 21, 0 }, { 1, 2, 3, 4, 11, 64 - 21, 0 },
{ 1, 2, 3, 4, 11, 256 - 21, 0 }, { 1, 2, 3, 4, 11, 256 - 21, 0 },
};
......@@ -165,6 +168,9 @@ const uint16_t band_cum_count_table[TX_SIZES_ALL][8] = {
{ 0, 1, 3, 6, 10, 18, 32, 0 }, { 0, 1, 3, 6, 10, 18, 32, 0 },
{ 0, 1, 3, 6, 10, 21, 128, 0 }, { 0, 1, 3, 6, 10, 21, 128, 0 },
{ 0, 1, 3, 6, 10, 21, 512, 0 }, { 0, 1, 3, 6, 10, 21, 512, 0 },
#if CONFIG_TX64X64
{ 0, 1, 3, 6, 10, 21, 2048, 0 }, { 0, 1, 3, 6, 10, 21, 2048, 0 },
#endif // CONFIG_TX64X64
{ 0, 1, 3, 6, 10, 21, 64, 0 }, { 0, 1, 3, 6, 10, 21, 64, 0 },
{ 0, 1, 3, 6, 10, 21, 256, 0 }, { 0, 1, 3, 6, 10, 21, 256, 0 },
};
......
......@@ -326,6 +326,16 @@ static INLINE int get_entropy_context(TX_SIZE tx_size, const ENTROPY_CONTEXT *a,
left_ec = !!(*(const uint64_t *)l | *(const uint64_t *)(l + 8) |
*(const uint64_t *)(l + 16) | *(const uint64_t *)(l + 24));
break;
case TX_32X64:
above_ec = !!(*(const uint64_t *)a | *(const uint64_t *)(a + 8));
left_ec = !!(*(const uint64_t *)l | *(const uint64_t *)(l + 8) |
*(const uint64_t *)(l + 16) | *(const uint64_t *)(l + 24));
break;
case TX_64X32:
above_ec = !!(*(const uint64_t *)a | *(const uint64_t *)(a + 8) |
*(const uint64_t *)(a + 16) | *(const uint64_t *)(a + 24));
left_ec = !!(*(const uint64_t *)l | *(const uint64_t *)(l + 8));
break;
#endif // CONFIG_TX64X64
#if CONFIG_RECT_TX_EXT && (CONFIG_EXT_TX || CONFIG_VAR_TX)
case TX_4X16:
......@@ -396,6 +406,14 @@ static INLINE int get_entropy_context(TX_SIZE tx_size, const ENTROPY_CONTEXT *a,
above_ec = !!(*(const uint64_t *)a | *(const uint64_t *)(a + 8));
left_ec = !!(*(const uint64_t *)l | *(const uint64_t *)(l + 8));
break;
case TX_32X64:
above_ec = !!*(const uint64_t *)a;
left_ec = !!(*(const uint64_t *)l | *(const uint64_t *)(l + 8));
break;
case TX_64X32:
above_ec = !!(*(const uint64_t *)a | *(const uint64_t *)(a + 8));
left_ec = !!*(const uint64_t *)l;
break;
#endif // CONFIG_TX64X64
#if CONFIG_RECT_TX_EXT && (CONFIG_EXT_TX || CONFIG_VAR_TX)
case TX_4X16:
......
......@@ -190,14 +190,18 @@ typedef enum ATTRIBUTE_PACKED {
TX_16X16, // 16x16 transform
TX_32X32, // 32x32 transform
#if CONFIG_TX64X64
TX_64X64, // 64x64 transform
TX_64X64, // 64x64 transform
#endif // CONFIG_TX64X64
TX_4X8, // 4x8 transform
TX_8X4, // 8x4 transform
TX_8X16, // 8x16 transform
TX_16X8, // 16x8 transform
TX_16X32, // 16x32 transform
TX_32X16, // 32x16 transform
#if CONFIG_TX64X64
TX_32X64, // 32x64 transform
TX_64X32, // 64x32 transform
#endif // CONFIG_TX64X64
TX_4X8, // 4x8 transform
TX_8X4, // 8x4 transform
TX_8X16, // 8x16 transform
TX_16X8, // 16x8 transform
TX_16X32, // 16x32 transform
TX_32X16, // 32x16 transform
TX_4X16, // 4x16 transform
TX_16X4, // 16x4 transform
TX_8X32, // 8x32 transform
......
This diff is collapsed.
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -81,6 +81,7 @@ void av1_fwd_txfm2d_32x32_sse4_1(const int16_t *input, int32_t *output,
fwd_txfm2d_sse4_1(input, output, stride, &cfg, txfm_buf);
}
#if CONFIG_TX64X64
void av1_fwd_txfm2d_64x64_sse4_1(const int16_t *input, int32_t *output,
int stride, int tx_type, int bd) {
DECLARE_ALIGNED(16, int32_t, txfm_buf[4096]);
......@@ -88,3 +89,4 @@ void av1_fwd_txfm2d_64x64_sse4_1(const int16_t *input, int32_t *output,
(void)bd;
fwd_txfm2d_sse4_1(input, output, stride, &cfg, txfm_buf);
}
#endif // CONFIG_TX64X64
......@@ -2446,7 +2446,7 @@ void av1_fht64x64_c(const int16_t *input, tran_low_t *output, int stride,
{ daala_idtx64, daala_fdst64 }, // H_ADST
{ daala_fdst64, daala_idtx64 }, // V_FLIPADST
{ daala_idtx64, daala_fdst64 }, // H_FLIPADST
#endif
#endif // CONFIG_EXT_TX
#else
{ fdct64_col, fdct64_row }, // DCT_DCT
#if CONFIG_EXT_TX
......@@ -2465,8 +2465,8 @@ void av1_fht64x64_c(const int16_t *input, tran_low_t *output, int stride,
{ fidtx64, fhalfright64 }, // H_ADST
{ fhalfright64, fidtx64 }, // V_FLIPADST
{ fidtx64, fhalfright64 }, // H_FLIPADST
#endif
#endif
#endif // CONFIG_EXT_TX
#endif // CONFIG_DAALA_DCT64
};
const transform_2d ht = FHT[tx_type];
tran_low_t out[4096];
......@@ -2506,19 +2506,137 @@ void av1_fht64x64_c(const int16_t *input, tran_low_t *output, int stride,
#endif
}
}
void av1_fht64x32_c(const int16_t *input, tran_low_t *output, int stride,
TxfmParam *txfm_param) {
int tx_type = txfm_param->tx_type;
#if CONFIG_MRC_TX
assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
#endif // CONFIG_MRC_TX
#if CONFIG_DCT_ONLY
assert(tx_type == DCT_DCT);
#endif
static const transform_2d FHT[] = {
{ fdct32, fdct64_row }, // DCT_DCT
#if CONFIG_EXT_TX
{ fhalfright32, fdct64_row }, // ADST_DCT
{ fdct32, fhalfright64 }, // DCT_ADST
{ fhalfright32, fhalfright64 }, // ADST_ADST
{ fhalfright32, fdct64_row }, // FLIPADST_DCT
{ fdct32, fhalfright64 }, // DCT_FLIPADST
{ fhalfright32, fhalfright64 }, // FLIPADST_FLIPADST
{ fhalfright32, fhalfright64 }, // ADST_FLIPADST
{ fhalfright32, fhalfright64 }, // FLIPADST_ADST
{ fidtx32, fidtx64 }, // IDTX
{ fdct32, fidtx64 }, // V_DCT
{ fidtx32, fdct64_row }, // H_DCT
{ fhalfright32, fidtx64 }, // V_ADST
{ fidtx32, fhalfright64 }, // H_ADST
{ fhalfright32, fidtx64 }, // V_FLIPADST
{ fidtx32, fhalfright64 }, // H_FLIPADST
#endif // CONFIG_EXT_TX
};
const transform_2d ht = FHT[tx_type];
tran_low_t out[2048];
int i, j;
tran_low_t temp_in[64], temp_out[64];
const int n = 32;
const int n2 = 64;
#if CONFIG_EXT_TX
int16_t flipped_input[32 * 64];
maybe_flip_input(&input, &stride, n, n2, flipped_input, tx_type);
#endif
// Columns
for (i = 0; i < n2; ++i) {
for (j = 0; j < n; ++j)
temp_in[j] = (tran_low_t)fdct_round_shift(input[j * stride + i] * Sqrt2);
ht.cols(temp_in, temp_out);
for (j = 0; j < n; ++j)
out[j * n2 + i] = (tran_low_t)ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 2);
}
// Rows
for (i = 0; i < n; ++i) {
for (j = 0; j < n2; ++j) temp_in[j] = out[j + i * n2];
ht.rows(temp_in, temp_out);
for (j = 0; j < n2; ++j)
output[j + i * n2] =
(tran_low_t)ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 2);
}
}
void av1_fht32x64_c(const int16_t *input, tran_low_t *output, int stride,
TxfmParam *txfm_param) {
int tx_type = txfm_param->tx_type;
#if CONFIG_MRC_TX
assert(tx_type != MRC_DCT && "Invalid tx type for tx size");
#endif // CONFIG_MRC_TX
#if CONFIG_DCT_ONLY
assert(tx_type == DCT_DCT);
#endif
static const transform_2d FHT[] = {
{ fdct64_row, fdct32 }, // DCT_DCT
#if CONFIG_EXT_TX
{ fhalfright64, fdct32 }, // ADST_DCT
{ fdct64_row, fhalfright32 }, // DCT_ADST
{ fhalfright64, fhalfright32 }, // ADST_ADST
{ fhalfright64, fdct32 }, // FLIPADST_DCT
{ fdct64_row, fhalfright32 }, // DCT_FLIPADST
{ fhalfright64, fhalfright32 }, // FLIPADST_FLIPADST
{ fhalfright64, fhalfright32 }, // ADST_FLIPADST
{ fhalfright64, fhalfright32 }, // FLIPADST_ADST
{ fidtx64, fidtx32 }, // IDTX
{ fdct64_row, fidtx32 }, // V_DCT
{ fidtx64, fdct32 }, // H_DCT
{ fhalfright64, fidtx32 }, // V_ADST
{ fidtx64, fhalfright32 }, // H_ADST
{ fhalfright64, fidtx32 }, // V_FLIPADST
{ fidtx64, fhalfright32 }, // H_FLIPADST
#endif // CONFIG_EXT_TX
};
const transform_2d ht = FHT[tx_type];
tran_low_t out[32 * 64];
int i, j;
tran_low_t temp_in[64], temp_out[64];
const int n = 32;
const int n2 = 64;
#if CONFIG_EXT_TX
int16_t flipped_input[32 * 64];
maybe_flip_input(&input, &stride, n2, n, flipped_input, tx_type);
#endif
// Rows
for (i = 0; i < n2; ++i) {
for (j = 0; j < n; ++j)
temp_in[j] = (tran_low_t)fdct_round_shift(input[i * stride + j] * Sqrt2);
ht.rows(temp_in, temp_out);
for (j = 0; j < n; ++j)
out[j * n2 + i] = (tran_low_t)ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 2);
}
// Columns
for (i = 0; i < n; ++i) {
for (j = 0; j < n2; ++j) temp_in[j] = out[j + i * n2];
ht.cols(temp_in, temp_out);
for (j = 0; j < n2; ++j)
output[i + j * n] = (tran_low_t)ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 2);
}
}
#endif // CONFIG_TX64X64
#if CONFIG_EXT_TX
// Forward identity transform.
void av1_fwd_idtx_c(const int16_t *src_diff, tran_low_t *coeff, int stride,
int bs, int tx_type) {
int bsx, int bsy, int tx_type) {
int r, c;
const int shift = bs < 32 ? 3 : (bs < 64 ? 2 : 1);
const int pels = bsx * bsy;
const int shift = 3 - ((pels > 256) + (pels > 1024));
if (tx_type == IDTX) {
for (r = 0; r < bs; ++r) {
for (c = 0; c < bs; ++c) coeff[c] = src_diff[c] * (1 << shift);
for (r = 0; r < bsy; ++r) {
for (c = 0; c < bsx; ++c) coeff[c] = src_diff[c] * (1 << shift);
src_diff += stride;
coeff += bs;
coeff += bsx;
}
}
}
......
......@@ -140,11 +140,31 @@ static void fwd_txfm_64x64(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TxfmParam *txfm_param) {
#if CONFIG_EXT_TX
if (txfm_param->tx_type == IDTX)
av1_fwd_idtx_c(src_diff, coeff, diff_stride, 64, txfm_param->tx_type);
av1_fwd_idtx_c(src_diff, coeff, diff_stride, 64, 64, txfm_param->tx_type);
else
#endif
av1_fht64x64(src_diff, coeff, diff_stride, txfm_param);
}
static void fwd_txfm_32x64(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TxfmParam *txfm_param) {
#if CONFIG_EXT_TX
if (txfm_param->tx_type == IDTX)
av1_fwd_idtx_c(src_diff, coeff, diff_stride, 32, 64, txfm_param->tx_type);
else
#endif
av1_fht32x64(src_diff, coeff, diff_stride, txfm_param);
}
static void fwd_txfm_64x32(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TxfmParam *txfm_param) {
#if CONFIG_EXT_TX
if (txfm_param->tx_type == IDTX)
av1_fwd_idtx_c(src_diff, coeff, diff_stride, 64, 32, txfm_param->tx_type);
else
#endif
av1_fht64x32(src_diff, coeff, diff_stride, txfm_param);
}
#endif // CONFIG_TX64X64
#if CONFIG_RECT_TX_EXT && (CONFIG_EXT_TX || CONFIG_VAR_TX)
......@@ -412,6 +432,85 @@ static void highbd_fwd_txfm_32x32(const int16_t *src_diff, tran_low_t *coeff,
}
#if CONFIG_TX64X64
static void highbd_fwd_txfm_32x64(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TxfmParam *txfm_param) {
int32_t *dst_coeff = (int32_t *)coeff;
const int tx_type = txfm_param->tx_type;
const int bd = txfm_param->bd;
switch (tx_type) {
case DCT_DCT:
av1_fwd_txfm2d_32x64_c(src_diff, dst_coeff, diff_stride, tx_type, bd);
break;
#if CONFIG_EXT_TX
case ADST_DCT:
case DCT_ADST:
case ADST_ADST:
case FLIPADST_DCT:
case DCT_FLIPADST:
case FLIPADST_FLIPADST:
case ADST_FLIPADST:
case FLIPADST_ADST:
case V_DCT:
case H_DCT:
case V_ADST:
case H_ADST:
case V_FLIPADST:
case H_FLIPADST:
// TODO(sarahparker)
// I've deleted the 64x64 implementations that existed in lieu
// of adst, flipadst and identity for simplicity but will bring back
// in a later change. This shouldn't impact performance since
// DCT_DCT is the only extended type currently allowed for 64x64,
// as dictated by get_ext_tx_set_type in blockd.h.
av1_fwd_txfm2d_32x64_c(src_diff, dst_coeff, diff_stride, DCT_DCT, bd);
break;
case IDTX:
av1_fwd_idtx_c(src_diff, dst_coeff, diff_stride, 32, 64, tx_type);
break;
#endif // CONFIG_EXT_TX
default: assert(0); break;
}
}
static void highbd_fwd_txfm_64x32(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TxfmParam *txfm_param) {
int32_t *dst_coeff = (int32_t *)coeff;
const int tx_type = txfm_param->tx_type;
const int bd = txfm_param->bd;
switch (tx_type) {
case DCT_DCT:
av1_fwd_txfm2d_64x32_c(src_diff, dst_coeff, diff_stride, tx_type, bd);
break;
#if CONFIG_EXT_TX
case ADST_DCT:
case DCT_ADST:
case ADST_ADST:
case FLIPADST_DCT:
case DCT_FLIPADST:
case FLIPADST_FLIPADST:
case ADST_FLIPADST:
case FLIPADST_ADST:
case V_DCT:
case H_DCT:
case V_ADST:
case H_ADST:
case V_FLIPADST:
case H_FLIPADST:
// TODO(sarahparker)
// I've deleted the 64x64 implementations that existed in lieu
// of adst, flipadst and identity for simplicity but will bring back
// in a later change. This shouldn't impact performance since
// DCT_DCT is the only extended type currently allowed for 64x64,
// as dictated by get_ext_tx_set_type in blockd.h.
av1_fwd_txfm2d_64x32_c(src_diff, dst_coeff, diff_stride, DCT_DCT, bd);
break;
case IDTX:
av1_fwd_idtx_c(src_diff, dst_coeff, diff_stride, 64, 32, tx_type);
break;
#endif // CONFIG_EXT_TX
default: assert(0); break;
}
}
static void highbd_fwd_txfm_64x64(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TxfmParam *txfm_param) {
int32_t *dst_coeff = (int32_t *)coeff;
......@@ -445,7 +544,7 @@ static void highbd_fwd_txfm_64x64(const int16_t *src_diff, tran_low_t *coeff,
av1_fwd_txfm2d_64x64_c(src_diff, dst_coeff, diff_stride, DCT_DCT, bd);
break;
case IDTX:
av1_fwd_idtx_c(src_diff, dst_coeff, diff_stride, 64, tx_type);
av1_fwd_idtx_c(src_diff, dst_coeff, diff_stride, 64, 64, tx_type);
break;
#endif // CONFIG_EXT_TX
default: assert(0); break;
......@@ -461,6 +560,12 @@ void av1_fwd_txfm(const int16_t *src_diff, tran_low_t *coeff, int diff_stride,
case TX_64X64:
fwd_txfm_64x64(src_diff, coeff, diff_stride, txfm_param);
break;
case TX_32X64:
fwd_txfm_32x64(src_diff, coeff, diff_stride, txfm_param);
break;
case TX_64X32:
fwd_txfm_64x32(src_diff, coeff, diff_stride, txfm_param);
break;
#endif // CONFIG_TX64X64
case TX_32X32:
fwd_txfm_32x32(src_diff, coeff, diff_stride, txfm_param);
......@@ -513,6 +618,12 @@ void av1_highbd_fwd_txfm(const int16_t *src_diff, tran_low_t *coeff,
case TX_64X64:
highbd_fwd_txfm_64x64(src_diff, coeff, diff_stride, txfm_param);
break;
case TX_32X64:
highbd_fwd_txfm_32x64(src_diff, coeff, diff_stride, txfm_param);
break;
case TX_64X32:
highbd_fwd_txfm_64x32(src_diff, coeff, diff_stride, txfm_param);
break;
#endif // CONFIG_TX64X64
case TX_32X32:
highbd_fwd_txfm_32x32(src_diff, coeff, diff_stride, txfm_param);
......
......@@ -879,6 +879,26 @@ static void get_entropy_contexts_plane(
!!(*(const uint64_t *)&left[i] | *(const uint64_t *)&left[i + 8]);
break;
#if CONFIG_TX64X64
case TX_32X64:
for (i = 0; i < num_4x4_w; i += 16)
t_above[i] =
!!(*(const uint64_t *)&above[i] | *(const uint64_t *)&above[i + 8]);
for (i = 0; i < num_4x4_h; i += 32)
t_left[i] =
!!(*(const uint64_t *)&left[i] | *(const uint64_t *)&left[i + 8] |
*(const uint64_t *)&left[i + 16] |
*(const uint64_t *)&left[i + 24]);
break;
case TX_64X32:
for (i = 0; i < num_4x4_w; i += 32)
t_above[i] =
!!(*(const uint64_t *)&above[i] | *(const uint64_t *)&above[i + 8] |
*(const uint64_t *)&above[i + 16] |
*(const uint64_t *)&above[i + 24]);
for (i = 0; i < num_4x4_h; i += 16)
t_left[i] =
!!(*(const uint64_t *)&left[i] | *(const uint64_t *)&left[i + 8]);
break;
case TX_64X64:
for (i = 0; i < num_4x4_w; i += 32)
t_above[i] =
......@@ -988,6 +1008,20 @@ static void get_entropy_contexts_plane(
t_left[i] = !!*(const uint64_t *)&left[i];
break;
#if CONFIG_TX64X64
case TX_32X64:
for (i = 0; i < num_4x4_w; i += 8)
t_above[i] = !!*(const uint64_t *)&above[i];
for (i = 0; i < num_4x4_h; i += 16)
t_left[i] =
!!(*(const uint64_t *)&left[i] | *(const uint64_t *)&left[i + 8]);
break;
case TX_64X32:
for (i = 0; i < num_4x4_w; i += 16)
t_above[i] =
!!(*(const uint64_t *)&above[i] | *(const uint64_t *)&above[i + 8]);
for (i = 0; i < num_4x4_h; i += 8)
t_left[i] = !!*(const uint64_t *)&left[i];
break;
case TX_64X64:
for (i = 0; i < num_4x4_w; i += 16)
t_above[i] =
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment