Commit 6a47cff8 authored by Debargha Mukherjee's avatar Debargha Mukherjee

Further work on 64x64 fwd/inv transform support

For higher level fwd and inv transform functions.

Change-Id: I91518250a0be7d94aada7519f6c9e7ed024574fb
parent 1b5bbf8e
......@@ -111,6 +111,9 @@ if (aom_config("CONFIG_AOM_HIGHBITDEPTH") eq "yes") {
add_proto qw/void av1_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type";
specialize qw/av1_iht16x16_256_add sse2 avx2/;
add_proto qw/void av1_iht32x32_1024_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type";
specialize qw/av1_iht32x32_1024_add/;
}
} else {
# Force C versions if CONFIG_EMULATE_HARDWARE is 1
......@@ -141,6 +144,10 @@ if (aom_config("CONFIG_AOM_HIGHBITDEPTH") eq "yes") {
add_proto qw/void av1_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type";
specialize qw/av1_iht16x16_256_add/;
add_proto qw/void av1_iht32x32_1024_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type";
specialize qw/av1_iht32x32_1024_add/;
} else {
add_proto qw/void av1_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
specialize qw/av1_iht4x4_16_add sse2 neon dspr2/;
......@@ -169,6 +176,9 @@ if (aom_config("CONFIG_AOM_HIGHBITDEPTH") eq "yes") {
add_proto qw/void av1_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type";
specialize qw/av1_iht16x16_256_add sse2 avx2 dspr2/;
add_proto qw/void av1_iht32x32_1024_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type";
specialize qw/av1_iht32x32_1024_add/;
if (aom_config("CONFIG_EXT_TX") ne "yes") {
specialize qw/av1_iht4x4_16_add msa/;
specialize qw/av1_iht8x8_64_add msa/;
......@@ -176,6 +186,13 @@ if (aom_config("CONFIG_AOM_HIGHBITDEPTH") eq "yes") {
}
}
}
add_proto qw/void av1_iht32x32_1024_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type";
specialize qw/av1_iht32x32_1024_add/;
if (aom_config("CONFIG_TX64X64") eq "yes") {
add_proto qw/void av1_iht64x64_4096_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type";
specialize qw/av1_iht64x64_4096_add/;
}
if (aom_config("CONFIG_NEW_QUANT") eq "yes") {
add_proto qw/void quantize_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, const int16_t *dequant_ptr, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band";
......
......@@ -134,24 +134,32 @@ typedef enum ATTRIBUTE_PACKED {
#if CONFIG_CB4X4
TX_2X2, // 2x2 transform
#endif
TX_4X4, // 4x4 transform
TX_8X8, // 8x8 transform
TX_16X16, // 16x16 transform
TX_32X32, // 32x32 transform
TX_4X8, // 4x8 transform
TX_8X4, // 8x4 transform
TX_8X16, // 8x16 transform
TX_16X8, // 16x8 transform
TX_16X32, // 16x32 transform
TX_32X16, // 32x16 transform
TX_SIZES_ALL, // Includes rectangular transforms
TX_SIZES = TX_32X32 + 1, // Does NOT include rectangular transforms
TX_INVALID = 255 // Invalid transform size
TX_4X4, // 4x4 transform
TX_8X8, // 8x8 transform
TX_16X16, // 16x16 transform
TX_32X32, // 32x32 transform
#if CONFIG_TX64X64
TX_64X64, // 64x64 transform
#endif // CONFIG_TX64X64
TX_4X8, // 4x8 transform
TX_8X4, // 8x4 transform
TX_8X16, // 8x16 transform
TX_16X8, // 16x8 transform
TX_16X32, // 16x32 transform
TX_32X16, // 32x16 transform
#if 0 // CONFIG_TX64X64
// TODO(debargha): To be enabled later
TX_32X64, // 32x64 transform
TX_64X32, // 64x32 transform
#endif // CONFIG_TX64X64
TX_SIZES_ALL, // Includes rectangular transforms
TX_SIZES = TX_4X8, // Does NOT include rectangular transforms
TX_INVALID = 255 // Invalid transform size
} TX_SIZE;
#define MAX_TX_DEPTH (TX_32X32 - TX_4X4)
#define MAX_TX_SIZE_LOG2 5
#define MAX_TX_SIZE_LOG2 (5 + CONFIG_TX64X64)
#define MAX_TX_SIZE (1 << MAX_TX_SIZE_LOG2)
#define MIN_TX_SIZE_LOG2 2
#define MIN_TX_SIZE (1 << MIN_TX_SIZE_LOG2)
......
......@@ -23,14 +23,14 @@
int get_tx_scale(const MACROBLOCKD *const xd, const TX_TYPE tx_type,
const TX_SIZE tx_size) {
(void)tx_type;
#if CONFIG_AOM_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
return txsize_sqr_up_map[tx_size] == TX_32X32;
}
#else
(void)xd;
#endif
return txsize_sqr_up_map[tx_size] == TX_32X32;
if (txsize_sqr_up_map[tx_size] == TX_32X32) return 1;
#if CONFIG_TX64X64
else if (txsize_sqr_up_map[tx_size] == TX_64X64)
return 2;
#endif // CONFIG_TX64X64
else
return 0;
}
// NOTE: The implementation of all inverses need to be aware of the fact
......@@ -58,6 +58,14 @@ static void iidtx32_c(const tran_low_t *input, tran_low_t *output) {
int i;
for (i = 0; i < 32; ++i) output[i] = input[i] * 4;
}
#if CONFIG_TX64X64
static void iidtx64_c(const tran_low_t *input, tran_low_t *output) {
int i;
for (i = 0; i < 64; ++i)
output[i] = (tran_low_t)dct_const_round_shift(input[i] * 4 * Sqrt2);
}
#endif // CONFIG_TX64X64
#endif // CONFIG_EXT_TX
// For use in lieu of ADST
......@@ -94,12 +102,6 @@ static void idct64_row_c(const tran_low_t *input, tran_low_t *output) {
for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i];
}
static void iidtx64_c(const tran_low_t *input, tran_low_t *output) {
int i;
for (i = 0; i < 64; ++i)
output[i] = (tran_low_t)dct_const_round_shift(input[i] * 4 * Sqrt2);
}
// For use in lieu of ADST
static void ihalfright64_c(const tran_low_t *input, tran_low_t *output) {
int i;
......@@ -174,7 +176,10 @@ static void highbd_iidtx64_c(const tran_low_t *input, tran_low_t *output,
output[i] =
HIGHBD_WRAPLOW(highbd_dct_const_round_shift(input[i] * 4 * Sqrt2), bd);
}
#endif // CONFIG_TX64X64
#endif // CONFIG_EXT_TX
#if CONFIG_TX64X64
// For use in lieu of ADST
static void highbd_ihalfright64_c(const tran_low_t *input, tran_low_t *output,
int bd) {
......@@ -215,7 +220,6 @@ static void highbd_idct64_row_c(const tran_low_t *input, tran_low_t *output,
for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i];
}
#endif // CONFIG_TX64X64
#endif // CONFIG_EXT_TX
#endif // CONFIG_AOM_HIGHBITDEPTH
// Inverse identity transform and add.
......@@ -223,7 +227,7 @@ static void highbd_idct64_row_c(const tran_low_t *input, tran_low_t *output,
static void inv_idtx_add_c(const tran_low_t *input, uint8_t *dest, int stride,
int bs, int tx_type) {
int r, c;
const int shift = bs < 32 ? 3 : 2;
const int shift = bs < 32 ? 3 : (bs < 64 ? 2 : 1);
if (tx_type == IDTX) {
for (r = 0; r < bs; ++r) {
for (c = 0; c < bs; ++c)
......@@ -929,6 +933,7 @@ void av1_iht32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, int stride,
}
}
}
#endif // CONFIG_EXT_TX
#if CONFIG_TX64X64
void av1_iht64x64_4096_add_c(const tran_low_t *input, uint8_t *dest, int stride,
......@@ -938,6 +943,7 @@ void av1_iht64x64_4096_add_c(const tran_low_t *input, uint8_t *dest, int stride,
{ ihalfright64_c, idct64_row_c }, // ADST_DCT
{ idct64_col_c, ihalfright64_c }, // DCT_ADST
{ ihalfright64_c, ihalfright64_c }, // ADST_ADST
#if CONFIG_EXT_TX
{ ihalfright64_c, idct64_row_c }, // FLIPADST_DCT
{ idct64_col_c, ihalfright64_c }, // DCT_FLIPADST
{ ihalfright64_c, ihalfright64_c }, // FLIPADST_FLIPADST
......@@ -950,6 +956,7 @@ void av1_iht64x64_4096_add_c(const tran_low_t *input, uint8_t *dest, int stride,
{ iidtx64_c, ihalfright64_c }, // H_ADST
{ ihalfright64_c, iidtx64_c }, // V_FLIPADST
{ iidtx64_c, ihalfright64_c }, // H_FLIPADST
#endif // CONFIG_EXT_TX
};
int i, j;
......@@ -979,7 +986,9 @@ void av1_iht64x64_4096_add_c(const tran_low_t *input, uint8_t *dest, int stride,
IHT_64[tx_type].cols(out[i], out[i]);
}
#if CONFIG_EXT_TX
maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 64, 64);
#endif // CONFIG_EXT_TX
// Sum with the destination
for (i = 0; i < 64; ++i) {
......@@ -991,7 +1000,6 @@ void av1_iht64x64_4096_add_c(const tran_low_t *input, uint8_t *dest, int stride,
}
}
#endif // CONFIG_TX64X64
#endif // CONFIG_EXT_TX
// idct
void av1_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
......@@ -1056,6 +1064,14 @@ void av1_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride,
aom_idct32x32_1024_add(input, dest, stride);
}
#if CONFIG_TX64X64
void av1_idct64x64_add(const tran_low_t *input, uint8_t *dest, int stride,
int eob) {
(void)eob;
av1_iht64x64_4096_add(input, dest, stride, DCT_DCT);
}
#endif // CONFIG_TX64X64
void av1_inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest, int stride,
int eob, TX_TYPE tx_type, int lossless) {
if (lossless) {
......@@ -1206,6 +1222,35 @@ void av1_inv_txfm_add_32x32(const tran_low_t *input, uint8_t *dest, int stride,
}
}
#if CONFIG_TX64X64
void av1_inv_txfm_add_64x64(const tran_low_t *input, uint8_t *dest, int stride,
int eob, TX_TYPE tx_type) {
switch (tx_type) {
case DCT_DCT: av1_idct64x64_add(input, dest, stride, eob); break;
#if CONFIG_EXT_TX
case ADST_DCT:
case DCT_ADST:
case ADST_ADST:
case FLIPADST_DCT:
case DCT_FLIPADST:
case FLIPADST_FLIPADST:
case ADST_FLIPADST:
case FLIPADST_ADST:
case V_DCT:
case H_DCT:
case V_ADST:
case H_ADST:
case V_FLIPADST:
case H_FLIPADST:
av1_iht64x64_4096_add_c(input, dest, stride, tx_type);
break;
case IDTX: inv_idtx_add_c(input, dest, stride, 64, tx_type); break;
#endif // CONFIG_EXT_TX
default: assert(0); break;
}
}
#endif // CONFIG_TX64X64
#if CONFIG_AOM_HIGHBITDEPTH
void av1_highbd_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8,
int stride, int tx_type, int bd) {
......@@ -1835,6 +1880,7 @@ void av1_highbd_iht32x32_1024_add_c(const tran_low_t *input, uint8_t *dest8,
}
}
}
#endif // CONFIG_EXT_TX
#if CONFIG_TX64X64
void av1_highbd_iht64x64_4096_add_c(const tran_low_t *input, uint8_t *dest8,
......@@ -1844,6 +1890,7 @@ void av1_highbd_iht64x64_4096_add_c(const tran_low_t *input, uint8_t *dest8,
{ highbd_ihalfright64_c, highbd_idct64_row_c }, // ADST_DCT
{ highbd_idct64_col_c, highbd_ihalfright64_c }, // DCT_ADST
{ highbd_ihalfright64_c, highbd_ihalfright64_c }, // ADST_ADST
#if CONFIG_EXT_TX
{ highbd_ihalfright64_c, highbd_idct64_row_c }, // FLIPADST_DCT
{ highbd_idct64_col_c, highbd_ihalfright64_c }, // DCT_FLIPADST
{ highbd_ihalfright64_c, highbd_ihalfright64_c }, // FLIPADST_FLIPADST
......@@ -1856,6 +1903,7 @@ void av1_highbd_iht64x64_4096_add_c(const tran_low_t *input, uint8_t *dest8,
{ highbd_iidtx64_c, highbd_ihalfright64_c }, // H_ADST
{ highbd_ihalfright64_c, highbd_iidtx64_c }, // V_FLIPADST
{ highbd_iidtx64_c, highbd_ihalfright64_c }, // H_FLIPADST
#endif // CONFIG_EXT_TX
};
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
......@@ -1887,7 +1935,9 @@ void av1_highbd_iht64x64_4096_add_c(const tran_low_t *input, uint8_t *dest8,
HIGH_IHT_64[tx_type].cols(out[i], out[i], bd);
}
#if CONFIG_EXT_TX
maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, 64, 64);
#endif // CONFIG_EXT_TX
// Sum with the destination
for (i = 0; i < 64; ++i) {
......@@ -1900,7 +1950,6 @@ void av1_highbd_iht64x64_4096_add_c(const tran_low_t *input, uint8_t *dest8,
}
}
#endif // CONFIG_TX64X64
#endif // CONFIG_EXT_TX
// idct
void av1_highbd_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
......@@ -2155,6 +2204,42 @@ void av1_highbd_inv_txfm_add_32x32(const tran_low_t *input, uint8_t *dest,
default: assert(0); break;
}
}
#if CONFIG_TX64X64
void av1_highbd_inv_txfm_add_64x64(const tran_low_t *input, uint8_t *dest,
int stride, int eob, int bd,
TX_TYPE tx_type) {
(void)eob;
switch (tx_type) {
case DCT_DCT:
av1_inv_txfm2d_add_64x64(input, CONVERT_TO_SHORTPTR(dest), stride,
DCT_DCT, bd);
break;
#if CONFIG_EXT_TX
case ADST_DCT:
case DCT_ADST:
case ADST_ADST:
case FLIPADST_DCT:
case DCT_FLIPADST:
case FLIPADST_FLIPADST:
case ADST_FLIPADST:
case FLIPADST_ADST:
case V_DCT:
case H_DCT:
case V_ADST:
case H_ADST:
case V_FLIPADST:
case H_FLIPADST:
av1_highbd_iht64x64_4096_add_c(input, dest, stride, tx_type, bd);
break;
case IDTX:
highbd_inv_idtx_add_c(input, dest, stride, 64, tx_type, bd);
break;
#endif // CONFIG_EXT_TX
default: assert(0); break;
}
}
#endif // CONFIG_TX64X64
#endif // CONFIG_AOM_HIGHBITDEPTH
void inv_txfm_add(const tran_low_t *input, uint8_t *dest, int stride,
......@@ -2165,6 +2250,11 @@ void inv_txfm_add(const tran_low_t *input, uint8_t *dest, int stride,
const int lossless = inv_txfm_param->lossless;
switch (tx_size) {
#if CONFIG_TX64X64
case TX_64X64:
av1_inv_txfm_add_64x64(input, dest, stride, eob, tx_type);
break;
#endif // CONFIG_TX64X64
case TX_32X32:
av1_inv_txfm_add_32x32(input, dest, stride, eob, tx_type);
break;
......@@ -2206,6 +2296,11 @@ void highbd_inv_txfm_add(const tran_low_t *input, uint8_t *dest, int stride,
const int lossless = inv_txfm_param->lossless;
switch (tx_size) {
#if CONFIG_TX64X64
case TX_64X64:
av1_highbd_inv_txfm_add_64x64(input, dest, stride, eob, bd, tx_type);
break;
#endif // CONFIG_TX64X64
case TX_32X32:
av1_highbd_inv_txfm_add_32x32(input, dest, stride, eob, bd, tx_type);
break;
......
......@@ -24,6 +24,24 @@ static INLINE void fdct32x32(int rd_transform, const int16_t *src,
av1_fht32x32(src, dst, src_stride, DCT_DCT);
}
#if CONFIG_TX64X64
static INLINE void fdct64x64(const int16_t *src, tran_low_t *dst,
int src_stride) {
av1_fht64x64(src, dst, src_stride, DCT_DCT);
}
static INLINE void fdct64x64_1(const int16_t *src, tran_low_t *dst,
int src_stride) {
int i, j;
int32_t sum = 0;
memset(dst, 0, sizeof(*dst) * 4096);
for (i = 0; i < 64; ++i)
for (j = 0; j < 64; ++j) sum += src[i * src_stride + j];
// Note: this scaling makes the transform 2 times unitary
dst[0] = ROUND_POWER_OF_TWO_SIGNED(sum, 5);
}
#endif // CONFIG_TX64X64
static void fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TX_TYPE tx_type, int lossless) {
if (lossless) {
......@@ -192,6 +210,41 @@ static void fwd_txfm_32x32(int rd_transform, const int16_t *src_diff,
}
}
#if CONFIG_TX64X64
static void fwd_txfm_64x64(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TX_TYPE tx_type,
FWD_TXFM_OPT fwd_txfm_opt) {
switch (tx_type) {
case DCT_DCT:
if (fwd_txfm_opt == FWD_TXFM_OPT_NORMAL)
fdct64x64(src_diff, coeff, diff_stride);
else // FWD_TXFM_OPT_DC
fdct64x64_1(src_diff, coeff, diff_stride);
break;
#if CONFIG_EXT_TX
case ADST_DCT:
case DCT_ADST:
case ADST_ADST:
case FLIPADST_DCT:
case DCT_FLIPADST:
case FLIPADST_FLIPADST:
case ADST_FLIPADST:
case FLIPADST_ADST:
av1_fht64x64(src_diff, coeff, diff_stride, tx_type);
break;
case V_DCT:
case H_DCT:
case V_ADST:
case H_ADST:
case V_FLIPADST:
case H_FLIPADST: av1_fht32x32(src_diff, coeff, diff_stride, tx_type); break;
case IDTX: av1_fwd_idtx_c(src_diff, coeff, diff_stride, 64, tx_type); break;
#endif // CONFIG_EXT_TX
default: assert(0); break;
}
}
#endif // CONFIG_TX64X64
#if CONFIG_AOM_HIGHBITDEPTH
static void highbd_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TX_TYPE tx_type, int lossless,
......@@ -379,6 +432,40 @@ static void highbd_fwd_txfm_32x32(int rd_transform, const int16_t *src_diff,
default: assert(0); break;
}
}
#if CONFIG_TX64X64
static void highbd_fwd_txfm_64x64(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TX_TYPE tx_type,
FWD_TXFM_OPT fwd_txfm_opt, const int bd) {
(void)fwd_txfm_opt;
(void)bd;
switch (tx_type) {
case DCT_DCT:
av1_highbd_fht64x64_c(src_diff, coeff, diff_stride, tx_type);
break;
#if CONFIG_EXT_TX
case ADST_DCT:
case DCT_ADST:
case ADST_ADST:
case FLIPADST_DCT:
case DCT_FLIPADST:
case FLIPADST_FLIPADST:
case ADST_FLIPADST:
case FLIPADST_ADST:
case V_DCT:
case H_DCT:
case V_ADST:
case H_ADST:
case V_FLIPADST:
case H_FLIPADST:
av1_highbd_fht64x64_c(src_diff, coeff, diff_stride, tx_type);
break;
case IDTX: av1_fwd_idtx_c(src_diff, coeff, diff_stride, 64, tx_type); break;
#endif // CONFIG_EXT_TX
default: assert(0); break;
}
}
#endif // CONFIG_TX64X64
#endif // CONFIG_AOM_HIGHBITDEPTH
void fwd_txfm(const int16_t *src_diff, tran_low_t *coeff, int diff_stride,
......@@ -389,6 +476,11 @@ void fwd_txfm(const int16_t *src_diff, tran_low_t *coeff, int diff_stride,
const int rd_transform = fwd_txfm_param->rd_transform;
const int lossless = fwd_txfm_param->lossless;
switch (tx_size) {
#if CONFIG_TX64X64
case TX_64X64:
fwd_txfm_64x64(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt);
break;
#endif // CONFIG_TX64X64
case TX_32X32:
fwd_txfm_32x32(rd_transform, src_diff, coeff, diff_stride, tx_type,
fwd_txfm_opt);
......@@ -434,6 +526,12 @@ void highbd_fwd_txfm(const int16_t *src_diff, tran_low_t *coeff,
const int lossless = fwd_txfm_param->lossless;
const int bd = fwd_txfm_param->bd;
switch (tx_size) {
#if CONFIG_TX64X64
case TX_64X64:
highbd_fwd_txfm_64x64(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt,
bd);
break;
#endif // CONFIG_TX64X64
case TX_32X32:
highbd_fwd_txfm_32x32(rd_transform, src_diff, coeff, diff_stride, tx_type,
fwd_txfm_opt, bd);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment