Commit 155bf9a7 authored by Angie Chiang's avatar Angie Chiang

Let rect txfms do shorter side first

Change-Id: I41a78f5066b96f59ab8f587bc5b1955f6806b103
parent e3b604d6
......@@ -109,10 +109,27 @@ static INLINE void fwd_txfm2d_c(const int16_t *input, int32_t *output,
void av1_fwd_txfm2d_4x8_c(const int16_t *input, int32_t *output, int stride,
int tx_type, int bd) {
#if CONFIG_TXMG
(void)bd;
int32_t txfm_buf[4 * 8];
int16_t rinput[4 * 8];
int tx_size = TX_4X8;
int rtx_size = av1_rotate_tx_size(tx_size);
int rtx_type = av1_rotate_tx_type(tx_type);
int w = tx_size_wide[tx_size];
int h = tx_size_high[tx_size];
int rw = h;
int rh = w;
transpose_int16(rinput, rw, input, stride, w, h);
TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(rtx_type, rtx_size);
fwd_txfm2d_c(rinput, txfm_buf, rw, &cfg, output);
transpose_int32(output, w, txfm_buf, rw, rw, rh);
#else
int32_t txfm_buf[4 * 8];
TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(tx_type, TX_4X8);
(void)bd;
fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf);
#endif
}
void av1_fwd_txfm2d_8x4_c(const int16_t *input, int32_t *output, int stride,
......@@ -125,10 +142,27 @@ void av1_fwd_txfm2d_8x4_c(const int16_t *input, int32_t *output, int stride,
void av1_fwd_txfm2d_8x16_c(const int16_t *input, int32_t *output, int stride,
int tx_type, int bd) {
#if CONFIG_TXMG
(void)bd;
int32_t txfm_buf[8 * 16];
int16_t rinput[8 * 16];
int tx_size = TX_8X16;
int rtx_size = av1_rotate_tx_size(tx_size);
int rtx_type = av1_rotate_tx_type(tx_type);
int w = tx_size_wide[tx_size];
int h = tx_size_high[tx_size];
int rw = h;
int rh = w;
transpose_int16(rinput, rw, input, stride, w, h);
TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(rtx_type, rtx_size);
fwd_txfm2d_c(rinput, txfm_buf, rw, &cfg, output);
transpose_int32(output, w, txfm_buf, rw, rw, rh);
#else
int32_t txfm_buf[8 * 16];
TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(tx_type, TX_8X16);
(void)bd;
fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf);
#endif
}
void av1_fwd_txfm2d_16x8_c(const int16_t *input, int32_t *output, int stride,
......@@ -141,10 +175,27 @@ void av1_fwd_txfm2d_16x8_c(const int16_t *input, int32_t *output, int stride,
void av1_fwd_txfm2d_16x32_c(const int16_t *input, int32_t *output, int stride,
int tx_type, int bd) {
#if CONFIG_TXMG
(void)bd;
int32_t txfm_buf[16 * 32];
int16_t rinput[16 * 32];
int tx_size = TX_16X32;
int rtx_size = av1_rotate_tx_size(tx_size);
int rtx_type = av1_rotate_tx_type(tx_type);
int w = tx_size_wide[tx_size];
int h = tx_size_high[tx_size];
int rw = h;
int rh = w;
transpose_int16(rinput, rw, input, stride, w, h);
TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(rtx_type, rtx_size);
fwd_txfm2d_c(rinput, txfm_buf, rw, &cfg, output);
transpose_int32(output, w, txfm_buf, rw, rw, rh);
#else
int32_t txfm_buf[16 * 32];
TXFM_2D_FLIP_CFG cfg = av1_get_fwd_txfm_cfg(tx_type, TX_16X32);
(void)bd;
fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf);
#endif
}
void av1_fwd_txfm2d_32x16_c(const int16_t *input, int32_t *output, int stride,
......
......@@ -211,8 +211,25 @@ void av1_inv_txfm2d_add_4x8_c(const int32_t *input, uint16_t *output,
void av1_inv_txfm2d_add_8x4_c(const int32_t *input, uint16_t *output,
int stride, int tx_type, int bd) {
#if CONFIG_TXMG
int txfm_buf[8 * 4 + 8 + 8];
int32_t rinput[8 * 4];
uint16_t routput[8 * 4];
int tx_size = TX_8X4;
int rtx_size = av1_rotate_tx_size(tx_size);
int rtx_type = av1_rotate_tx_type(tx_type);
int w = tx_size_wide[tx_size];
int h = tx_size_high[tx_size];
int rw = h;
int rh = w;
transpose_int32(rinput, rw, input, w, w, h);
transpose_uint16(routput, rw, output, stride, w, h);
inv_txfm2d_add_facade(rinput, routput, rw, txfm_buf, rtx_type, rtx_size, bd);
transpose_uint16(output, stride, routput, rw, rw, rh);
#else
int txfm_buf[8 * 4 + 4 + 4];
inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_8X4, bd);
#endif
}
void av1_inv_txfm2d_add_8x16_c(const int32_t *input, uint16_t *output,
......@@ -223,8 +240,25 @@ void av1_inv_txfm2d_add_8x16_c(const int32_t *input, uint16_t *output,
void av1_inv_txfm2d_add_16x8_c(const int32_t *input, uint16_t *output,
int stride, int tx_type, int bd) {
#if CONFIG_TXMG
int txfm_buf[16 * 8 + 16 + 16];
int32_t rinput[16 * 8];
uint16_t routput[16 * 8];
int tx_size = TX_16X8;
int rtx_size = av1_rotate_tx_size(tx_size);
int rtx_type = av1_rotate_tx_type(tx_type);
int w = tx_size_wide[tx_size];
int h = tx_size_high[tx_size];
int rw = h;
int rh = w;
transpose_int32(rinput, rw, input, w, w, h);
transpose_uint16(routput, rw, output, stride, w, h);
inv_txfm2d_add_facade(rinput, routput, rw, txfm_buf, rtx_type, rtx_size, bd);
transpose_uint16(output, stride, routput, rw, rw, rh);
#else
int txfm_buf[16 * 8 + 8 + 8];
inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_16X8, bd);
#endif
}
void av1_inv_txfm2d_add_16x32_c(const int32_t *input, uint16_t *output,
......@@ -235,8 +269,25 @@ void av1_inv_txfm2d_add_16x32_c(const int32_t *input, uint16_t *output,
void av1_inv_txfm2d_add_32x16_c(const int32_t *input, uint16_t *output,
int stride, int tx_type, int bd) {
#if CONFIG_TXMG
int txfm_buf[32 * 16 + 32 + 32];
int32_t rinput[32 * 16];
uint16_t routput[32 * 16];
int tx_size = TX_32X16;
int rtx_size = av1_rotate_tx_size(tx_size);
int rtx_type = av1_rotate_tx_type(tx_type);
int w = tx_size_wide[tx_size];
int h = tx_size_high[tx_size];
int rw = h;
int rh = w;
transpose_int32(rinput, rw, input, w, w, h);
transpose_uint16(routput, rw, output, stride, w, h);
inv_txfm2d_add_facade(rinput, routput, rw, txfm_buf, rtx_type, rtx_size, bd);
transpose_uint16(output, stride, routput, rw, rw, rh);
#else
int txfm_buf[32 * 16 + 16 + 16];
inv_txfm2d_add_facade(input, output, stride, txfm_buf, tx_type, TX_32X16, bd);
#endif
}
void av1_inv_txfm2d_add_4x4_c(const int32_t *input, uint16_t *output,
......
......@@ -17,6 +17,7 @@
#include <stdio.h>
#include "av1/common/enums.h"
#include "av1/common/blockd.h"
#include "aom/aom_integer.h"
#include "aom_dsp/aom_dsp_common.h"
......@@ -209,6 +210,61 @@ static INLINE void set_flip_cfg(int tx_type, TXFM_2D_FLIP_CFG *cfg) {
}
}
#if CONFIG_TXMG
static INLINE int av1_rotate_tx_size(int tx_size) {
switch (tx_size) {
#if CONFIG_CHROMA_2X2
case TX_2X2: return TX_2X2;
#endif
case TX_4X4: return TX_4X4;
case TX_8X8: return TX_8X8;
case TX_16X16: return TX_16X16;
case TX_32X32: return TX_32X32;
#if CONFIG_TX64X64
case TX_64X64: return TX_64X64;
#endif
case TX_4X8: return TX_8X4;
case TX_8X4: return TX_4X8;
case TX_8X16: return TX_16X8;
case TX_16X8: return TX_8X16;
case TX_16X32: return TX_32X16;
case TX_32X16: return TX_16X32;
case TX_4X16: return TX_16X4;
case TX_16X4: return TX_4X16;
case TX_8X32: return TX_32X8;
case TX_32X8: return TX_8X32;
default: assert(0); return TX_INVALID;
}
}
static INLINE int av1_rotate_tx_type(int tx_type) {
switch (tx_type) {
case DCT_DCT: return DCT_DCT;
case ADST_DCT: return DCT_ADST;
case DCT_ADST: return ADST_DCT;
case ADST_ADST: return ADST_ADST;
#if CONFIG_EXT_TX
case FLIPADST_DCT: return DCT_FLIPADST;
case DCT_FLIPADST: return FLIPADST_DCT;
case FLIPADST_FLIPADST: return FLIPADST_FLIPADST;
case ADST_FLIPADST: return FLIPADST_ADST;
case FLIPADST_ADST: return ADST_FLIPADST;
case IDTX: return IDTX;
case V_DCT: return H_DCT;
case H_DCT: return V_DCT;
case V_ADST: return H_ADST;
case H_ADST: return V_ADST;
case V_FLIPADST: return H_FLIPADST;
case H_FLIPADST: return V_FLIPADST;
#endif // CONFIG_EXT_TX
#if CONFIG_MRC_TX
case MRC_DCT: return MRC_DCT;
#endif // CONFIG_MRC_TX
default: assert(0); return TX_TYPES;
}
}
#endif // CONFIG_TXMG
#if CONFIG_MRC_TX
static INLINE int get_mrc_mask(const uint8_t *pred, int pred_stride, int *mask,
int mask_stride, int width, int height) {
......
......@@ -1743,6 +1743,38 @@ static INLINE PLANE_TYPE get_plane_type(int plane) {
return (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
}
static INLINE void transpose_uint8(uint8_t *dst, int dst_stride,
const uint8_t *src, int src_stride, int w,
int h) {
int r, c;
for (r = 0; r < h; ++r)
for (c = 0; c < w; ++c) dst[c * dst_stride + r] = src[r * src_stride + c];
}
static INLINE void transpose_uint16(uint16_t *dst, int dst_stride,
const uint16_t *src, int src_stride, int w,
int h) {
int r, c;
for (r = 0; r < h; ++r)
for (c = 0; c < w; ++c) dst[c * dst_stride + r] = src[r * src_stride + c];
}
static INLINE void transpose_int16(int16_t *dst, int dst_stride,
const int16_t *src, int src_stride, int w,
int h) {
int r, c;
for (r = 0; r < h; ++r)
for (c = 0; c < w; ++c) dst[c * dst_stride + r] = src[r * src_stride + c];
}
static INLINE void transpose_int32(int32_t *dst, int dst_stride,
const int32_t *src, int src_stride, int w,
int h) {
int r, c;
for (r = 0; r < h; ++r)
for (c = 0; c < w; ++c) dst[c * dst_stride + r] = src[r * src_stride + c];
}
#ifdef __cplusplus
} // extern "C"
#endif
......
......@@ -14,6 +14,7 @@
#include "./aom_dsp_rtcd.h"
#include "./av1_rtcd.h"
#include "av1/common/blockd.h"
#include "av1/common/convolve.h"
#include "av1/common/filter.h"
#include "av1/common/onyxc_int.h"
......@@ -422,24 +423,6 @@ void av1_convolve_2d_c(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst,
}
#endif
static INLINE void transpose_uint8(uint8_t *dst, int dst_stride,
const uint8_t *src, int src_stride, int w,
int h) {
int r, c;
for (r = 0; r < h; ++r)
for (c = 0; c < w; ++c)
dst[c * (dst_stride) + r] = src[r * (src_stride) + c];
}
static INLINE void transpose_int32(int32_t *dst, int dst_stride,
const int32_t *src, int src_stride, int w,
int h) {
int r, c;
for (r = 0; r < h; ++r)
for (c = 0; c < w; ++c)
dst[c * (dst_stride) + r] = src[r * (src_stride) + c];
}
void av1_convolve_2d_facade(const uint8_t *src, int src_stride, uint8_t *dst,
int dst_stride, int w, int h,
const InterpFilter *interp_filter,
......@@ -500,14 +483,6 @@ void av1_convolve_2d_facade(const uint8_t *src, int src_stride, uint8_t *dst,
}
#if CONFIG_HIGHBITDEPTH
static INLINE void transpose_uint16(uint16_t *dst, int dst_stride,
const uint16_t *src, int src_stride, int w,
int h) {
int r, c;
for (r = 0; r < h; ++r)
for (c = 0; c < w; ++c) dst[c * dst_stride + r] = src[r * src_stride + c];
}
void av1_highbd_convolve_rounding_c(const int32_t *src, int src_stride,
uint8_t *dst8, int dst_stride, int w, int h,
int bits, int bd) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment