Commit a3874850 authored by Ronald S. Bultje's avatar Ronald S. Bultje

Make SB coding size-independent.

Merge sb32x32 and sb64x64 functions; allow for rectangular sizes. Code
gives identical encoder results before and after. There are a few
macros for rectangular block sizes under the sbsegment experiment; this
experiment is not yet functional and should not yet be used.

Change-Id: I71f93b5d2a1596e99a6f01f29c3f0a456694d728
parent f42bee7e
...@@ -252,6 +252,7 @@ EXPERIMENT_LIST=" ...@@ -252,6 +252,7 @@ EXPERIMENT_LIST="
implicit_compoundinter_weight implicit_compoundinter_weight
scatterscan scatterscan
oneshotq oneshotq
sbsegment
" "
CONFIG_LIST=" CONFIG_LIST="
external_build external_build
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include "vp9/common/vp9_treecoder.h" #include "vp9/common/vp9_treecoder.h"
#include "vpx_ports/mem.h" #include "vpx_ports/mem.h"
#include "vp9/common/vp9_common.h" #include "vp9/common/vp9_common.h"
#include "vp9/common/vp9_enums.h"
#define TRUE 1 #define TRUE 1
#define FALSE 0 #define FALSE 0
...@@ -198,11 +199,43 @@ typedef enum { ...@@ -198,11 +199,43 @@ typedef enum {
MAX_REF_FRAMES = 4 MAX_REF_FRAMES = 4
} MV_REFERENCE_FRAME; } MV_REFERENCE_FRAME;
typedef enum { static INLINE int mb_width_log2(BLOCK_SIZE_TYPE sb_type) {
BLOCK_SIZE_MB16X16 = 0, switch (sb_type) {
BLOCK_SIZE_SB32X32 = 1, #if CONFIG_SBSEGMENT
BLOCK_SIZE_SB64X64 = 2, case BLOCK_SIZE_SB16X32:
} BLOCK_SIZE_TYPE; #endif
case BLOCK_SIZE_MB16X16: return 0;
#if CONFIG_SBSEGMENT
case BLOCK_SIZE_SB32X16:
case BLOCK_SIZE_SB32X64:
#endif
case BLOCK_SIZE_SB32X32: return 1;
#if CONFIG_SBSEGMENT
case BLOCK_SIZE_SB64X32:
#endif
case BLOCK_SIZE_SB64X64: return 2;
default: assert(0);
}
}
static INLINE int mb_height_log2(BLOCK_SIZE_TYPE sb_type) {
switch (sb_type) {
#if CONFIG_SBSEGMENT
case BLOCK_SIZE_SB32X16:
#endif
case BLOCK_SIZE_MB16X16: return 0;
#if CONFIG_SBSEGMENT
case BLOCK_SIZE_SB16X32:
case BLOCK_SIZE_SB64X32:
#endif
case BLOCK_SIZE_SB32X32: return 1;
#if CONFIG_SBSEGMENT
case BLOCK_SIZE_SB32X64:
#endif
case BLOCK_SIZE_SB64X64: return 2;
default: assert(0);
}
}
typedef struct { typedef struct {
MB_PREDICTION_MODE mode, uv_mode; MB_PREDICTION_MODE mode, uv_mode;
...@@ -469,11 +502,12 @@ static TX_TYPE get_tx_type_4x4(const MACROBLOCKD *xd, int ib) { ...@@ -469,11 +502,12 @@ static TX_TYPE get_tx_type_4x4(const MACROBLOCKD *xd, int ib) {
// is smaller than the prediction size // is smaller than the prediction size
TX_TYPE tx_type = DCT_DCT; TX_TYPE tx_type = DCT_DCT;
const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type; const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type;
const int wb = mb_width_log2(sb_type), hb = mb_height_log2(sb_type);
#if !USE_ADST_FOR_SB #if !USE_ADST_FOR_SB
if (sb_type) if (sb_type > BLOCK_SIZE_MB16X16)
return tx_type; return tx_type;
#endif #endif
if (ib >= (16 << (2 * sb_type))) // no chroma adst if (ib >= (16 << (wb + hb))) // no chroma adst
return tx_type; return tx_type;
if (xd->lossless) if (xd->lossless)
return DCT_DCT; return DCT_DCT;
...@@ -524,7 +558,7 @@ static TX_TYPE get_tx_type_4x4(const MACROBLOCKD *xd, int ib) { ...@@ -524,7 +558,7 @@ static TX_TYPE get_tx_type_4x4(const MACROBLOCKD *xd, int ib) {
xd->q_index < ACTIVE_HT) { xd->q_index < ACTIVE_HT) {
#if USE_ADST_FOR_I16X16_4X4 #if USE_ADST_FOR_I16X16_4X4
#if USE_ADST_PERIPHERY_ONLY #if USE_ADST_PERIPHERY_ONLY
const int hmax = 4 << sb_type; const int hmax = 4 << wb;
tx_type = txfm_map(pred_mode_conv(xd->mode_info_context->mbmi.mode)); tx_type = txfm_map(pred_mode_conv(xd->mode_info_context->mbmi.mode));
#if USE_ADST_FOR_REMOTE_EDGE #if USE_ADST_FOR_REMOTE_EDGE
if ((ib & (hmax - 1)) != 0 && ib >= hmax) if ((ib & (hmax - 1)) != 0 && ib >= hmax)
...@@ -557,11 +591,12 @@ static TX_TYPE get_tx_type_8x8(const MACROBLOCKD *xd, int ib) { ...@@ -557,11 +591,12 @@ static TX_TYPE get_tx_type_8x8(const MACROBLOCKD *xd, int ib) {
// is smaller than the prediction size // is smaller than the prediction size
TX_TYPE tx_type = DCT_DCT; TX_TYPE tx_type = DCT_DCT;
const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type; const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type;
const int wb = mb_width_log2(sb_type), hb = mb_height_log2(sb_type);
#if !USE_ADST_FOR_SB #if !USE_ADST_FOR_SB
if (sb_type) if (sb_type > BLOCK_SIZE_MB16X16)
return tx_type; return tx_type;
#endif #endif
if (ib >= (16 << (2 * sb_type))) // no chroma adst if (ib >= (16 << (wb + hb))) // no chroma adst
return tx_type; return tx_type;
if (xd->mode_info_context->mbmi.mode == I8X8_PRED && if (xd->mode_info_context->mbmi.mode == I8X8_PRED &&
xd->q_index < ACTIVE_HT8) { xd->q_index < ACTIVE_HT8) {
...@@ -574,7 +609,7 @@ static TX_TYPE get_tx_type_8x8(const MACROBLOCKD *xd, int ib) { ...@@ -574,7 +609,7 @@ static TX_TYPE get_tx_type_8x8(const MACROBLOCKD *xd, int ib) {
xd->q_index < ACTIVE_HT8) { xd->q_index < ACTIVE_HT8) {
#if USE_ADST_FOR_I16X16_8X8 #if USE_ADST_FOR_I16X16_8X8
#if USE_ADST_PERIPHERY_ONLY #if USE_ADST_PERIPHERY_ONLY
const int hmax = 4 << sb_type; const int hmax = 4 << wb;
tx_type = txfm_map(pred_mode_conv(xd->mode_info_context->mbmi.mode)); tx_type = txfm_map(pred_mode_conv(xd->mode_info_context->mbmi.mode));
#if USE_ADST_FOR_REMOTE_EDGE #if USE_ADST_FOR_REMOTE_EDGE
if ((ib & (hmax - 1)) != 0 && ib >= hmax) if ((ib & (hmax - 1)) != 0 && ib >= hmax)
...@@ -605,18 +640,19 @@ static TX_TYPE get_tx_type_8x8(const MACROBLOCKD *xd, int ib) { ...@@ -605,18 +640,19 @@ static TX_TYPE get_tx_type_8x8(const MACROBLOCKD *xd, int ib) {
static TX_TYPE get_tx_type_16x16(const MACROBLOCKD *xd, int ib) { static TX_TYPE get_tx_type_16x16(const MACROBLOCKD *xd, int ib) {
TX_TYPE tx_type = DCT_DCT; TX_TYPE tx_type = DCT_DCT;
const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type; const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type;
const int wb = mb_width_log2(sb_type), hb = mb_height_log2(sb_type);
#if !USE_ADST_FOR_SB #if !USE_ADST_FOR_SB
if (sb_type) if (sb_type > BLOCK_SIZE_MB16X16)
return tx_type; return tx_type;
#endif #endif
if (ib >= (16 << (2 * sb_type))) if (ib >= (16 << (wb + hb)))
return tx_type; return tx_type;
if (xd->mode_info_context->mbmi.mode < I8X8_PRED && if (xd->mode_info_context->mbmi.mode < I8X8_PRED &&
xd->q_index < ACTIVE_HT16) { xd->q_index < ACTIVE_HT16) {
tx_type = txfm_map(pred_mode_conv(xd->mode_info_context->mbmi.mode)); tx_type = txfm_map(pred_mode_conv(xd->mode_info_context->mbmi.mode));
#if USE_ADST_PERIPHERY_ONLY #if USE_ADST_PERIPHERY_ONLY
if (sb_type) { if (sb_type > BLOCK_SIZE_MB16X16) {
const int hmax = 4 << sb_type; const int hmax = 4 << wb;
#if USE_ADST_FOR_REMOTE_EDGE #if USE_ADST_FOR_REMOTE_EDGE
if ((ib & (hmax - 1)) != 0 && ib >= hmax) if ((ib & (hmax - 1)) != 0 && ib >= hmax)
tx_type = DCT_DCT; tx_type = DCT_DCT;
...@@ -658,6 +694,10 @@ static TX_SIZE get_uv_tx_size(const MACROBLOCKD *xd) { ...@@ -658,6 +694,10 @@ static TX_SIZE get_uv_tx_size(const MACROBLOCKD *xd) {
switch (mbmi->sb_type) { switch (mbmi->sb_type) {
case BLOCK_SIZE_SB64X64: case BLOCK_SIZE_SB64X64:
return size; return size;
#if CONFIG_SBSEGMENT
case BLOCK_SIZE_SB64X32:
case BLOCK_SIZE_SB32X64:
#endif
case BLOCK_SIZE_SB32X32: case BLOCK_SIZE_SB32X32:
if (size == TX_32X32) if (size == TX_32X32)
return TX_16X16; return TX_16X16;
......
...@@ -122,16 +122,12 @@ static INLINE void vp9_reset_mb_tokens_context(MACROBLOCKD* const xd) { ...@@ -122,16 +122,12 @@ static INLINE void vp9_reset_mb_tokens_context(MACROBLOCKD* const xd) {
vpx_memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)); vpx_memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES));
} }
static INLINE void vp9_reset_sb_tokens_context(MACROBLOCKD* const xd) { static INLINE void vp9_reset_sb_tokens_context(MACROBLOCKD* const xd,
BLOCK_SIZE_TYPE bsize) {
/* Clear entropy contexts */ /* Clear entropy contexts */
vpx_memset(xd->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * 2); const int bw = 1 << mb_width_log2(bsize), bh = 1 << mb_height_log2(bsize);
vpx_memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * 2); vpx_memset(xd->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * bw);
} vpx_memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * bh);
static INLINE void vp9_reset_sb64_tokens_context(MACROBLOCKD* const xd) {
/* Clear entropy contexts */
vpx_memset(xd->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * 4);
vpx_memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * 4);
} }
extern const int vp9_coef_bands8x8[64]; extern const int vp9_coef_bands8x8[64];
......
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP9_COMMON_VP9_ENUMS_H_
#define VP9_COMMON_VP9_ENUMS_H_
#include "./vpx_config.h"
typedef enum BLOCK_SIZE_TYPE {
BLOCK_SIZE_MB16X16,
#if CONFIG_SBSEGMENT
BLOCK_SIZE_SB16X32,
BLOCK_SIZE_SB32X16,
#endif
BLOCK_SIZE_SB32X32,
#if CONFIG_SBSEGMENT
BLOCK_SIZE_SB32X64,
BLOCK_SIZE_SB64X32,
#endif
BLOCK_SIZE_SB64X64,
} BLOCK_SIZE_TYPE;
#endif // VP9_COMMON_VP9_ENUMS_H_
...@@ -111,210 +111,150 @@ void vp9_inverse_transform_mb_16x16(MACROBLOCKD *xd) { ...@@ -111,210 +111,150 @@ void vp9_inverse_transform_mb_16x16(MACROBLOCKD *xd) {
vp9_inverse_transform_mbuv_8x8(xd); vp9_inverse_transform_mbuv_8x8(xd);
} }
void vp9_inverse_transform_sby_32x32(MACROBLOCKD *xd) { void vp9_inverse_transform_sby_32x32(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) {
vp9_short_idct32x32(BLOCK_OFFSET(xd->plane[0].dqcoeff, 0, 16), xd->diff, 64); const int bwl = mb_width_log2(bsize) - 1, bw = 1 << bwl;
} const int bh = 1 << (mb_height_log2(bsize) - 1);
const int stride = 32 << bwl;
void vp9_inverse_transform_sby_16x16(MACROBLOCKD *xd) {
int n; int n;
for (n = 0; n < 4; n++) { for (n = 0; n < bw * bh; n++) {
const int x_idx = n & 1, y_idx = n >> 1; const int x_idx = n & (bw - 1), y_idx = n >> bwl;
const TX_TYPE tx_type = get_tx_type_16x16(xd, (y_idx * 8 + x_idx) * 4);
if (tx_type == DCT_DCT) {
vp9_inverse_transform_b_16x16(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 256),
xd->diff + x_idx * 16 + y_idx * 32 * 16,
64);
} else {
vp9_short_iht16x16(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 256),
xd->diff + x_idx * 16 + y_idx * 32 * 16, 32, tx_type);
}
}
}
void vp9_inverse_transform_sby_8x8(MACROBLOCKD *xd) {
int n;
for (n = 0; n < 16; n++) {
const int x_idx = n & 3, y_idx = n >> 2;
const TX_TYPE tx_type = get_tx_type_8x8(xd, (y_idx * 8 + x_idx) * 2);
if (tx_type == DCT_DCT) {
vp9_inverse_transform_b_8x8(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 64),
xd->diff + x_idx * 8 + y_idx * 32 * 8, 64);
} else {
vp9_short_iht8x8(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 64),
xd->diff + x_idx * 8 + y_idx * 32 * 8, 32, tx_type);
}
}
}
void vp9_inverse_transform_sby_4x4(MACROBLOCKD *xd) {
int n;
for (n = 0; n < 64; n++) {
const int x_idx = n & 7, y_idx = n >> 3;
const TX_TYPE tx_type = get_tx_type_4x4(xd, y_idx * 8 + x_idx);
if (tx_type == DCT_DCT) {
vp9_inverse_transform_b_4x4(xd, xd->plane[0].eobs[n],
BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 16),
xd->diff + x_idx * 4 + y_idx * 4 * 32, 64);
} else {
vp9_short_iht4x4(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 16),
xd->diff + x_idx * 4 + y_idx * 4 * 32, 32, tx_type);
}
}
}
void vp9_inverse_transform_sbuv_16x16(MACROBLOCKD *xd) {
vp9_inverse_transform_b_16x16(xd->plane[1].dqcoeff,
xd->diff + 1024, 32);
vp9_inverse_transform_b_16x16(xd->plane[2].dqcoeff,
xd->diff + 1280, 32);
}
void vp9_inverse_transform_sbuv_8x8(MACROBLOCKD *xd) {
int n;
for (n = 0; n < 4; n++) {
const int x_idx = n & 1, y_idx = n >> 1;
vp9_inverse_transform_b_8x8(BLOCK_OFFSET(xd->plane[1].dqcoeff, n, 64),
xd->diff + 1024 + x_idx * 8 + y_idx * 16 * 8,
32);
vp9_inverse_transform_b_8x8(BLOCK_OFFSET(xd->plane[2].dqcoeff, n, 64),
xd->diff + 1280 + x_idx * 8 + y_idx * 16 * 8,
32);
}
}
void vp9_inverse_transform_sbuv_4x4(MACROBLOCKD *xd) {
int n;
for (n = 0; n < 16; n++) {
const int x_idx = n & 3, y_idx = n >> 2;
vp9_inverse_transform_b_4x4(xd, xd->plane[1].eobs[n],
BLOCK_OFFSET(xd->plane[1].dqcoeff, n, 16),
xd->diff + 1024 + x_idx * 4 + y_idx * 16 * 4,
32);
vp9_inverse_transform_b_4x4(xd, xd->plane[2].eobs[n],
BLOCK_OFFSET(xd->plane[2].dqcoeff, n, 16),
xd->diff + 1280 + x_idx * 4 + y_idx * 16 * 4,
32);
}
}
void vp9_inverse_transform_sb64y_32x32(MACROBLOCKD *xd) {
int n;
for (n = 0; n < 4; n++) {
const int x_idx = n & 1, y_idx = n >> 1;
vp9_short_idct32x32(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 1024), vp9_short_idct32x32(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 1024),
xd->diff + x_idx * 32 + y_idx * 32 * 64, 128); xd->diff + x_idx * 32 + y_idx * 32 * stride,
stride * 2);
} }
} }
void vp9_inverse_transform_sb64y_16x16(MACROBLOCKD *xd) { void vp9_inverse_transform_sby_16x16(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) {
const int bwl = mb_width_log2(bsize), bw = 1 << bwl;
const int bh = 1 << mb_height_log2(bsize);
const int stride = 16 << bwl, bstride = 4 << bwl;
int n; int n;
for (n = 0; n < 16; n++) { for (n = 0; n < bw * bh; n++) {
const int x_idx = n & 3, y_idx = n >> 2; const int x_idx = n & (bw - 1), y_idx = n >> bwl;
const TX_TYPE tx_type = get_tx_type_16x16(xd, (y_idx * 16 + x_idx) * 4); const TX_TYPE tx_type = get_tx_type_16x16(xd,
(y_idx * bstride + x_idx) * 4);
if (tx_type == DCT_DCT) { if (tx_type == DCT_DCT) {
vp9_inverse_transform_b_16x16(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 256), vp9_inverse_transform_b_16x16(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 256),
xd->diff + x_idx * 16 + y_idx * 64 * 16, xd->diff + x_idx * 16 + y_idx * stride * 16,
128); stride * 2);
} else { } else {
vp9_short_iht16x16(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 256), vp9_short_iht16x16(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 256),
xd->diff + x_idx * 16 + y_idx * 64 * 16, 64, tx_type); xd->diff + x_idx * 16 + y_idx * stride * 16,
stride, tx_type);
} }
} }
} }
void vp9_inverse_transform_sb64y_8x8(MACROBLOCKD *xd) { void vp9_inverse_transform_sby_8x8(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) {
const int bwl = mb_width_log2(bsize) + 1, bw = 1 << bwl;
const int bh = 1 << (mb_height_log2(bsize) + 1);
const int stride = 8 << bwl, bstride = 2 << bwl;
int n; int n;
for (n = 0; n < 64; n++) { for (n = 0; n < bw * bh; n++) {
const int x_idx = n & 7, y_idx = n >> 3; const int x_idx = n & (bw - 1), y_idx = n >> bwl;
const TX_TYPE tx_type = get_tx_type_8x8(xd, (y_idx * 16 + x_idx) * 2); const TX_TYPE tx_type = get_tx_type_8x8(xd, (y_idx * bstride + x_idx) * 2);
if (tx_type == DCT_DCT) { if (tx_type == DCT_DCT) {
vp9_inverse_transform_b_8x8(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 64), vp9_inverse_transform_b_8x8(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 64),
xd->diff + x_idx * 8 + y_idx * 64 * 8, 128); xd->diff + x_idx * 8 + y_idx * stride * 8,
stride * 2);
} else { } else {
vp9_short_iht8x8(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 64), vp9_short_iht8x8(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 64),
xd->diff + x_idx * 8 + y_idx * 64 * 8, 64, tx_type); xd->diff + x_idx * 8 + y_idx * stride * 8,
stride, tx_type);
} }
} }
} }
void vp9_inverse_transform_sb64y_4x4(MACROBLOCKD *xd) { void vp9_inverse_transform_sby_4x4(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) {
const int bwl = mb_width_log2(bsize) + 2, bw = 1 << bwl;
const int bh = 1 << (mb_height_log2(bsize) + 2);
const int stride = 4 << bwl, bstride = 1 << bwl;
int n; int n;
for (n = 0; n < 256; n++) { for (n = 0; n < bw * bh; n++) {
const int x_idx = n & 15, y_idx = n >> 4; const int x_idx = n & (bw - 1), y_idx = n >> bwl;
const TX_TYPE tx_type = get_tx_type_4x4(xd, y_idx * 16 + x_idx); const TX_TYPE tx_type = get_tx_type_4x4(xd, y_idx * bstride + x_idx);
if (tx_type == DCT_DCT) { if (tx_type == DCT_DCT) {
vp9_inverse_transform_b_4x4(xd, xd->plane[0].eobs[n], vp9_inverse_transform_b_4x4(xd, xd->plane[0].eobs[n],
BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 16), BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 16),
xd->diff + x_idx * 4 + y_idx * 4 * 64, 128); xd->diff + x_idx * 4 + y_idx * 4 * stride,
stride * 2);
} else { } else {
vp9_short_iht4x4(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 16), vp9_short_iht4x4(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 16),
xd->diff + x_idx * 4 + y_idx * 4 * 64, 64, tx_type); xd->diff + x_idx * 4 + y_idx * 4 * stride,
stride, tx_type);
} }
} }
} }
void vp9_inverse_transform_sb64uv_32x32(MACROBLOCKD *xd) { void vp9_inverse_transform_sbuv_32x32(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) {
assert(bsize == BLOCK_SIZE_SB64X64);
vp9_short_idct32x32(xd->plane[1].dqcoeff, vp9_short_idct32x32(xd->plane[1].dqcoeff,
xd->diff + 4096, 64); xd->diff + 4096, 64);
vp9_short_idct32x32(xd->plane[2].dqcoeff, vp9_short_idct32x32(xd->plane[2].dqcoeff,
xd->diff + 4096 + 1024, 64); xd->diff + 4096 + 1024, 64);
} }
void vp9_inverse_transform_sb64uv_16x16(MACROBLOCKD *xd) { void vp9_inverse_transform_sbuv_16x16(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) {
const int bwl = mb_width_log2(bsize), bhl = mb_height_log2(bsize);
const int uoff = (16 * 16) << (bwl + bhl), voff = (uoff * 5) >> 2;
const int bw = 1 << (bwl - 1), bh = 1 << (bhl - 1);
const int stride = 16 << (bwl - 1);
int n; int n;
for (n = 0; n < 4; n++) { for (n = 0; n < bw * bh; n++) {
const int x_idx = n & 1, y_idx = n >> 1, off = x_idx * 16 + y_idx * 32 * 16; const int x_idx = n & (bw - 1), y_idx = n >> (bwl - 1);
const int off = x_idx * 16 + y_idx * stride * 16;
vp9_inverse_transform_b_16x16(BLOCK_OFFSET(xd->plane[1].dqcoeff, n, 256), vp9_inverse_transform_b_16x16(BLOCK_OFFSET(xd->plane[1].dqcoeff, n, 256),
xd->diff + 4096 + off, 64); xd->diff + uoff + off, stride * 2);
vp9_inverse_transform_b_16x16(BLOCK_OFFSET(xd->plane[2].dqcoeff, n, 256), vp9_inverse_transform_b_16x16(BLOCK_OFFSET(xd->plane[2].dqcoeff, n, 256),
xd->diff + 4096 + 1024 + off, 64); xd->diff + voff + off, stride * 2);
} }
} }
void vp9_inverse_transform_sb64uv_8x8(MACROBLOCKD *xd) { void vp9_inverse_transform_sbuv_8x8(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) {
const int bwl = mb_width_log2(bsize) + 1, bhl = mb_height_log2(bsize) + 1;
const int uoff = (8 * 8) << (bwl + bhl), voff = (uoff * 5) >> 2;
const int bw = 1 << (bwl - 1), bh = 1 << (bhl - 1);
const int stride = 8 << (bwl - 1);
int n; int n;
for (n = 0; n < 16; n++) { for (n = 0; n < bw * bh; n++) {
const int x_idx = n & 3, y_idx = n >> 2, off = x_idx * 8 + y_idx * 32 * 8; const int x_idx = n & (bw - 1), y_idx = n >> (bwl - 1);
const int off = x_idx * 8 + y_idx * stride * 8;
vp9_inverse_transform_b_8x8(BLOCK_OFFSET(xd->plane[1].dqcoeff, n, 64), vp9_inverse_transform_b_8x8(BLOCK_OFFSET(xd->plane[1].dqcoeff, n, 64),
xd->diff + 4096 + off, 64); xd->diff + uoff + off, stride * 2);
vp9_inverse_transform_b_8x8(BLOCK_OFFSET(xd->plane[2].dqcoeff, n, 64), vp9_inverse_transform_b_8x8(BLOCK_OFFSET(xd->plane[2].dqcoeff, n, 64),
xd->diff + 4096 + 1024 + off, 64); xd->diff + voff + off, stride * 2);
} }
} }
void vp9_inverse_transform_sb64uv_4x4(MACROBLOCKD *xd) { void vp9_inverse_transform_sbuv_4x4(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) {
const int bwl = mb_width_log2(bsize) + 2, bhl = mb_height_log2(bsize) + 2;
const int uoff = (4 * 4) << (bwl + bhl), voff = (uoff * 5) >> 2;
const int bw = 1 << (bwl - 1), bh = 1 << (bhl - 1);
const int stride = 4 << (bwl - 1);
int n; int n;
for (n = 0; n < 64; n++) { for (n = 0; n < bw * bh; n++) {
const int x_idx = n & 7, y_idx = n >> 3, off = x_idx * 4 + y_idx * 32 * 4; const int x_idx = n & (bw - 1), y_idx = n >> (bwl - 1);
const int off = x_idx * 4 + y_idx * stride * 4;
vp9_inverse_transform_b_4x4(xd, xd->plane[1].eobs[n], vp9_inverse_transform_b_4x4(xd, xd->plane[1].eobs[n],
BLOCK_OFFSET(xd->plane[1].dqcoeff, n, 16), BLOCK_OFFSET(xd->plane[1].dqcoeff, n, 16),
xd->diff + 4096 + off, 64); xd->diff + uoff + off, stride * 2);
vp9_inverse_transform_b_4x4(xd, xd->plane[2].eobs[n], vp9_inverse_transform_b_4x4(xd, xd->plane[2].eobs[n],
BLOCK_OFFSET(xd->plane[2].dqcoeff, n, 16), BLOCK_OFFSET(xd->plane[2].dqcoeff, n, 16),
xd->diff + 4096 + 1024 + off, 64); xd->diff + voff + off, stride * 2);
} }
} }
...@@ -41,21 +41,13 @@ void vp9_inverse_transform_mb_16x16(MACROBLOCKD *xd); ...@@ -41,21 +41,13 @@ void vp9_inverse_transform_mb_16x16(MACROBLOCKD *xd);
void vp9_inverse_transform_mby_16x16(MACROBLOCKD *xd); void vp9_inverse_transform_mby_16x16(MACROBLOCKD *xd);
void vp9_inverse_transform_sby_32x32(MACROBLOCKD *xd); void vp9_inverse_transform_sby_32x32(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize);
void vp9_inverse_transform_sby_16x16(MACROBLOCKD *xd); void vp9_inverse_transform_sby_16x16(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize);
void vp9_inverse_transform_sby_8x8(MACROBLOCKD *xd); void vp9_inverse_transform_sby_8x8(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize);