Commit 005fc697 authored by Dmitry Kovalev's avatar Dmitry Kovalev
Browse files

Finally removing "short" from transform names.

Change-Id: I5259b68dc1bcceb153e3ffe638a79a59a3019e9d
parent 4d8ebc9e
...@@ -273,7 +273,7 @@ void fdct16x16_ref(const int16_t *in, int16_t *out, int stride, int tx_type) { ...@@ -273,7 +273,7 @@ void fdct16x16_ref(const int16_t *in, int16_t *out, int stride, int tx_type) {
} }
void fht16x16_ref(const int16_t *in, int16_t *out, int stride, int tx_type) { void fht16x16_ref(const int16_t *in, int16_t *out, int stride, int tx_type) {
vp9_short_fht16x16_c(in, out, stride, tx_type); vp9_fht16x16_c(in, out, stride, tx_type);
} }
class Trans16x16TestBase { class Trans16x16TestBase {
...@@ -507,10 +507,10 @@ INSTANTIATE_TEST_CASE_P( ...@@ -507,10 +507,10 @@ INSTANTIATE_TEST_CASE_P(
INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P(
C, Trans16x16HT, C, Trans16x16HT,
::testing::Values( ::testing::Values(
make_tuple(&vp9_short_fht16x16_c, &vp9_iht16x16_256_add_c, 0), make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 0),
make_tuple(&vp9_short_fht16x16_c, &vp9_iht16x16_256_add_c, 1), make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 1),
make_tuple(&vp9_short_fht16x16_c, &vp9_iht16x16_256_add_c, 2), make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 2),
make_tuple(&vp9_short_fht16x16_c, &vp9_iht16x16_256_add_c, 3))); make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 3)));
#if HAVE_SSE2 #if HAVE_SSE2
INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P(
...@@ -521,9 +521,9 @@ INSTANTIATE_TEST_CASE_P( ...@@ -521,9 +521,9 @@ INSTANTIATE_TEST_CASE_P(
INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P(
SSE2, Trans16x16HT, SSE2, Trans16x16HT,
::testing::Values( ::testing::Values(
make_tuple(&vp9_short_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 0), make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 0),
make_tuple(&vp9_short_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 1), make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 1),
make_tuple(&vp9_short_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 2), make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 2),
make_tuple(&vp9_short_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 3))); make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 3)));
#endif #endif
} // namespace } // namespace
...@@ -45,7 +45,7 @@ void fdct4x4_ref(const int16_t *in, int16_t *out, int stride, int tx_type) { ...@@ -45,7 +45,7 @@ void fdct4x4_ref(const int16_t *in, int16_t *out, int stride, int tx_type) {
} }
void fht4x4_ref(const int16_t *in, int16_t *out, int stride, int tx_type) { void fht4x4_ref(const int16_t *in, int16_t *out, int stride, int tx_type) {
vp9_short_fht4x4_c(in, out, stride, tx_type); vp9_fht4x4_c(in, out, stride, tx_type);
} }
class Trans4x4TestBase { class Trans4x4TestBase {
...@@ -281,10 +281,10 @@ INSTANTIATE_TEST_CASE_P( ...@@ -281,10 +281,10 @@ INSTANTIATE_TEST_CASE_P(
INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P(
C, Trans4x4HT, C, Trans4x4HT,
::testing::Values( ::testing::Values(
make_tuple(&vp9_short_fht4x4_c, &vp9_iht4x4_16_add_c, 0), make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 0),
make_tuple(&vp9_short_fht4x4_c, &vp9_iht4x4_16_add_c, 1), make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 1),
make_tuple(&vp9_short_fht4x4_c, &vp9_iht4x4_16_add_c, 2), make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 2),
make_tuple(&vp9_short_fht4x4_c, &vp9_iht4x4_16_add_c, 3))); make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 3)));
#if HAVE_SSE2 #if HAVE_SSE2
INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P(
...@@ -295,10 +295,10 @@ INSTANTIATE_TEST_CASE_P( ...@@ -295,10 +295,10 @@ INSTANTIATE_TEST_CASE_P(
INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P(
SSE2, Trans4x4HT, SSE2, Trans4x4HT,
::testing::Values( ::testing::Values(
make_tuple(&vp9_short_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 0), make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 0),
make_tuple(&vp9_short_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 1), make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 1),
make_tuple(&vp9_short_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 2), make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 2),
make_tuple(&vp9_short_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 3))); make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 3)));
#endif #endif
} // namespace } // namespace
...@@ -44,7 +44,7 @@ void fdct8x8_ref(const int16_t *in, int16_t *out, int stride, int tx_type) { ...@@ -44,7 +44,7 @@ void fdct8x8_ref(const int16_t *in, int16_t *out, int stride, int tx_type) {
} }
void fht8x8_ref(const int16_t *in, int16_t *out, int stride, int tx_type) { void fht8x8_ref(const int16_t *in, int16_t *out, int stride, int tx_type) {
vp9_short_fht8x8_c(in, out, stride, tx_type); vp9_fht8x8_c(in, out, stride, tx_type);
} }
class FwdTrans8x8TestBase { class FwdTrans8x8TestBase {
...@@ -308,10 +308,10 @@ INSTANTIATE_TEST_CASE_P( ...@@ -308,10 +308,10 @@ INSTANTIATE_TEST_CASE_P(
INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P(
C, FwdTrans8x8HT, C, FwdTrans8x8HT,
::testing::Values( ::testing::Values(
make_tuple(&vp9_short_fht8x8_c, &vp9_iht8x8_64_add_c, 0), make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 0),
make_tuple(&vp9_short_fht8x8_c, &vp9_iht8x8_64_add_c, 1), make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 1),
make_tuple(&vp9_short_fht8x8_c, &vp9_iht8x8_64_add_c, 2), make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 2),
make_tuple(&vp9_short_fht8x8_c, &vp9_iht8x8_64_add_c, 3))); make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 3)));
#if HAVE_SSE2 #if HAVE_SSE2
INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P(
...@@ -321,9 +321,9 @@ INSTANTIATE_TEST_CASE_P( ...@@ -321,9 +321,9 @@ INSTANTIATE_TEST_CASE_P(
INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P(
SSE2, FwdTrans8x8HT, SSE2, FwdTrans8x8HT,
::testing::Values( ::testing::Values(
make_tuple(&vp9_short_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 0), make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 0),
make_tuple(&vp9_short_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 1), make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 1),
make_tuple(&vp9_short_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 2), make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 2),
make_tuple(&vp9_short_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 3))); make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 3)));
#endif #endif
} // namespace } // namespace
...@@ -707,14 +707,14 @@ if [ "$CONFIG_INTERNAL_STATS" = "yes" ]; then ...@@ -707,14 +707,14 @@ if [ "$CONFIG_INTERNAL_STATS" = "yes" ]; then
fi fi
# fdct functions # fdct functions
prototype void vp9_short_fht4x4 "const int16_t *input, int16_t *output, int stride, int tx_type" prototype void vp9_fht4x4 "const int16_t *input, int16_t *output, int stride, int tx_type"
specialize vp9_short_fht4x4 sse2 avx2 specialize vp9_fht4x4 sse2 avx2
prototype void vp9_short_fht8x8 "const int16_t *input, int16_t *output, int stride, int tx_type" prototype void vp9_fht8x8 "const int16_t *input, int16_t *output, int stride, int tx_type"
specialize vp9_short_fht8x8 sse2 avx2 specialize vp9_fht8x8 sse2 avx2
prototype void vp9_short_fht16x16 "const int16_t *input, int16_t *output, int stride, int tx_type" prototype void vp9_fht16x16 "const int16_t *input, int16_t *output, int stride, int tx_type"
specialize vp9_short_fht16x16 sse2 avx2 specialize vp9_fht16x16 sse2 avx2
prototype void vp9_fwht4x4 "const int16_t *input, int16_t *output, int stride" prototype void vp9_fwht4x4 "const int16_t *input, int16_t *output, int stride"
specialize vp9_fwht4x4 specialize vp9_fwht4x4
......
...@@ -18,8 +18,6 @@ ...@@ -18,8 +18,6 @@
#include "vp9/common/vp9_idct.h" #include "vp9/common/vp9_idct.h"
#include "vp9/common/vp9_systemdependent.h" #include "vp9/common/vp9_systemdependent.h"
#include "vp9/encoder/vp9_dct.h"
static INLINE int fdct_round_shift(int input) { static INLINE int fdct_round_shift(int input) {
int rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS); int rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS);
assert(INT16_MIN <= rv && rv <= INT16_MAX); assert(INT16_MIN <= rv && rv <= INT16_MAX);
...@@ -157,32 +155,36 @@ static const transform_2d FHT_4[] = { ...@@ -157,32 +155,36 @@ static const transform_2d FHT_4[] = {
{ fadst4, fadst4 } // ADST_ADST = 3 { fadst4, fadst4 } // ADST_ADST = 3
}; };
void vp9_short_fht4x4_c(const int16_t *input, int16_t *output, void vp9_fht4x4_c(const int16_t *input, int16_t *output,
int stride, int tx_type) { int stride, int tx_type) {
int16_t out[4 * 4]; if (tx_type == DCT_DCT) {
int16_t *outptr = &out[0]; vp9_fdct4x4_c(input, output, stride);
int i, j; } else {
int16_t temp_in[4], temp_out[4]; int16_t out[4 * 4];
const transform_2d ht = FHT_4[tx_type]; int16_t *outptr = &out[0];
int i, j;
int16_t temp_in[4], temp_out[4];
const transform_2d ht = FHT_4[tx_type];
// Columns // Columns
for (i = 0; i < 4; ++i) { for (i = 0; i < 4; ++i) {
for (j = 0; j < 4; ++j) for (j = 0; j < 4; ++j)
temp_in[j] = input[j * stride + i] * 16; temp_in[j] = input[j * stride + i] * 16;
if (i == 0 && temp_in[0]) if (i == 0 && temp_in[0])
temp_in[0] += 1; temp_in[0] += 1;
ht.cols(temp_in, temp_out); ht.cols(temp_in, temp_out);
for (j = 0; j < 4; ++j) for (j = 0; j < 4; ++j)
outptr[j * 4 + i] = temp_out[j]; outptr[j * 4 + i] = temp_out[j];
} }
// Rows // Rows
for (i = 0; i < 4; ++i) { for (i = 0; i < 4; ++i) {
for (j = 0; j < 4; ++j) for (j = 0; j < 4; ++j)
temp_in[j] = out[j + i * 4]; temp_in[j] = out[j + i * 4];
ht.rows(temp_in, temp_out); ht.rows(temp_in, temp_out);
for (j = 0; j < 4; ++j) for (j = 0; j < 4; ++j)
output[j + i * 4] = (temp_out[j] + 1) >> 2; output[j + i * 4] = (temp_out[j] + 1) >> 2;
}
} }
} }
...@@ -565,30 +567,34 @@ static const transform_2d FHT_8[] = { ...@@ -565,30 +567,34 @@ static const transform_2d FHT_8[] = {
{ fadst8, fadst8 } // ADST_ADST = 3 { fadst8, fadst8 } // ADST_ADST = 3
}; };
void vp9_short_fht8x8_c(const int16_t *input, int16_t *output, void vp9_fht8x8_c(const int16_t *input, int16_t *output,
int stride, int tx_type) { int stride, int tx_type) {
int16_t out[64]; if (tx_type == DCT_DCT) {
int16_t *outptr = &out[0]; vp9_fdct8x8_c(input, output, stride);
int i, j; } else {
int16_t temp_in[8], temp_out[8]; int16_t out[64];
const transform_2d ht = FHT_8[tx_type]; int16_t *outptr = &out[0];
int i, j;
// Columns int16_t temp_in[8], temp_out[8];
for (i = 0; i < 8; ++i) { const transform_2d ht = FHT_8[tx_type];
for (j = 0; j < 8; ++j)
temp_in[j] = input[j * stride + i] * 4; // Columns
ht.cols(temp_in, temp_out); for (i = 0; i < 8; ++i) {
for (j = 0; j < 8; ++j) for (j = 0; j < 8; ++j)
outptr[j * 8 + i] = temp_out[j]; temp_in[j] = input[j * stride + i] * 4;
} ht.cols(temp_in, temp_out);
for (j = 0; j < 8; ++j)
outptr[j * 8 + i] = temp_out[j];
}
// Rows // Rows
for (i = 0; i < 8; ++i) { for (i = 0; i < 8; ++i) {
for (j = 0; j < 8; ++j) for (j = 0; j < 8; ++j)
temp_in[j] = out[j + i * 8]; temp_in[j] = out[j + i * 8];
ht.rows(temp_in, temp_out); ht.rows(temp_in, temp_out);
for (j = 0; j < 8; ++j) for (j = 0; j < 8; ++j)
output[j + i * 8] = (temp_out[j] + (temp_out[j] < 0)) >> 1; output[j + i * 8] = (temp_out[j] + (temp_out[j] < 0)) >> 1;
}
} }
} }
...@@ -958,31 +964,34 @@ static const transform_2d FHT_16[] = { ...@@ -958,31 +964,34 @@ static const transform_2d FHT_16[] = {
{ fadst16, fadst16 } // ADST_ADST = 3 { fadst16, fadst16 } // ADST_ADST = 3
}; };
void vp9_short_fht16x16_c(const int16_t *input, int16_t *output, void vp9_fht16x16_c(const int16_t *input, int16_t *output,
int stride, int tx_type) { int stride, int tx_type) {
int16_t out[256]; if (tx_type == DCT_DCT) {
int16_t *outptr = &out[0]; vp9_fdct16x16_c(input, output, stride);
int i, j; } else {
int16_t temp_in[16], temp_out[16]; int16_t out[256];
const transform_2d ht = FHT_16[tx_type]; int16_t *outptr = &out[0];
int i, j;
// Columns int16_t temp_in[16], temp_out[16];
for (i = 0; i < 16; ++i) { const transform_2d ht = FHT_16[tx_type];
for (j = 0; j < 16; ++j)
temp_in[j] = input[j * stride + i] * 4; // Columns
ht.cols(temp_in, temp_out); for (i = 0; i < 16; ++i) {
for (j = 0; j < 16; ++j) for (j = 0; j < 16; ++j)
outptr[j * 16 + i] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2; temp_in[j] = input[j * stride + i] * 4;
// outptr[j * 16 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2; ht.cols(temp_in, temp_out);
} for (j = 0; j < 16; ++j)
outptr[j * 16 + i] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2;
}
// Rows // Rows
for (i = 0; i < 16; ++i) { for (i = 0; i < 16; ++i) {
for (j = 0; j < 16; ++j) for (j = 0; j < 16; ++j)
temp_in[j] = out[j + i * 16]; temp_in[j] = out[j + i * 16];
ht.rows(temp_in, temp_out); ht.rows(temp_in, temp_out);
for (j = 0; j < 16; ++j) for (j = 0; j < 16; ++j)
output[j + i * 16] = temp_out[j]; output[j + i * 16] = temp_out[j];
}
} }
} }
...@@ -1375,27 +1384,3 @@ void vp9_fdct32x32_rd_c(const int16_t *input, int16_t *out, int stride) { ...@@ -1375,27 +1384,3 @@ void vp9_fdct32x32_rd_c(const int16_t *input, int16_t *out, int stride) {
out[j + i * 32] = temp_out[j]; out[j + i * 32] = temp_out[j];
} }
} }
void vp9_fht4x4(TX_TYPE tx_type, const int16_t *input, int16_t *output,
int stride) {
if (tx_type == DCT_DCT)
vp9_fdct4x4(input, output, stride);
else
vp9_short_fht4x4(input, output, stride, tx_type);
}
void vp9_fht8x8(TX_TYPE tx_type, const int16_t *input, int16_t *output,
int stride) {
if (tx_type == DCT_DCT)
vp9_fdct8x8(input, output, stride);
else
vp9_short_fht8x8(input, output, stride, tx_type);
}
void vp9_fht16x16(TX_TYPE tx_type, const int16_t *input, int16_t *output,
int stride) {
if (tx_type == DCT_DCT)
vp9_fdct16x16(input, output, stride);
else
vp9_short_fht16x16(input, output, stride, tx_type);
}
/*
* Copyright (c) 2013 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP9_ENCODER_VP9_DCT_H_
#define VP9_ENCODER_VP9_DCT_H_
#ifdef __cplusplus
extern "C" {
#endif
void vp9_fht4x4(TX_TYPE tx_type, const int16_t *input, int16_t *output,
int stride);
void vp9_fht8x8(TX_TYPE tx_type, const int16_t *input, int16_t *output,
int stride);
void vp9_fht16x16(TX_TYPE tx_type, const int16_t *input, int16_t *output,
int stride);
#ifdef __cplusplus
} // extern "C"
#endif
#endif // VP9_ENCODER_VP9_DCT_H_
...@@ -19,7 +19,6 @@ ...@@ -19,7 +19,6 @@
#include "vp9/common/vp9_reconintra.h" #include "vp9/common/vp9_reconintra.h"
#include "vp9/common/vp9_systemdependent.h" #include "vp9/common/vp9_systemdependent.h"
#include "vp9/encoder/vp9_dct.h"
#include "vp9/encoder/vp9_encodemb.h" #include "vp9/encoder/vp9_encodemb.h"
#include "vp9/encoder/vp9_quantize.h" #include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_rdopt.h" #include "vp9/encoder/vp9_rdopt.h"
...@@ -571,7 +570,7 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, ...@@ -571,7 +570,7 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
if (!x->skip_recode) { if (!x->skip_recode) {
vp9_subtract_block(16, 16, src_diff, diff_stride, vp9_subtract_block(16, 16, src_diff, diff_stride,
src, p->src.stride, dst, pd->dst.stride); src, p->src.stride, dst, pd->dst.stride);
vp9_fht16x16(tx_type, src_diff, coeff, diff_stride); vp9_fht16x16(src_diff, coeff, diff_stride, tx_type);
vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
p->quant, p->quant_shift, qcoeff, dqcoeff, p->quant, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, p->zbin_extra, eob, scan_order->scan, pd->dequant, p->zbin_extra, eob, scan_order->scan,
...@@ -591,7 +590,7 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, ...@@ -591,7 +590,7 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
if (!x->skip_recode) { if (!x->skip_recode) {
vp9_subtract_block(8, 8, src_diff, diff_stride, vp9_subtract_block(8, 8, src_diff, diff_stride,
src, p->src.stride, dst, pd->dst.stride); src, p->src.stride, dst, pd->dst.stride);
vp9_fht8x8(tx_type, src_diff, coeff, diff_stride); vp9_fht8x8(src_diff, coeff, diff_stride, tx_type);
vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant, vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant,
p->quant_shift, qcoeff, dqcoeff, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, p->zbin_extra, eob, scan_order->scan, pd->dequant, p->zbin_extra, eob, scan_order->scan,
...@@ -617,7 +616,7 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, ...@@ -617,7 +616,7 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
vp9_subtract_block(4, 4, src_diff, diff_stride, vp9_subtract_block(4, 4, src_diff, diff_stride,
src, p->src.stride, dst, pd->dst.stride); src, p->src.stride, dst, pd->dst.stride);
if (tx_type != DCT_DCT) if (tx_type != DCT_DCT)
vp9_short_fht4x4(src_diff, coeff, diff_stride, tx_type); vp9_fht4x4(src_diff, coeff, diff_stride, tx_type);
else else
x->fwd_txm4x4(src_diff, coeff, diff_stride); x->fwd_txm4x4(src_diff, coeff, diff_stride);
vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant, vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant,
......
...@@ -1064,7 +1064,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, ...@@ -1064,7 +1064,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
so = &vp9_scan_orders[TX_4X4][tx_type]; so = &vp9_scan_orders[TX_4X4][tx_type];
if (tx_type != DCT_DCT) if (tx_type != DCT_DCT)
vp9_short_fht4x4(src_diff, coeff, 8, tx_type); vp9_fht4x4(src_diff, coeff, 8, tx_type);
else else
x->fwd_txm4x4(src_diff, coeff, 8); x->fwd_txm4x4(src_diff, coeff, 8);
......
...@@ -244,32 +244,36 @@ void fadst4_avx2(__m128i *in) { ...@@ -244,32 +244,36 @@ void fadst4_avx2(__m128i *in) {
transpose_4x4_avx2(in); transpose_4x4_avx2(in);
} }
void vp9_short_fht4x4_avx2(const int16_t *input, int16_t *output, void vp9_fht4x4_avx2(const int16_t *input, int16_t *output,
int stride, int tx_type) { int stride, int tx_type) {
__m128i in[4]; __m128i in[4];
load_buffer_4x4_avx2(input, in, stride);
switch (tx_type) { switch (tx_type) {
case 0: // DCT_DCT case DCT_DCT:
fdct4_avx2(in); vp9_fdct4x4_avx2(input, output, stride);
fdct4_avx2(in);
break; break;
case 1: // ADST_DCT case ADST_DCT:
load_buffer_4x4_avx2(input, in, stride);
fadst4_avx2(in); fadst4_avx2(in);
fdct4_avx2(in); fdct4_avx2(in);
write_buffer_4x4_avx2(output, in);
break; break;
case 2: // DCT_ADST case DCT_ADST:
load_buffer_4x4_avx2(input, in, stride);
fdct4_avx2(in); fdct4_avx2(in);
fadst4_avx2(in); fadst4_avx2(in);
write_buffer_4x4_avx2(output, in);
break; break;
case 3: // ADST_ADST case ADST_ADST:
load_buffer_4x4_avx2(input, in, stride);
fadst4_avx2(in); fadst4_avx2(in);
fadst4_avx2(in); fadst4_avx2(in);
write_buffer_4x4_avx2(output, in);
break; break;
default: default:
assert(0); assert(0);
break; break;
} }
write_buffer_4x4_avx2(output, in);
} }
void vp9_fdct8x8_avx2(const int16_t *input, int16_t *output, int stride) { void vp9_fdct8x8_avx2(const int16_t *input, int16_t *output, int stride) {
...@@ -1028,33 +1032,39 @@ void fadst8_avx2(__m128i *in) { ...@@ -1028,33 +1032,39 @@ void fadst8_avx2(__m128i *in) {
array_transpose_8x8_avx2(in, in); array_transpose_8x8_avx2(in, in);
} }
void vp9_short_fht8x8_avx2(const int16_t *input, int16_t *output, void vp9_fht8x8_avx2(const int16_t *input, int16_t *output,
int stride, int tx_type) { int stride, int tx_type) {
__m128i in[8]; __m128i in[8];
load_buffer_8x8_avx2(input, in, stride);
switch (tx_type) { switch (tx_type) {
case 0: // DCT_DCT case DCT_DCT:
fdct8_avx2(in); vp9_fdct8x8_avx2(input, output, stride);
fdct8_avx2(in);
break; break;
case 1: // ADST_DCT case ADST_DCT:
load_buffer_8x8_avx2(input, in, stride);
fadst8_avx2(in); fadst8_avx2(in);
fdct8_avx2(in); fdct8_avx2(in);
right_shift_8x8_avx2(in, 1);
write_buffer_8x8_avx2(output, in, 8);
break; break;