Commit 2c08c281 authored by John Koleszar's avatar John Koleszar

Merge with upstream experimental changes

Include upstream changes (unit test fixes, in particular) into the
merged code base.

Change-Id: I096f8a9d09e2532fbec0c95d7a995ab22fa54b29
parents 7b8dfcb5 0cedaa36
......@@ -242,7 +242,6 @@ EXPERIMENT_LIST="
superblocks
pred_filter
lossless
newbestrefmv
subpelrefmv
new_mvref
implicit_segmentation
......
......@@ -17,7 +17,7 @@
extern "C" {
#include "vp9/common/entropy.h"
#include "vp9/common/idct.h"
#include "vp9/encoder/dct.h"
#include "vpx_rtcd.h"
}
#include "acm_random.h"
......@@ -256,7 +256,7 @@ void reference_16x16_dct_2d(int16_t input[16*16], double output[16*16]) {
}
TEST(VP8Idct16x16Test, AccuracyCheck) {
TEST(VP9Idct16x16Test, AccuracyCheck) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
const int count_test_block = 1000;
for (int i = 0; i < count_test_block; ++i) {
......@@ -271,7 +271,7 @@ TEST(VP8Idct16x16Test, AccuracyCheck) {
reference_16x16_dct_2d(in, out_r);
for (int j = 0; j < 256; j++)
coeff[j] = round(out_r[j]);
vp8_short_idct16x16_c(coeff, out_c, 32);
vp9_short_idct16x16_c(coeff, out_c, 32);
for (int j = 0; j < 256; ++j) {
const int diff = out_c[j] - in[j];
const int error = diff * diff;
......@@ -280,7 +280,7 @@ TEST(VP8Idct16x16Test, AccuracyCheck) {
<< " at index " << j;
}
vp8_short_fdct16x16_c(in, out_c, 32);
vp9_short_fdct16x16_c(in, out_c, 32);
for (int j = 0; j < 256; ++j) {
const double diff = coeff[j] - out_c[j];
const double error = diff * diff;
......@@ -291,7 +291,7 @@ TEST(VP8Idct16x16Test, AccuracyCheck) {
}
}
TEST(VP8Fdct16x16Test, AccuracyCheck) {
TEST(VP9Fdct16x16Test, AccuracyCheck) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
int max_error = 0;
double total_error = 0;
......@@ -306,8 +306,8 @@ TEST(VP8Fdct16x16Test, AccuracyCheck) {
test_input_block[j] = rnd.Rand8() - rnd.Rand8();
const int pitch = 32;
vp8_short_fdct16x16_c(test_input_block, test_temp_block, pitch);
vp8_short_idct16x16_c(test_temp_block, test_output_block, pitch);
vp9_short_fdct16x16_c(test_input_block, test_temp_block, pitch);
vp9_short_idct16x16_c(test_temp_block, test_output_block, pitch);
for (int j = 0; j < 256; ++j) {
const int diff = test_input_block[j] - test_output_block[j];
......@@ -325,7 +325,7 @@ TEST(VP8Fdct16x16Test, AccuracyCheck) {
<< "Error: 16x16 FDCT/IDCT has average roundtrip error > 1/10 per block";
}
TEST(VP8Fdct16x16Test, CoeffSizeCheck) {
TEST(VP9Fdct16x16Test, CoeffSizeCheck) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
const int count_test_block = 1000;
for (int i = 0; i < count_test_block; ++i) {
......@@ -342,8 +342,8 @@ TEST(VP8Fdct16x16Test, CoeffSizeCheck) {
input_extreme_block[j] = 255;
const int pitch = 32;
vp8_short_fdct16x16_c(input_block, output_block, pitch);
vp8_short_fdct16x16_c(input_extreme_block, output_extreme_block, pitch);
vp9_short_fdct16x16_c(input_block, output_block, pitch);
vp9_short_fdct16x16_c(input_extreme_block, output_extreme_block, pitch);
// The minimum quant value is 4.
for (int j = 0; j < 256; ++j) {
......
......@@ -16,7 +16,7 @@
extern "C" {
#include "vp9/common/idct.h"
#include "vp9/encoder/dct.h"
#include "vpx_rtcd.h"
}
#include "acm_random.h"
......@@ -26,7 +26,7 @@ using libvpx_test::ACMRandom;
namespace {
TEST(Vp8FdctTest, SignBiasCheck) {
TEST(Vp9FdctTest, SignBiasCheck) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
int16_t test_input_block[16];
int16_t test_output_block[16];
......@@ -43,7 +43,7 @@ TEST(Vp8FdctTest, SignBiasCheck) {
// TODO(Yaowu): this should be converted to a parameterized test
// to test optimized versions of this function.
vp8_short_fdct4x4_c(test_input_block, test_output_block, pitch);
vp9_short_fdct4x4_c(test_input_block, test_output_block, pitch);
for (int j = 0; j < 16; ++j) {
if (test_output_block[j] < 0)
......@@ -70,7 +70,7 @@ TEST(Vp8FdctTest, SignBiasCheck) {
// TODO(Yaowu): this should be converted to a parameterized test
// to test optimized versions of this function.
vp8_short_fdct4x4_c(test_input_block, test_output_block, pitch);
vp9_short_fdct4x4_c(test_input_block, test_output_block, pitch);
for (int j = 0; j < 16; ++j) {
if (test_output_block[j] < 0)
......@@ -89,7 +89,7 @@ TEST(Vp8FdctTest, SignBiasCheck) {
}
};
TEST(Vp8FdctTest, RoundTripErrorCheck) {
TEST(Vp9FdctTest, RoundTripErrorCheck) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
int max_error = 0;
double total_error = 0;
......@@ -106,7 +106,7 @@ TEST(Vp8FdctTest, RoundTripErrorCheck) {
// TODO(Yaowu): this should be converted to a parameterized test
// to test optimized versions of this function.
const int pitch = 8;
vp8_short_fdct4x4_c(test_input_block, test_temp_block, pitch);
vp9_short_fdct4x4_c(test_input_block, test_temp_block, pitch);
for (int j = 0; j < 16; ++j) {
if(test_temp_block[j] > 0) {
......@@ -121,7 +121,7 @@ TEST(Vp8FdctTest, RoundTripErrorCheck) {
}
// Because the bitstream is not frozen yet, use the idct in the codebase.
vp8_short_idct4x4llm_c(test_temp_block, test_output_block, pitch);
vp9_short_idct4x4llm_c(test_temp_block, test_output_block, pitch);
for (int j = 0; j < 16; ++j) {
const int diff = test_input_block[j] - test_output_block[j];
......
......@@ -15,8 +15,8 @@
#include "third_party/googletest/src/include/gtest/gtest.h"
extern "C" {
#include "vp9/encoder/dct.h"
#include "vp9/common/idct.h"
#include "vpx_rtcd.h"
}
#include "acm_random.h"
......@@ -26,7 +26,7 @@ using libvpx_test::ACMRandom;
namespace {
TEST(VP8Fdct8x8Test, SignBiasCheck) {
TEST(VP9Fdct8x8Test, SignBiasCheck) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
int16_t test_input_block[64];
int16_t test_output_block[64];
......@@ -41,7 +41,7 @@ TEST(VP8Fdct8x8Test, SignBiasCheck) {
for (int j = 0; j < 64; ++j)
test_input_block[j] = rnd.Rand8() - rnd.Rand8();
vp8_short_fdct8x8_c(test_input_block, test_output_block, pitch);
vp9_short_fdct8x8_c(test_input_block, test_output_block, pitch);
for (int j = 0; j < 64; ++j) {
if (test_output_block[j] < 0)
......@@ -66,7 +66,7 @@ TEST(VP8Fdct8x8Test, SignBiasCheck) {
for (int j = 0; j < 64; ++j)
test_input_block[j] = (rnd.Rand8() >> 4) - (rnd.Rand8() >> 4);
vp8_short_fdct8x8_c(test_input_block, test_output_block, pitch);
vp9_short_fdct8x8_c(test_input_block, test_output_block, pitch);
for (int j = 0; j < 64; ++j) {
if (test_output_block[j] < 0)
......@@ -85,7 +85,7 @@ TEST(VP8Fdct8x8Test, SignBiasCheck) {
}
};
TEST(VP8Fdct8x8Test, RoundTripErrorCheck) {
TEST(VP9Fdct8x8Test, RoundTripErrorCheck) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
int max_error = 0;
double total_error = 0;
......@@ -100,7 +100,7 @@ TEST(VP8Fdct8x8Test, RoundTripErrorCheck) {
test_input_block[j] = rnd.Rand8() - rnd.Rand8();
const int pitch = 16;
vp8_short_fdct8x8_c(test_input_block, test_temp_block, pitch);
vp9_short_fdct8x8_c(test_input_block, test_temp_block, pitch);
for (int j = 0; j < 64; ++j){
if(test_temp_block[j] > 0) {
test_temp_block[j] += 2;
......@@ -112,7 +112,7 @@ TEST(VP8Fdct8x8Test, RoundTripErrorCheck) {
test_temp_block[j] *= 4;
}
}
vp8_short_idct8x8_c(test_temp_block, test_output_block, pitch);
vp9_short_idct8x8_c(test_temp_block, test_output_block, pitch);
for (int j = 0; j < 64; ++j) {
const int diff = test_input_block[j] - test_output_block[j];
......@@ -130,7 +130,7 @@ TEST(VP8Fdct8x8Test, RoundTripErrorCheck) {
<< "Error: 8x8 FDCT/IDCT has average roundtrip error > 1/5 per block";
};
TEST(VP8Fdct8x8Test, ExtremalCheck) {
TEST(VP9Fdct8x8Test, ExtremalCheck) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
int max_error = 0;
double total_error = 0;
......@@ -145,8 +145,8 @@ TEST(VP8Fdct8x8Test, ExtremalCheck) {
test_input_block[j] = rnd.Rand8() % 2 ? 255 : -255;
const int pitch = 16;
vp8_short_fdct8x8_c(test_input_block, test_temp_block, pitch);
vp8_short_idct8x8_c(test_temp_block, test_output_block, pitch);
vp9_short_fdct8x8_c(test_input_block, test_temp_block, pitch);
vp9_short_idct8x8_c(test_temp_block, test_output_block, pitch);
for (int j = 0; j < 64; ++j) {
const int diff = test_input_block[j] - test_output_block[j];
......
......@@ -15,8 +15,8 @@
#include "third_party/googletest/src/include/gtest/gtest.h"
extern "C" {
#include "vp9/encoder/dct.h"
#include "vp9/common/idct.h"
#include "vpx_rtcd.h"
}
#include "acm_random.h"
......@@ -99,7 +99,7 @@ void reference_idct_2d(double input[64], int16_t output[64]) {
output[i] = round(out2[i]/32);
}
TEST(VP8Idct8x8Test, AccuracyCheck) {
TEST(VP9Idct8x8Test, AccuracyCheck) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
const int count_test_block = 10000;
for (int i = 0; i < count_test_block; ++i) {
......@@ -112,7 +112,7 @@ TEST(VP8Idct8x8Test, AccuracyCheck) {
input[j] = rnd.Rand8() - rnd.Rand8();
const int pitch = 16;
vp8_short_fdct8x8_c(input, output_c, pitch);
vp9_short_fdct8x8_c(input, output_c, pitch);
reference_dct_2d(input, output_r);
for (int j = 0; j < 64; ++j) {
......@@ -140,7 +140,7 @@ TEST(VP8Idct8x8Test, AccuracyCheck) {
reference_dct_2d(input, output_r);
for (int j = 0; j < 64; ++j)
coeff[j] = round(output_r[j]);
vp8_short_idct8x8_c(coeff, output_c, pitch);
vp9_short_idct8x8_c(coeff, output_c, pitch);
for (int j = 0; j < 64; ++j) {
const int diff = output_c[j] -input[j];
const int error = diff * diff;
......
......@@ -33,9 +33,12 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP8_DECODER) += test_vector_test.cc
##
ifeq ($(CONFIG_SHARED),)
## VP8
ifneq ($(CONFIG_VP8_ENCODER)$(CONFIG_VP8_DECODER),)
# These tests require both the encoder and decoder to be built.
ifeq ($(CONFIG_VP8_ENCODER)$(CONFIG_VP8_DECODER),yesyes)
LIBVPX_TEST_SRCS-yes += boolcoder_test.cc
LIBVPX_TEST_SRCS-yes += vp8_boolcoder_test.cc
endif
LIBVPX_TEST_SRCS-yes += idctllm_test.cc
......@@ -47,13 +50,22 @@ LIBVPX_TEST_SRCS-yes += sixtap_predict_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += subtract_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += vp8_fdct4x4_test.cc
# VP9 tests
endif # VP8
## VP9
ifneq ($(CONFIG_VP9_ENCODER)$(CONFIG_VP9_DECODER),)
# These tests require both the encoder and decoder to be built.
ifeq ($(CONFIG_VP9_ENCODER)$(CONFIG_VP9_DECODER),yesyes)
LIBVPX_TEST_SRCS-yes += vp9_boolcoder_test.cc
endif
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct4x4_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct8x8_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += dct16x16_test.cc
ifneq ($(CONFIG_VP9_ENCODER)$(CONFIG_VP9_DECODER),)
#LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += dct16x16_test.cc
LIBVPX_TEST_SRCS-yes += idct8x8_test.cc
endif
endif # VP9
endif
......
/*
* Copyright (c) 2012 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <math.h>
#include <stdlib.h>
#include <string.h>
#include "third_party/googletest/src/include/gtest/gtest.h"
extern "C" {
#include "vp9/encoder/boolhuff.h"
#include "vp9/decoder/dboolhuff.h"
}
#include "acm_random.h"
#include "vpx/vpx_integer.h"
using libvpx_test::ACMRandom;
namespace {
const int num_tests = 10;
} // namespace
TEST(VP9, TestBitIO) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
for (int n = 0; n < num_tests; ++n) {
for (int method = 0; method <= 7; ++method) { // we generate various proba
const int bits_to_test = 1000;
uint8_t probas[bits_to_test];
for (int i = 0; i < bits_to_test; ++i) {
const int parity = i & 1;
probas[i] =
(method == 0) ? 0 : (method == 1) ? 255 :
(method == 2) ? 128 :
(method == 3) ? rnd.Rand8() :
(method == 4) ? (parity ? 0 : 255) :
// alternate between low and high proba:
(method == 5) ? (parity ? rnd(128) : 255 - rnd(128)) :
(method == 6) ?
(parity ? rnd(64) : 255 - rnd(64)) :
(parity ? rnd(32) : 255 - rnd(32));
}
for (int bit_method = 0; bit_method <= 3; ++bit_method) {
const int random_seed = 6432;
const int buffer_size = 10000;
ACMRandom bit_rnd(random_seed);
BOOL_CODER bw;
uint8_t bw_buffer[buffer_size];
vp9_start_encode(&bw, bw_buffer);
int bit = (bit_method == 0) ? 0 : (bit_method == 1) ? 1 : 0;
for (int i = 0; i < bits_to_test; ++i) {
if (bit_method == 2) {
bit = (i & 1);
} else if (bit_method == 3) {
bit = bit_rnd(2);
}
encode_bool(&bw, bit, static_cast<int>(probas[i]));
}
vp9_stop_encode(&bw);
BOOL_DECODER br;
vp9_start_decode(&br, bw_buffer, buffer_size);
bit_rnd.Reset(random_seed);
for (int i = 0; i < bits_to_test; ++i) {
if (bit_method == 2) {
bit = (i & 1);
} else if (bit_method == 3) {
bit = bit_rnd(2);
}
GTEST_ASSERT_EQ(decode_bool(&br, probas[i]), bit)
<< "pos: " << i << " / " << bits_to_test
<< " bit_method: " << bit_method
<< " method: " << method;
}
}
}
}
}
......@@ -44,9 +44,7 @@ void vpx_log(const char *format, ...);
/* Segment Feature Masks */
#define SEGMENT_DELTADATA 0
#define SEGMENT_ABSDATA 1
#if CONFIG_NEWBESTREFMV || CONFIG_NEW_MVREF
#define MAX_MV_REFS 19
#endif
typedef struct {
int r, c;
......@@ -216,9 +214,7 @@ typedef struct {
MV_REFERENCE_FRAME ref_frame, second_ref_frame;
TX_SIZE txfm_size;
int_mv mv[2]; // for each reference frame used
#if CONFIG_NEWBESTREFMV || CONFIG_NEW_MVREF
int_mv ref_mvs[MAX_REF_FRAMES][MAX_MV_REFS];
#endif
SPLITMV_PARTITIONING_TYPE partitioning;
unsigned char mb_skip_coeff; /* does this mb has coefficients at all, 1=no coefficients, 0=need decode tokens */
......@@ -280,7 +276,7 @@ typedef struct macroblockd {
DECLARE_ALIGNED(16, unsigned char, predictor[384]);
DECLARE_ALIGNED(16, short, qcoeff[400]);
DECLARE_ALIGNED(16, short, dqcoeff[400]);
DECLARE_ALIGNED(16, char, eobs[25]);
DECLARE_ALIGNED(16, unsigned short, eobs[25]);
/* 16 Y blocks, 4 U, 4 V, 1 DC 2nd order block, each with 16 entries. */
BLOCKD block[25];
......@@ -467,7 +463,10 @@ static TX_TYPE get_tx_type_8x8(const MACROBLOCKD *xd, const BLOCKD *b) {
TX_TYPE tx_type = DCT_DCT;
if (xd->mode_info_context->mbmi.mode == I8X8_PRED &&
xd->q_index < ACTIVE_HT8) {
tx_type = txfm_map(pred_mode_conv(b->bmi.as_mode.first));
// TODO(rbultje): MB_PREDICTION_MODE / B_PREDICTION_MODE should be merged
// or the relationship otherwise modified to address this type conversion.
tx_type = txfm_map(pred_mode_conv(
(MB_PREDICTION_MODE)b->bmi.as_mode.first));
}
return tx_type;
}
......@@ -483,7 +482,7 @@ static TX_TYPE get_tx_type_16x16(const MACROBLOCKD *xd, const BLOCKD *b) {
static TX_TYPE get_tx_type(const MACROBLOCKD *xd, const BLOCKD *b) {
TX_TYPE tx_type = DCT_DCT;
int ib = (b - xd->block);
int ib = (int)(b - xd->block);
if (ib >= 16)
return tx_type;
if (xd->mode_info_context->mbmi.txfm_size == TX_16X16) {
......
......@@ -11,6 +11,7 @@
#include "findnearmv.h"
#include "vp9/common/sadmxn.h"
#include "vp9/common/subpelvar.h"
#include <limits.h>
const unsigned char vp9_mbsplit_offset[4][16] = {
......@@ -167,7 +168,6 @@ vp9_prob *vp9_mv_ref_probs(VP9_COMMON *pc,
return p;
}
#if CONFIG_NEWBESTREFMV
#define SP(x) (((x) & 7) << 1)
unsigned int vp9_sad3x16_c(
const unsigned char *src_ptr,
......@@ -186,6 +186,76 @@ unsigned int vp9_sad16x3_c(
return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 16, 3);
}
#if CONFIG_SUBPELREFMV
unsigned int vp9_variance2x16_c(const unsigned char *src_ptr,
const int source_stride,
const unsigned char *ref_ptr,
const int recon_stride,
unsigned int *sse) {
unsigned int var;
int avg;
variance(src_ptr, source_stride, ref_ptr, recon_stride, 2, 16, &var, &avg);
*sse = var;
return (var - ((avg * avg) >> 5));
}
unsigned int vp9_variance16x2_c(const unsigned char *src_ptr,
const int source_stride,
const unsigned char *ref_ptr,
const int recon_stride,
unsigned int *sse) {
unsigned int var;
int avg;
variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 2, &var, &avg);
*sse = var;
return (var - ((avg * avg) >> 5));
}
unsigned int vp9_sub_pixel_variance16x2_c(const unsigned char *src_ptr,
const int src_pixels_per_line,
const int xoffset,
const int yoffset,
const unsigned char *dst_ptr,
const int dst_pixels_per_line,
unsigned int *sse) {
unsigned short FData3[16 * 3]; // Temp data bufffer used in filtering
unsigned char temp2[20 * 16];
const short *HFilter, *VFilter;
HFilter = vp9_bilinear_filters[xoffset];
VFilter = vp9_bilinear_filters[yoffset];
var_filter_block2d_bil_first_pass(src_ptr, FData3,
src_pixels_per_line, 1, 3, 16, HFilter);
var_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 2, 16, VFilter);
return vp9_variance16x2_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
}
unsigned int vp9_sub_pixel_variance2x16_c(const unsigned char *src_ptr,
const int src_pixels_per_line,
const int xoffset,
const int yoffset,
const unsigned char *dst_ptr,
const int dst_pixels_per_line,
unsigned int *sse) {
unsigned short FData3[2 * 17]; // Temp data bufffer used in filtering
unsigned char temp2[2 * 16];
const short *HFilter, *VFilter;
HFilter = vp9_bilinear_filters[xoffset];
VFilter = vp9_bilinear_filters[yoffset];
var_filter_block2d_bil_first_pass(src_ptr, FData3,
src_pixels_per_line, 1, 17, 2, HFilter);
var_filter_block2d_bil_second_pass(FData3, temp2, 2, 2, 16, 2, VFilter);
return vp9_variance2x16_c(temp2, 2, dst_ptr, dst_pixels_per_line, sse);
}
#endif
/* check a list of motion vectors by sad score using a number rows of pixels
* above and a number cols of pixels in the left to select the one with best
* score to use as ref motion vector
......@@ -323,5 +393,3 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd,
// Copy back the re-ordered mv list
vpx_memcpy(mvlist, sorted_mvs, sizeof(sorted_mvs));
}
#endif // CONFIG_NEWBESTREFMV
......@@ -18,7 +18,6 @@
#include "treecoder.h"
#include "onyxc_int.h"
#if CONFIG_NEWBESTREFMV
/* check a list of motion vectors by sad score using a number rows of pixels
* above and a number cols of pixels in the left to select the one with best
* score to use as ref motion vector
......@@ -30,7 +29,6 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd,
int_mv *best_mv,
int_mv *nearest,
int_mv *near);
#endif
static void mv_bias(int refmb_ref_frame_sign_bias, int refframe, int_mv *mvp, const int *ref_frame_sign_bias) {
MV xmv;
......
......@@ -1013,6 +1013,8 @@ void vp9_short_idct16x16_c(short *input, short *output, int pitch) {
}
#endif
#define TEST_INT_16x16_IDCT 1
#if !TEST_INT_16x16_IDCT
static const double C1 = 0.995184726672197;
static const double C2 = 0.98078528040323;
static const double C3 = 0.956940335732209;
......@@ -1273,3 +1275,235 @@ void vp9_short_idct16x16_c(short *input, short *output, int pitch) {
}
vp9_clear_system_state(); // Make it simd safe : __asm emms;
}
#else
static const int16_t C1 = 16305;
static const int16_t C2 = 16069;
static const int16_t C3 = 15679;
static const int16_t C4 = 15137;
static const int16_t C5 = 14449;
static const int16_t C6 = 13623;
static const int16_t C7 = 12665;
static const int16_t C8 = 11585;
static const int16_t C9 = 10394;
static const int16_t C10 = 9102;
static const int16_t C11 = 7723;
static const int16_t C12 = 6270;
static const int16_t C13 = 4756;
static const int16_t C14 = 3196;
static const int16_t C15 = 1606;
#define INITIAL_SHIFT 2
#define INITIAL_ROUNDING (1 << (INITIAL_SHIFT - 1))
#define RIGHT_SHIFT 14
#define RIGHT_ROUNDING (1 << (RIGHT_SHIFT - 1))
static void butterfly_16x16_idct_1d(int16_t input[16], int16_t output[16],
int last_shift_bits) {
int16_t step[16];
int intermediate[16];
int temp1, temp2;
int step1_shift = RIGHT_SHIFT + INITIAL_SHIFT;
int step1_rounding = 1 << (step1_shift - 1);
int last_rounding = 0;