Commit 8cacca73 authored by Yi Luo's avatar Yi Luo
Browse files

HBD convolution filtering (10/12 taps) SSE4.1 optimization

- For experiment EXT_INTERP under high bit depth.
- Add unit test to verify bit-exact.
- Speed performance improvement:
  On Xeon E5-2680, park_joy_1080p_12.y4m, 50 frames, encoding time
  drops from 6682503 ms to 5390270 ms.

Change-Id: Iea4debf5414f3accf1eb5672abeab56a0539ac77
parent 1178f71d
......@@ -24,12 +24,25 @@ using libvpx_test::ACMRandom;
typedef void (*conv_filter_t)(const uint8_t*, int, uint8_t*, int,
int, int, const InterpFilterParams,
const int, int, int);
#if CONFIG_VP9_HIGHBITDEPTH
typedef void (*hbd_conv_filter_t)(const uint16_t*, int, uint16_t*, int,
int, int, const InterpFilterParams,
const int, int, int, int);
#endif
// Test parameter list:
// <convolve_horiz_func, convolve_vert_func,
// <width, height>, filter_params, subpel_x_q4, avg>
typedef tuple<int, int> BlockDimension;
typedef tuple<conv_filter_t, conv_filter_t, BlockDimension, INTERP_FILTER,
int, int> ConvParams;
#if CONFIG_VP9_HIGHBITDEPTH
// Test parameter list:
// <convolve_horiz_func, convolve_vert_func,
// <width, height>, filter_params, subpel_x_q4, avg, bit_dpeth>
typedef tuple<hbd_conv_filter_t, hbd_conv_filter_t, BlockDimension,
INTERP_FILTER, int, int, int> HbdConvParams;
#endif
// Note:
// src_ and src_ref_ have special boundary requirement
......@@ -75,11 +88,8 @@ class VP10ConvolveOptimzTest : public ::testing::TestWithParam<ConvParams> {
void RunVertFilterBitExactCheck();
private:
void PrepFilterBuffer(uint8_t *src, uint8_t *src_ref,
uint8_t *dst, uint8_t *dst_ref,
int w, int h);
void DiffFilterBuffer(const uint8_t *buf, const uint8_t *buf_ref,
int w, int h, int fgroup, int findex);
void PrepFilterBuffer(int w, int h);
void DiffFilterBuffer();
conv_filter_t conv_horiz_;
conv_filter_t conv_vert_;
uint8_t *alloc_;
......@@ -94,18 +104,16 @@ class VP10ConvolveOptimzTest : public ::testing::TestWithParam<ConvParams> {
int avg_;
};
void VP10ConvolveOptimzTest::PrepFilterBuffer(uint8_t *src, uint8_t *src_ref,
uint8_t *dst, uint8_t *dst_ref,
int w, int h) {
void VP10ConvolveOptimzTest::PrepFilterBuffer(int w, int h) {
int r, c;
ACMRandom rnd(ACMRandom::DeterministicSeed());
memset(alloc_, 0, 4 * maxBlockSize * sizeof(alloc_[0]));
uint8_t *src_ptr = src;
uint8_t *dst_ptr = dst;
uint8_t *src_ref_ptr = src_ref;
uint8_t *dst_ref_ptr = dst_ref;
uint8_t *src_ptr = src_;
uint8_t *dst_ptr = dst_;
uint8_t *src_ref_ptr = src_ref_;
uint8_t *dst_ref_ptr = dst_ref_;
for (r = 0; r < height_; ++r) {
for (c = 0; c < width_; ++c) {
......@@ -121,21 +129,17 @@ void VP10ConvolveOptimzTest::PrepFilterBuffer(uint8_t *src, uint8_t *src_ref,
}
}
void VP10ConvolveOptimzTest::DiffFilterBuffer(const uint8_t *buf,
const uint8_t *buf_ref,
int w, int h,
int filter_group,
int filter_index) {
void VP10ConvolveOptimzTest::DiffFilterBuffer() {
int r, c;
const uint8_t *dst_ptr = buf;
const uint8_t *dst_ref_ptr = buf_ref;
for (r = 0; r < h; ++r) {
for (c = 0; c < w; ++c) {
const uint8_t *dst_ptr = dst_;
const uint8_t *dst_ref_ptr = dst_ref_;
for (r = 0; r < height_; ++r) {
for (c = 0; c < width_; ++c) {
EXPECT_EQ((uint8_t)dst_ref_ptr[c], (uint8_t)dst_ptr[c])
<< "Error at row: " << r << " col: " << c << " "
<< "w = " << w << " " << "h = " << h << " "
<< "filter group index = " << filter_group << " "
<< "filter index = " << filter_index;
<< "w = " << width_ << " " << "h = " << height_ << " "
<< "filter group index = " << filter_ << " "
<< "filter index = " << subpel_;
}
dst_ptr += stride;
dst_ref_ptr += stride;
......@@ -143,7 +147,7 @@ void VP10ConvolveOptimzTest::DiffFilterBuffer(const uint8_t *buf,
}
void VP10ConvolveOptimzTest::RunHorizFilterBitExactCheck() {
PrepFilterBuffer(src_, src_ref_, dst_, dst_ref_, testMaxBlk, testMaxBlk);
PrepFilterBuffer(testMaxBlk, testMaxBlk);
InterpFilterParams filter_params = vp10_get_interp_filter_params(filter_);
......@@ -153,14 +157,14 @@ void VP10ConvolveOptimzTest::RunHorizFilterBitExactCheck() {
conv_horiz_(src_, stride, dst_, stride, width_, height_,
filter_params, subpel_, x_step_q4, avg_);
DiffFilterBuffer(dst_, dst_ref_, width_, height_, filter_, subpel_);
DiffFilterBuffer();
// Note:
// Here we need calculate a height which is different from the specified one
// and test again.
int intermediate_height =
(((height_ - 1) * 16 + subpel_) >> SUBPEL_BITS) + filter_params.taps;
PrepFilterBuffer(src_, src_ref_, dst_, dst_ref_, testMaxBlk, testMaxBlk);
PrepFilterBuffer(testMaxBlk, testMaxBlk);
vp10_convolve_horiz_c(src_ref_, stride, dst_ref_, stride, width_,
intermediate_height, filter_params, subpel_, x_step_q4,
......@@ -170,12 +174,11 @@ void VP10ConvolveOptimzTest::RunHorizFilterBitExactCheck() {
intermediate_height, filter_params, subpel_, x_step_q4,
avg_);
DiffFilterBuffer(dst_, dst_ref_, width_, intermediate_height, filter_,
subpel_);
DiffFilterBuffer();
}
void VP10ConvolveOptimzTest::RunVertFilterBitExactCheck() {
PrepFilterBuffer(src_, src_ref_, dst_, dst_ref_, testMaxBlk, testMaxBlk);
PrepFilterBuffer(testMaxBlk, testMaxBlk);
InterpFilterParams filter_params = vp10_get_interp_filter_params(filter_);
......@@ -185,7 +188,7 @@ void VP10ConvolveOptimzTest::RunVertFilterBitExactCheck() {
conv_vert_(src_, stride, dst_, stride, width_, height_,
filter_params, subpel_, x_step_q4, avg_);
DiffFilterBuffer(dst_, dst_ref_, width_, height_, filter_, subpel_);
DiffFilterBuffer();
}
TEST_P(VP10ConvolveOptimzTest, HorizBitExactCheck) {
......@@ -197,7 +200,7 @@ TEST_P(VP10ConvolveOptimzTest, VerticalBitExactCheck) {
using std::tr1::make_tuple;
#if HAVE_SSSE3 && CONFIG_EXT_INTERP
#if (HAVE_SSSE3 || HAVE_SSE4_1) && CONFIG_EXT_INTERP
const BlockDimension kBlockDim[] = {
make_tuple(2, 2),
make_tuple(2, 4),
......@@ -225,7 +228,9 @@ const INTERP_FILTER kFilter[] = {6, 4, 2};
const int kSubpelQ4[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
const int kAvg[] = {0, 1};
#endif
#if HAVE_SSSE3 && CONFIG_EXT_INTERP
INSTANTIATE_TEST_CASE_P(
SSSE3, VP10ConvolveOptimzTest,
::testing::Combine(
......@@ -236,4 +241,167 @@ INSTANTIATE_TEST_CASE_P(
::testing::ValuesIn(kSubpelQ4),
::testing::ValuesIn(kAvg)));
#endif // HAVE_SSSE3 && CONFIG_EXT_INTERP
#if CONFIG_VP9_HIGHBITDEPTH
typedef ::testing::TestWithParam<HbdConvParams> TestWithHbdConvParams;
class VP10HbdConvolveOptimzTest : public TestWithHbdConvParams {
public:
virtual ~VP10HbdConvolveOptimzTest() {}
virtual void SetUp() {
conv_horiz_ = GET_PARAM(0);
conv_vert_ = GET_PARAM(1);
BlockDimension block = GET_PARAM(2);
width_ = std::tr1::get<0>(block);
height_ = std::tr1::get<1>(block);
filter_ = GET_PARAM(3);
subpel_ = GET_PARAM(4);
avg_ = GET_PARAM(5);
bit_depth_ = GET_PARAM(6);
alloc_ = new uint16_t[maxBlockSize * 4];
src_ = alloc_ + (vertiOffset * maxWidth);
src_ += horizOffset;
src_ref_ = src_ + maxBlockSize;
dst_ = alloc_ + 2 * maxBlockSize;
dst_ref_ = alloc_ + 3 * maxBlockSize;
}
virtual void TearDown() {
delete[] alloc_;
libvpx_test::ClearSystemState();
}
protected:
void RunHorizFilterBitExactCheck();
void RunVertFilterBitExactCheck();
private:
void PrepFilterBuffer(int w, int h);
void DiffFilterBuffer();
hbd_conv_filter_t conv_horiz_;
hbd_conv_filter_t conv_vert_;
uint16_t *alloc_;
uint16_t *src_;
uint16_t *dst_;
uint16_t *src_ref_;
uint16_t *dst_ref_;
int width_;
int height_;
int filter_;
int subpel_;
int avg_;
int bit_depth_;
};
void VP10HbdConvolveOptimzTest::PrepFilterBuffer(int w, int h) {
int r, c;
ACMRandom rnd(ACMRandom::DeterministicSeed());
memset(alloc_, 0, 4 * maxBlockSize * sizeof(alloc_[0]));
uint16_t *src_ptr = src_;
uint16_t *dst_ptr = dst_;
uint16_t *dst_ref_ptr = dst_ref_;
uint16_t hbd_mask = (1 << bit_depth_) - 1;
for (r = 0; r < height_; ++r) {
for (c = 0; c < width_; ++c) {
src_ptr[c] = rnd.Rand16() & hbd_mask;
dst_ptr[c] = rnd.Rand16() & hbd_mask;
dst_ref_ptr[c] = dst_ptr[c];
}
src_ptr += stride;
dst_ptr += stride;
dst_ref_ptr += stride;
}
}
void VP10HbdConvolveOptimzTest::DiffFilterBuffer() {
int r, c;
const uint16_t *dst_ptr = dst_;
const uint16_t *dst_ref_ptr = dst_ref_;
for (r = 0; r < height_; ++r) {
for (c = 0; c < width_; ++c) {
EXPECT_EQ((uint16_t)dst_ref_ptr[c], (uint16_t)dst_ptr[c])
<< "Error at row: " << r << " col: " << c << " "
<< "w = " << width_ << " " << "h = " << height_ << " "
<< "filter group index = " << filter_ << " "
<< "filter index = " << subpel_ << " "
<< "bit depth = " << bit_depth_;
}
dst_ptr += stride;
dst_ref_ptr += stride;
}
}
void VP10HbdConvolveOptimzTest::RunHorizFilterBitExactCheck() {
PrepFilterBuffer(testMaxBlk, testMaxBlk);
InterpFilterParams filter_params = vp10_get_interp_filter_params(filter_);
vp10_highbd_convolve_horiz_c(src_, stride, dst_ref_, stride, width_,
height_, filter_params, subpel_, x_step_q4,
avg_, bit_depth_);
conv_horiz_(src_, stride, dst_, stride, width_, height_,
filter_params, subpel_, x_step_q4, avg_, bit_depth_);
DiffFilterBuffer();
// Note:
// Here we need calculate a height which is different from the specified one
// and test again.
int intermediate_height =
(((height_ - 1) * 16 + subpel_) >> SUBPEL_BITS) + filter_params.taps;
PrepFilterBuffer(testMaxBlk, testMaxBlk);
vp10_highbd_convolve_horiz_c(src_, stride, dst_ref_, stride, width_,
intermediate_height, filter_params, subpel_,
x_step_q4, avg_, bit_depth_);
conv_horiz_(src_, stride, dst_, stride, width_, intermediate_height,
filter_params, subpel_, x_step_q4, avg_, bit_depth_);
DiffFilterBuffer();
}
void VP10HbdConvolveOptimzTest::RunVertFilterBitExactCheck() {
PrepFilterBuffer(testMaxBlk, testMaxBlk);
InterpFilterParams filter_params = vp10_get_interp_filter_params(filter_);
vp10_highbd_convolve_vert_c(src_, stride, dst_ref_, stride, width_, height_,
filter_params, subpel_, x_step_q4, avg_,
bit_depth_);
conv_vert_(src_, stride, dst_, stride, width_, height_,
filter_params, subpel_, x_step_q4, avg_, bit_depth_);
DiffFilterBuffer();
}
TEST_P(VP10HbdConvolveOptimzTest, HorizBitExactCheck) {
RunHorizFilterBitExactCheck();
}
TEST_P(VP10HbdConvolveOptimzTest, VertBitExactCheck) {
RunVertFilterBitExactCheck();
}
#if HAVE_SSE4_1 && CONFIG_EXT_INTERP
const int kBitdepth[] = {10, 12};
INSTANTIATE_TEST_CASE_P(
SSE4_1, VP10HbdConvolveOptimzTest,
::testing::Combine(
::testing::Values(vp10_highbd_convolve_horiz_sse4_1),
::testing::Values(vp10_highbd_convolve_vert_sse4_1),
::testing::ValuesIn(kBlockDim),
::testing::ValuesIn(kFilter),
::testing::ValuesIn(kSubpelQ4),
::testing::ValuesIn(kAvg),
::testing::ValuesIn(kBitdepth)));
#endif // HAVE_SSE4_1 && CONFIG_EXT_INTERP
#endif // CONFIG_VP9_HIGHBITDEPTH
} // namespace
......@@ -342,3 +342,25 @@ SubpelFilterCoeffs vp10_get_subpel_filter_ver_signal_dir(
(void)index;
return NULL;
}
#if CONFIG_VP9_HIGHBITDEPTH
HbdSubpelFilterCoeffs vp10_hbd_get_subpel_filter_ver_signal_dir(
const InterpFilterParams p, int index) {
#if CONFIG_EXT_INTERP && HAVE_SSE4_1
if (p.filter_ptr == (const int16_t *)sub_pel_filters_12sharp) {
return &sub_pel_filters_12sharp_highbd_ver_signal_dir[index][0];
}
if (p.filter_ptr == (const int16_t *)sub_pel_filters_10sharp) {
return &sub_pel_filters_10sharp_highbd_ver_signal_dir[index][0];
}
#endif
#if USE_TEMPORALFILTER_12TAP && HAVE_SSE4_1
if (p.filter_ptr == (const int16_t *)sub_pel_filters_temporalfilter_12) {
return &sub_pel_filters_temporalfilter_12_highbd_ver_signal_dir[index][0];
}
#endif
(void)p;
(void)index;
return NULL;
}
#endif
......@@ -95,6 +95,10 @@ static INLINE int vp10_is_interpolating_filter(
#if USE_TEMPORALFILTER_12TAP
extern const int8_t sub_pel_filters_temporalfilter_12_signal_dir[15][2][16];
extern const int8_t sub_pel_filters_temporalfilter_12_ver_signal_dir[15][6][16];
#if CONFIG_VP9_HIGHBITDEPTH
extern const
int16_t sub_pel_filters_temporalfilter_12_highbd_ver_signal_dir[15][6][8];
#endif
#endif
#if CONFIG_EXT_INTERP
......@@ -102,15 +106,26 @@ extern const int8_t sub_pel_filters_12sharp_signal_dir[15][2][16];
extern const int8_t sub_pel_filters_10sharp_signal_dir[15][2][16];
extern const int8_t sub_pel_filters_12sharp_ver_signal_dir[15][6][16];
extern const int8_t sub_pel_filters_10sharp_ver_signal_dir[15][6][16];
#if CONFIG_VP9_HIGHBITDEPTH
extern const int16_t sub_pel_filters_12sharp_highbd_ver_signal_dir[15][6][8];
extern const int16_t sub_pel_filters_10sharp_highbd_ver_signal_dir[15][6][8];
#endif
#endif
typedef const int8_t (*SubpelFilterCoeffs)[16];
#if CONFIG_VP9_HIGHBITDEPTH
typedef const int16_t (*HbdSubpelFilterCoeffs)[8];
#endif
SubpelFilterCoeffs vp10_get_subpel_filter_signal_dir(
const InterpFilterParams p, int index);
SubpelFilterCoeffs vp10_get_subpel_filter_ver_signal_dir(
const InterpFilterParams p, int index);
#if CONFIG_VP9_HIGHBITDEPTH
HbdSubpelFilterCoeffs vp10_hbd_get_subpel_filter_ver_signal_dir(
const InterpFilterParams p, int index);
#endif
#ifdef __cplusplus
} // extern "C"
......
......@@ -182,7 +182,7 @@ void vp10_convolve(const uint8_t *src, int src_stride, uint8_t *dst,
}
#if CONFIG_VP9_HIGHBITDEPTH
static void highbd_convolve_horiz(const uint16_t *src, int src_stride,
void vp10_highbd_convolve_horiz_c(const uint16_t *src, int src_stride,
uint16_t *dst, int dst_stride, int w, int h,
const InterpFilterParams filter_params,
const int subpel_x_q4, int x_step_q4, int avg,
......@@ -213,7 +213,7 @@ static void highbd_convolve_horiz(const uint16_t *src, int src_stride,
}
}
static void highbd_convolve_vert(const uint16_t *src, int src_stride,
void vp10_highbd_convolve_vert_c(const uint16_t *src, int src_stride,
uint16_t *dst, int dst_stride, int w, int h,
const InterpFilterParams filter_params,
const int subpel_y_q4, int y_step_q4, int avg,
......@@ -300,8 +300,9 @@ void vp10_highbd_convolve(const uint8_t *src8, int src_stride, uint8_t *dst8,
InterpFilterParams filter_params =
vp10_get_interp_filter_params(interp_filter);
#endif
highbd_convolve_horiz(src, src_stride, dst, dst_stride, w, h, filter_params,
subpel_x_q4, x_step_q4, ref_idx, bd);
vp10_highbd_convolve_horiz(src, src_stride, dst, dst_stride, w, h,
filter_params, subpel_x_q4, x_step_q4, ref_idx,
bd);
} else if (ignore_horiz) {
#if CONFIG_DUAL_FILTER
InterpFilterParams filter_params =
......@@ -310,8 +311,9 @@ void vp10_highbd_convolve(const uint8_t *src8, int src_stride, uint8_t *dst8,
InterpFilterParams filter_params =
vp10_get_interp_filter_params(interp_filter);
#endif
highbd_convolve_vert(src, src_stride, dst, dst_stride, w, h, filter_params,
subpel_y_q4, y_step_q4, ref_idx, bd);
vp10_highbd_convolve_vert(src, src_stride, dst, dst_stride, w, h,
filter_params, subpel_y_q4, y_step_q4, ref_idx,
bd);
} else {
// temp's size is set to (maximum possible intermediate_height) *
// MAX_BLOCK_WIDTH
......@@ -336,9 +338,10 @@ void vp10_highbd_convolve(const uint8_t *src8, int src_stride, uint8_t *dst8,
int intermediate_height =
(((h - 1) * y_step_q4 + subpel_y_q4) >> SUBPEL_BITS) + filter_size;
highbd_convolve_horiz(src - src_stride * (filter_size / 2 - 1), src_stride,
temp, temp_stride, w, intermediate_height,
filter_params, subpel_x_q4, x_step_q4, 0, bd);
vp10_highbd_convolve_horiz(src - src_stride * (filter_size / 2 - 1),
src_stride, temp, temp_stride, w,
intermediate_height, filter_params, subpel_x_q4,
x_step_q4, 0, bd);
#if CONFIG_DUAL_FILTER
filter_params = filter_params_y;
......@@ -346,7 +349,7 @@ void vp10_highbd_convolve(const uint8_t *src8, int src_stride, uint8_t *dst8,
filter_size = filter_params.taps;
assert(filter_params.taps <= MAX_FILTER_TAP);
highbd_convolve_vert(temp + temp_stride * (filter_size / 2 - 1),
vp10_highbd_convolve_vert(temp + temp_stride * (filter_size / 2 - 1),
temp_stride, dst, dst_stride, w, h, filter_params,
subpel_y_q4, y_step_q4, ref_idx, bd);
}
......
......@@ -93,6 +93,13 @@ specialize qw/vp10_convolve_horiz ssse3/;
add_proto qw/void vp10_convolve_vert/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, int avg";
specialize qw/vp10_convolve_vert ssse3/;
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/void vp10_highbd_convolve_horiz/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, int avg, int bd";
specialize qw/vp10_highbd_convolve_horiz sse4_1/;
add_proto qw/void vp10_highbd_convolve_vert/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, int avg, int bd";
specialize qw/vp10_highbd_convolve_vert sse4_1/;
}
#
# dct
#
......
/*
* Copyright (c) 2016 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "./vpx_config.h"
#include "vp10/common/filter.h"
#if CONFIG_VP9_HIGHBITDEPTH
#if CONFIG_EXT_INTERP
DECLARE_ALIGNED(16, const int16_t,
sub_pel_filters_10sharp_highbd_ver_signal_dir[15][6][8]) = {
{
{ 0, 0, 0, 0, 0, 0, 0, 0, },
{ -1, 3, -1, 3, -1, 3, -1, 3, },
{ -6, 127, -6, 127, -6, 127, -6, 127, },
{ 8, -4, 8, -4, 8, -4, 8, -4, },
{ 2, -1, 2, -1, 2, -1, 2, -1, },
{ 0, 0, 0, 0, 0, 0, 0, 0, },
},
{
{ 0, 1, 0, 1, 0, 1, 0, 1, },
{ -2, 5, -2, 5, -2, 5, -2, 5, },
{-12, 124, -12, 124, -12, 124, -12, 124, },
{ 18, -7, 18, -7, 18, -7, 18, -7, },
{ 3, -2, 3, -2, 3, -2, 3, -2, },
{ 0, 0, 0, 0, 0, 0, 0, 0, },
},
{
{ 0, 1, 0, 1, 0, 1, 0, 1, },
{ -3, 7, -3, 7, -3, 7, -3, 7, },
{-17, 119, -17, 119, -17, 119, -17, 119, },
{ 28, -11, 28, -11, 28, -11, 28, -11, },
{ 5, -2, 5, -2, 5, -2, 5, -2, },
{ 1, 0, 1, 0, 1, 0, 1, 0, },
},
{
{ 0, 1, 0, 1, 0, 1, 0, 1, },
{ -4, 8, -4, 8, -4, 8, -4, 8, },
{-20, 114, -20, 114, -20, 114, -20, 114, },
{ 38, -14, 38, -14, 38, -14, 38, -14, },
{ 7, -3, 7, -3, 7, -3, 7, -3, },
{ 1, 0, 1, 0, 1, 0, 1, 0, },
},
{
{ 0, 1, 0, 1, 0, 1, 0, 1, },
{ -4, 9, -4, 9, -4, 9, -4, 9, },
{-22, 107, -22, 107, -22, 107, -22, 107, },
{ 49, -17, 49, -17, 49, -17, 49, -17, },
{ 8, -4, 8, -4, 8, -4, 8, -4, },
{ 1, 0, 1, 0, 1, 0, 1, 0, },
},
{
{ 0, 2, 0, 2, 0, 2, 0, 2, },
{ -5, 10, -5, 10, -5, 10, -5, 10, },
{-24, 99, -24, 99, -24, 99, -24, 99, },
{ 59, -20, 59, -20, 59, -20, 59, -20, },
{ 9, -4, 9, -4, 9, -4, 9, -4, },
{ 2, 0, 2, 0, 2, 0, 2, 0, },
},
{
{ 0, 2, 0, 2, 0, 2, 0, 2, },
{ -5, 10, -5, 10, -5, 10, -5, 10, },
{-24, 90, -24, 90, -24, 90, -24, 90, },
{ 70, -22, 70, -22, 70, -22, 70, -22, },
{ 10, -5, 10, -5, 10, -5, 10, -5, },
{ 2, 0, 2, 0, 2, 0, 2, 0, },
},
{
{ 0, 2, 0, 2, 0, 2, 0, 2, },
{ -5, 10, -5, 10, -5, 10, -5, 10, },
{-23, 80, -23, 80, -23, 80, -23, 80, },
{ 80, -23, 80, -23, 80, -23, 80, -23, },
{ 10, -5, 10, -5, 10, -5, 10, -5, },
{ 2, 0, 2, 0, 2, 0, 2, 0, },
},
{
{ 0, 2, 0, 2, 0, 2, 0, 2, },
{ -5, 10, -5, 10, -5, 10, -5, 10, },
{-22, 70, -22, 70, -22, 70, -22, 70, },
{ 90, -24, 90, -24, 90, -24, 90, -24, },
{ 10, -5, 10, -5, 10, -5, 10, -5, },
{ 2, 0, 2, 0, 2, 0, 2, 0, },
},
{
{ 0, 2, 0, 2, 0, 2, 0, 2, },
{ -4, 9, -4, 9, -4, 9, -4, 9, },
{-20, 59, -20, 59, -20, 59, -20, 59, },
{ 99, -24, 99, -24, 99, -24, 99, -24, },
{ 10, -5, 10, -5, 10, -5, 10, -5, },
{ 2, 0, 2, 0, 2, 0, 2, 0, },
},
{
{ 0, 1, 0, 1, 0, 1, 0, 1, },
{ -4, 8, -4, 8, -4, 8, -4, 8, },
{-17, 49, -17, 49, -17, 49, -17, 49, },
{107, -22, 107, -22, 107, -22, 107, -22, },
{ 9, -4, 9, -4, 9, -4, 9, -4, },
{ 1, 0, 1, 0, 1, 0, 1, 0, },
},
{
{ 0, 1, 0, 1, 0, 1, 0, 1, },
{ -3, 7, -3, 7, -3, 7, -3, 7, },
{-14, 38, -14, 38, -14, 38, -14, 38, },
{114, -20, 114, -20, 114, -20, 114, -20, },
{ 8, -4, 8, -4, 8, -4, 8, -4, },
{ 1, 0, 1, 0, 1, 0, 1, 0, },
},
{