Commit a4e245a9 authored by Monty Montgomery's avatar Monty Montgomery Committed by Christopher Montgomery

Add CONFIG_DAALA_DCT64 experiment.

This experiment replaces the 64-point Type-II DCT and related
scaling vp9 transforms with the 64-point orthonormal
Daala transforms.

subset-1:

    monty-square-baseline-s1-F2@2017-07-28T03:35:45.962Z ->
      monty-square-dct64-s1-F2@2017-07-29T04:50:58.412Z

       PSNR | PSNR Cb | PSNR Cr | PSNR HVS |    SSIM | MS SSIM | CIEDE 2000
    -0.1930 | -0.2037 | -0.0643 |  -0.1917 | -0.2331 | -0.3510 |    -0.1810

objective-1-fast:

    monty-square-baseline-o1f-F2@2017-07-28T03:35:35.533Z ->
      monty-square-dct64-o1f-F2@2017-07-29T04:50:28.542Z

       PSNR | PSNR Cb | PSNR Cr | PSNR HVS |    SSIM | MS SSIM | CIEDE 2000
    -0.2557 | -0.1743 | -0.4900 |  -0.3028 | -0.4147 | -0.5764 |    -0.2864

Change-Id: I1f944df29e44d2e350c42555af274f2d75a62a92
parent ccfdfce1
......@@ -15,7 +15,7 @@
#include "./aom_dsp_rtcd.h"
#include "aom_dsp/inv_txfm.h"
#if CONFIG_DAALA_DCT4 || CONFIG_DAALA_DCT8 || CONFIG_DAALA_DCT16 || \
CONFIG_DAALA_DCT32
CONFIG_DAALA_DCT32 || CONFIG_DAALA_DCT64
#include "av1/common/daala_tx.h"
#endif
......@@ -1469,6 +1469,17 @@ void aom_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
}
}
#if CONFIG_TX64X64 && CONFIG_DAALA_DCT64
void aom_idct64_c(const tran_low_t *input, tran_low_t *output) {
int i;
od_coeff x[64];
od_coeff y[64];
for (i = 0; i < 64; i++) y[i] = (od_coeff)input[i];
od_bin_idct64(x, 1, y);
for (i = 0; i < 64; i++) output[i] = (tran_low_t)x[i];
}
#endif
void aom_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8,
int stride, int bd) {
/* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds,
......
......@@ -68,6 +68,9 @@ void aom_idct4_c(const tran_low_t *input, tran_low_t *output);
void aom_idct8_c(const tran_low_t *input, tran_low_t *output);
void aom_idct16_c(const tran_low_t *input, tran_low_t *output);
void aom_idct32_c(const tran_low_t *input, tran_low_t *output);
#if CONFIG_TX64X64 && CONFIG_DAALA_DCT64
void aom_idct64_c(const tran_low_t *input, tran_low_t *output);
#endif
void aom_iadst4_c(const tran_low_t *input, tran_low_t *output);
void aom_iadst8_c(const tran_low_t *input, tran_low_t *output);
void aom_iadst16_c(const tran_low_t *input, tran_low_t *output);
......
......@@ -1788,6 +1788,1336 @@
} \
while (0)
#if CONFIG_TX64X64
#define OD_FDCT_32_ASYM(t0, tg, tgh, t8, to, toh, t4, tk, tkh, tc, ts, tsh, \
t2, ti, tih, ta, tq, tqh, t6, tm, tmh, te, tu, tuh, t1, th, thh, \
t9, tp, tph, t5, tl, tlh, td, tt, tth, t3, tj, tjh, tb, tr, trh, \
t7, tn, tnh, tf, tv, tvh) \
/* Embedded 32-point asymmetric Type-II fDCT. */ \
do { \
t0 += tvh; \
tv = t0 - tv; \
t1 = tuh - t1; \
tu -= t1; \
t2 += tth; \
tt = t2 - tt; \
t3 = tsh - t3; \
ts -= t3; \
t4 += trh; \
tr = t4 - tr; \
t5 = tqh - t5; \
tq -= t5; \
t6 += tph; \
tp = t6 - tp; \
t7 = toh - t7; \
to -= t7; \
t8 += tnh; \
tn = t8 - tn; \
t9 = tmh - t9; \
tm -= t9; \
ta += tlh; \
tl = ta - tl; \
tb = tkh - tb; \
tk -= tb; \
tc += tjh; \
tj = tc - tj; \
td = tih - td; \
ti -= td; \
te += thh; \
th = te - th; \
tf = tgh - tf; \
tg -= tf; \
OD_FDCT_16(t0, tg, t8, to, t4, tk, tc, ts, \
t2, ti, ta, tq, t6, tm, te, tu); \
OD_FDST_16(tv, tf, tn, t7, tr, tb, tj, t3, \
tt, td, tl, t5, tp, t9, th, t1); \
} \
while (0)
#define OD_IDCT_32_ASYM(t0, tg, t8, to, t4, tk, tc, ts, t2, ti, ta, tq, \
t6, tm, te, tu, t1, t1h, th, thh, t9, t9h, tp, tph, t5, t5h, tl, tlh, \
td, tdh, tt, tth, t3, t3h, tj, tjh, tb, tbh, tr, trh, t7, t7h, tn, tnh, \
tf, tfh, tv, tvh) \
/* Embedded 32-point asymmetric Type-II iDCT. */ \
do { \
OD_IDST_16(tv, tn, tr, tj, tt, tl, tp, th, \
tu, tm, tq, ti, ts, tk, to, tg); \
OD_IDCT_16(t0, t8, t4, tc, t2, ta, t6, te, \
t1, t9, t5, td, t3, tb, t7, tf); \
tv = t0 - tv; \
tvh = OD_DCT_RSHIFT(tv, 1); \
t0 -= tvh; \
t1 += tu; \
t1h = OD_DCT_RSHIFT(t1, 1); \
tu = t1h - tu; \
tt = t2 - tt; \
tth = OD_DCT_RSHIFT(tt, 1); \
t2 -= tth; \
t3 += ts; \
t3h = OD_DCT_RSHIFT(t3, 1); \
ts = t3h - ts; \
tr = t4 - tr; \
trh = OD_DCT_RSHIFT(tr, 1); \
t4 -= trh; \
t5 += tq; \
t5h = OD_DCT_RSHIFT(t5, 1); \
tq = t5h - tq; \
tp = t6 - tp; \
tph = OD_DCT_RSHIFT(tp, 1); \
t6 -= tph; \
t7 += to; \
t7h = OD_DCT_RSHIFT(t7, 1); \
to = t7h - to; \
tn = t8 - tn; \
tnh = OD_DCT_RSHIFT(tn, 1); \
t8 -= tnh; \
t9 += tm; \
t9h = OD_DCT_RSHIFT(t9, 1); \
tm = t9h - tm; \
tl = ta - tl; \
tlh = OD_DCT_RSHIFT(tl, 1); \
ta -= tlh; \
tb += tk; \
tbh = OD_DCT_RSHIFT(tb, 1); \
tk = tbh - tk; \
tj = tc - tj; \
tjh = OD_DCT_RSHIFT(tj, 1); \
tc -= tjh; \
td += ti; \
tdh = OD_DCT_RSHIFT(td, 1); \
ti = tdh - ti; \
th = te - th; \
thh = OD_DCT_RSHIFT(th, 1); \
te -= thh; \
tf += tg; \
tfh = OD_DCT_RSHIFT(tf, 1); \
tg = tfh - tg; \
} \
while (0)
#define OD_FDST_32_ASYM(t0, tg, t8, to, t4, tk, tc, ts, t2, ti, ta, tq, t6, \
tm, te, tu, t1, th, t9, tp, t5, tl, td, tt, t3, tj, tb, tr, t7, tn, tf, tv) \
/* Embedded 32-point asymmetric Type-IV fDST. */ \
do { \
int t0h; \
int t1h; \
int t4h; \
int t5h; \
int tqh; \
int trh; \
int tuh; \
int tvh; \
\
tu = -tu; \
\
/* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
OD_DCT_OVERFLOW_CHECK(tq, 13573, 8192, 271); \
t5 -= (tq*13573 + 8192) >> 14; \
/* 11585/32768 ~= Sin[Pi/4]/2 ~= 0.353553390593274 */ \
OD_DCT_OVERFLOW_CHECK(t5, 11585, 16384, 272); \
tq += (t5*11585 + 16384) >> 15; \
/* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \
OD_DCT_OVERFLOW_CHECK(tq, 13573, 8192, 273); \
t5 -= (tq*13573 + 8192) >> 14; \
/* 29957/32768 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
OD_DCT_OVERFLOW_CHECK(t6, 29957, 16384, 274); \
tp += (t6*29957 + 16384) >> 15; \
/* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
OD_DCT_OVERFLOW_CHECK(tp, 11585, 8192, 275); \
t6 -= (tp*11585 + 8192) >> 14; \
/* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
OD_DCT_OVERFLOW_CHECK(t6, 19195, 16384, 276); \
tp -= (t6*19195 + 16384) >> 15; \
/* 29957/32768 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
OD_DCT_OVERFLOW_CHECK(t1, 29957, 16384, 277); \
tu += (t1*29957 + 16384) >> 15; \
/* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
OD_DCT_OVERFLOW_CHECK(tu, 11585, 8192, 278); \
t1 -= (tu*11585 + 8192) >> 14; \
/* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
OD_DCT_OVERFLOW_CHECK(t1, 19195, 16384, 279); \
tu -= (t1*19195 + 16384) >> 15; \
/* 28681/32768 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
OD_DCT_OVERFLOW_CHECK(t2, 28681, 16384, 280); \
tt += (t2*28681 + 16384) >> 15; \
/* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
OD_DCT_OVERFLOW_CHECK(tt, 15137, 8192, 281); \
t2 -= (tt*15137 + 8192) >> 14; \
/* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
OD_DCT_OVERFLOW_CHECK(t2, 4161, 8192, 282); \
tt += (t2*4161 + 8192) >> 14; \
/* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
OD_DCT_OVERFLOW_CHECK(ts, 4161, 8192, 283); \
t3 += (ts*4161 + 8192) >> 14; \
/* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
OD_DCT_OVERFLOW_CHECK(t3, 15137, 8192, 284); \
ts -= (t3*15137 + 8192) >> 14; \
/* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
OD_DCT_OVERFLOW_CHECK(ts, 14341, 8192, 285); \
t3 += (ts*14341 + 8192) >> 14; \
/* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
OD_DCT_OVERFLOW_CHECK(tm, 19195, 16384, 286); \
t9 -= (tm*19195 + 16384) >> 15; \
/* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
OD_DCT_OVERFLOW_CHECK(t9, 11585, 8192, 287); \
tm -= (t9*11585 + 8192) >> 14; \
/* 7489/8192 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
OD_DCT_OVERFLOW_CHECK(tm, 7489, 4096, 288); \
t9 += (tm*7489 + 4096) >> 13; \
/* 3259/8192 ~= 2*Tan[Pi/16] ~= 0.397824734759316 */ \
OD_DCT_OVERFLOW_CHECK(tl, 3259, 4096, 289); \
ta += (tl*3259 + 4096) >> 13; \
/* 3135/16384 ~= Sin[Pi/8]/2 ~= 0.1913417161825449 */ \
OD_DCT_OVERFLOW_CHECK(ta, 3135, 8192, 290); \
tl -= (ta*3135 + 8192) >> 14; \
/* 3259/8192 ~= 2*Tan[Pi/16] ~= 0.397824734759316 */ \
OD_DCT_OVERFLOW_CHECK(tl, 3259, 4096, 291); \
ta += (tl*3259 + 4096) >> 13; \
/* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
OD_DCT_OVERFLOW_CHECK(tk, 4161, 8192, 292); \
tb += (tk*4161 + 8192) >> 14; \
/* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
OD_DCT_OVERFLOW_CHECK(tb, 15137, 8192, 293); \
tk -= (tb*15137 + 8192) >> 14; \
/* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
OD_DCT_OVERFLOW_CHECK(tk, 14341, 8192, 294); \
tb += (tk*14341 + 8192) >> 14; \
/* 29957/32768 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \
OD_DCT_OVERFLOW_CHECK(te, 29957, 16384, 295); \
th += (te*29957 + 16384) >> 15; \
/* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \
OD_DCT_OVERFLOW_CHECK(th, 11585, 8192, 296); \
te -= (th*11585 + 8192) >> 14; \
/* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \
OD_DCT_OVERFLOW_CHECK(te, 19195, 16384, 297); \
th -= (te*19195 + 16384) >> 15; \
/* 28681/32768 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
OD_DCT_OVERFLOW_CHECK(tc, 28681, 16384, 298); \
tj += (tc*28681 + 16384) >> 15; \
/* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
OD_DCT_OVERFLOW_CHECK(tj, 15137, 8192, 299); \
tc -= (tj*15137 + 8192) >> 14; \
/* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
OD_DCT_OVERFLOW_CHECK(tc, 4161, 8192, 300); \
tj += (tc*4161 + 8192) >> 14; \
/* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \
OD_DCT_OVERFLOW_CHECK(ti, 4161, 8192, 301); \
td += (ti*4161 + 8192) >> 14; \
/* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \
OD_DCT_OVERFLOW_CHECK(td, 15137, 8192, 302); \
ti -= (td*15137 + 8192) >> 14; \
/* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \
OD_DCT_OVERFLOW_CHECK(ti, 14341, 8192, 303); \
td += (ti*14341 + 8192) >> 14; \
\
t1 = -t1; \
t2 = -t2; \
t3 = -t3; \
td = -td; \
tg = -tg; \
to = -to; \
ts = -ts; \
\
tr -= OD_DCT_RSHIFT(t5, 1); \
t5 += tr; \
tq -= OD_DCT_RSHIFT(t4, 1); /* pass */ \
t4 += tq; \
t6 -= OD_DCT_RSHIFT(t7, 1); \
t7 += t6; \
to -= OD_DCT_RSHIFT(tp, 1); /* pass */ \
tp += to; \
t1 += OD_DCT_RSHIFT(t0, 1); /* pass */ \
t0 -= t1; \
tv -= OD_DCT_RSHIFT(tu, 1); \
tu += tv; \
t3 -= OD_DCT_RSHIFT(tt, 1); \
tt += t3; \
t2 += OD_DCT_RSHIFT(ts, 1); \
ts -= t2; \
t9 -= OD_DCT_RSHIFT(t8, 1); /* pass */ \
t8 += t9; \
tn += OD_DCT_RSHIFT(tm, 1); \
tm -= tn; \
tb += OD_DCT_RSHIFT(ta, 1); \
ta -= tb; \
tl -= OD_DCT_RSHIFT(tk, 1); \
tk += tl; \
te -= OD_DCT_RSHIFT(tf, 1); /* pass */ \
tf += te; \
tg -= OD_DCT_RSHIFT(th, 1); \
th += tg; \
tc -= OD_DCT_RSHIFT(ti, 1); \
ti += tc; \
td += OD_DCT_RSHIFT(tj, 1); \
tj -= td; \
\
t4 = -t4; \
\
/* 6723/8192 ~= Tan[7*Pi/32] ~= 0.8206787908286602 */ \
OD_DCT_OVERFLOW_CHECK(tr, 6723, 4096, 304); \
t4 += (tr*6723 + 4096) >> 13; \
/* 16069/16384 ~= Sin[7*Pi/16] ~= 0.9807852804032304 */ \
OD_DCT_OVERFLOW_CHECK(t4, 16069, 8192, 305); \
tr -= (t4*16069 + 8192) >> 14; \
/* 6723/8192 ~= Tan[7*Pi/32] ~= 0.8206787908286602 */ \
OD_DCT_OVERFLOW_CHECK(tr, 6723, 4096, 306); \
t4 += (tr*6723 + 4096) >> 13; \
/* 17515/32768 ~= Tan[5*Pi/32] ~= 0.5345111359507916 */ \
OD_DCT_OVERFLOW_CHECK(tq, 17515, 16384, 307); \
t5 += (tq*17515 + 16384) >> 15; \
/* 13623/16384 ~= Sin[5*Pi/16] ~= 0.8314696123025452 */ \
OD_DCT_OVERFLOW_CHECK(t5, 13623, 8192, 308); \
tq -= (t5*13623 + 8192) >> 14; \
/* 17515/32768 ~= Tan[5*Pi/32] ~= 0.5345111359507916 */ \
OD_DCT_OVERFLOW_CHECK(tq, 17515, 16384, 309); \
t5 += (tq*17515 + 16384) >> 15; \
/* 3227/32768 ~= Tan[Pi/32] ~= 0.09849140335716425 */ \
OD_DCT_OVERFLOW_CHECK(to, 3227, 16384, 310); \
t7 += (to*3227 + 16384) >> 15; \
/* 6393/32768 ~= Sin[Pi/16] ~= 0.19509032201612825 */ \
OD_DCT_OVERFLOW_CHECK(t7, 6393, 16384, 311); \
to -= (t7*6393 + 16384) >> 15; \
/* 3227/32768 ~= Tan[Pi/32] ~= 0.09849140335716425 */ \
OD_DCT_OVERFLOW_CHECK(to, 3227, 16384, 312); \
t7 += (to*3227 + 16384) >> 15; \
/* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
OD_DCT_OVERFLOW_CHECK(tp, 2485, 4096, 313); \
t6 += (tp*2485 + 4096) >> 13; \
/* 18205/32768 ~= Sin[3*Pi/16] ~= 0.555570233019602 */ \
OD_DCT_OVERFLOW_CHECK(t6, 18205, 16384, 314); \
tp -= (t6*18205 + 16384) >> 15; \
/* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \
OD_DCT_OVERFLOW_CHECK(tp, 2485, 4096, 315); \
t6 += (tp*2485 + 4096) >> 13; \
\
t5 = -t5; \
\
tr += to; \
trh = OD_DCT_RSHIFT(tr, 1); \
to -= trh; \
t4 += t7; \
t4h = OD_DCT_RSHIFT(t4, 1); \
t7 -= t4h; \
t5 += tp; \
t5h = OD_DCT_RSHIFT(t5, 1); \
tp -= t5h; \
tq += t6; \
tqh = OD_DCT_RSHIFT(tq, 1); \
t6 -= tqh; \
t0 -= t3; \
t0h = OD_DCT_RSHIFT(t0, 1); \
t3 += t0h; \
tv -= ts; \
tvh = OD_DCT_RSHIFT(tv, 1); \
ts += tvh; \
tu += tt; \
tuh = OD_DCT_RSHIFT(tu, 1); \
tt -= tuh; \
t1 -= t2; \
t1h = OD_DCT_RSHIFT(t1, 1); \
t2 += t1h; \
t8 += tb; \
tb -= OD_DCT_RSHIFT(t8, 1); \
tn += tk; \
tk -= OD_DCT_RSHIFT(tn, 1); \
t9 += tl; \
tl -= OD_DCT_RSHIFT(t9, 1); \
tm -= ta; \
ta += OD_DCT_RSHIFT(tm, 1); \
tc -= tf; \
tf += OD_DCT_RSHIFT(tc, 1); \
tj += tg; \
tg -= OD_DCT_RSHIFT(tj, 1); \
td -= te; \
te += OD_DCT_RSHIFT(td, 1); \
ti += th; \
th -= OD_DCT_RSHIFT(ti, 1); \
\
t9 = -t9; \
tl = -tl; \
\
/* 805/16384 ~= Tan[Pi/64] ~= 0.04912684976946793 */ \
OD_DCT_OVERFLOW_CHECK(tn, 805, 8192, 316); \
t8 += (tn*805 + 8192) >> 14; \
/* 803/8192 ~= Sin[Pi/32] ~= 0.0980171403295606 */ \
OD_DCT_OVERFLOW_CHECK(t8, 803, 4096, 317); \
tn -= (t8*803 + 4096) >> 13; \
/* 805/16384 ~= Tan[Pi/64] ~= 0.04912684976946793 */ \
OD_DCT_OVERFLOW_CHECK(tn, 805, 8192, 318); \
t8 += (tn*805 + 8192) >> 14; \
/* 11725/32768 ~= Tan[7*Pi/64] ~= 0.3578057213145241 */ \
OD_DCT_OVERFLOW_CHECK(tb, 11725, 16384, 319); \
tk += (tb*11725 + 16384) >> 15; \
/* 5197/8192 ~= Sin[7*Pi/32] ~= 0.6343932841636455 */ \
OD_DCT_OVERFLOW_CHECK(tk, 5197, 4096, 320); \
tb -= (tk*5197 + 4096) >> 13; \
/* 11725/32768 ~= Tan[7*Pi/64] ~= 0.3578057213145241 */ \
OD_DCT_OVERFLOW_CHECK(tb, 11725, 16384, 321); \
tk += (tb*11725 + 16384) >> 15; \
/* 2455/4096 ~= Tan[11*Pi/64] ~= 0.5993769336819237 */ \
OD_DCT_OVERFLOW_CHECK(tl, 2455, 2048, 322); \
ta += (tl*2455 + 2048) >> 12; \
/* 14449/16384 ~= Sin[11*Pi/32] ~= 0.881921264348355 */ \
OD_DCT_OVERFLOW_CHECK(ta, 14449, 8192, 323); \
tl -= (ta*14449 + 8192) >> 14; \
/* 2455/4096 ~= Tan[11*Pi/64] ~= 0.5993769336819237 */ \
OD_DCT_OVERFLOW_CHECK(tl, 2455, 2048, 324); \
ta += (tl*2455 + 2048) >> 12; \
/* 4861/32768 ~= Tan[3*Pi/64] ~= 0.14833598753834742 */ \
OD_DCT_OVERFLOW_CHECK(tm, 4861, 16384, 325); \
t9 += (tm*4861 + 16384) >> 15; \
/* 1189/4096 ~= Sin[3*Pi/32] ~= 0.29028467725446233 */ \
OD_DCT_OVERFLOW_CHECK(t9, 1189, 2048, 326); \
tm -= (t9*1189 + 2048) >> 12; \
/* 4861/32768 ~= Tan[3*Pi/64] ~= 0.14833598753834742 */ \
OD_DCT_OVERFLOW_CHECK(tm, 4861, 16384, 327); \
t9 += (tm*4861 + 16384) >> 15; \
/* 805/16384 ~= Tan[Pi/64] ~= 0.04912684976946793 */ \
OD_DCT_OVERFLOW_CHECK(tg, 805, 8192, 328); \
tf += (tg*805 + 8192) >> 14; \
/* 803/8192 ~= Sin[Pi/32] ~= 0.0980171403295606 */ \
OD_DCT_OVERFLOW_CHECK(tf, 803, 4096, 329); \
tg -= (tf*803 + 4096) >> 13; \
/* 805/16384 ~= Tan[Pi/64] ~= 0.04912684976946793 */ \
OD_DCT_OVERFLOW_CHECK(tg, 805, 8192, 330); \
tf += (tg*805 + 8192) >> 14; \
/* 2931/8192 ~= Tan[7*Pi/64] ~= 0.3578057213145241 */ \
OD_DCT_OVERFLOW_CHECK(tj, 2931, 4096, 331); \
tc += (tj*2931 + 4096) >> 13; \
/* 5197/8192 ~= Sin[7*Pi/32] ~= 0.6343932841636455 */ \
OD_DCT_OVERFLOW_CHECK(tc, 5197, 4096, 332); \
tj -= (tc*5197 + 4096) >> 13; \
/* 2931/8192 ~= Tan[7*Pi/64] ~= 0.3578057213145241 */ \
OD_DCT_OVERFLOW_CHECK(tj, 2931, 4096, 333); \
tc += (tj*2931 + 4096) >> 13; \
/* 513/2048 ~= Tan[5*Pi/64] ~= 0.25048696019130545 */ \
OD_DCT_OVERFLOW_CHECK(ti, 513, 1024, 334); \
td += (ti*513 + 1024) >> 11; \
/* 7723/16384 ~= Sin[5*Pi/32] ~= 0.47139673682599764 */ \
OD_DCT_OVERFLOW_CHECK(td, 7723, 8192, 335); \
ti -= (td*7723 + 8192) >> 14; \
/* 513/2048 ~= Tan[5*Pi/64] ~= 0.25048696019130545 */ \
OD_DCT_OVERFLOW_CHECK(ti, 513, 1024, 336); \
td += (ti*513 + 1024) >> 11; \
/* 4861/32768 ~= Tan[3*Pi/64] ~= 0.14833598753834742 */ \
OD_DCT_OVERFLOW_CHECK(th, 4861, 16384, 337); \
te += (th*4861 + 16384) >> 15; \
/* 1189/4096 ~= Sin[3*Pi/32] ~= 0.29028467725446233 */ \
OD_DCT_OVERFLOW_CHECK(te, 1189, 2048, 338); \
th -= (te*1189 + 2048) >> 12; \
/* 4861/32768 ~= Tan[3*Pi/64] ~= 0.14833598753834742 */ \
OD_DCT_OVERFLOW_CHECK(th, 4861, 16384, 339); \
te += (th*4861 + 16384) >> 15; \
\
ta = -ta; \
tb = -tb; \
\
tt += t5h; \
t5 -= tt; \
t2 -= tqh; \
tq += t2; \
tp += t1h; \
t1 -= tp; \
t6 -= tuh; \
tu += t6; \
t7 += tvh; \
tv -= t7; \
to += t0h; \
t0 -= to; \
t3 -= t4h; \
t4 += t3; \
ts += trh; \
tr -= ts; \
tf -= OD_DCT_RSHIFT(tn, 1); \
tn += tf; \
tg -= OD_DCT_RSHIFT(t8, 1); \
t8 += tg; \
tk += OD_DCT_RSHIFT(tc, 1); \
tc -= tk; \
tb += OD_DCT_RSHIFT(tj, 1); \
tj -= tb; \
ta += OD_DCT_RSHIFT(ti, 1); \
ti -= ta; \
tl += OD_DCT_RSHIFT(td, 1); \
td -= tl; \
te -= OD_DCT_RSHIFT(tm, 1); \
tm += te; \
th -= OD_DCT_RSHIFT(t9, 1); \
t9 += th; \
ta -= t5; \
t5 += OD_DCT_RSHIFT(ta, 1); \
tq -= tl; \
tl += OD_DCT_RSHIFT(tq, 1); \
t2 -= ti; \
ti += OD_DCT_RSHIFT(t2, 1); \
td -= tt; \
tt += OD_DCT_RSHIFT(td, 1); \
tm += tp; \
tp -= OD_DCT_RSHIFT(tm, 1); \
t6 += t9; \
t9 -= OD_DCT_RSHIFT(t6, 1); \
te -= tu; \
tu += OD_DCT_RSHIFT(te, 1); \
t1 -= th; \
th += OD_DCT_RSHIFT(t1, 1); \
t0 -= tg; \
tg += OD_DCT_RSHIFT(t0, 1); \
tf += tv; \
tv -= OD_DCT_RSHIFT(tf, 1); \
t8 -= t7; \
t7 += OD_DCT_RSHIFT(t8, 1); \
to -= tn; \
tn += OD_DCT_RSHIFT(to, 1); \
t4 -= tk; \
tk += OD_DCT_RSHIFT(t4, 1); \
tb -= tr; \
tr += OD_DCT_RSHIFT(tb, 1); \
t3 -= tj; \
tj += OD_DCT_RSHIFT(t3, 1); \
tc -= ts; \
ts += OD_DCT_RSHIFT(tc, 1); \
\
tr = -tr; \
ts = -ts; \
tt = -tt; \
tu = -tu; \
\
/* 2847/4096 ~= (1/Sqrt[2] - Cos[63*Pi/128]/2)/Sin[63*Pi/128] */ \
OD_DCT_OVERFLOW_CHECK(t0, 2847, 2048, 340); \
tv += (t0*2847 + 2048) >> 12; \
/* 5791/4096 ~= Sqrt[2]*Sin[63*Pi/128] */ \
OD_DCT_OVERFLOW_CHECK(tv, 5791, 2048, 341); \
t0 -= (tv*5791 + 2048) >> 12; \
/* 5593/8192 ~= (1/Sqrt[2] - Cos[63*Pi/128])/Sin[63*Pi/128] */ \
OD_DCT_OVERFLOW_CHECK(t0, 5593, 4096, 342); \
tv += (t0*5593 + 4096) >> 13; \
/* 4099/8192 ~= (1/Sqrt[2] - Cos[31*Pi/128]/2)/Sin[31*Pi/128] */ \
OD_DCT_OVERFLOW_CHECK(tf, 4099, 4096, 343); \
tg -= (tf*4099 + 4096) >> 13; \
/* 1997/2048 ~= Sqrt[2]*Sin[31*Pi/128] */ \
OD_DCT_OVERFLOW_CHECK(tg, 1997, 1024, 344); \
tf += (tg*1997 + 1024) >> 11; \
/* -815/32768 ~= (1/Sqrt[2] - Cos[31*Pi/128])/Sin[31*Pi/128] */ \
OD_DCT_OVERFLOW_CHECK(tf, 815, 16384, 345); \
tg += (tf*815 + 16384) >> 15; \
/* 2527/4096 ~= (1/Sqrt[2] - Cos[17*Pi/128]/2)/Sin[17*Pi/128] */ \
OD_DCT_OVERFLOW_CHECK(t8, 2527, 2048, 346); \
tn -= (t8*2527 + 2048) >> 12; \
/* 4695/8192 ~= Sqrt[2]*Sin[17*Pi/128] */ \
OD_DCT_OVERFLOW_CHECK(tn, 4695, 4096, 347); \
t8 += (tn*4695 + 4096) >> 13; \
/* -4187/8192 ~= (1/Sqrt[2] - Cos[17*Pi/128])/Sin[17*Pi/128] */ \
OD_DCT_OVERFLOW_CHECK(t8, 4187, 4096, 348); \
tn += (t8*4187 + 4096) >> 13; \
/* 5477/8192 ~= (1/Sqrt[2] - Cos[15*Pi/128]/2)/Sin[15*Pi/128] */ \
OD_DCT_OVERFLOW_CHECK(to, 5477, 4096, 349); \
t7 += (to*5477 + 4096) >> 13; \
/* 4169/8192 ~= Sqrt[2]*Sin[15*Pi/128] */ \
OD_DCT_OVERFLOW_CHECK(t7, 4169, 4096, 350); \
to -= (t7*4169 + 4096) >> 13; \
/* -2571/4096 ~= (1/Sqrt[2] - Cos[15*Pi/128])/Sin[15*Pi/128] */ \
OD_DCT_OVERFLOW_CHECK(to, 2571, 2048, 351); \
t7 -= (to*2571 + 2048) >> 12; \
/* 5331/8192 ~= (1/Sqrt[2] - Cos[59*Pi/128]/2)/Sin[59*Pi/128] */ \
OD_DCT_OVERFLOW_CHECK(t2, 5331, 4096, 352); \
tt += (t2*5331 + 4096) >> 13; \
/* 5749/4096 ~= Sqrt[2]*Sin[59*Pi/128] */ \
OD_DCT_OVERFLOW_CHECK(tt, 5749, 2048, 353); \
t2 -= (tt*5749 + 2048) >> 12; \
/* 2413/4096 ~= (1/Sqrt[2] - Cos[59*Pi/128])/Sin[59*Pi/128] */ \
OD_DCT_OVERFLOW_CHECK(t2, 2413, 2048, 354); \
tt += (t2*2413 + 2048) >> 12; \
/* 4167/8192 ~= (1/Sqrt[2] - Cos[27*Pi/128]/2)/Sin[27*Pi/128] */ \
OD_DCT_OVERFLOW_CHECK(td, 4167, 4096, 355); \
ti -= (td*4167 + 4096) >> 13; \
/* 891/1024 ~= Sqrt[2]*Sin[27*Pi/128] */ \
OD_DCT_OVERFLOW_CHECK(ti, 891, 512, 356); \
td += (ti*891 + 512) >> 10; \
/* -4327/32768 ~= (1/Sqrt[2] - Cos[27*Pi/128])/Sin[27*Pi/128] */ \
OD_DCT_OVERFLOW_CHECK(td, 4327, 16384, 357); \
ti += (td*4327 + 16384) >> 15; \
/* 2261/4096 ~= (1/Sqrt[2] - Cos[21*Pi/128]/2)/Sin[21*Pi/128] */ \
OD_DCT_OVERFLOW_CHECK(ta, 2261, 2048, 358); \
tl -= (ta*2261 + 2048) >> 12; \
/* 2855/4096 ~= Sqrt[2]*Sin[21*Pi/128] */ \
OD_DCT_OVERFLOW_CHECK(tl, 2855, 2048, 359); \
ta += (tl*2855 + 2048) >> 12; \
/* -5417/16384 ~= (1/Sqrt[2] - Cos[21*Pi/128])/Sin[21*Pi/128] */ \
OD_DCT_OVERFLOW_CHECK(ta, 5417, 8192, 360); \
tl += (ta*5417 + 8192) >> 14; \
/* 3459/4096 ~= (1/Sqrt[2] - Cos[11*Pi/128]/2)/Sin[11*Pi/128] */ \
OD_DCT_OVERFLOW_CHECK(tq, 3459, 2048, 361); \
t5 += (tq*3459 + 2048) >> 12; \
/* 1545/4096 ~= Sqrt[2]*Sin[11*Pi/128] */ \
OD_DCT_OVERFLOW_CHECK(t5, 1545, 2048, 362); \
tq -= (t5*1545 + 2048) >> 12; \
/* -1971/2048 ~= (1/Sqrt[2] - Cos[11*Pi/128])/Sin[11*Pi/128] */ \
OD_DCT_OVERFLOW_CHECK(tq, 1971, 1024, 363); \
t5 -= (tq*1971 + 1024) >> 11; \
/* 323/512 ~= (1/Sqrt[2] - Cos[57*Pi/128]/2)/Sin[57*Pi/128] */ \
OD_DCT_OVERFLOW_CHECK(t3, 323, 256, 364); \
ts += (t3*323 + 256) >> 9; \
/* 5707/4096 ~= Sqrt[2]*Sin[57*Pi/128] */ \
OD_DCT_OVERFLOW_CHECK(ts, 5707, 2048, 365); \
t3 -= (ts*5707 + 2048) >> 12; \
/* 2229/4096 ~= (1/Sqrt[2] - Cos[57*Pi/128])/Sin[57*Pi/128] */ \
OD_DCT_OVERFLOW_CHECK(t3, 2229, 2048, 366); \
ts += (t3*2229 + 2048) >> 12; \
/* 1061/2048 ~= (1/Sqrt[2] - Cos[25*Pi/128]/2)/Sin[25*Pi/128] */ \
OD_DCT_OVERFLOW_CHECK(tc, 1061, 1024, 367); \
tj -= (tc*1061 + 1024) >> 11; \
/* 6671/8192 ~= Sqrt[2]*Sin[25*Pi/128] */ \
OD_DCT_OVERFLOW_CHECK(tj, 6671, 4096, 368); \
tc += (tj*6671 + 4096) >> 13; \
/* -6287/32768 ~= (1/Sqrt[2] - Cos[25*Pi/128])/Sin[25*Pi/128] */ \
OD_DCT_OVERFLOW_CHECK(tc, 6287, 16384, 369); \
tj += (tc*6287 + 16384) >> 15; \
/* 4359/8192 ~= (1/Sqrt[2] - Cos[23*Pi/128]/2)/Sin[23*Pi/128] */ \
OD_DCT_OVERFLOW_CHECK(tb, 4359, 4096, 370); \
tk -= (tb*4359 + 4096) >> 13; \
/* 3099/4096 ~= Sqrt[2]*Sin[23*Pi/128] */ \
OD_DCT_OVERFLOW_CHECK(tk, 3099, 2048, 371); \
tb += (tk*3099 + 2048) >> 12; \