Commit b2f82ebd authored by Nathan E. Egge's avatar Nathan E. Egge Committed by Nathan Egge
Browse files

daala_tx: Unify the asym and ortho DST designs.

This patch refactors the DST transforms so that the orthonormal and
 asymmetric transforms are now nearly identical (up to multiplicaiton
 constants and an extra set of shifts).
This means that the DST designs are now embeddable for every level
 and should address hardware concerns about gate area.

In addition, minor changes were made to improve transform accuracy:

 - all of the transforms now have perfect reconstruction for those
    computations outside the rotations, i.e., all +/- butterfly steps
    are exactly invertible
 - two multiplication constants were reduced below < 1.0 (better for
    SIMD and gives slightly improved accuracy)
 - the averaging bias is removed which saves an extra addition for each
    of the averaging steps

Additional averaging steps can be removed from the 8-point Type-IV DST
 giving a 68% reduction in MSE for the 32-point DCT, but has not been
 done in the event we use it in place of the 8-point Type-VII DST.

subset-1:

master-daala_tx@2017-12-10T22:38:19.651Z ->
 new-daala_tx@2017-12-10T22:37:50.844Z

  PSNR | PSNR Cb | PSNR Cr | PSNR HVS |    SSIM | MS SSIM | CIEDE 2000
0.0057 | -0.0210 | -0.1821 |   0.0085 | -0.0002 |  0.0147 |    -0.0674

Change-Id: Ib124eebf6f2e4b3c51c078d4e8f229fc5ec26171
parent e6579113
......@@ -31,6 +31,7 @@ set(AOM_AV1_COMMON_SOURCES
"${AOM_ROOT}/av1/common/convolve.h"
"${AOM_ROOT}/av1/common/daala_tx.c"
"${AOM_ROOT}/av1/common/daala_tx.h"
"${AOM_ROOT}/av1/common/daala_tx_kernels.h"
"${AOM_ROOT}/av1/common/debugmodes.c"
"${AOM_ROOT}/av1/common/entropy.c"
"${AOM_ROOT}/av1/common/entropy.h"
......
......@@ -26,6 +26,7 @@ AV1_COMMON_SRCS-yes += common/blockd.h
AV1_COMMON_SRCS-yes += common/common.h
AV1_COMMON_SRCS-yes += common/daala_tx.c
AV1_COMMON_SRCS-yes += common/daala_tx.h
AV1_COMMON_SRCS-yes += common/daala_tx_kernels.h
AV1_COMMON_SRCS-yes += common/daala_inv_txfm.c
AV1_COMMON_SRCS-yes += common/daala_inv_txfm.h
AV1_COMMON_SRCS-$(HAVE_AVX2) += common/x86/daala_tx_kernels.h
......
#include "av1/common/daala_tx.h"
#include "av1/common/odintrin.h"
#include "av1/common/daala_tx_kernels.h"
/* clang-format off */
......@@ -39,32 +40,6 @@
} \
while (0)
#define OD_FDCT_2_FLAT(p0, p1) \
/* Embedded 2-point orthonormal Type-II fDCT with flattened rotations. */ \
do { \
int t_; \
t_ = (p0 - p1 + 1) >> 1; \
/* 46341/32768 ~= 2*Sin[Pi/4] = 1.4142135623730951 */ \
p0 = (p1*46341 + 16384) >> 15; \
/* 46341/32768 ~= 2*Cos[Pi/4] = 1.4142135623730951 */ \
p1 = (t_*46341 + 16384) >> 15; \
p0 += p1; \
} \
while (0)
#define OD_IDCT_2_FLAT(p0, p1) \
/* Embedded 2-point orthonormal Type-II iDCT with flattened rotations. */ \
do { \
int t_; \
t_ = p0 + p1; \
/* 11585/8192 ~= 2*Sin[Pi/4] = 1.4142135623730951 */ \
p1 = (p0*11585 + 4096) >> 13; \
/* 11585/16384 ~= Cos[Pi/4] = 0.7071067811865475 */ \
p0 = (t_*11585 + 8192) >> 14; \
p1 -= p0; \
} \
while (0)
#define OD_FDCT_2_ASYM_PR(p0, p1, p1h) \
/* Embedded 2-point asymmetric Type-II fDCT. */ \
do { \
......@@ -82,9 +57,6 @@
} \
while (0)
#define OD_FDCT_2_ASYM_FLAT OD_FDCT_2_ASYM_PR
#define OD_IDCT_2_ASYM_FLAT OD_IDCT_2_ASYM_PR
#define OD_FDST_2_PR(p0, p1) \
/* Embedded 2-point orthonormal Type-IV fDST. */ \
do { \
......@@ -112,24 +84,6 @@
} \
while (0)
#define OD_FDST_2_FLAT(p0, p1) \
do { \
int t_; \
int u_; \
t_ = (p0 + p1 + 1) >> 1; \
/* 21407/16384 ~= Sin[3*Pi/8] + Cos[3*Pi/8] ~= 1.3065629648763766 */ \
u_ = (p0*21407 + 8192) >> 14; \
/* 8867/16384 ~= Sin[3*Pi/8] - Cos[3*Pi/8] ~= 0.541196100146197 */ \
p0 = (p1*8867 + 8192) >> 14; \
/* 3135/4096 ~= 2*Cos[3*Pi/8] ~= 0.7653668647301796 */ \
t_ = (t_*3135 + 2048) >> 12; \
p0 += t_; \
p1 = u_ - t_; \
} \
while (0)
#define OD_IDST_2_FLAT OD_FDST_2_FLAT
#define OD_FDST_2_ASYM_PR(p0, p1) \
/* Embedded 2-point asymmetric Type-IV fDST. */ \
do { \
......@@ -157,40 +111,6 @@
} \
while (0)
#define OD_FDST_2_ASYM_FLAT(p0, p0h, p1) \
/* Embedded 2-point asymmetric Type-IV fDST with flattened rotations. */ \
do { \
int t_; \
int u_; \
t_ = p0h + p1; \
/* 15137/16384 ~= (Cos[3*Pi/8] + Sin[3*Pi/8])/Sqrt[2] = 0.9238795325112867 */ \
u_ = (p0*15137 + 8192) >> 14; \
/* 3135/4096 ~= (Cos[3*Pi/8] - Sin[3*Pi/8])*Sqrt[2] = 0.7653668647301795 */ \
p0 = (p1*3135 + 2048) >> 12; \
/* 8867/16384 ~= Cos[3*Pi/8]*Sqrt[2] = 0.5411961001461971 */ \
t_ = (t_*8867 + 8192) >> 14; \
p0 += t_; \
p1 = u_ - t_; \
} \
while (0)
#define OD_IDST_2_ASYM_FLAT(p0, p1) \
/* Embedded 2-point asymmetric Type-IV iDST with flattened rotations. */ \
do { \
int t_; \
int u_; \
t_ = (p0 + p1 + 1) >> 1; \
/* 3135/4096 ~= (Cos[Pi/8] - Sin[Pi/8])*Sqrt[2] = 0.7653668647301795 */ \
u_ = (p1*3135 + 2048) >> 12; \
/* 15137/16384 ~= (Cos[Pi/8] + Sin[Pi/8])/Sqrt[2] = 0.9238795325112867 */ \
p1 = (p0*15137 + 8192) >> 14; \
/* 8867/8192 ~= 2*Cos[3*Pi/8]*Sqrt[2] = 1.082392200292394 */ \
t_ = (t_*8867 + 4096) >> 13; \
p0 = u_ + t_; \
p1 -= OD_RSHIFT1(t_); \
} \
while (0)
#define OD_FDCT_4_PR(q0, q2, q1, q3) \
/* Embedded 4-point orthonormal Type-II fDCT. */ \
do { \
......@@ -248,61 +168,6 @@
} \
while (0)
#define OD_FDCT_4_FLAT(q0, q1, q2, q3) \
/* Embedded 4-point orthonormal Type-II fDCT with flattened rotations. */ \
do { \
int q1h; \
int q3h; \
q3 = q0 - q3; \
q3h = OD_RSHIFT1(q3); \
q0 -= q3h; \
q1 += q2; \
q1h = OD_RSHIFT1(q1); \
q2 -= q1h; \
OD_FDCT_2_ASYM_FLAT(q0, q1, q1h); \
OD_FDST_2_ASYM_FLAT(q3, q3h, q2); \
} \
while (0)
#define OD_IDCT_4_FLAT(q0, q2, q1, q3) \
/* Embedded 4-point orthonormal Type-II iDCT with flattened rotations. */ \
do { \
int q1h; \
OD_IDST_2_ASYM_FLAT(q3, q2); \
OD_IDCT_2_ASYM_FLAT(q0, q1, q1h); \
q2 += q1h; \
q1 -= q2; \
q0 += OD_RSHIFT1(q3); \
q3 = q0 - q3; \
} \
while (0)
#define OD_FDCT_4_ASYM_FLAT(q0, q1, q1h, q2, q3, q3h) \
/* Embedded 4-point asymmetric Type-II fDCT with flattened rotations. */ \
do { \
q0 += q3h; \
q3 = q0 - q3; \
q2 -= q1h; \
q1 += q2; \
OD_FDCT_2_FLAT(q0, q1); \
OD_FDST_2_FLAT(q3, q2); \
} \
while (0)
#define OD_IDCT_4_ASYM_FLAT(q0, q2, q1, q1h, q3, q3h) \
/* Embedded 4-point asymmetric Type-II iDCT with flattened rotations. */ \
do { \
OD_IDST_2_FLAT(q3, q2); \
OD_IDCT_2_FLAT(q0, q1); \
q1 -= q2; \
q1h = OD_RSHIFT1(q1); \
q2 += q1h; \
q3 = q0 - q3; \
q3h = OD_RSHIFT1(q3); \
q0 -= q3h; \
} \
while (0)
#define OD_FDST_4_PR(q0, q2, q1, q3) \
/* Embedded 4-point orthonormal Type-IV fDST. */ \
do { \
......@@ -384,90 +249,6 @@
} \
while (0)
#define OD_FDST_4_FLAT(q0, q1, q2, q3) \
/* Embedded 4-point orthonormal Type-IV fDST with flattened rotations. */ \
do { \
int t_; \
int u_; \
t_ = q0 - q3; \
/* 13623/16384 ~= (Sin[7*Pi/16] + Cos[7*Pi/16])/Sqrt[2] ~=
0.8314696123025451 */ \
u_ = (13623*q3 + 8192) >> 14; \
/* 18205/16384 ~= (Sin[7*Pi/16] - Cos[7*Pi/16])*Sqrt[2] ~=
1.1111404660392046 */ \
q3 = (18205*q0 + 8192) >> 14; \
/* 9041/32768 ~= Cos[7*Pi/16]*Sqrt[2] ~= 0.275899379282943 */ \
t_ = (9041*t_ + 16384) >> 15; \
q0 = u_ + OD_RSHIFT1(t_); \
q3 += t_; \
t_ = q1 + q2; \
/* 16069/16384 ~= (Sin[5*Pi/16] + Cos[5*Pi/16])/Sqrt[2] ~=
0.9807852804032304 */ \
u_ = (16069*q1 + 8192) >> 14; \
/* 12785/32768 ~= (Sin[5*Pi/16] - Cos[5*Pi/16])*Sqrt[2] ~=
0.3901806440322566 */ \
q1 = (12785*q2 + 16384) >> 15; \
/* 12873/16384 ~= Cos[5*Pi/16]*Sqrt[2] ~= 0.7856949583871021 */ \
t_ = (12873*t_ + 8192) >> 14; \
q2 = u_ - OD_RSHIFT1(t_); \
q1 += t_; \
q2 += OD_RSHIFT1(q3); \
q3 -= q2; \
q0 += OD_RSHIFT1(q1); \
q1 -= q0; \
t_ = (q1 + q2 + 1) >> 1; \
/* 11585/8192 ~= 2*Sin[Pi/4] ~= 1.4142135623730951 */ \
q1 = (11585*q2 + 4096) >> 13; \
/* 11585/8192 ~= 2*Cos[Pi/4] ~= 1.4142135623730951 */ \
q2 = (11585*t_ + 4096) >> 13; \
q1 -= q2; \
} \
while (0)
#define OD_IDST_4_FLAT(q0, q1, q2, q3) \
/* Embedded 4-point orthonormal Type-IV fDST with flattened rotations. */ \
do { \
int t_; \
int u_; \
int q2h; \
int q3h; \
t_ = (q1 + q2 + 1) >> 1; \
/* 11585/8192 ~= 2*Sin[Pi/4] ~= 1.4142135623730951 */ \
q2 = (11585*q1 + 4096) >> 13; \
/* 11585/8192 ~= 2*Cos[Pi/4] ~= 1.4142135623730951 */ \
q1 = (11585*t_ + 4096) >> 13; \
q2 -= q1; \
q2 += q0; \
q2h = OD_RSHIFT1(q2); \
q0 -= q2h; \
q3 += q1; \
q3h = OD_RSHIFT1(q3); \
q1 -= q3h; \
t_ = q1 + q2h; \
/* 16069/16384 ~= (Sin[5*Pi/16] + Cos[5*Pi/16])/Sqrt[2] ~=
0.9807852804032304 */ \
u_ = (16069*q2 + 8192) >> 14; \
/* 12785/32768 ~= (Sin[5*Pi/16] - Cos[5*Pi/16])*Sqrt[2] ~=
0.3901806440322566 */ \
q2 = (12785*q1 + 16384) >> 15; \
/* 12873/16384 ~= Cos[5*Pi/16]*Sqrt[2] ~= 0.7856949583871021 */ \
t_ = (12873*t_ + 8192) >> 14; \
q1 = u_ - t_; \
q2 += t_; \
t_ = q0 - q3h; \
/* 13623/16384 ~= (Sin[7*Pi/16] + Cos[7*Pi/16])/Sqrt[2] ~=
0.8314696123025451 */ \
u_ = (13623*q3 + 8192) >> 14; \
/* 18205/16384 ~= (Sin[7*Pi/16] - Cos[7*Pi/16])*Sqrt[2] ~=
1.1111404660392046 */ \
q3 = (18205*q0 + 8192) >> 14; \
/* 9041/32768 ~= Cos[7*Pi/16]*Sqrt[2] = 0.275899379282943 */ \
t_ = (9041*t_ + 16384) >> 15; \
q0 = u_ + t_; \
q3 += t_; \
} \
while (0)
#define OD_FDST_4_ASYM_PR(t0, t0h, t2, t1, t3) \
/* Embedded 4-point asymmetric Type-IV fDST. */ \
do { \
......@@ -534,81 +315,6 @@
} \
while (0)
#define OD_FDST_4_ASYM_FLAT(q0, q0h, q1, q2, q2h, q3) \
/* Embedded 4-point asymmetric Type-IV fDST with flattened rotations. */ \
do { \
int t_; \
int u_; \
t_ = q0h - q3; \
/* 38531/32768 ~= Sin[7*Pi/16] + Cos[7*Pi/16] ~= 1.1758756024193586 */ \
u_ = (q3*38531 + 16384) >> 15; \
/* 12873/16384 ~= Sin[7*Pi/16] - Cos[7*Pi/16] ~= 0.7856949583871022 */ \
q3 = (q0*12873 + 8192) >> 14; \
/* 12785/32768 ~= 2*Cos[7*Pi/16] ~= 0.3901806440322565 */ \
t_ = (t_*12785 + 16384) >> 15; \
q0 = u_ + OD_RSHIFT1(t_); \
q3 += t_; \
t_ = q1 + q2h; \
/* 45451/32768 ~= Sin[5*Pi/16] + Cos[5*Pi/16] ~= 1.3870398453221475 */ \
u_ = (q1*45451 + 16384) >> 15; \
/* 9041/32768 ~= Sin[5*Pi/16] - Cos[5*Pi/16] ~= 0.27589937928294306 */ \
q1 = (q2*9041 + 16384) >> 15; \
/* 18205/16384 ~= 2*Cos[5*Pi/16] ~= 1.1111404660392044 */ \
t_ = (t_*18205 + 8192) >> 14; \
q1 += t_; \
q2 = u_ - OD_RSHIFT1(t_); \
q2 += OD_RSHIFT1(q3); \
q3 -= q2; \
q0 += OD_RSHIFT1(q1); \
q1 -= q0; \
t_ = (q1 + q2 + 1) >> 1; \
/* 11585/8192 ~= 2*Sin[Pi/4] ~= 1.4142135623730951 */ \
q1 = (q2*11585 + 4096) >> 13; \
/* 11585/8192 ~= 2*Cos[Pi/4] ~= 1.4142135623730951 */ \
q2 = (t_*11585 + 4096) >> 13; \
q1 -= q2; \
} \
while (0)
#define OD_IDST_4_ASYM_FLAT(q0, q2, q1, q3) \
do { \
int t_; \
int u_; \
int q1h; \
int q3h; \
t_ = (q1 + q2 + 1) >> 1; \
/* 11585/8192 ~= 2*Sin[Pi/4] ~= 1.4142135623730951 */ \
q1 = (q2*11585 + 4096) >> 13; \
/* 11585/8192 ~= 2*Cos[Pi/4] ~= 1.4142135623730951 */ \
q2 = (t_*11585 + 4096) >> 13; \
q1 -= q2; \
q1 += q0; \
q1h = OD_RSHIFT1(q1); \
q0 -= q1h; \
q3 += q2; \
q3h = OD_RSHIFT1(q3); \
q2 -= q3h; \
t_ = q1h + q2; \
/* 45451/32768 ~= Sin[5*Pi/16] + Cos[5*Pi/16] ~= 1.3870398453221475 */ \
u_ = (q1*45451 + 16384) >> 15; \
/* 9041/32768 ~= Sin[5*Pi/16] - Cos[5*Pi/16] ~= 0.27589937928294306 */ \
q1 = (q2*9041 + 16384) >> 15; \
/* 18205/16384 ~= 2*Cos[5*Pi/16] ~= 1.1111404660392044 */ \
t_ = (t_*18205 + 8192) >> 14; \
q1 += OD_RSHIFT1(t_); \
q2 = u_ - t_; \
t_ = q0 - q3h; \
/* 38531/32768 ~= Sin[7*Pi/16] + Cos[7*Pi/16] = 1.1758756024193586 */ \
u_ = (q3*38531 + 16384) >> 15; \
/* 12873/16384 ~= Sin[7*Pi/16] - Cos[7*Pi/16] = 0.7856949583871022 */ \
q3 = (q0*12873 + 8192) >> 14; \
/* 12785/32768 ~= 2*Cos[7*Pi/16] = 0.3901806440322565 */ \
t_ = (t_*12785 + 16384) >> 15; \
q3 += OD_RSHIFT1(t_); \
q0 = u_ + t_; \
} \
while (0)
#define OD_FDCT_8_PR(r0, r4, r2, r6, r1, r5, r3, r7) \
/* Embedded 8-point orthonormal Type-II fDCT. */ \
do { \
......@@ -654,48 +360,6 @@
} \
while (0)
#define OD_FDCT_8_FLAT(r0, r1, r2, r3, r4, r5, r6, r7) \
/* Embedded 8-point orthonormal Type-II fDCT with flattened rotations. */ \
do { \
int r1h; \
int r3h; \
int r5h; \
int r7h; \
r7 = r0 - r7; \
r7h = OD_RSHIFT1(r7); \
r0 -= r7h; \
r1 += r6; \
r1h = OD_RSHIFT1(r1); \
r6 -= r1h; \
r5 = r2 - r5; \
r5h = OD_RSHIFT1(r5); \
r2 -= r5h; \
r3 += r4; \
r3h = OD_RSHIFT1(r3); \
r4 -= r3h; \
OD_FDCT_4_ASYM_FLAT(r0, r1, r1h, r2, r3, r3h); \
OD_FDST_4_ASYM_FLAT(r7, r7h, r6, r5, r5h, r4); \
} \
while (0)
#define OD_IDCT_8_FLAT(r0, r4, r2, r6, r1, r5, r3, r7) \
/* Embedded 8-point orthonormal Type-II iDCT with flattened rotations. */ \
do { \
int r1h; \
int r3h; \
OD_IDST_4_ASYM_FLAT(r7, r5, r6, r4); \
OD_IDCT_4_ASYM_FLAT(r0, r2, r1, r1h, r3, r3h); \
r4 += r3h; \
r3 -= r4; \
r2 += OD_RSHIFT1(r5); \
r5 = r2 - r5; \
r6 += r1h; \
r1 -= r6; \
r0 += OD_RSHIFT1(r7); \
r7 = r0 - r7; \
} \
while (0)
#define OD_FDCT_8_ASYM_PR(r0, r4, r4h, r2, r6, r6h, r1, r5, r5h, r3, r7, r7h) \
/* Embedded 8-point asymmetric Type-II fDCT. */ \
do { \
......@@ -732,42 +396,6 @@
} \
while (0)
#define OD_FDCT_8_ASYM_FLAT(r0, r1, r1h, r2, r3, r3h, r4, r5, r5h, r6, r7, r7h) \
/* Embedded 8-point asymmetric Type-II fDCT. */ \
do { \
r0 += r7h; \
r7 = r0 - r7; \
r4 -= r3h; \
r3 += r4; \
r2 += r5h; \
r5 = r2 - r5; \
r6 -= r1h; \
r1 += r6; \
OD_FDCT_4_FLAT(r0, r1, r2, r3); \
OD_FDST_4_FLAT(r7, r6, r5, r4); \
} \
while (0)
#define OD_IDCT_8_ASYM_FLAT(r0, r4, r2, r6, r1, r1h, r5, r5h, r3, r3h, r7, r7h) \
/* Embedded 8-point asymmetric Type-II iDCT with flattened rotations. */ \
do { \
OD_IDST_4_FLAT(r7, r5, r6, r4); \
OD_IDCT_4_FLAT(r0, r2, r1, r3); \
r7 = r0 - r7; \
r7h = OD_RSHIFT1(r7); \
r0 -= r7h; \
r1 -= r6; \
r1h = OD_RSHIFT1(r1); \
r6 += r1h; \
r5 = r2 - r5; \
r5h = OD_RSHIFT1(r5); \
r2 -= r5h; \
r3 -= r4; \
r3h = OD_RSHIFT1(r3); \
r4 += r3h; \
} \
while (0)
#define OD_FDST_8_PR(t0, t4, t2, t6, t1, t5, t3, t7) \
/* Embedded 8-point orthonormal Type-IV fDST. */ \
do { \
......@@ -937,190 +565,6 @@
} \
while (0)
#define OD_FDST_8_FLAT(r0, r1, r2, r3, r4, r5, r6, r7) \
/* Embedded 8-point Type-IV fDST with flattened rotations. */ \
do { \
int t_; \
int u_; \
int r0h; \
int r2h; \
int r5h; \
int r7h; \
t_ = r3 - r4; \
/* 23059/16384 ~= Sin[9*Pi/32] + Cos[9*Pi/32] ~= 1.4074037375263826 */ \
u_ = (23059*r4 + 8192) >> 14; \
/* 2271/16384 ~= Sin[9*Pi/32] - Cos[9*Pi/32] ~= 0.1386171691990915 */ \
r4 = (2271*r3 + 8192) >> 14; \
/* 5197/8192 ~= Cos[9*Pi/32] ~= 0.6343932841636455 */ \
t_ = (5197*t_ + 4096) >> 13; \
r3 = u_ + t_; \
r4 += t_; \
t_ = r2 + r5; \
/* 22173/16384 ~= Sin[11*Pi/32] + Cos[11*Pi/32] ~= 1.3533180011743526 */ \
u_ = (22173*r2 + 8192) >> 14; \
/* 3363/8192 ~= Sin[11*Pi/32] - Cos[11*Pi/32] ~= 0.4105245275223574 */ \
r2 = (3363*r5 + 4096) >> 13; \
/* 15447/32768 ~= Cos[11*Pi/32] ~= 0.47139673682599764 */ \
t_ = (15447*t_ + 16384) >> 15; \
r2 += t_; \
r5 = u_ - t_; \
t_ = r1 - r6; \
/* 40869/32768 ~= Sin[13*Pi/32] + Cos[13*Pi/32] ~= 1.247225012986671 */ \
u_ = (40869*r6 + 16384) >> 15; \
/* 21845/32768 ~= Sin[13*Pi/32] - Cos[13*Pi/32] ~= 0.6666556584777465 */ \
r6 = (21845*r1 + 16384) >> 15; \
/* 1189/4096 ~= Cos[13*Pi/32] ~= 0.29028467725446233 */ \
t_ = (1189*t_ + 2048) >> 12; \
r1 = u_ + t_; \
r6 += t_; \
t_ = r0 + r7; \
/* 17911/16384 ~= Sin[15*Pi/32] + Cos[15*Pi/32] ~= 1.0932018670017576 */ \
u_ = (17911*r0 + 8192) >> 14; \
/* 14699/16384 ~= Sin[15*Pi/32] - Cos[15*Pi/32] ~= 0.8971675863426363 */ \
r0 = (14699*r7 + 8192) >> 14; \
/* 803/8192 ~= Cos[15*Pi/32] ~= 0.0980171403295606 */ \
t_ = (803*t_ + 4096) >> 13; \
r0 += t_; \
r7 = u_ - t_; \
r2 -= r1; \
r2h = OD_RSHIFT1(r2); \
r1 += r2h; \
r5 += r6; \
r5h = OD_RSHIFT1(r5); \
r6 -= r5h; \
r0 += r3; \
r0h = OD_RSHIFT1(r0); \
r3 -= r0h; \
r7 -= r4; \
r7h = OD_RSHIFT1(r7); \
r4 += r7h; \
r3 += r5h; \
r5 -= r3; \
r1 -= r0h; \
r0 += r1; \
r4 += r2h; \
r2 -= r4; \
r6 += r7h; \
r7 -= r6; \
t_ = (r4 - r3 + 1) >> 1; \
/* 21407/16384 ~= Sin[3*Pi/8] + Cos[3*Pi/8] ~= 1.3065629648763766 */ \
u_ = (21407*r3 + 8192) >> 14; \
/* 8867/16384 ~= Sin[3*Pi/8] - Cos[3*Pi/8] ~= 0.5411961001461969 */ \
r3 = (8867*r4 + 8192) >> 14; \
/* 3135/4096 ~= 2*Cos[3*Pi/8] ~= 0.7653668647301796 */ \
t_ = (3135*t_ + 2048) >> 12; \
r3 += t_; \
r4 = u_ + t_; \
t_ = (r2 - r5 + 1) >> 1; \
/* 21407/16384 ~= Sin[3*Pi/8] + Cos[3*Pi/8] ~= 1.3065629648763766 */ \
u_ = (21407*r2 + 8192) >> 14; \
/* 8867/16384 ~= Sin[3*Pi/8] - Cos[3*Pi/8] ~= 0.5411961001461969 */ \
r2 = (8867*r5 + 8192) >> 14; \
/* 3135/4096 ~= 2*Cos[3*Pi/8] ~= 0.7653668647301796 */ \
t_ = (3135*t_ + 2048) >> 12; \
r5 = t_ - u_; \
r2 -= t_; \
t_ = (r6 - r1 + 1) >> 1; \
/* 11585/8192 ~= 2*Sin[Pi/4] ~= 1.4142135623730951 */ \
r6 = (11585*r1 + 4096) >> 13; \
/* 11585/8192 ~= 2*Cos[Pi/4] ~= 1.4142135623730951 */ \
r1 = (11585*t_ + 4096) >> 13; \
r6 += r1; \
} \
while (0)
#define OD_IDST_8_FLAT(r0, r4, r2, r6, r1, r5, r3, r7) \
/* Embedded 8-point Type-IV iDST with flattened rotations. */ \
do { \
int t_; \
int u_; \
int r0h; \
int r2h; \
int r5h; \
int r7h; \
t_ = (r1 + r6 + 1) >> 1; \
/* 11585/8192 ~= 2*Sin[Pi/4] ~= 1.4142135623730951 */ \
r1 = (11585*r6 + 4096) >> 13; \
/* 11585/8192 ~= 2*Cos[Pi/4] ~= 1.4142135623730951 */ \
r6 = (11585*t_ + 4096) >> 13; \
r1 -= r6; \
t_ = (r5 - r2 + 1) >> 1; \
/* 21407/16384 ~= Sin[3*Pi/8] + Cos[3*Pi/8] ~= 1.3065629648763766 */ \
u_ = (21407*r5 + 8192) >> 14; \
/* 8867/16384 ~= Sin[3*Pi/8] - Cos[3*Pi/8] ~= 0.5411961001461969 */ \
r5 = (8867*r2 + 8192) >> 14; \
/* 3135/4096 ~= 2*Cos[3*Pi/8] ~= 0.7653668647301796 */ \
t_ = (3135*t_ + 2048) >> 12; \
r5 -= t_; \
r2 = t_ - u_; \
t_ = (r3 + r4 + 1) >> 1; \
/* 21407/16384 ~= Sin[3*Pi/8] + Cos[3*Pi/8] ~= 1.3065629648763766 */ \
u_ = (21407*r4 + 8192) >> 14; \
/* 8867/16384 ~= Sin[3*Pi/8] - Cos[3*Pi/8] ~= 0.5411961001461969 */ \
r4 = (8867*r3 + 8192) >> 14; \
/* 3135/4096 ~= 2*Cos[3*Pi/8] ~= 0.7653668647301796 */ \
t_ = (3135*t_ + 2048) >> 12; \
r3 = u_ - t_; \
r4 += t_; \
r7 += r6; \
r7h = OD_RSHIFT1(r7); \
r6 -= r7h; \
r2 += r4; \
r2h = OD_RSHIFT1(r2); \
r4 -= r2h; \
r0 -= r1; \
r0h = OD_RSHIFT1(r0); \
r1 += r0h; \
r5 += r3; \
r5h = OD_RSHIFT1(r5); \
r3 -= r5h; \
r4 -= r7h; \
r7 += r4; \
r6 += r5h; \
r5 -= r6; \
r3 += r0h; \
r0 -= r3; \
r1 -= r2h; \
r2 += r1; \
t_ = r0 + r7; \
/* 17911/16384 ~= Sin[15*Pi/32] + Cos[15*Pi/32] ~= 1.0932018670017576 */ \
u_ = (17911*r0 + 8192) >> 14; \
/* 14699/16384 ~= Sin[15*Pi/32] - Cos[15*Pi/32] ~= 0.8971675863426363 */ \
r0 = (14699*r7 + 8192) >> 14; \
/* 803/8192 ~= Cos[15*Pi/32] ~= 0.0980171403295606 */ \
t_ = (803*t_ + 4096) >> 13; \
r7 = u_ - t_; \
r0 += t_; \
t_ = r1 - r6; \
/* 40869/32768 ~= Sin[13*Pi/32] + Cos[13*Pi/32] ~= 1.247225012986671 */ \
u_ = (40869*r6 + 16384) >> 15; \
/* 21845/32768 ~= Sin[13*Pi/32] - Cos[13*Pi/32] ~= 0.6666556584777465 */ \
r6 = (21845*r1 + 16384) >> 15; \
/* 1189/4096 ~= Cos[13*Pi/32] ~= 0.29028467725446233 */ \
t_ = (1189*t_ + 2048) >> 12; \
r1 = u_ + t_; \
r6 += t_; \
t_ = r2 + r5; \
/* 22173/16384 ~= Sin[11*Pi/32] + Cos[11*Pi/32] ~= 1.3533180011743526 */ \
u_ = (22173*r2 + 8192) >> 14; \
/* 3363/8192 ~= Sin[11*Pi/32] - Cos[11*Pi/32] ~= 0.4105245275223574 */ \
r2 = (3363*r5 + 4096) >> 13; \
/* 15447/32768 ~= Cos[11*Pi/32] ~= 0.47139673682599764 */ \
t_ = (15447*t_ + 16384) >> 15; \
r5 = u_ - t_; \
r2 += t_; \
t_ = r3 - r4; \
/* 23059/16384 ~= Sin[9*Pi/32] + Cos[9*Pi/32] ~= 1.4074037375263826 */ \
u_ = (23059*r4 + 8192) >> 14; \
/* 2271/16384 ~= Sin[9*Pi/32] - Cos[9*Pi/32] ~= 0.1386171691990915 */ \
r4 = (2271*r3 + 8192) >> 14; \
/* 5197/8192 ~= Cos[9*Pi/32] ~= 0.6343932841636455 */ \
t_ = (5197*t_ + 4096) >> 13; \
r3 = u_ + t_; \
r4 += t_; \
} \
while (0)
/* Rewrite this so that t0h can be passed in. */
#define OD_FDST_8_ASYM_PR(t0, t4, t2, t6, t1, t5, t3, t7) \
/* Embedded 8-point asymmetric Type-IV fDST. */ \
......@@ -1287,205 +731,6 @@
} \