Commit dc857d1b authored by Nathan E. Egge's avatar Nathan E. Egge Committed by Tim Terriberry

daala_tx: New flattened 4-point Type-IV asym DST.

This 4-point Type-IV asymmetric DST uses the same computation graph as
 the 4-point Type-IV DST.
This change improves the accuracy of the 8-point Type-II DCT:

Old MSE: 1.8927096972341813413041010372151e-06
New MSE: 1.7946367518072710517065436117146e-06

subset-1:

new_dst4@2017-12-04T06:31:41.096Z -> new_dst4a@2017-12-04T06:32:22.698Z

   PSNR | PSNR Cb | PSNR Cr | PSNR HVS |    SSIM | MS SSIM | CIEDE 2000
-0.0143 |  0.0410 | -0.2166 |  -0.0556 | -0.0379 | -0.0461 |    -0.0002

Change-Id: Ifde11fca987220130c1657306b0df34ec2f3fe25
parent 4644a7d0
......@@ -539,34 +539,34 @@
do { \
int t_; \
int u_; \
t_ = q0h + q3; \
/* 38531/32768 = Sin[7*Pi/16] + Cos[7*Pi/16] = 1.1758756024193586 */ \
u_ = (q0*38531 + 16384) >> 15; \
/* 12873/16384 = Sin[7*Pi/16] - Cos[7*Pi/16] = 0.7856949583871022 */ \
q0 = (q3*12873 + 8192) >> 14; \
/* 12785/32768 = 2*Cos[7*Pi/16] = 0.3901806440322565 */ \
t_ = q0h - q3; \
/* 38531/32768 ~= Sin[7*Pi/16] + Cos[7*Pi/16] ~= 1.1758756024193586 */ \
u_ = (q3*38531 + 16384) >> 15; \
/* 12873/16384 ~= Sin[7*Pi/16] - Cos[7*Pi/16] ~= 0.7856949583871022 */ \
q3 = (q0*12873 + 8192) >> 14; \
/* 12785/32768 ~= 2*Cos[7*Pi/16] ~= 0.3901806440322565 */ \
t_ = (t_*12785 + 16384) >> 15; \
q0 += OD_RSHIFT1(t_); \
q3 = u_ - t_; \
q0 = u_ + OD_RSHIFT1(t_); \
q3 += t_; \
t_ = q1 + q2h; \
/* 45451/32768 = Sin[5*Pi/16] + Cos[5*Pi/16] = 1.3870398453221475 */ \
/* 45451/32768 ~= Sin[5*Pi/16] + Cos[5*Pi/16] ~= 1.3870398453221475 */ \
u_ = (q1*45451 + 16384) >> 15; \
/* 9041/32768 = Sin[5*Pi/16] - Cos[5*Pi/16] = 0.27589937928294306 */ \
/* 9041/32768 ~= Sin[5*Pi/16] - Cos[5*Pi/16] ~= 0.27589937928294306 */ \
q1 = (q2*9041 + 16384) >> 15; \
/* 18205/16384 = 2*Cos[5*Pi/16] = 1.1111404660392044 */ \
/* 18205/16384 ~= 2*Cos[5*Pi/16] ~= 1.1111404660392044 */ \
t_ = (t_*18205 + 8192) >> 14; \
q1 += t_; \
q2 = OD_RSHIFT1(t_) - u_; \
q2 -= OD_RSHIFT1(q3); \
q3 += q2; \
q2 = u_ - OD_RSHIFT1(t_); \
q2 += OD_RSHIFT1(q3); \
q3 -= q2; \
q0 += OD_RSHIFT1(q1); \
q1 -= q0; \
t_ = (q1 + q2 + 1) >> 1; \
/* 11585/8192 2*Sin[Pi/4] = 1.4142135623730951 */ \
q2 = (q1*11585 + 4096) >> 13; \
/* -46341/32768 = -2*Cos[Pi/4] = -1.4142135623730951 */ \
q1 = (t_*-46341 + 16384) >> 15; \
q2 += q1; \
/* 11585/8192 ~= 2*Sin[Pi/4] ~= 1.4142135623730951 */ \
q1 = (q2*11585 + 4096) >> 13; \
/* 11585/8192 ~= 2*Cos[Pi/4] ~= 1.4142135623730951 */ \
q2 = (t_*11585 + 4096) >> 13; \
q1 -= q2; \
} \
while (0)
......@@ -577,35 +577,35 @@
int q1h; \
int q3h; \
t_ = (q1 + q2 + 1) >> 1; \
/* 11585/8192 2*Sin[Pi/4] = 1.4142135623730951 */ \
/* 11585/8192 ~= 2*Sin[Pi/4] ~= 1.4142135623730951 */ \
q1 = (q2*11585 + 4096) >> 13; \
/* -46341/32768 = -2*Cos[Pi/4] = -1.4142135623730951 */ \
q2 = (t_*-46341 + 16384) >> 15; \
q1 += q2; \
/* 11585/8192 ~= 2*Cos[Pi/4] ~= 1.4142135623730951 */ \
q2 = (t_*11585 + 4096) >> 13; \
q1 -= q2; \
q1 += q0; \
q1h = OD_RSHIFT1(q1); \
q0 -= q1h; \
q3 -= q2; \
q3 += q2; \
q3h = OD_RSHIFT1(q3); \
q2 += q3h; \
q2 -= q3h; \
t_ = q1h + q2; \
/* 45451/32768 = Sin[5*Pi/16] + Cos[5*Pi/16] = 1.3870398453221475 */ \
u_ = (q2*45451 + 16384) >> 15; \
/* 9041/32768 = Sin[5*Pi/16] - Cos[5*Pi/16] = 0.27589937928294306 */ \
q2 = (q1*9041 + 16384) >> 15; \
/* 18205/16384 = 2*Cos[5*Pi/16] = 1.1111404660392044 */ \
/* 45451/32768 ~= Sin[5*Pi/16] + Cos[5*Pi/16] ~= 1.3870398453221475 */ \
u_ = (q1*45451 + 16384) >> 15; \
/* 9041/32768 ~= Sin[5*Pi/16] - Cos[5*Pi/16] ~= 0.27589937928294306 */ \
q1 = (q2*9041 + 16384) >> 15; \
/* 18205/16384 ~= 2*Cos[5*Pi/16] ~= 1.1111404660392044 */ \
t_ = (t_*18205 + 8192) >> 14; \
q1 = OD_RSHIFT1(t_) - u_; \
q2 += t_; \
t_ = q0 + q3h; \
/* 38531/32768 = Sin[7*Pi/16] + Cos[7*Pi/16] = 1.1758756024193586 */ \
u_ = (q0*38531 + 16384) >> 15; \
/* 12873/16384 = Sin[7*Pi/16] - Cos[7*Pi/16] = 0.7856949583871022 */ \
q0 = (q3*12873 + 8192) >> 14; \
/* 12785/32768 = 2*Cos[7*Pi/16] = 0.3901806440322565 */ \
q1 += OD_RSHIFT1(t_); \
q2 = u_ - t_; \
t_ = q0 - q3h; \
/* 38531/32768 ~= Sin[7*Pi/16] + Cos[7*Pi/16] = 1.1758756024193586 */ \
u_ = (q3*38531 + 16384) >> 15; \
/* 12873/16384 ~= Sin[7*Pi/16] - Cos[7*Pi/16] = 0.7856949583871022 */ \
q3 = (q0*12873 + 8192) >> 14; \
/* 12785/32768 ~= 2*Cos[7*Pi/16] = 0.3901806440322565 */ \
t_ = (t_*12785 + 16384) >> 15; \
q3 = u_ - OD_RSHIFT1(t_); \
q0 += t_; \
q3 += OD_RSHIFT1(t_); \
q0 = u_ + t_; \
} \
while (0)
......
......@@ -151,35 +151,35 @@ static INLINE void OD_KERNEL_FUNC(od_idst4_asym)(OD_REG *q0, OD_REG *q2,
OD_REG q1h;
OD_REG q3h;
t_ = OD_AVG(*q1, *q2);
/* 11585/8192 2*Sin[Pi/4] = 1.4142135623730951 */
/* 11585/8192 ~= 2*Sin[Pi/4] ~= 1.4142135623730951 */
*q1 = OD_MUL(*q2, 11585, 13);
/* -46341/32768 = -2*Cos[Pi/4] = -1.4142135623730951 */
*q2 = OD_MUL(t_, -46341, 15);
*q1 = OD_ADD(*q1, *q2);
/* 11585/8192 ~= 2*Cos[Pi/4] ~= 1.4142135623730951 */
*q2 = OD_MUL(t_, 11585, 13);
*q1 = OD_SUB(*q1, *q2);
*q1 = OD_ADD(*q1, *q0);
q1h = OD_RSHIFT1(*q1);
*q0 = OD_SUB(*q0, q1h);
*q3 = OD_SUB(*q3, *q2);
*q3 = OD_ADD(*q3, *q2);
q3h = OD_RSHIFT1(*q3);
*q2 = OD_ADD(*q2, q3h);
*q2 = OD_SUB(*q2, q3h);
t_ = OD_ADD(q1h, *q2);
/* 45451/32768 = Sin[5*Pi/16] + Cos[5*Pi/16] = 1.3870398453221475 */
u_ = OD_MUL(*q2, 45451, 15);
/* 9041/32768 = Sin[5*Pi/16] - Cos[5*Pi/16] = 0.27589937928294306 */
*q2 = OD_MUL(*q1, 9041, 15);
/* 18205/16384 = 2*Cos[5*Pi/16] = 1.1111404660392044 */
/* 45451/32768 ~= Sin[5*Pi/16] + Cos[5*Pi/16] ~= 1.3870398453221475 */
u_ = OD_MUL(*q1, 45451, 15);
/* 9041/32768 ~= Sin[5*Pi/16] - Cos[5*Pi/16] ~= 0.27589937928294306 */
*q1 = OD_MUL(*q2, 9041, 15);
/* 18205/16384 ~= 2*Cos[5*Pi/16] ~= 1.1111404660392044 */
t_ = OD_MUL(t_, 18205, 14);
*q1 = OD_SUB(OD_RSHIFT1(t_), u_);
*q2 = OD_ADD(*q2, t_);
t_ = OD_ADD(*q0, q3h);
/* 38531/32768 = Sin[7*Pi/16] + Cos[7*Pi/16] = 1.1758756024193586 */
u_ = OD_MUL(*q0, 38531, 15);
/* 12873/16384 = Sin[7*Pi/16] - Cos[7*Pi/16] = 0.7856949583871022 */
*q0 = OD_MUL(*q3, 12873, 14);
/* 12785/32768 = 2*Cos[7*Pi/16] = 0.3901806440322565 */
*q1 = OD_ADD(*q1, OD_RSHIFT1(t_));
*q2 = OD_SUB(u_, t_);
t_ = OD_SUB(*q0, q3h);
/* 38531/32768 ~= Sin[7*Pi/16] + Cos[7*Pi/16] = 1.1758756024193586 */
u_ = OD_MUL(*q3, 38531, 15);
/* 12873/16384 ~= Sin[7*Pi/16] - Cos[7*Pi/16] = 0.7856949583871022 */
*q3 = OD_MUL(*q0, 12873, 14);
/* 12785/32768 ~= 2*Cos[7*Pi/16] = 0.3901806440322565 */
t_ = OD_MUL(t_, 12785, 15);
*q3 = OD_SUB(u_, OD_RSHIFT1(t_));
*q0 = OD_ADD(*q0, t_);
*q3 = OD_ADD(*q3, OD_RSHIFT1(t_));
*q0 = OD_ADD(u_, t_);
}
static INLINE void OD_KERNEL_FUNC(od_idct8)(OD_REG *r0, OD_REG *r4, OD_REG *r2,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment