Commit e9d9f1ad authored by Debargha Mukherjee's avatar Debargha Mukherjee Committed by Gerrit Code Review
Browse files

Merge "Refactored idct routines and headers"

parents 3726d45d 3a8c43a4
...@@ -11,39 +11,9 @@ ...@@ -11,39 +11,9 @@
#include <math.h> #include <math.h>
#include "./vp9_rtcd.h" #include "./vp9_rtcd.h"
#include "vp9/common/vp9_systemdependent.h"
#include "vp9/common/vp9_blockd.h" #include "vp9/common/vp9_blockd.h"
#include "vp9/common/vp9_idct.h" #include "vp9/common/vp9_idct.h"
#include "vp9/common/vp9_systemdependent.h"
#if CONFIG_EMULATE_HARDWARE
// When CONFIG_EMULATE_HARDWARE is 1 the transform performs a
// non-normative method to handle overflows. A stream that causes
// overflows in the inverse transform is considered invalid in VP9,
// and a hardware implementer is free to choose any reasonable
// method to handle overflows. However to aid in hardware
// verification they can use a specific implementation of the
// WRAPLOW() macro below that is identical to their intended
// hardware implementation (and also use configure options to trigger
// the C-implementation of the transform).
//
// The particular WRAPLOW implementation below performs strict
// overflow wrapping to match common hardware implementations.
// bd of 8 uses trans_low with 16bits, need to remove 16bits
// bd of 10 uses trans_low with 18bits, need to remove 14bits
// bd of 12 uses trans_low with 20bits, need to remove 12bits
// bd of x uses trans_low with 8+x bits, need to remove 24-x bits
#define WRAPLOW(x, bd) ((((int32_t)(x)) << (24 - bd)) >> (24 - bd))
#else
#define WRAPLOW(x, bd) ((int32_t)(x))
#endif // CONFIG_EMULATE_HARDWARE
#if CONFIG_VP9_HIGHBITDEPTH
static INLINE uint16_t highbd_clip_pixel_add(uint16_t dest, tran_high_t trans,
int bd) {
trans = WRAPLOW(trans, bd);
return clip_pixel_highbd(WRAPLOW(dest + trans, bd), bd);
}
#endif // CONFIG_VP9_HIGHBITDEPTH
static INLINE uint8_t clip_pixel_add(uint8_t dest, tran_high_t trans) { static INLINE uint8_t clip_pixel_add(uint8_t dest, tran_high_t trans) {
trans = WRAPLOW(trans, 8); trans = WRAPLOW(trans, 8);
...@@ -1540,7 +1510,7 @@ void vp9_highbd_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest8, ...@@ -1540,7 +1510,7 @@ void vp9_highbd_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest8,
} }
} }
static void highbd_idct4(const tran_low_t *input, tran_low_t *output, int bd) { void vp9_highbd_idct4(const tran_low_t *input, tran_low_t *output, int bd) {
tran_low_t step[4]; tran_low_t step[4];
tran_high_t temp1, temp2; tran_high_t temp1, temp2;
(void) bd; (void) bd;
...@@ -1571,7 +1541,7 @@ void vp9_highbd_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, ...@@ -1571,7 +1541,7 @@ void vp9_highbd_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest8,
// Rows // Rows
for (i = 0; i < 4; ++i) { for (i = 0; i < 4; ++i) {
highbd_idct4(input, outptr, bd); vp9_highbd_idct4(input, outptr, bd);
input += 4; input += 4;
outptr += 4; outptr += 4;
} }
...@@ -1580,7 +1550,7 @@ void vp9_highbd_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, ...@@ -1580,7 +1550,7 @@ void vp9_highbd_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest8,
for (i = 0; i < 4; ++i) { for (i = 0; i < 4; ++i) {
for (j = 0; j < 4; ++j) for (j = 0; j < 4; ++j)
temp_in[j] = out[j * 4 + i]; temp_in[j] = out[j * 4 + i];
highbd_idct4(temp_in, temp_out, bd); vp9_highbd_idct4(temp_in, temp_out, bd);
for (j = 0; j < 4; ++j) { for (j = 0; j < 4; ++j) {
dest[j * stride + i] = highbd_clip_pixel_add( dest[j * stride + i] = highbd_clip_pixel_add(
dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 4), bd); dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 4), bd);
...@@ -1607,7 +1577,7 @@ void vp9_highbd_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest8, ...@@ -1607,7 +1577,7 @@ void vp9_highbd_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest8,
} }
} }
static void highbd_idct8(const tran_low_t *input, tran_low_t *output, int bd) { void vp9_highbd_idct8(const tran_low_t *input, tran_low_t *output, int bd) {
tran_low_t step1[8], step2[8]; tran_low_t step1[8], step2[8];
tran_high_t temp1, temp2; tran_high_t temp1, temp2;
// stage 1 // stage 1
...@@ -1625,7 +1595,7 @@ static void highbd_idct8(const tran_low_t *input, tran_low_t *output, int bd) { ...@@ -1625,7 +1595,7 @@ static void highbd_idct8(const tran_low_t *input, tran_low_t *output, int bd) {
step1[6] = WRAPLOW(dct_const_round_shift(temp2), bd); step1[6] = WRAPLOW(dct_const_round_shift(temp2), bd);
// stage 2 & stage 3 - even half // stage 2 & stage 3 - even half
highbd_idct4(step1, step1, bd); vp9_highbd_idct4(step1, step1, bd);
// stage 2 - odd half // stage 2 - odd half
step2[4] = WRAPLOW(step1[4] + step1[5], bd); step2[4] = WRAPLOW(step1[4] + step1[5], bd);
...@@ -1662,7 +1632,7 @@ void vp9_highbd_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest8, ...@@ -1662,7 +1632,7 @@ void vp9_highbd_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest8,
// First transform rows. // First transform rows.
for (i = 0; i < 8; ++i) { for (i = 0; i < 8; ++i) {
highbd_idct8(input, outptr, bd); vp9_highbd_idct8(input, outptr, bd);
input += 8; input += 8;
outptr += 8; outptr += 8;
} }
...@@ -1671,7 +1641,7 @@ void vp9_highbd_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest8, ...@@ -1671,7 +1641,7 @@ void vp9_highbd_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest8,
for (i = 0; i < 8; ++i) { for (i = 0; i < 8; ++i) {
for (j = 0; j < 8; ++j) for (j = 0; j < 8; ++j)
temp_in[j] = out[j * 8 + i]; temp_in[j] = out[j * 8 + i];
highbd_idct8(temp_in, temp_out, bd); vp9_highbd_idct8(temp_in, temp_out, bd);
for (j = 0; j < 8; ++j) { for (j = 0; j < 8; ++j) {
dest[j * stride + i] = highbd_clip_pixel_add( dest[j * stride + i] = highbd_clip_pixel_add(
dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd); dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd);
...@@ -1735,9 +1705,9 @@ static void highbd_iadst4(const tran_low_t *input, tran_low_t *output, int bd) { ...@@ -1735,9 +1705,9 @@ static void highbd_iadst4(const tran_low_t *input, tran_low_t *output, int bd) {
void vp9_highbd_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, void vp9_highbd_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8,
int stride, int tx_type, int bd) { int stride, int tx_type, int bd) {
const highbd_transform_2d IHT_4[] = { const highbd_transform_2d IHT_4[] = {
{ highbd_idct4, highbd_idct4 }, // DCT_DCT = 0 { vp9_highbd_idct4, vp9_highbd_idct4 }, // DCT_DCT = 0
{ highbd_iadst4, highbd_idct4 }, // ADST_DCT = 1 { highbd_iadst4, vp9_highbd_idct4 }, // ADST_DCT = 1
{ highbd_idct4, highbd_iadst4 }, // DCT_ADST = 2 { vp9_highbd_idct4, highbd_iadst4 }, // DCT_ADST = 2
{ highbd_iadst4, highbd_iadst4 } // ADST_ADST = 3 { highbd_iadst4, highbd_iadst4 } // ADST_ADST = 3
}; };
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
...@@ -1844,9 +1814,9 @@ static void highbd_iadst8(const tran_low_t *input, tran_low_t *output, int bd) { ...@@ -1844,9 +1814,9 @@ static void highbd_iadst8(const tran_low_t *input, tran_low_t *output, int bd) {
} }
static const highbd_transform_2d HIGH_IHT_8[] = { static const highbd_transform_2d HIGH_IHT_8[] = {
{ highbd_idct8, highbd_idct8 }, // DCT_DCT = 0 { vp9_highbd_idct8, vp9_highbd_idct8 }, // DCT_DCT = 0
{ highbd_iadst8, highbd_idct8 }, // ADST_DCT = 1 { highbd_iadst8, vp9_highbd_idct8 }, // ADST_DCT = 1
{ highbd_idct8, highbd_iadst8 }, // DCT_ADST = 2 { vp9_highbd_idct8, highbd_iadst8 }, // DCT_ADST = 2
{ highbd_iadst8, highbd_iadst8 } // ADST_ADST = 3 { highbd_iadst8, highbd_iadst8 } // ADST_ADST = 3
}; };
...@@ -1889,7 +1859,7 @@ void vp9_highbd_idct8x8_10_add_c(const tran_low_t *input, uint8_t *dest8, ...@@ -1889,7 +1859,7 @@ void vp9_highbd_idct8x8_10_add_c(const tran_low_t *input, uint8_t *dest8,
// First transform rows. // First transform rows.
// Only first 4 row has non-zero coefs. // Only first 4 row has non-zero coefs.
for (i = 0; i < 4; ++i) { for (i = 0; i < 4; ++i) {
highbd_idct8(input, outptr, bd); vp9_highbd_idct8(input, outptr, bd);
input += 8; input += 8;
outptr += 8; outptr += 8;
} }
...@@ -1897,7 +1867,7 @@ void vp9_highbd_idct8x8_10_add_c(const tran_low_t *input, uint8_t *dest8, ...@@ -1897,7 +1867,7 @@ void vp9_highbd_idct8x8_10_add_c(const tran_low_t *input, uint8_t *dest8,
for (i = 0; i < 8; ++i) { for (i = 0; i < 8; ++i) {
for (j = 0; j < 8; ++j) for (j = 0; j < 8; ++j)
temp_in[j] = out[j * 8 + i]; temp_in[j] = out[j * 8 + i];
highbd_idct8(temp_in, temp_out, bd); vp9_highbd_idct8(temp_in, temp_out, bd);
for (j = 0; j < 8; ++j) { for (j = 0; j < 8; ++j) {
dest[j * stride + i] = highbd_clip_pixel_add( dest[j * stride + i] = highbd_clip_pixel_add(
dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd); dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd);
...@@ -1905,7 +1875,7 @@ void vp9_highbd_idct8x8_10_add_c(const tran_low_t *input, uint8_t *dest8, ...@@ -1905,7 +1875,7 @@ void vp9_highbd_idct8x8_10_add_c(const tran_low_t *input, uint8_t *dest8,
} }
} }
static void highbd_idct16(const tran_low_t *input, tran_low_t *output, int bd) { void vp9_highbd_idct16(const tran_low_t *input, tran_low_t *output, int bd) {
tran_low_t step1[16], step2[16]; tran_low_t step1[16], step2[16];
tran_high_t temp1, temp2; tran_high_t temp1, temp2;
(void) bd; (void) bd;
...@@ -2081,7 +2051,7 @@ void vp9_highbd_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest8, ...@@ -2081,7 +2051,7 @@ void vp9_highbd_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest8,
// First transform rows. // First transform rows.
for (i = 0; i < 16; ++i) { for (i = 0; i < 16; ++i) {
highbd_idct16(input, outptr, bd); vp9_highbd_idct16(input, outptr, bd);
input += 16; input += 16;
outptr += 16; outptr += 16;
} }
...@@ -2090,7 +2060,7 @@ void vp9_highbd_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest8, ...@@ -2090,7 +2060,7 @@ void vp9_highbd_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest8,
for (i = 0; i < 16; ++i) { for (i = 0; i < 16; ++i) {
for (j = 0; j < 16; ++j) for (j = 0; j < 16; ++j)
temp_in[j] = out[j * 16 + i]; temp_in[j] = out[j * 16 + i];
highbd_idct16(temp_in, temp_out, bd); vp9_highbd_idct16(temp_in, temp_out, bd);
for (j = 0; j < 16; ++j) { for (j = 0; j < 16; ++j) {
dest[j * stride + i] = highbd_clip_pixel_add( dest[j * stride + i] = highbd_clip_pixel_add(
dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd);
...@@ -2270,9 +2240,9 @@ static void highbd_iadst16(const tran_low_t *input, tran_low_t *output, ...@@ -2270,9 +2240,9 @@ static void highbd_iadst16(const tran_low_t *input, tran_low_t *output,
} }
static const highbd_transform_2d HIGH_IHT_16[] = { static const highbd_transform_2d HIGH_IHT_16[] = {
{ highbd_idct16, highbd_idct16 }, // DCT_DCT = 0 { vp9_highbd_idct16, vp9_highbd_idct16 }, // DCT_DCT = 0
{ highbd_iadst16, highbd_idct16 }, // ADST_DCT = 1 { highbd_iadst16, vp9_highbd_idct16 }, // ADST_DCT = 1
{ highbd_idct16, highbd_iadst16 }, // DCT_ADST = 2 { vp9_highbd_idct16, highbd_iadst16 }, // DCT_ADST = 2
{ highbd_iadst16, highbd_iadst16 } // ADST_ADST = 3 { highbd_iadst16, highbd_iadst16 } // ADST_ADST = 3
}; };
...@@ -2315,7 +2285,7 @@ void vp9_highbd_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest8, ...@@ -2315,7 +2285,7 @@ void vp9_highbd_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest8,
// First transform rows. Since all non-zero dct coefficients are in // First transform rows. Since all non-zero dct coefficients are in
// upper-left 4x4 area, we only need to calculate first 4 rows here. // upper-left 4x4 area, we only need to calculate first 4 rows here.
for (i = 0; i < 4; ++i) { for (i = 0; i < 4; ++i) {
highbd_idct16(input, outptr, bd); vp9_highbd_idct16(input, outptr, bd);
input += 16; input += 16;
outptr += 16; outptr += 16;
} }
...@@ -2324,7 +2294,7 @@ void vp9_highbd_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest8, ...@@ -2324,7 +2294,7 @@ void vp9_highbd_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest8,
for (i = 0; i < 16; ++i) { for (i = 0; i < 16; ++i) {
for (j = 0; j < 16; ++j) for (j = 0; j < 16; ++j)
temp_in[j] = out[j*16 + i]; temp_in[j] = out[j*16 + i];
highbd_idct16(temp_in, temp_out, bd); vp9_highbd_idct16(temp_in, temp_out, bd);
for (j = 0; j < 16; ++j) { for (j = 0; j < 16; ++j) {
dest[j * stride + i] = highbd_clip_pixel_add( dest[j * stride + i] = highbd_clip_pixel_add(
dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd);
......
...@@ -118,6 +118,28 @@ typedef struct { ...@@ -118,6 +118,28 @@ typedef struct {
} highbd_transform_2d; } highbd_transform_2d;
#endif // CONFIG_VP9_HIGHBITDEPTH #endif // CONFIG_VP9_HIGHBITDEPTH
#if CONFIG_EMULATE_HARDWARE
// When CONFIG_EMULATE_HARDWARE is 1 the transform performs a
// non-normative method to handle overflows. A stream that causes
// overflows in the inverse transform is considered invalid in VP9,
// and a hardware implementer is free to choose any reasonable
// method to handle overflows. However to aid in hardware
// verification they can use a specific implementation of the
// WRAPLOW() macro below that is identical to their intended
// hardware implementation (and also use configure options to trigger
// the C-implementation of the transform).
//
// The particular WRAPLOW implementation below performs strict
// overflow wrapping to match common hardware implementations.
// bd of 8 uses trans_low with 16bits, need to remove 16bits
// bd of 10 uses trans_low with 18bits, need to remove 14bits
// bd of 12 uses trans_low with 20bits, need to remove 12bits
// bd of x uses trans_low with 8+x bits, need to remove 24-x bits
#define WRAPLOW(x, bd) ((((int32_t)(x)) << (24 - bd)) >> (24 - bd))
#else
#define WRAPLOW(x, bd) (x)
#endif // CONFIG_EMULATE_HARDWARE
void vp9_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride, void vp9_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
int eob); int eob);
void vp9_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride, void vp9_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
...@@ -137,6 +159,9 @@ void vp9_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest, ...@@ -137,6 +159,9 @@ void vp9_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,
int stride, int eob); int stride, int eob);
#if CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH
void vp9_highbd_idct4(const tran_low_t *input, tran_low_t *output, int bd);
void vp9_highbd_idct8(const tran_low_t *input, tran_low_t *output, int bd);
void vp9_highbd_idct16(const tran_low_t *input, tran_low_t *output, int bd);
void vp9_highbd_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride, void vp9_highbd_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
int eob, int bd); int eob, int bd);
void vp9_highbd_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride, void vp9_highbd_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
...@@ -153,6 +178,11 @@ void vp9_highbd_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input, ...@@ -153,6 +178,11 @@ void vp9_highbd_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input,
uint8_t *dest, int stride, int eob, int bd); uint8_t *dest, int stride, int eob, int bd);
void vp9_highbd_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input, void vp9_highbd_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input,
uint8_t *dest, int stride, int eob, int bd); uint8_t *dest, int stride, int eob, int bd);
static INLINE uint16_t highbd_clip_pixel_add(uint16_t dest, tran_high_t trans,
int bd) {
trans = WRAPLOW(trans, bd);
return clip_pixel_highbd(WRAPLOW(dest + trans, bd), bd);
}
#endif // CONFIG_VP9_HIGHBITDEPTH #endif // CONFIG_VP9_HIGHBITDEPTH
#ifdef __cplusplus #ifdef __cplusplus
} // extern "C" } // extern "C"
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include "vp9/common/vp9_blockd.h" #include "vp9/common/vp9_blockd.h"
#include "vp9/common/vp9_idct.h" #include "vp9/common/vp9_idct.h"
#include "vp9/common/vp9_systemdependent.h" #include "vp9/common/vp9_systemdependent.h"
#include "vp9/encoder/vp9_dct.h"
static INLINE tran_high_t fdct_round_shift(tran_high_t input) { static INLINE tran_high_t fdct_round_shift(tran_high_t input) {
tran_high_t rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS); tran_high_t rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS);
...@@ -26,7 +27,7 @@ static INLINE tran_high_t fdct_round_shift(tran_high_t input) { ...@@ -26,7 +27,7 @@ static INLINE tran_high_t fdct_round_shift(tran_high_t input) {
return rv; return rv;
} }
static void fdct4(const tran_low_t *input, tran_low_t *output) { void vp9_fdct4(const tran_low_t *input, tran_low_t *output) {
tran_high_t step[4]; tran_high_t step[4];
tran_high_t temp1, temp2; tran_high_t temp1, temp2;
...@@ -123,7 +124,7 @@ void vp9_fdct4x4_c(const int16_t *input, tran_low_t *output, int stride) { ...@@ -123,7 +124,7 @@ void vp9_fdct4x4_c(const int16_t *input, tran_low_t *output, int stride) {
} }
} }
static void fadst4(const tran_low_t *input, tran_low_t *output) { void vp9_fadst4(const tran_low_t *input, tran_low_t *output) {
tran_high_t x0, x1, x2, x3; tran_high_t x0, x1, x2, x3;
tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; tran_high_t s0, s1, s2, s3, s4, s5, s6, s7;
...@@ -163,13 +164,6 @@ static void fadst4(const tran_low_t *input, tran_low_t *output) { ...@@ -163,13 +164,6 @@ static void fadst4(const tran_low_t *input, tran_low_t *output) {
output[3] = (tran_low_t)fdct_round_shift(s3); output[3] = (tran_low_t)fdct_round_shift(s3);
} }
static const transform_2d FHT_4[] = {
{ fdct4, fdct4 }, // DCT_DCT = 0
{ fadst4, fdct4 }, // ADST_DCT = 1
{ fdct4, fadst4 }, // DCT_ADST = 2
{ fadst4, fadst4 } // ADST_ADST = 3
};
void vp9_fht4x4_c(const int16_t *input, tran_low_t *output, void vp9_fht4x4_c(const int16_t *input, tran_low_t *output,
int stride, int tx_type) { int stride, int tx_type) {
if (tx_type == DCT_DCT) { if (tx_type == DCT_DCT) {
...@@ -203,7 +197,7 @@ void vp9_fht4x4_c(const int16_t *input, tran_low_t *output, ...@@ -203,7 +197,7 @@ void vp9_fht4x4_c(const int16_t *input, tran_low_t *output,
} }
} }
static void fdct8(const tran_low_t *input, tran_low_t *output) { void vp9_fdct8(const tran_low_t *input, tran_low_t *output) {
tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; // canbe16 tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; // canbe16
tran_high_t t0, t1, t2, t3; // needs32 tran_high_t t0, t1, t2, t3; // needs32
tran_high_t x0, x1, x2, x3; // canbe16 tran_high_t x0, x1, x2, x3; // canbe16
...@@ -331,7 +325,7 @@ void vp9_fdct8x8_c(const int16_t *input, tran_low_t *final_output, int stride) { ...@@ -331,7 +325,7 @@ void vp9_fdct8x8_c(const int16_t *input, tran_low_t *final_output, int stride) {
// Rows // Rows
for (i = 0; i < 8; ++i) { for (i = 0; i < 8; ++i) {
fdct8(&intermediate[i * 8], &final_output[i * 8]); vp9_fdct8(&intermediate[i * 8], &final_output[i * 8]);
for (j = 0; j < 8; ++j) for (j = 0; j < 8; ++j)
final_output[j + i * 8] /= 2; final_output[j + i * 8] /= 2;
} }
...@@ -413,7 +407,7 @@ void vp9_fdct8x8_quant_c(const int16_t *input, int stride, ...@@ -413,7 +407,7 @@ void vp9_fdct8x8_quant_c(const int16_t *input, int stride,
// Rows // Rows
for (i = 0; i < 8; ++i) { for (i = 0; i < 8; ++i) {
fdct8(&intermediate[i * 8], &coeff_ptr[i * 8]); vp9_fdct8(&intermediate[i * 8], &coeff_ptr[i * 8]);
for (j = 0; j < 8; ++j) for (j = 0; j < 8; ++j)
coeff_ptr[j + i * 8] /= 2; coeff_ptr[j + i * 8] /= 2;
} }
...@@ -641,7 +635,7 @@ void vp9_fdct16x16_c(const int16_t *input, tran_low_t *output, int stride) { ...@@ -641,7 +635,7 @@ void vp9_fdct16x16_c(const int16_t *input, tran_low_t *output, int stride) {
} }
} }
static void fadst8(const tran_low_t *input, tran_low_t *output) { void vp9_fadst8(const tran_low_t *input, tran_low_t *output) {
tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; tran_high_t s0, s1, s2, s3, s4, s5, s6, s7;
tran_high_t x0 = input[7]; tran_high_t x0 = input[7];
...@@ -712,13 +706,6 @@ static void fadst8(const tran_low_t *input, tran_low_t *output) { ...@@ -712,13 +706,6 @@ static void fadst8(const tran_low_t *input, tran_low_t *output) {
output[7] = (tran_low_t)-x1; output[7] = (tran_low_t)-x1;
} }
static const transform_2d FHT_8[] = {
{ fdct8, fdct8 }, // DCT_DCT = 0
{ fadst8, fdct8 }, // ADST_DCT = 1
{ fdct8, fadst8 }, // DCT_ADST = 2
{ fadst8, fadst8 } // ADST_ADST = 3
};
void vp9_fht8x8_c(const int16_t *input, tran_low_t *output, void vp9_fht8x8_c(const int16_t *input, tran_low_t *output,
int stride, int tx_type) { int stride, int tx_type) {
if (tx_type == DCT_DCT) { if (tx_type == DCT_DCT) {
...@@ -807,7 +794,7 @@ void vp9_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride) { ...@@ -807,7 +794,7 @@ void vp9_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride) {
} }
// Rewrote to use same algorithm as others. // Rewrote to use same algorithm as others.
static void fdct16(const tran_low_t in[16], tran_low_t out[16]) { void vp9_fdct16(const tran_low_t in[16], tran_low_t out[16]) {
tran_high_t step1[8]; // canbe16 tran_high_t step1[8]; // canbe16
tran_high_t step2[8]; // canbe16 tran_high_t step2[8]; // canbe16
tran_high_t step3[8]; // canbe16 tran_high_t step3[8]; // canbe16
...@@ -948,7 +935,7 @@ static void fdct16(const tran_low_t in[16], tran_low_t out[16]) { ...@@ -948,7 +935,7 @@ static void fdct16(const tran_low_t in[16], tran_low_t out[16]) {
out[15] = (tran_low_t)fdct_round_shift(temp2); out[15] = (tran_low_t)fdct_round_shift(temp2);
} }
static void fadst16(const tran_low_t *input, tran_low_t *output) { void vp9_fadst16(const tran_low_t *input, tran_low_t *output) {
tran_high_t s0, s1, s2, s3, s4, s5, s6, s7, s8; tran_high_t s0, s1, s2, s3, s4, s5, s6, s7, s8;
tran_high_t s9, s10, s11, s12, s13, s14, s15; tran_high_t s9, s10, s11, s12, s13, s14, s15;
...@@ -1111,13 +1098,6 @@ static void fadst16(const tran_low_t *input, tran_low_t *output) { ...@@ -1111,13 +1098,6 @@ static void fadst16(const tran_low_t *input, tran_low_t *output) {
output[15] = (tran_low_t)-x1; output[15] = (tran_low_t)-x1;
} }
static const transform_2d FHT_16[] = {
{ fdct16, fdct16 }, // DCT_DCT = 0
{ fadst16, fdct16 }, // ADST_DCT = 1
{ fdct16, fadst16 }, // DCT_ADST = 2
{ fadst16, fadst16 } // ADST_ADST = 3
};
void vp9_fht16x16_c(const int16_t *input, tran_low_t *output, void vp9_fht16x16_c(const int16_t *input, tran_low_t *output,
int stride, int tx_type) { int stride, int tx_type) {
if (tx_type == DCT_DCT) { if (tx_type == DCT_DCT) {
...@@ -1162,7 +1142,7 @@ static INLINE tran_high_t half_round_shift(tran_high_t input) { ...@@ -1162,7 +1142,7 @@ static INLINE tran_high_t half_round_shift(tran_high_t input) {
return rv; return rv;
} }
static void fdct32(const tran_high_t *input, tran_high_t *output, int round) { void vp9_fdct32(const tran_high_t *input, tran_high_t *output, int round) {
tran_high_t step[32]; tran_high_t step[32];
// Stage 1 // Stage 1
step[0] = input[0] + input[(32 - 1)]; step[0] = input[0] + input[(32 - 1)];
...@@ -1505,7 +1485,7 @@ void vp9_fdct32x32_c(const int16_t *input, tran_low_t *out, int stride) { ...@@ -1505,7 +1485,7 @@ void vp9_fdct32x32_c(const int16_t *input, tran_low_t *out, int stride) {
tran_high_t temp_in[32], temp_out[32]; tran_high_t temp_in[32], temp_out[32];
for (j = 0; j < 32; ++j) for (j = 0; j < 32; ++j)
temp_in[j] = input[j * stride + i] * 4; temp_in[j] = input[j * stride + i] * 4;
fdct32(temp_in, temp_out, 0); vp9_fdct32(temp_in, temp_out, 0);
for (j = 0; j < 32; ++j) for (j = 0; j < 32; ++j)
output[j * 32 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2; output[j * 32 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2;
} }
...@@ -1515,7 +1495,7 @@ void vp9_fdct32x32_c(const int16_t *input, tran_low_t *out, int stride) { ...@@ -1515,7 +1495,7 @@ void vp9_fdct32x32_c(const int16_t *input, tran_low_t *out, int stride) {
tran_high_t temp_in[32], temp_out[32]; tran_high_t temp_in[32], temp_out[32];
for (j = 0; j < 32; ++j) for (j = 0; j < 32; ++j)
temp_in[j] = output[j + i * 32]; temp_in[j] = output[j + i * 32];
fdct32(temp_in, temp_out, 0); vp9_fdct32(temp_in, temp_out, 0);
for (j = 0; j < 32; ++j) for (j = 0; j < 32; ++j)
out[j + i * 32] = out[j + i * 32] =
(tran_low_t)((temp_out[j] + 1 + (temp_out[j] < 0)) >> 2); (tran_low_t)((temp_out[j] + 1 + (temp_out[j] < 0)) >> 2);
...@@ -1534,7 +1514,7 @@ void vp9_fdct32x32_rd_c(const int16_t *input, tran_low_t *out, int stride) { ...@@ -1534,7 +1514,7 @@ void vp9_fdct32x32_rd_c(const int16_t *input, tran_low_t *out, int stride) {
tran_high_t temp_in[32], temp_out[32]; tran_high_t temp_in[32], temp_out[32];
for (j = 0; j < 32; ++j) for (j = 0; j < 32; ++j)
temp_in[j] = input[j * stride + i] * 4; temp_in[j] = input[j * stride + i] * 4;
fdct32(temp_in, temp_out, 0); vp9_fdct32(temp_in, temp_out, 0);
for (j = 0; j < 32; ++j) for (j = 0; j < 32; ++j)
// TODO(cd): see quality impact of only doing // TODO(cd): see quality impact of only doing
// output[j * 32 + i] = (temp_out[j] + 1) >> 2; // output[j * 32 + i] = (temp_out[j] + 1) >> 2;
...@@ -1547,7 +1527,7 @@ void vp9_fdct32x32_rd_c(const int16_t *input, tran_low_t *out, int stride) { ...@@ -1547,7 +1527,7 @@ void vp9_fdct32x32_rd_c(const int16_t *input, tran_low_t *out, int stride) {
tran_high_t temp_in[32], temp_out[32]; tran_high_t temp_in[32], temp_out[32];
for (j = 0; j < 32; ++j) for (j = 0; j < 32; ++j)
temp_in[j] = output[j + i * 32]; temp_in[j] = output[j + i * 32];
fdct32(temp_in, temp_out, 1); vp9_fdct32(temp_in, temp_out, 1);
for (j = 0; j < 32; ++j) for (j = 0; j < 32; ++j)
out[j + i * 32] = (tran_low_t)temp_out[j]; out[j + i * 32] = (tran_low_t)temp_out[j];
} }
......
/*
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP9_ENCODER_VP9_DCT_H_
#define VP9_ENCODER_VP9_DCT_H_