Commit 65f118d7 authored by Dmitry Kovalev's avatar Dmitry Kovalev
Browse files

Making input pointer of any inverse transform constant.

Also renaming dest_stride to stride in some places.

Change-Id: I75f602b623a5a7071d4922b747c45fa0b7d7a940
parent ac468dde
......@@ -21,7 +21,7 @@
extern "C" {
#include "vp9/common/vp9_entropy.h"
#include "./vp9_rtcd.h"
void vp9_idct16x16_256_add_c(int16_t *input, uint8_t *output, int pitch);
void vp9_idct16x16_256_add_c(const int16_t *input, uint8_t *output, int pitch);
}
#include "vpx/vpx_integer.h"
......@@ -258,9 +258,10 @@ void reference_16x16_dct_2d(int16_t input[256], double output[256]) {
}
typedef void (*fdct_t)(int16_t *in, int16_t *out, int stride);
typedef void (*idct_t)(int16_t *in, uint8_t *dst, int stride);
typedef void (*idct_t)(const int16_t *in, uint8_t *dst, int stride);
typedef void (*fht_t) (int16_t *in, int16_t *out, int stride, int tx_type);
typedef void (*iht_t) (int16_t *in, uint8_t *dst, int stride, int tx_type);
typedef void (*iht_t) (const int16_t *in, uint8_t *dst, int stride,
int tx_type);
void fdct16x16_ref(int16_t *in, int16_t *out, int stride, int tx_type) {
vp9_short_fdct16x16_c(in, out, stride);
......
......@@ -75,7 +75,7 @@ void reference_32x32_dct_2d(const int16_t input[kNumCoeffs],
}
typedef void (*fwd_txfm_t)(int16_t *in, int16_t *out, int stride);
typedef void (*inv_txfm_t)(int16_t *in, uint8_t *dst, int stride);
typedef void (*inv_txfm_t)(const int16_t *in, uint8_t *dst, int stride);
class Trans32x32Test : public PARAMS(fwd_txfm_t, inv_txfm_t, int) {
public:
......
......@@ -21,7 +21,7 @@
extern "C" {
#include "vp9/common/vp9_entropy.h"
#include "./vp9_rtcd.h"
void vp9_idct8x8_64_add_c(int16_t *input, uint8_t *output, int pitch);
void vp9_idct8x8_64_add_c(const int16_t *input, uint8_t *output, int pitch);
}
#include "vpx/vpx_integer.h"
......@@ -29,9 +29,10 @@ using libvpx_test::ACMRandom;
namespace {
typedef void (*fdct_t)(int16_t *in, int16_t *out, int stride);
typedef void (*idct_t)(int16_t *in, uint8_t *dst, int stride);
typedef void (*idct_t)(const int16_t *in, uint8_t *dst, int stride);
typedef void (*fht_t) (int16_t *in, int16_t *out, int stride, int tx_type);
typedef void (*iht_t) (int16_t *in, uint8_t *dst, int stride, int tx_type);
typedef void (*iht_t) (const int16_t *in, uint8_t *dst, int stride,
int tx_type);
void fdct8x8_ref(int16_t *in, int16_t *out, int stride, int tx_type) {
vp9_short_fdct8x8_c(in, out, stride);
......
......@@ -11,19 +11,19 @@
#include "./vp9_rtcd.h"
#include "vp9/common/vp9_common.h"
extern void vp9_idct16x16_256_add_neon_pass1(int16_t *input,
void vp9_idct16x16_256_add_neon_pass1(const int16_t *input,
int16_t *output,
int output_stride);
extern void vp9_idct16x16_256_add_neon_pass2(int16_t *src,
void vp9_idct16x16_256_add_neon_pass2(const int16_t *src,
int16_t *output,
int16_t *pass1Output,
int16_t skip_adding,
uint8_t *dest,
int dest_stride);
extern void vp9_idct16x16_10_add_neon_pass1(int16_t *input,
void vp9_idct16x16_10_add_neon_pass1(const int16_t *input,
int16_t *output,
int output_stride);
extern void vp9_idct16x16_10_add_neon_pass2(int16_t *src,
void vp9_idct16x16_10_add_neon_pass2(const int16_t *src,
int16_t *output,
int16_t *pass1Output,
int16_t skip_adding,
......@@ -34,7 +34,7 @@ extern void vp9_idct16x16_10_add_neon_pass2(int16_t *src,
extern void vp9_push_neon(int64_t *store);
extern void vp9_pop_neon(int64_t *store);
void vp9_idct16x16_256_add_neon(int16_t *input,
void vp9_idct16x16_256_add_neon(const int16_t *input,
uint8_t *dest, int dest_stride) {
int64_t store_reg[8];
int16_t pass1_output[16*16] = {0};
......@@ -109,7 +109,7 @@ void vp9_idct16x16_256_add_neon(int16_t *input,
return;
}
void vp9_idct16x16_10_add_neon(int16_t *input,
void vp9_idct16x16_10_add_neon(const int16_t *input,
uint8_t *dest, int dest_stride) {
int64_t store_reg[8];
int16_t pass1_output[16*16] = {0};
......
......@@ -221,7 +221,7 @@ typedef struct macroblockd {
int lossless;
/* Inverse transform function pointers. */
void (*itxm_add)(int16_t *input, uint8_t *dest, int stride, int eob);
void (*itxm_add)(const int16_t *input, uint8_t *dest, int stride, int eob);
struct subpix_fn_table subpix;
......
......@@ -18,13 +18,13 @@
#include "vp9/common/vp9_common.h"
#include "vp9/common/vp9_idct.h"
void vp9_iwht4x4_16_add_c(int16_t *input, uint8_t *dest, int dest_stride) {
void vp9_iwht4x4_16_add_c(const int16_t *input, uint8_t *dest, int stride) {
/* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds,
0.5 shifts per pixel. */
int i;
int16_t output[16];
int a1, b1, c1, d1, e1;
int16_t *ip = input;
const int16_t *ip = input;
int16_t *op = output;
for (i = 0; i < 4; i++) {
......@@ -60,21 +60,21 @@ void vp9_iwht4x4_16_add_c(int16_t *input, uint8_t *dest, int dest_stride) {
c1 = e1 - c1;
a1 -= b1;
d1 += c1;
dest[dest_stride * 0] = clip_pixel(dest[dest_stride * 0] + a1);
dest[dest_stride * 1] = clip_pixel(dest[dest_stride * 1] + b1);
dest[dest_stride * 2] = clip_pixel(dest[dest_stride * 2] + c1);
dest[dest_stride * 3] = clip_pixel(dest[dest_stride * 3] + d1);
dest[stride * 0] = clip_pixel(dest[stride * 0] + a1);
dest[stride * 1] = clip_pixel(dest[stride * 1] + b1);
dest[stride * 2] = clip_pixel(dest[stride * 2] + c1);
dest[stride * 3] = clip_pixel(dest[stride * 3] + d1);
ip++;
dest++;
}
}
void vp9_iwht4x4_1_add_c(int16_t *in, uint8_t *dest, int dest_stride) {
void vp9_iwht4x4_1_add_c(const int16_t *in, uint8_t *dest, int dest_stride) {
int i;
int a1, e1;
int16_t tmp[4];
int16_t *ip = in;
const int16_t *ip = in;
int16_t *op = tmp;
a1 = ip[0] >> UNIT_QUANT_SHIFT;
......@@ -116,7 +116,7 @@ static void idct4_1d(const int16_t *input, int16_t *output) {
output[3] = step[0] - step[3];
}
void vp9_idct4x4_16_add_c(int16_t *input, uint8_t *dest, int dest_stride) {
void vp9_idct4x4_16_add_c(const int16_t *input, uint8_t *dest, int stride) {
int16_t out[4 * 4];
int16_t *outptr = out;
int i, j;
......@@ -135,12 +135,12 @@ void vp9_idct4x4_16_add_c(int16_t *input, uint8_t *dest, int dest_stride) {
temp_in[j] = out[j * 4 + i];
idct4_1d(temp_in, temp_out);
for (j = 0; j < 4; ++j)
dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4)
+ dest[j * dest_stride + i]);
dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4)
+ dest[j * stride + i]);
}
}
void vp9_idct4x4_1_add_c(int16_t *input, uint8_t *dest, int dest_stride) {
void vp9_idct4x4_1_add_c(const int16_t *input, uint8_t *dest, int dest_stride) {
int i;
int a1;
int16_t out = dct_const_round_shift(input[0] * cospi_16_64);
......@@ -201,7 +201,7 @@ static void idct8_1d(const int16_t *input, int16_t *output) {
output[7] = step1[0] - step1[7];
}
void vp9_idct8x8_64_add_c(int16_t *input, uint8_t *dest, int dest_stride) {
void vp9_idct8x8_64_add_c(const int16_t *input, uint8_t *dest, int stride) {
int16_t out[8 * 8];
int16_t *outptr = out;
int i, j;
......@@ -220,12 +220,12 @@ void vp9_idct8x8_64_add_c(int16_t *input, uint8_t *dest, int dest_stride) {
temp_in[j] = out[j * 8 + i];
idct8_1d(temp_in, temp_out);
for (j = 0; j < 8; ++j)
dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5)
+ dest[j * dest_stride + i]);
dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5)
+ dest[j * stride + i]);
}
}
void vp9_idct8x8_1_add_c(int16_t *input, uint8_t *dest, int dest_stride) {
void vp9_idct8x8_1_add_c(const int16_t *input, uint8_t *dest, int stride) {
int i, j;
int a1;
int16_t out = dct_const_round_shift(input[0] * cospi_16_64);
......@@ -234,7 +234,7 @@ void vp9_idct8x8_1_add_c(int16_t *input, uint8_t *dest, int dest_stride) {
for (j = 0; j < 8; ++j) {
for (i = 0; i < 8; ++i)
dest[i] = clip_pixel(dest[i] + a1);
dest += dest_stride;
dest += stride;
}
}
......@@ -280,7 +280,7 @@ static void iadst4_1d(const int16_t *input, int16_t *output) {
output[3] = dct_const_round_shift(s3);
}
void vp9_iht4x4_16_add_c(int16_t *input, uint8_t *dest, int dest_stride,
void vp9_iht4x4_16_add_c(const int16_t *input, uint8_t *dest, int stride,
int tx_type) {
const transform_2d IHT_4[] = {
{ idct4_1d, idct4_1d }, // DCT_DCT = 0
......@@ -307,8 +307,8 @@ void vp9_iht4x4_16_add_c(int16_t *input, uint8_t *dest, int dest_stride,
temp_in[j] = out[j * 4 + i];
IHT_4[tx_type].cols(temp_in, temp_out);
for (j = 0; j < 4; ++j)
dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4)
+ dest[j * dest_stride + i]);
dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4)
+ dest[j * stride + i]);
}
}
static void iadst8_1d(const int16_t *input, int16_t *output) {
......@@ -395,7 +395,7 @@ static const transform_2d IHT_8[] = {
{ iadst8_1d, iadst8_1d } // ADST_ADST = 3
};
void vp9_iht8x8_64_add_c(int16_t *input, uint8_t *dest, int dest_stride,
void vp9_iht8x8_64_add_c(const int16_t *input, uint8_t *dest, int stride,
int tx_type) {
int i, j;
int16_t out[8 * 8];
......@@ -416,12 +416,12 @@ void vp9_iht8x8_64_add_c(int16_t *input, uint8_t *dest, int dest_stride,
temp_in[j] = out[j * 8 + i];
ht.cols(temp_in, temp_out);
for (j = 0; j < 8; ++j)
dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5)
+ dest[j * dest_stride + i]); }
dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5)
+ dest[j * stride + i]);
}
}
void vp9_idct8x8_10_add_c(int16_t *input, uint8_t *dest,
int dest_stride) {
void vp9_idct8x8_10_add_c(const int16_t *input, uint8_t *dest, int stride) {
int16_t out[8 * 8] = { 0 };
int16_t *outptr = out;
int i, j;
......@@ -441,8 +441,8 @@ void vp9_idct8x8_10_add_c(int16_t *input, uint8_t *dest,
temp_in[j] = out[j * 8 + i];
idct8_1d(temp_in, temp_out);
for (j = 0; j < 8; ++j)
dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5)
+ dest[j * dest_stride + i]);
dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5)
+ dest[j * stride + i]);
}
}
......@@ -611,7 +611,7 @@ static void idct16_1d(const int16_t *input, int16_t *output) {
output[15] = step2[0] - step2[15];
}
void vp9_idct16x16_256_add_c(int16_t *input, uint8_t *dest, int dest_stride) {
void vp9_idct16x16_256_add_c(const int16_t *input, uint8_t *dest, int stride) {
int16_t out[16 * 16];
int16_t *outptr = out;
int i, j;
......@@ -630,8 +630,8 @@ void vp9_idct16x16_256_add_c(int16_t *input, uint8_t *dest, int dest_stride) {
temp_in[j] = out[j * 16 + i];
idct16_1d(temp_in, temp_out);
for (j = 0; j < 16; ++j)
dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)
+ dest[j * dest_stride + i]);
dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)
+ dest[j * stride + i]);
}
}
......@@ -813,7 +813,7 @@ static const transform_2d IHT_16[] = {
{ iadst16_1d, iadst16_1d } // ADST_ADST = 3
};
void vp9_iht16x16_256_add_c(int16_t *input, uint8_t *dest, int dest_stride,
void vp9_iht16x16_256_add_c(const int16_t *input, uint8_t *dest, int stride,
int tx_type) {
int i, j;
int16_t out[16 * 16];
......@@ -834,12 +834,11 @@ void vp9_iht16x16_256_add_c(int16_t *input, uint8_t *dest, int dest_stride,
temp_in[j] = out[j * 16 + i];
ht.cols(temp_in, temp_out);
for (j = 0; j < 16; ++j)
dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)
+ dest[j * dest_stride + i]); }
dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)
+ dest[j * stride + i]); }
}
void vp9_idct16x16_10_add_c(int16_t *input, uint8_t *dest,
int dest_stride) {
void vp9_idct16x16_10_add_c(const int16_t *input, uint8_t *dest, int stride) {
int16_t out[16 * 16] = { 0 };
int16_t *outptr = out;
int i, j;
......@@ -859,13 +858,12 @@ void vp9_idct16x16_10_add_c(int16_t *input, uint8_t *dest,
temp_in[j] = out[j*16 + i];
idct16_1d(temp_in, temp_out);
for (j = 0; j < 16; ++j)
dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)
+ dest[j * dest_stride + i]);
dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)
+ dest[j * stride + i]);
}
}
void vp9_idct16x16_1_add_c(int16_t *input, uint8_t *dest,
int dest_stride) {
void vp9_idct16x16_1_add_c(const int16_t *input, uint8_t *dest, int stride) {
int i, j;
int a1;
int16_t out = dct_const_round_shift(input[0] * cospi_16_64);
......@@ -874,7 +872,7 @@ void vp9_idct16x16_1_add_c(int16_t *input, uint8_t *dest,
for (j = 0; j < 16; ++j) {
for (i = 0; i < 16; ++i)
dest[i] = clip_pixel(dest[i] + a1);
dest += dest_stride;
dest += stride;
}
}
......@@ -1245,7 +1243,7 @@ static void idct32_1d(const int16_t *input, int16_t *output) {
output[31] = step1[0] - step1[31];
}
void vp9_idct32x32_1024_add_c(int16_t *input, uint8_t *dest, int dest_stride) {
void vp9_idct32x32_1024_add_c(const int16_t *input, uint8_t *dest, int stride) {
int16_t out[32 * 32];
int16_t *outptr = out;
int i, j;
......@@ -1277,13 +1275,12 @@ void vp9_idct32x32_1024_add_c(int16_t *input, uint8_t *dest, int dest_stride) {
temp_in[j] = out[j * 32 + i];
idct32_1d(temp_in, temp_out);
for (j = 0; j < 32; ++j)
dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)
+ dest[j * dest_stride + i]);
dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)
+ dest[j * stride + i]);
}
}
void vp9_idct32x32_1_add_c(int16_t *input, uint8_t *dest,
int dest_stride) {
void vp9_idct32x32_1_add_c(const int16_t *input, uint8_t *dest, int stride) {
int i, j;
int a1;
......@@ -1294,12 +1291,12 @@ void vp9_idct32x32_1_add_c(int16_t *input, uint8_t *dest,
for (j = 0; j < 32; ++j) {
for (i = 0; i < 32; ++i)
dest[i] = clip_pixel(dest[i] + a1);
dest += dest_stride;
dest += stride;
}
}
// idct
void vp9_idct4x4_add(int16_t *input, uint8_t *dest, int stride, int eob) {
void vp9_idct4x4_add(const int16_t *input, uint8_t *dest, int stride, int eob) {
if (eob > 1)
vp9_idct4x4_16_add(input, dest, stride);
else
......@@ -1307,14 +1304,14 @@ void vp9_idct4x4_add(int16_t *input, uint8_t *dest, int stride, int eob) {
}
void vp9_iwht4x4_add(int16_t *input, uint8_t *dest, int stride, int eob) {
void vp9_iwht4x4_add(const int16_t *input, uint8_t *dest, int stride, int eob) {
if (eob > 1)
vp9_iwht4x4_16_add(input, dest, stride);
else
vp9_iwht4x4_1_add(input, dest, stride);
}
void vp9_idct8x8_add(int16_t *input, uint8_t *dest, int stride, int eob) {
void vp9_idct8x8_add(const int16_t *input, uint8_t *dest, int stride, int eob) {
// If dc is 1, then input[0] is the reconstructed value, do not need
// dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1.
......@@ -1333,7 +1330,8 @@ void vp9_idct8x8_add(int16_t *input, uint8_t *dest, int stride, int eob) {
}
}
void vp9_idct16x16_add(int16_t *input, uint8_t *dest, int stride, int eob) {
void vp9_idct16x16_add(const int16_t *input, uint8_t *dest, int stride,
int eob) {
/* The calculation can be simplified if there are not many non-zero dct
* coefficients. Use eobs to separate different cases. */
if (eob) {
......@@ -1347,7 +1345,8 @@ void vp9_idct16x16_add(int16_t *input, uint8_t *dest, int stride, int eob) {
}
}
void vp9_idct32x32_add(int16_t *input, uint8_t *dest, int stride, int eob) {
void vp9_idct32x32_add(const int16_t *input, uint8_t *dest, int stride,
int eob) {
if (eob) {
if (eob == 1)
vp9_idct32x32_1_add(input, dest, stride);
......@@ -1357,15 +1356,15 @@ void vp9_idct32x32_add(int16_t *input, uint8_t *dest, int stride, int eob) {
}
// iht
void vp9_iht4x4_add(TX_TYPE tx_type, int16_t *input, uint8_t *dest, int stride,
int eob) {
void vp9_iht4x4_add(TX_TYPE tx_type, const int16_t *input, uint8_t *dest,
int stride, int eob) {
if (tx_type == DCT_DCT)
vp9_idct4x4_add(input, dest, stride, eob);
else
vp9_iht4x4_16_add(input, dest, stride, tx_type);
}
void vp9_iht8x8_add(TX_TYPE tx_type, int16_t *input, uint8_t *dest,
void vp9_iht8x8_add(TX_TYPE tx_type, const int16_t *input, uint8_t *dest,
int stride, int eob) {
if (tx_type == DCT_DCT) {
vp9_idct8x8_add(input, dest, stride, eob);
......@@ -1376,7 +1375,7 @@ void vp9_iht8x8_add(TX_TYPE tx_type, int16_t *input, uint8_t *dest,
}
}
void vp9_iht16x16_add(TX_TYPE tx_type, int16_t *input, uint8_t *dest,
void vp9_iht16x16_add(TX_TYPE tx_type, const int16_t *input, uint8_t *dest,
int stride, int eob) {
if (tx_type == DCT_DCT) {
vp9_idct16x16_add(input, dest, stride, eob);
......
......@@ -87,18 +87,20 @@ typedef struct {
transform_1d cols, rows; // vertical and horizontal
} transform_2d;
void vp9_iwht4x4_add(int16_t *input, uint8_t *dest, int stride, int eob);
void vp9_iwht4x4_add(const int16_t *input, uint8_t *dest, int stride, int eob);
void vp9_idct4x4_add(int16_t *input, uint8_t *dest, int stride, int eob);
void vp9_idct8x8_add(int16_t *input, uint8_t *dest, int stride, int eob);
void vp9_idct16x16_add(int16_t *input, uint8_t *dest, int stride, int eob);
void vp9_idct32x32_add(int16_t *input, uint8_t *dest, int stride, int eob);
void vp9_idct4x4_add(const int16_t *input, uint8_t *dest, int stride, int eob);
void vp9_idct8x8_add(const int16_t *input, uint8_t *dest, int stride, int eob);
void vp9_idct16x16_add(const int16_t *input, uint8_t *dest, int stride, int
eob);
void vp9_idct32x32_add(const int16_t *input, uint8_t *dest, int stride,
int eob);
void vp9_iht4x4_add(TX_TYPE tx_type, int16_t *input, uint8_t *dest,
void vp9_iht4x4_add(TX_TYPE tx_type, const int16_t *input, uint8_t *dest,
int stride, int eob);
void vp9_iht8x8_add(TX_TYPE tx_type, int16_t *input, uint8_t *dest,
void vp9_iht8x8_add(TX_TYPE tx_type, const int16_t *input, uint8_t *dest,
int stride, int eob);
void vp9_iht16x16_add(TX_TYPE tx_type, int16_t *input, uint8_t *dest,
void vp9_iht16x16_add(TX_TYPE tx_type, const int16_t *input, uint8_t *dest,
int stride, int eob);
......
......@@ -267,51 +267,51 @@ specialize vp9_convolve8_avg_vert sse2 ssse3 neon dspr2
#
# dct
#
prototype void vp9_idct4x4_1_add "int16_t *input, uint8_t *dest, int dest_stride"
prototype void vp9_idct4x4_1_add "const int16_t *input, uint8_t *dest, int dest_stride"
specialize vp9_idct4x4_1_add sse2 neon
prototype void vp9_idct4x4_16_add "int16_t *input, uint8_t *dest, int dest_stride"
prototype void vp9_idct4x4_16_add "const int16_t *input, uint8_t *dest, int dest_stride"
specialize vp9_idct4x4_16_add sse2 neon
prototype void vp9_idct8x8_1_add "int16_t *input, uint8_t *dest, int dest_stride"
prototype void vp9_idct8x8_1_add "const int16_t *input, uint8_t *dest, int dest_stride"
specialize vp9_idct8x8_1_add sse2 neon
prototype void vp9_idct8x8_64_add "int16_t *input, uint8_t *dest, int dest_stride"
prototype void vp9_idct8x8_64_add "const int16_t *input, uint8_t *dest, int dest_stride"
specialize vp9_idct8x8_64_add sse2 neon
prototype void vp9_idct8x8_10_add "int16_t *input, uint8_t *dest, int dest_stride"
prototype void vp9_idct8x8_10_add "const int16_t *input, uint8_t *dest, int dest_stride"
specialize vp9_idct8x8_10_add sse2 neon
prototype void vp9_idct16x16_1_add "int16_t *input, uint8_t *dest, int dest_stride"
prototype void vp9_idct16x16_1_add "const int16_t *input, uint8_t *dest, int dest_stride"
specialize vp9_idct16x16_1_add sse2 neon
prototype void vp9_idct16x16_256_add "int16_t *input, uint8_t *dest, int dest_stride"
prototype void vp9_idct16x16_256_add "const int16_t *input, uint8_t *dest, int dest_stride"
specialize vp9_idct16x16_256_add sse2 neon
prototype void vp9_idct16x16_10_add "int16_t *input, uint8_t *dest, int dest_stride"
prototype void vp9_idct16x16_10_add "const int16_t *input, uint8_t *dest, int dest_stride"
specialize vp9_idct16x16_10_add sse2 neon
prototype void vp9_idct32x32_1024_add "int16_t *input, uint8_t *dest, int dest_stride"
prototype void vp9_idct32x32_1024_add "const int16_t *input, uint8_t *dest, int dest_stride"
specialize vp9_idct32x32_1024_add sse2 neon
prototype void vp9_idct32x32_1_add "int16_t *input, uint8_t *dest, int dest_stride"
prototype void vp9_idct32x32_1_add "const int16_t *input, uint8_t *dest, int dest_stride"
specialize vp9_idct32x32_1_add sse2
prototype void vp9_iht4x4_16_add "int16_t *input, uint8_t *dest, int dest_stride, int tx_type"
prototype void vp9_iht4x4_16_add "const int16_t *input, uint8_t *dest, int dest_stride, int tx_type"
specialize vp9_iht4x4_16_add sse2 neon
prototype void vp9_iht8x8_64_add "int16_t *input, uint8_t *dest, int dest_stride, int tx_type"
prototype void vp9_iht8x8_64_add "const int16_t *input, uint8_t *dest, int dest_stride, int tx_type"
specialize vp9_iht8x8_64_add sse2 neon
prototype void vp9_iht16x16_256_add "int16_t *input, uint8_t *output, int pitch, int tx_type"
prototype void vp9_iht16x16_256_add "const int16_t *input, uint8_t *output, int pitch, int tx_type"
specialize vp9_iht16x16_256_add sse2
# dct and add
prototype void vp9_iwht4x4_1_add "int16_t *input, uint8_t *dest, int dest_stride"
prototype void vp9_iwht4x4_1_add "const int16_t *input, uint8_t *dest, int dest_stride"
specialize vp9_iwht4x4_1_add
prototype void vp9_iwht4x4_16_add "int16_t *input, uint8_t *dest, int dest_stride"
prototype void vp9_iwht4x4_16_add "const int16_t *input, uint8_t *dest, int dest_stride"
specialize vp9_iwht4x4_16_add
#
......
......@@ -15,7 +15,7 @@
#include "vp9/common/vp9_common.h"
#include "vp9/common/vp9_idct.h"
void vp9_idct4x4_16_add_sse2(int16_t *input, uint8_t *dest, int stride) {
void vp9_idct4x4_16_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
const __m128i zero = _mm_setzero_si128();
const __m128i eight = _mm_set1_epi16(8);
const __m128i cst = _mm_setr_epi16((int16_t)cospi_16_64, (int16_t)cospi_16_64,
......@@ -26,10 +26,10 @@ void vp9_idct4x4_16_add_sse2(int16_t *input, uint8_t *dest, int stride) {
__m128i input0, input1, input2, input3;
// Rows
input0 = _mm_loadl_epi64((__m128i *)input);
input1 = _mm_loadl_epi64((__m128i *)(input + 4));
input2 = _mm_loadl_epi64((__m128i *)(input + 8));
input3 = _mm_loadl_epi64((__m128i *)(input + 12));
input0 = _mm_loadl_epi64((const __m128i *)input);
input1 = _mm_loadl_epi64((const __m128i *)(input + 4));
input2 = _mm_loadl_epi64((const __m128i *)(input + 8));
input3 = _mm_loadl_epi64((const __m128i *)(input + 12));
// Construct i3, i1, i3, i1, i2, i0, i2, i0
input0 = _mm_shufflelo_epi16(input0, 0xd8);
......@@ -148,7 +148,7 @@ void vp9_idct4x4_16_add_sse2(int16_t *input, uint8_t *dest, int stride) {
RECON_AND_STORE4X4(dest, input3);
}
void vp9_idct4x4_1_add_sse2(int16_t *input, uint8_t *dest, int stride) {
void vp9_idct4x4_1_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
__m128i dc_value;
const __m128i zero = _mm_setzero_si128();
int a;
......@@ -264,16 +264,16 @@ static void iadst4_1d_sse2(__m128i *in) {
in[3] = _mm_unpackhi_epi64(in[1], in[1]);
}
void vp9_iht4x4_16_add_sse2(int16_t *input, uint8_t *dest, int stride,
void vp9_iht4x4_16_add_sse2(const int16_t *input, uint8_t *dest, int stride,
int tx_type) {
__m128i in[4];
const __m128i zero = _mm_setzero_si128();
const __m128i eight = _mm_set1_epi16(8);
in[0] = _mm_loadl_epi64((__m128i *)input);
in[1] = _mm_loadl_epi64((__m128i *)(input + 4));
in[2] = _mm_loadl_epi64((__m128i *)(input + 8));
in[3] = _mm_loadl_epi64((__m128i *)(input + 12));
in[0] = _mm_loadl_epi64((const __m128i *)input);
in[1] = _mm_loadl_epi64((const __m128i *)(input + 4));
in[2] = _mm_loadl_epi64((const __m128i *)(input + 8));
in[3] = _mm_loadl_epi64((const __m128i *)(input + 12));
switch (tx_type) {
case 0: // DCT_DCT
......@@ -494,7 +494,7 @@ void vp9_iht4x4_16_add_sse2(int16_t *input, uint8_t *dest, int stride,
dest += stride; \
}
void vp9_idct8x8_64_add_sse2(int16_t *input, uint8_t *dest, int stride) {
void vp9_idct8x8_64_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
const __m128i zero = _mm_setzero_si128();
const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
const __m128i final_rounding = _mm_set1_epi16(1<<4);
......@@ -514,14 +514,14 @@ void vp9_idct8x8_64_add_sse2(int16_t *input, uint8_t *dest, int stride) {
int i;
// Load input data.
in0 = _mm_load_si128((__m128i *)input);
in1 = _mm_load_si128((__m128i *)(input + 8 * 1));
in2 = _mm_load_si128((__m128i *)(input + 8 * 2));
in3 = _mm_load_si128((__m128i *)(input + 8 * 3));