vp9_idct.c 97.7 KB
Newer Older
John Koleszar's avatar
John Koleszar committed
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
John Koleszar's avatar
John Koleszar committed
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5 6
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
John Koleszar's avatar
John Koleszar committed
9 10
 */

Yunqing Wang's avatar
Yunqing Wang committed
11
#include <math.h>
Dmitry Kovalev's avatar
Dmitry Kovalev committed
12

Yunqing Wang's avatar
Yunqing Wang committed
13
#include "./vp9_rtcd.h"
14
#include "vp9/common/vp9_blockd.h"
15
#include "vp9/common/vp9_idct.h"
16
#include "vp9/common/vp9_systemdependent.h"
17

18
static INLINE uint8_t clip_pixel_add(uint8_t dest, tran_high_t trans) {
19
  trans = WRAPLOW(trans, 8);
20
  return clip_pixel(WRAPLOW(dest + trans, 8));
21 22
}

23
void vp9_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
Timothy B. Terriberry's avatar
Timothy B. Terriberry committed
24 25
/* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds,
   0.5 shifts per pixel. */
John Koleszar's avatar
John Koleszar committed
26
  int i;
27 28 29 30
  tran_low_t output[16];
  tran_high_t a1, b1, c1, d1, e1;
  const tran_low_t *ip = input;
  tran_low_t *op = output;
John Koleszar's avatar
John Koleszar committed
31 32

  for (i = 0; i < 4; i++) {
Yaowu Xu's avatar
Yaowu Xu committed
33 34 35 36
    a1 = ip[0] >> UNIT_QUANT_SHIFT;
    c1 = ip[1] >> UNIT_QUANT_SHIFT;
    d1 = ip[2] >> UNIT_QUANT_SHIFT;
    b1 = ip[3] >> UNIT_QUANT_SHIFT;
Yaowu Xu's avatar
Yaowu Xu committed
37 38 39 40 41 42 43
    a1 += c1;
    d1 -= b1;
    e1 = (a1 - d1) >> 1;
    b1 = e1 - b1;
    c1 = e1 - c1;
    a1 -= b1;
    d1 += c1;
44 45 46 47
    op[0] = WRAPLOW(a1, 8);
    op[1] = WRAPLOW(b1, 8);
    op[2] = WRAPLOW(c1, 8);
    op[3] = WRAPLOW(d1, 8);
John Koleszar's avatar
John Koleszar committed
48
    ip += 4;
Scott LaVarnway's avatar
Scott LaVarnway committed
49
    op += 4;
John Koleszar's avatar
John Koleszar committed
50 51 52 53
  }

  ip = output;
  for (i = 0; i < 4; i++) {
Timothy B. Terriberry's avatar
Timothy B. Terriberry committed
54 55 56 57
    a1 = ip[4 * 0];
    c1 = ip[4 * 1];
    d1 = ip[4 * 2];
    b1 = ip[4 * 3];
Yaowu Xu's avatar
Yaowu Xu committed
58 59 60 61 62 63 64
    a1 += c1;
    d1 -= b1;
    e1 = (a1 - d1) >> 1;
    b1 = e1 - b1;
    c1 = e1 - c1;
    a1 -= b1;
    d1 += c1;
65 66 67 68
    dest[stride * 0] = clip_pixel_add(dest[stride * 0], a1);
    dest[stride * 1] = clip_pixel_add(dest[stride * 1], b1);
    dest[stride * 2] = clip_pixel_add(dest[stride * 2], c1);
    dest[stride * 3] = clip_pixel_add(dest[stride * 3], d1);
John Koleszar's avatar
John Koleszar committed
69 70

    ip++;
Scott LaVarnway's avatar
Scott LaVarnway committed
71
    dest++;
John Koleszar's avatar
John Koleszar committed
72 73
  }
}
Hui Su's avatar
Hui Su committed
74

75
void vp9_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest, int dest_stride) {
John Koleszar's avatar
John Koleszar committed
76
  int i;
77 78 79 80
  tran_high_t a1, e1;
  tran_low_t tmp[4];
  const tran_low_t *ip = in;
  tran_low_t *op = tmp;
John Koleszar's avatar
John Koleszar committed
81

Yaowu Xu's avatar
Yaowu Xu committed
82
  a1 = ip[0] >> UNIT_QUANT_SHIFT;
Timothy B. Terriberry's avatar
Timothy B. Terriberry committed
83
  e1 = a1 >> 1;
Yaowu Xu's avatar
Yaowu Xu committed
84
  a1 -= e1;
85 86
  op[0] = WRAPLOW(a1, 8);
  op[1] = op[2] = op[3] = WRAPLOW(e1, 8);
John Koleszar's avatar
John Koleszar committed
87 88 89

  ip = tmp;
  for (i = 0; i < 4; i++) {
Timothy B. Terriberry's avatar
Timothy B. Terriberry committed
90 91
    e1 = ip[0] >> 1;
    a1 = ip[0] - e1;
92 93 94 95
    dest[dest_stride * 0] = clip_pixel_add(dest[dest_stride * 0], a1);
    dest[dest_stride * 1] = clip_pixel_add(dest[dest_stride * 1], e1);
    dest[dest_stride * 2] = clip_pixel_add(dest[dest_stride * 2], e1);
    dest[dest_stride * 3] = clip_pixel_add(dest[dest_stride * 3], e1);
John Koleszar's avatar
John Koleszar committed
96
    ip++;
Scott LaVarnway's avatar
Scott LaVarnway committed
97
    dest++;
John Koleszar's avatar
John Koleszar committed
98 99
  }
}
Hui Su's avatar
Hui Su committed
100

101 102 103
static void idct4(const tran_low_t *input, tran_low_t *output) {
  tran_low_t step[4];
  tran_high_t temp1, temp2;
Yaowu Xu's avatar
Yaowu Xu committed
104 105 106
  // stage 1
  temp1 = (input[0] + input[2]) * cospi_16_64;
  temp2 = (input[0] - input[2]) * cospi_16_64;
107 108
  step[0] = WRAPLOW(dct_const_round_shift(temp1), 8);
  step[1] = WRAPLOW(dct_const_round_shift(temp2), 8);
Yaowu Xu's avatar
Yaowu Xu committed
109 110
  temp1 = input[1] * cospi_24_64 - input[3] * cospi_8_64;
  temp2 = input[1] * cospi_8_64 + input[3] * cospi_24_64;
111 112
  step[2] = WRAPLOW(dct_const_round_shift(temp1), 8);
  step[3] = WRAPLOW(dct_const_round_shift(temp2), 8);
John Koleszar's avatar
John Koleszar committed
113

Yaowu Xu's avatar
Yaowu Xu committed
114
  // stage 2
115 116 117 118
  output[0] = WRAPLOW(step[0] + step[3], 8);
  output[1] = WRAPLOW(step[1] + step[2], 8);
  output[2] = WRAPLOW(step[1] - step[2], 8);
  output[3] = WRAPLOW(step[0] - step[3], 8);
119 120
}

121 122 123
void vp9_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
  tran_low_t out[4 * 4];
  tran_low_t *outptr = out;
Yaowu Xu's avatar
Yaowu Xu committed
124
  int i, j;
125
  tran_low_t temp_in[4], temp_out[4];
Dmitry Kovalev's avatar
Dmitry Kovalev committed
126 127

  // Rows
Yaowu Xu's avatar
Yaowu Xu committed
128
  for (i = 0; i < 4; ++i) {
129
    idct4(input, outptr);
Yaowu Xu's avatar
Yaowu Xu committed
130 131 132
    input += 4;
    outptr += 4;
  }
Dmitry Kovalev's avatar
Dmitry Kovalev committed
133 134

  // Columns
Yaowu Xu's avatar
Yaowu Xu committed
135 136 137
  for (i = 0; i < 4; ++i) {
    for (j = 0; j < 4; ++j)
      temp_in[j] = out[j * 4 + i];
138
    idct4(temp_in, temp_out);
139
    for (j = 0; j < 4; ++j) {
140 141
      dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
                                            ROUND_POWER_OF_TWO(temp_out[j], 4));
142
    }
Yaowu Xu's avatar
Yaowu Xu committed
143 144 145
  }
}

146 147
void vp9_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest,
                         int dest_stride) {
Yaowu Xu's avatar
Yaowu Xu committed
148
  int i;
149
  tran_high_t a1;
150 151
  tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), 8);
  out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), 8);
Dmitry Kovalev's avatar
Dmitry Kovalev committed
152
  a1 = ROUND_POWER_OF_TWO(out, 4);
Yaowu Xu's avatar
Yaowu Xu committed
153 154

  for (i = 0; i < 4; i++) {
155 156 157 158
    dest[0] = clip_pixel_add(dest[0], a1);
    dest[1] = clip_pixel_add(dest[1], a1);
    dest[2] = clip_pixel_add(dest[2], a1);
    dest[3] = clip_pixel_add(dest[3], a1);
Scott LaVarnway's avatar
Scott LaVarnway committed
159
    dest += dest_stride;
Yaowu Xu's avatar
Yaowu Xu committed
160 161 162
  }
}

163 164 165
static void idct8(const tran_low_t *input, tran_low_t *output) {
  tran_low_t step1[8], step2[8];
  tran_high_t temp1, temp2;
Yaowu Xu's avatar
Yaowu Xu committed
166 167 168 169 170 171 172
  // stage 1
  step1[0] = input[0];
  step1[2] = input[4];
  step1[1] = input[2];
  step1[3] = input[6];
  temp1 = input[1] * cospi_28_64 - input[7] * cospi_4_64;
  temp2 = input[1] * cospi_4_64 + input[7] * cospi_28_64;
173 174
  step1[4] = WRAPLOW(dct_const_round_shift(temp1), 8);
  step1[7] = WRAPLOW(dct_const_round_shift(temp2), 8);
Yaowu Xu's avatar
Yaowu Xu committed
175 176
  temp1 = input[5] * cospi_12_64 - input[3] * cospi_20_64;
  temp2 = input[5] * cospi_20_64 + input[3] * cospi_12_64;
177 178
  step1[5] = WRAPLOW(dct_const_round_shift(temp1), 8);
  step1[6] = WRAPLOW(dct_const_round_shift(temp2), 8);
John Koleszar's avatar
John Koleszar committed
179

Yaowu Xu's avatar
Yaowu Xu committed
180
  // stage 2 & stage 3 - even half
181
  idct4(step1, step1);
182

Yaowu Xu's avatar
Yaowu Xu committed
183
  // stage 2 - odd half
184 185 186 187
  step2[4] = WRAPLOW(step1[4] + step1[5], 8);
  step2[5] = WRAPLOW(step1[4] - step1[5], 8);
  step2[6] = WRAPLOW(-step1[6] + step1[7], 8);
  step2[7] = WRAPLOW(step1[6] + step1[7], 8);
John Koleszar's avatar
John Koleszar committed
188

Yaowu Xu's avatar
Yaowu Xu committed
189 190 191 192
  // stage 3 -odd half
  step1[4] = step2[4];
  temp1 = (step2[6] - step2[5]) * cospi_16_64;
  temp2 = (step2[5] + step2[6]) * cospi_16_64;
193 194
  step1[5] = WRAPLOW(dct_const_round_shift(temp1), 8);
  step1[6] = WRAPLOW(dct_const_round_shift(temp2), 8);
Yaowu Xu's avatar
Yaowu Xu committed
195
  step1[7] = step2[7];
John Koleszar's avatar
John Koleszar committed
196

Yaowu Xu's avatar
Yaowu Xu committed
197
  // stage 4
198 199 200 201 202 203 204 205
  output[0] = WRAPLOW(step1[0] + step1[7], 8);
  output[1] = WRAPLOW(step1[1] + step1[6], 8);
  output[2] = WRAPLOW(step1[2] + step1[5], 8);
  output[3] = WRAPLOW(step1[3] + step1[4], 8);
  output[4] = WRAPLOW(step1[3] - step1[4], 8);
  output[5] = WRAPLOW(step1[2] - step1[5], 8);
  output[6] = WRAPLOW(step1[1] - step1[6], 8);
  output[7] = WRAPLOW(step1[0] - step1[7], 8);
206 207
}

208 209 210
void vp9_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
  tran_low_t out[8 * 8];
  tran_low_t *outptr = out;
Yaowu Xu's avatar
Yaowu Xu committed
211
  int i, j;
212
  tran_low_t temp_in[8], temp_out[8];
Yunqing Wang's avatar
Yunqing Wang committed
213

Scott LaVarnway's avatar
Scott LaVarnway committed
214
  // First transform rows
Yaowu Xu's avatar
Yaowu Xu committed
215
  for (i = 0; i < 8; ++i) {
216
    idct8(input, outptr);
Yaowu Xu's avatar
Yaowu Xu committed
217 218
    input += 8;
    outptr += 8;
Yunqing Wang's avatar
Yunqing Wang committed
219 220
  }

Scott LaVarnway's avatar
Scott LaVarnway committed
221
  // Then transform columns
Yaowu Xu's avatar
Yaowu Xu committed
222 223 224
  for (i = 0; i < 8; ++i) {
    for (j = 0; j < 8; ++j)
      temp_in[j] = out[j * 8 + i];
225
    idct8(temp_in, temp_out);
226
    for (j = 0; j < 8; ++j) {
227 228
      dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
                                            ROUND_POWER_OF_TWO(temp_out[j], 5));
229
    }
Dmitry Kovalev's avatar
Dmitry Kovalev committed
230
  }
Yunqing Wang's avatar
Yunqing Wang committed
231 232
}

233
void vp9_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
234
  int i, j;
235
  tran_high_t a1;
236 237
  tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), 8);
  out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), 8);
238 239 240
  a1 = ROUND_POWER_OF_TWO(out, 5);
  for (j = 0; j < 8; ++j) {
    for (i = 0; i < 8; ++i)
241
      dest[i] = clip_pixel_add(dest[i], a1);
242
    dest += stride;
243 244 245
  }
}

246 247
static void iadst4(const tran_low_t *input, tran_low_t *output) {
  tran_high_t s0, s1, s2, s3, s4, s5, s6, s7;
Jingning Han's avatar
Jingning Han committed
248

249 250 251 252
  tran_low_t x0 = input[0];
  tran_low_t x1 = input[1];
  tran_low_t x2 = input[2];
  tran_low_t x3 = input[3];
Jingning Han's avatar
Jingning Han committed
253 254 255 256 257 258 259 260 261 262 263 264 265 266 267

  if (!(x0 | x1 | x2 | x3)) {
    output[0] = output[1] = output[2] = output[3] = 0;
    return;
  }

  s0 = sinpi_1_9 * x0;
  s1 = sinpi_2_9 * x0;
  s2 = sinpi_3_9 * x1;
  s3 = sinpi_4_9 * x2;
  s4 = sinpi_1_9 * x2;
  s5 = sinpi_2_9 * x3;
  s6 = sinpi_4_9 * x3;
  s7 = x0 - x2 + x3;

268 269 270 271
  s0 = s0 + s3 + s5;
  s1 = s1 - s4 - s6;
  s3 = s2;
  s2 = sinpi_3_9 * s7;
Jingning Han's avatar
Jingning Han committed
272 273 274 275 276

  // 1-D transform scaling factor is sqrt(2).
  // The overall dynamic range is 14b (input) + 14b (multiplication scaling)
  // + 1b (addition) = 29b.
  // Hence the output bit depth is 15b.
277 278
  output[0] = WRAPLOW(dct_const_round_shift(s0 + s3), 8);
  output[1] = WRAPLOW(dct_const_round_shift(s1 + s3), 8);
279
  output[2] = WRAPLOW(dct_const_round_shift(s2), 8);
280
  output[3] = WRAPLOW(dct_const_round_shift(s0 + s1 - s3), 8);
Jingning Han's avatar
Jingning Han committed
281 282
}

283
void vp9_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride,
284
                         int tx_type) {
Yunqing Wang's avatar
Yunqing Wang committed
285
  const transform_2d IHT_4[] = {
286 287 288 289
    { idct4, idct4  },  // DCT_DCT  = 0
    { iadst4, idct4  },   // ADST_DCT = 1
    { idct4, iadst4 },    // DCT_ADST = 2
    { iadst4, iadst4 }      // ADST_ADST = 3
Yunqing Wang's avatar
Yunqing Wang committed
290 291
  };

292
  int i, j;
293 294 295
  tran_low_t out[4 * 4];
  tran_low_t *outptr = out;
  tran_low_t temp_in[4], temp_out[4];
Jingning Han's avatar
Jingning Han committed
296 297 298

  // inverse transform row vectors
  for (i = 0; i < 4; ++i) {
Yunqing Wang's avatar
Yunqing Wang committed
299
    IHT_4[tx_type].rows(input, outptr);
Jingning Han's avatar
Jingning Han committed
300 301 302 303 304 305 306 307
    input  += 4;
    outptr += 4;
  }

  // inverse transform column vectors
  for (i = 0; i < 4; ++i) {
    for (j = 0; j < 4; ++j)
      temp_in[j] = out[j * 4 + i];
Yunqing Wang's avatar
Yunqing Wang committed
308
    IHT_4[tx_type].cols(temp_in, temp_out);
309
    for (j = 0; j < 4; ++j) {
310 311
      dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
                                            ROUND_POWER_OF_TWO(temp_out[j], 4));
312
    }
Jingning Han's avatar
Jingning Han committed
313 314
  }
}
315

316
static void iadst8(const tran_low_t *input, tran_low_t *output) {
317 318
  int s0, s1, s2, s3, s4, s5, s6, s7;

319 320 321 322 323 324 325 326
  tran_high_t x0 = input[7];
  tran_high_t x1 = input[0];
  tran_high_t x2 = input[5];
  tran_high_t x3 = input[2];
  tran_high_t x4 = input[3];
  tran_high_t x5 = input[4];
  tran_high_t x6 = input[1];
  tran_high_t x7 = input[6];
327 328 329

  if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7)) {
    output[0] = output[1] = output[2] = output[3] = output[4]
Dmitry Kovalev's avatar
Dmitry Kovalev committed
330
              = output[5] = output[6] = output[7] = 0;
331 332 333 334
    return;
  }

  // stage 1
335 336 337 338 339 340 341 342
  s0 = (int)(cospi_2_64  * x0 + cospi_30_64 * x1);
  s1 = (int)(cospi_30_64 * x0 - cospi_2_64  * x1);
  s2 = (int)(cospi_10_64 * x2 + cospi_22_64 * x3);
  s3 = (int)(cospi_22_64 * x2 - cospi_10_64 * x3);
  s4 = (int)(cospi_18_64 * x4 + cospi_14_64 * x5);
  s5 = (int)(cospi_14_64 * x4 - cospi_18_64 * x5);
  s6 = (int)(cospi_26_64 * x6 + cospi_6_64  * x7);
  s7 = (int)(cospi_6_64  * x6 - cospi_26_64 * x7);
343

344 345 346 347 348 349 350 351
  x0 = WRAPLOW(dct_const_round_shift(s0 + s4), 8);
  x1 = WRAPLOW(dct_const_round_shift(s1 + s5), 8);
  x2 = WRAPLOW(dct_const_round_shift(s2 + s6), 8);
  x3 = WRAPLOW(dct_const_round_shift(s3 + s7), 8);
  x4 = WRAPLOW(dct_const_round_shift(s0 - s4), 8);
  x5 = WRAPLOW(dct_const_round_shift(s1 - s5), 8);
  x6 = WRAPLOW(dct_const_round_shift(s2 - s6), 8);
  x7 = WRAPLOW(dct_const_round_shift(s3 - s7), 8);
352 353

  // stage 2
354 355 356 357 358 359 360 361
  s0 = (int)x0;
  s1 = (int)x1;
  s2 = (int)x2;
  s3 = (int)x3;
  s4 = (int)(cospi_8_64 * x4 + cospi_24_64 * x5);
  s5 = (int)(cospi_24_64 * x4 - cospi_8_64 * x5);
  s6 = (int)(-cospi_24_64 * x6 + cospi_8_64 * x7);
  s7 = (int)(cospi_8_64 * x6 + cospi_24_64 * x7);
362

363 364 365 366 367 368 369 370
  x0 = WRAPLOW(s0 + s2, 8);
  x1 = WRAPLOW(s1 + s3, 8);
  x2 = WRAPLOW(s0 - s2, 8);
  x3 = WRAPLOW(s1 - s3, 8);
  x4 = WRAPLOW(dct_const_round_shift(s4 + s6), 8);
  x5 = WRAPLOW(dct_const_round_shift(s5 + s7), 8);
  x6 = WRAPLOW(dct_const_round_shift(s4 - s6), 8);
  x7 = WRAPLOW(dct_const_round_shift(s5 - s7), 8);
371 372

  // stage 3
373 374 375 376
  s2 = (int)(cospi_16_64 * (x2 + x3));
  s3 = (int)(cospi_16_64 * (x2 - x3));
  s6 = (int)(cospi_16_64 * (x6 + x7));
  s7 = (int)(cospi_16_64 * (x6 - x7));
377

378 379 380 381 382 383 384 385 386 387 388 389 390
  x2 = WRAPLOW(dct_const_round_shift(s2), 8);
  x3 = WRAPLOW(dct_const_round_shift(s3), 8);
  x6 = WRAPLOW(dct_const_round_shift(s6), 8);
  x7 = WRAPLOW(dct_const_round_shift(s7), 8);

  output[0] = WRAPLOW(x0, 8);
  output[1] = WRAPLOW(-x4, 8);
  output[2] = WRAPLOW(x6, 8);
  output[3] = WRAPLOW(-x2, 8);
  output[4] = WRAPLOW(x3, 8);
  output[5] = WRAPLOW(-x7, 8);
  output[6] = WRAPLOW(x5, 8);
  output[7] = WRAPLOW(-x1, 8);
391 392
}

Dmitry Kovalev's avatar
Dmitry Kovalev committed
393
static const transform_2d IHT_8[] = {
394 395 396 397
  { idct8,  idct8  },  // DCT_DCT  = 0
  { iadst8, idct8  },  // ADST_DCT = 1
  { idct8,  iadst8 },  // DCT_ADST = 2
  { iadst8, iadst8 }   // ADST_ADST = 3
Dmitry Kovalev's avatar
Dmitry Kovalev committed
398 399
};

400
void vp9_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
401
                         int tx_type) {
402
  int i, j;
403 404 405
  tran_low_t out[8 * 8];
  tran_low_t *outptr = out;
  tran_low_t temp_in[8], temp_out[8];
Dmitry Kovalev's avatar
Dmitry Kovalev committed
406
  const transform_2d ht = IHT_8[tx_type];
407 408 409

  // inverse transform row vectors
  for (i = 0; i < 8; ++i) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
410
    ht.rows(input, outptr);
411 412 413 414 415 416 417 418
    input += 8;
    outptr += 8;
  }

  // inverse transform column vectors
  for (i = 0; i < 8; ++i) {
    for (j = 0; j < 8; ++j)
      temp_in[j] = out[j * 8 + i];
Dmitry Kovalev's avatar
Dmitry Kovalev committed
419
    ht.cols(temp_in, temp_out);
420
    for (j = 0; j < 8; ++j) {
421 422
      dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
                                            ROUND_POWER_OF_TWO(temp_out[j], 5));
423
    }
424
  }
425 426
}

427 428 429
void vp9_idct8x8_12_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
  tran_low_t out[8 * 8] = { 0 };
  tran_low_t *outptr = out;
Yunqing Wang's avatar
Yunqing Wang committed
430
  int i, j;
431
  tran_low_t temp_in[8], temp_out[8];
Yunqing Wang's avatar
Yunqing Wang committed
432

Yaowu Xu's avatar
Yaowu Xu committed
433 434 435
  // First transform rows
  // only first 4 row has non-zero coefs
  for (i = 0; i < 4; ++i) {
436
    idct8(input, outptr);
Yaowu Xu's avatar
Yaowu Xu committed
437 438
    input += 8;
    outptr += 8;
Yunqing Wang's avatar
Yunqing Wang committed
439 440
  }

Yaowu Xu's avatar
Yaowu Xu committed
441 442 443 444
  // Then transform columns
  for (i = 0; i < 8; ++i) {
    for (j = 0; j < 8; ++j)
      temp_in[j] = out[j * 8 + i];
445
    idct8(temp_in, temp_out);
446
    for (j = 0; j < 8; ++j) {
447 448
      dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
                                            ROUND_POWER_OF_TWO(temp_out[j], 5));
449
    }
Dmitry Kovalev's avatar
Dmitry Kovalev committed
450
  }
Yaowu Xu's avatar
Yaowu Xu committed
451 452
}

453 454 455
static void idct16(const tran_low_t *input, tran_low_t *output) {
  tran_low_t step1[16], step2[16];
  tran_high_t temp1, temp2;
456

Yaowu Xu's avatar
Yaowu Xu committed
457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486
  // stage 1
  step1[0] = input[0/2];
  step1[1] = input[16/2];
  step1[2] = input[8/2];
  step1[3] = input[24/2];
  step1[4] = input[4/2];
  step1[5] = input[20/2];
  step1[6] = input[12/2];
  step1[7] = input[28/2];
  step1[8] = input[2/2];
  step1[9] = input[18/2];
  step1[10] = input[10/2];
  step1[11] = input[26/2];
  step1[12] = input[6/2];
  step1[13] = input[22/2];
  step1[14] = input[14/2];
  step1[15] = input[30/2];

  // stage 2
  step2[0] = step1[0];
  step2[1] = step1[1];
  step2[2] = step1[2];
  step2[3] = step1[3];
  step2[4] = step1[4];
  step2[5] = step1[5];
  step2[6] = step1[6];
  step2[7] = step1[7];

  temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64;
  temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64;
487 488
  step2[8] = WRAPLOW(dct_const_round_shift(temp1), 8);
  step2[15] = WRAPLOW(dct_const_round_shift(temp2), 8);
Yaowu Xu's avatar
Yaowu Xu committed
489 490 491

  temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64;
  temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64;
492 493
  step2[9] = WRAPLOW(dct_const_round_shift(temp1), 8);
  step2[14] = WRAPLOW(dct_const_round_shift(temp2), 8);
Yaowu Xu's avatar
Yaowu Xu committed
494 495 496

  temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64;
  temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64;
497 498
  step2[10] = WRAPLOW(dct_const_round_shift(temp1), 8);
  step2[13] = WRAPLOW(dct_const_round_shift(temp2), 8);
Yaowu Xu's avatar
Yaowu Xu committed
499 500 501

  temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64;
  temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64;
502 503
  step2[11] = WRAPLOW(dct_const_round_shift(temp1), 8);
  step2[12] = WRAPLOW(dct_const_round_shift(temp2), 8);
Yaowu Xu's avatar
Yaowu Xu committed
504 505 506 507 508 509 510 511 512

  // stage 3
  step1[0] = step2[0];
  step1[1] = step2[1];
  step1[2] = step2[2];
  step1[3] = step2[3];

  temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64;
  temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64;
513 514
  step1[4] = WRAPLOW(dct_const_round_shift(temp1), 8);
  step1[7] = WRAPLOW(dct_const_round_shift(temp2), 8);
Yaowu Xu's avatar
Yaowu Xu committed
515 516
  temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64;
  temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64;
517 518 519 520 521 522 523 524 525 526 527
  step1[5] = WRAPLOW(dct_const_round_shift(temp1), 8);
  step1[6] = WRAPLOW(dct_const_round_shift(temp2), 8);

  step1[8] = WRAPLOW(step2[8] + step2[9], 8);
  step1[9] = WRAPLOW(step2[8] - step2[9], 8);
  step1[10] = WRAPLOW(-step2[10] + step2[11], 8);
  step1[11] = WRAPLOW(step2[10] + step2[11], 8);
  step1[12] = WRAPLOW(step2[12] + step2[13], 8);
  step1[13] = WRAPLOW(step2[12] - step2[13], 8);
  step1[14] = WRAPLOW(-step2[14] + step2[15], 8);
  step1[15] = WRAPLOW(step2[14] + step2[15], 8);
Yaowu Xu's avatar
Yaowu Xu committed
528

529
  // stage 4
Yaowu Xu's avatar
Yaowu Xu committed
530 531
  temp1 = (step1[0] + step1[1]) * cospi_16_64;
  temp2 = (step1[0] - step1[1]) * cospi_16_64;
532 533
  step2[0] = WRAPLOW(dct_const_round_shift(temp1), 8);
  step2[1] = WRAPLOW(dct_const_round_shift(temp2), 8);
Yaowu Xu's avatar
Yaowu Xu committed
534 535
  temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64;
  temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64;
536 537 538 539 540 541
  step2[2] = WRAPLOW(dct_const_round_shift(temp1), 8);
  step2[3] = WRAPLOW(dct_const_round_shift(temp2), 8);
  step2[4] = WRAPLOW(step1[4] + step1[5], 8);
  step2[5] = WRAPLOW(step1[4] - step1[5], 8);
  step2[6] = WRAPLOW(-step1[6] + step1[7], 8);
  step2[7] = WRAPLOW(step1[6] + step1[7], 8);
Yaowu Xu's avatar
Yaowu Xu committed
542 543 544 545 546

  step2[8] = step1[8];
  step2[15] = step1[15];
  temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64;
  temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64;
547 548
  step2[9] = WRAPLOW(dct_const_round_shift(temp1), 8);
  step2[14] = WRAPLOW(dct_const_round_shift(temp2), 8);
Yaowu Xu's avatar
Yaowu Xu committed
549 550
  temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64;
  temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64;
551 552
  step2[10] = WRAPLOW(dct_const_round_shift(temp1), 8);
  step2[13] = WRAPLOW(dct_const_round_shift(temp2), 8);
Yaowu Xu's avatar
Yaowu Xu committed
553 554 555 556
  step2[11] = step1[11];
  step2[12] = step1[12];

  // stage 5
557 558 559 560
  step1[0] = WRAPLOW(step2[0] + step2[3], 8);
  step1[1] = WRAPLOW(step2[1] + step2[2], 8);
  step1[2] = WRAPLOW(step2[1] - step2[2], 8);
  step1[3] = WRAPLOW(step2[0] - step2[3], 8);
Yaowu Xu's avatar
Yaowu Xu committed
561 562 563
  step1[4] = step2[4];
  temp1 = (step2[6] - step2[5]) * cospi_16_64;
  temp2 = (step2[5] + step2[6]) * cospi_16_64;
564 565
  step1[5] = WRAPLOW(dct_const_round_shift(temp1), 8);
  step1[6] = WRAPLOW(dct_const_round_shift(temp2), 8);
Yaowu Xu's avatar
Yaowu Xu committed
566 567
  step1[7] = step2[7];

568 569 570 571 572 573 574 575
  step1[8] = WRAPLOW(step2[8] + step2[11], 8);
  step1[9] = WRAPLOW(step2[9] + step2[10], 8);
  step1[10] = WRAPLOW(step2[9] - step2[10], 8);
  step1[11] = WRAPLOW(step2[8] - step2[11], 8);
  step1[12] = WRAPLOW(-step2[12] + step2[15], 8);
  step1[13] = WRAPLOW(-step2[13] + step2[14], 8);
  step1[14] = WRAPLOW(step2[13] + step2[14], 8);
  step1[15] = WRAPLOW(step2[12] + step2[15], 8);
Yaowu Xu's avatar
Yaowu Xu committed
576 577

  // stage 6
578 579 580 581 582 583 584 585
  step2[0] = WRAPLOW(step1[0] + step1[7], 8);
  step2[1] = WRAPLOW(step1[1] + step1[6], 8);
  step2[2] = WRAPLOW(step1[2] + step1[5], 8);
  step2[3] = WRAPLOW(step1[3] + step1[4], 8);
  step2[4] = WRAPLOW(step1[3] - step1[4], 8);
  step2[5] = WRAPLOW(step1[2] - step1[5], 8);
  step2[6] = WRAPLOW(step1[1] - step1[6], 8);
  step2[7] = WRAPLOW(step1[0] - step1[7], 8);
Yaowu Xu's avatar
Yaowu Xu committed
586 587 588 589
  step2[8] = step1[8];
  step2[9] = step1[9];
  temp1 = (-step1[10] + step1[13]) * cospi_16_64;
  temp2 = (step1[10] + step1[13]) * cospi_16_64;
590 591
  step2[10] = WRAPLOW(dct_const_round_shift(temp1), 8);
  step2[13] = WRAPLOW(dct_const_round_shift(temp2), 8);
Yaowu Xu's avatar
Yaowu Xu committed
592 593
  temp1 = (-step1[11] + step1[12]) * cospi_16_64;
  temp2 = (step1[11] + step1[12]) * cospi_16_64;
594 595
  step2[11] = WRAPLOW(dct_const_round_shift(temp1), 8);
  step2[12] = WRAPLOW(dct_const_round_shift(temp2), 8);
Yaowu Xu's avatar
Yaowu Xu committed
596 597 598 599
  step2[14] = step1[14];
  step2[15] = step1[15];

  // stage 7
600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615
  output[0] = WRAPLOW(step2[0] + step2[15], 8);
  output[1] = WRAPLOW(step2[1] + step2[14], 8);
  output[2] = WRAPLOW(step2[2] + step2[13], 8);
  output[3] = WRAPLOW(step2[3] + step2[12], 8);
  output[4] = WRAPLOW(step2[4] + step2[11], 8);
  output[5] = WRAPLOW(step2[5] + step2[10], 8);
  output[6] = WRAPLOW(step2[6] + step2[9], 8);
  output[7] = WRAPLOW(step2[7] + step2[8], 8);
  output[8] = WRAPLOW(step2[7] - step2[8], 8);
  output[9] = WRAPLOW(step2[6] - step2[9], 8);
  output[10] = WRAPLOW(step2[5] - step2[10], 8);
  output[11] = WRAPLOW(step2[4] - step2[11], 8);
  output[12] = WRAPLOW(step2[3] - step2[12], 8);
  output[13] = WRAPLOW(step2[2] - step2[13], 8);
  output[14] = WRAPLOW(step2[1] - step2[14], 8);
  output[15] = WRAPLOW(step2[0] - step2[15], 8);
616 617
}

618 619 620 621
void vp9_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest,
                             int stride) {
  tran_low_t out[16 * 16];
  tran_low_t *outptr = out;
622
  int i, j;
623
  tran_low_t temp_in[16], temp_out[16];
624

625 626
  // First transform rows
  for (i = 0; i < 16; ++i) {
627
    idct16(input, outptr);
628
    input += 16;
629 630
    outptr += 16;
  }
631

632 633 634 635
  // Then transform columns
  for (i = 0; i < 16; ++i) {
    for (j = 0; j < 16; ++j)
      temp_in[j] = out[j * 16 + i];
636
    idct16(temp_in, temp_out);
637
    for (j = 0; j < 16; ++j) {
638 639
      dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
                                            ROUND_POWER_OF_TWO(temp_out[j], 6));
640
    }
Dmitry Kovalev's avatar
Dmitry Kovalev committed
641
  }
642
}
Yunqing Wang's avatar
Yunqing Wang committed
643

644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663
static void iadst16(const tran_low_t *input, tran_low_t *output) {
  tran_high_t s0, s1, s2, s3, s4, s5, s6, s7, s8;
  tran_high_t s9, s10, s11, s12, s13, s14, s15;

  tran_high_t x0 = input[15];
  tran_high_t x1 = input[0];
  tran_high_t x2 = input[13];
  tran_high_t x3 = input[2];
  tran_high_t x4 = input[11];
  tran_high_t x5 = input[4];
  tran_high_t x6 = input[9];
  tran_high_t x7 = input[6];
  tran_high_t x8 = input[7];
  tran_high_t x9 = input[8];
  tran_high_t x10 = input[5];
  tran_high_t x11 = input[10];
  tran_high_t x12 = input[3];
  tran_high_t x13 = input[12];
  tran_high_t x14 = input[1];
  tran_high_t x15 = input[14];
664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691

  if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7 | x8
           | x9 | x10 | x11 | x12 | x13 | x14 | x15)) {
    output[0] = output[1] = output[2] = output[3] = output[4]
              = output[5] = output[6] = output[7] = output[8]
              = output[9] = output[10] = output[11] = output[12]
              = output[13] = output[14] = output[15] = 0;
    return;
  }

  // stage 1
  s0 = x0 * cospi_1_64  + x1 * cospi_31_64;
  s1 = x0 * cospi_31_64 - x1 * cospi_1_64;
  s2 = x2 * cospi_5_64  + x3 * cospi_27_64;
  s3 = x2 * cospi_27_64 - x3 * cospi_5_64;
  s4 = x4 * cospi_9_64  + x5 * cospi_23_64;
  s5 = x4 * cospi_23_64 - x5 * cospi_9_64;
  s6 = x6 * cospi_13_64 + x7 * cospi_19_64;
  s7 = x6 * cospi_19_64 - x7 * cospi_13_64;
  s8 = x8 * cospi_17_64 + x9 * cospi_15_64;
  s9 = x8 * cospi_15_64 - x9 * cospi_17_64;
  s10 = x10 * cospi_21_64 + x11 * cospi_11_64;
  s11 = x10 * cospi_11_64 - x11 * cospi_21_64;
  s12 = x12 * cospi_25_64 + x13 * cospi_7_64;
  s13 = x12 * cospi_7_64  - x13 * cospi_25_64;
  s14 = x14 * cospi_29_64 + x15 * cospi_3_64;
  s15 = x14 * cospi_3_64  - x15 * cospi_29_64;

692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707
  x0 = WRAPLOW(dct_const_round_shift(s0 + s8), 8);
  x1 = WRAPLOW(dct_const_round_shift(s1 + s9), 8);
  x2 = WRAPLOW(dct_const_round_shift(s2 + s10), 8);
  x3 = WRAPLOW(dct_const_round_shift(s3 + s11), 8);
  x4 = WRAPLOW(dct_const_round_shift(s4 + s12), 8);
  x5 = WRAPLOW(dct_const_round_shift(s5 + s13), 8);
  x6 = WRAPLOW(dct_const_round_shift(s6 + s14), 8);
  x7 = WRAPLOW(dct_const_round_shift(s7 + s15), 8);
  x8 = WRAPLOW(dct_const_round_shift(s0 - s8), 8);
  x9 = WRAPLOW(dct_const_round_shift(s1 - s9), 8);
  x10 = WRAPLOW(dct_const_round_shift(s2 - s10), 8);
  x11 = WRAPLOW(dct_const_round_shift(s3 - s11), 8);
  x12 = WRAPLOW(dct_const_round_shift(s4 - s12), 8);
  x13 = WRAPLOW(dct_const_round_shift(s5 - s13), 8);
  x14 = WRAPLOW(dct_const_round_shift(s6 - s14), 8);
  x15 = WRAPLOW(dct_const_round_shift(s7 - s15), 8);
708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726

  // stage 2
  s0 = x0;
  s1 = x1;
  s2 = x2;
  s3 = x3;
  s4 = x4;
  s5 = x5;
  s6 = x6;
  s7 = x7;
  s8 =    x8 * cospi_4_64   + x9 * cospi_28_64;
  s9 =    x8 * cospi_28_64  - x9 * cospi_4_64;
  s10 =   x10 * cospi_20_64 + x11 * cospi_12_64;
  s11 =   x10 * cospi_12_64 - x11 * cospi_20_64;
  s12 = - x12 * cospi_28_64 + x13 * cospi_4_64;
  s13 =   x12 * cospi_4_64  + x13 * cospi_28_64;
  s14 = - x14 * cospi_12_64 + x15 * cospi_20_64;
  s15 =   x14 * cospi_20_64 + x15 * cospi_12_64;

727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742
  x0 = WRAPLOW(s0 + s4, 8);
  x1 = WRAPLOW(s1 + s5, 8);
  x2 = WRAPLOW(s2 + s6, 8);
  x3 = WRAPLOW(s3 + s7, 8);
  x4 = WRAPLOW(s0 - s4, 8);
  x5 = WRAPLOW(s1 - s5, 8);
  x6 = WRAPLOW(s2 - s6, 8);
  x7 = WRAPLOW(s3 - s7, 8);
  x8 = WRAPLOW(dct_const_round_shift(s8 + s12), 8);
  x9 = WRAPLOW(dct_const_round_shift(s9 + s13), 8);
  x10 = WRAPLOW(dct_const_round_shift(s10 + s14), 8);
  x11 = WRAPLOW(dct_const_round_shift(s11 + s15), 8);
  x12 = WRAPLOW(dct_const_round_shift(s8 - s12), 8);
  x13 = WRAPLOW(dct_const_round_shift(s9 - s13), 8);
  x14 = WRAPLOW(dct_const_round_shift(s10 - s14), 8);
  x15 = WRAPLOW(dct_const_round_shift(s11 - s15), 8);
743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761

  // stage 3
  s0 = x0;
  s1 = x1;
  s2 = x2;
  s3 = x3;
  s4 = x4 * cospi_8_64  + x5 * cospi_24_64;
  s5 = x4 * cospi_24_64 - x5 * cospi_8_64;
  s6 = - x6 * cospi_24_64 + x7 * cospi_8_64;
  s7 =   x6 * cospi_8_64  + x7 * cospi_24_64;
  s8 = x8;
  s9 = x9;
  s10 = x10;
  s11 = x11;
  s12 = x12 * cospi_8_64  + x13 * cospi_24_64;
  s13 = x12 * cospi_24_64 - x13 * cospi_8_64;
  s14 = - x14 * cospi_24_64 + x15 * cospi_8_64;
  s15 =   x14 * cospi_8_64  + x15 * cospi_24_64;

762 763 764 765
  x0 = WRAPLOW(check_range(s0 + s2), 8);
  x1 = WRAPLOW(check_range(s1 + s3), 8);
  x2 = WRAPLOW(check_range(s0 - s2), 8);
  x3 = WRAPLOW(check_range(s1 - s3), 8);
766 767 768 769
  x4 = WRAPLOW(dct_const_round_shift(s4 + s6), 8);
  x5 = WRAPLOW(dct_const_round_shift(s5 + s7), 8);
  x6 = WRAPLOW(dct_const_round_shift(s4 - s6), 8);
  x7 = WRAPLOW(dct_const_round_shift(s5 - s7), 8);
770 771 772 773
  x8 = WRAPLOW(check_range(s8 + s10), 8);
  x9 = WRAPLOW(check_range(s9 + s11), 8);
  x10 = WRAPLOW(check_range(s8 - s10), 8);
  x11 = WRAPLOW(check_range(s9 - s11), 8);
774 775 776 777
  x12 = WRAPLOW(dct_const_round_shift(s12 + s14), 8);
  x13 = WRAPLOW(dct_const_round_shift(s13 + s15), 8);
  x14 = WRAPLOW(dct_const_round_shift(s12 - s14), 8);
  x15 = WRAPLOW(dct_const_round_shift(s13 - s15), 8);
778 779 780 781 782 783 784 785 786 787 788

  // stage 4
  s2 = (- cospi_16_64) * (x2 + x3);
  s3 = cospi_16_64 * (x2 - x3);
  s6 = cospi_16_64 * (x6 + x7);
  s7 = cospi_16_64 * (- x6 + x7);
  s10 = cospi_16_64 * (x10 + x11);
  s11 = cospi_16_64 * (- x10 + x11);
  s14 = (- cospi_16_64) * (x14 + x15);
  s15 = cospi_16_64 * (x14 - x15);

789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813
  x2 = WRAPLOW(dct_const_round_shift(s2), 8);
  x3 = WRAPLOW(dct_const_round_shift(s3), 8);
  x6 = WRAPLOW(dct_const_round_shift(s6), 8);
  x7 = WRAPLOW(dct_const_round_shift(s7), 8);
  x10 = WRAPLOW(dct_const_round_shift(s10), 8);
  x11 = WRAPLOW(dct_const_round_shift(s11), 8);
  x14 = WRAPLOW(dct_const_round_shift(s14), 8);
  x15 = WRAPLOW(dct_const_round_shift(s15), 8);

  output[0] = WRAPLOW(x0, 8);
  output[1] = WRAPLOW(-x8, 8);
  output[2] = WRAPLOW(x12, 8);
  output[3] = WRAPLOW(-x4, 8);
  output[4] = WRAPLOW(x6, 8);
  output[5] = WRAPLOW(x14, 8);
  output[6] = WRAPLOW(x10, 8);
  output[7] = WRAPLOW(x2, 8);
  output[8] = WRAPLOW(x3, 8);
  output[9] = WRAPLOW(x11, 8);
  output[10] = WRAPLOW(x15, 8);
  output[11] = WRAPLOW(x7, 8);
  output[12] = WRAPLOW(x5, 8);
  output[13] = WRAPLOW(-x13, 8);
  output[14] = WRAPLOW(x9, 8);
  output[15] = WRAPLOW(-x1, 8);
814 815
}

Dmitry Kovalev's avatar
Dmitry Kovalev committed
816
static const transform_2d IHT_16[] = {
817 818 819 820
  { idct16,  idct16  },  // DCT_DCT  = 0
  { iadst16, idct16  },  // ADST_DCT = 1
  { idct16,  iadst16 },  // DCT_ADST = 2
  { iadst16, iadst16 }   // ADST_ADST = 3
Dmitry Kovalev's avatar
Dmitry Kovalev committed
821 822
};

823
void vp9_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
824
                            int tx_type) {
825
  int i, j;
826 827 828
  tran_low_t out[16 * 16];
  tran_low_t *outptr = out;
  tran_low_t temp_in[16], temp_out[16];
Dmitry Kovalev's avatar
Dmitry Kovalev committed
829
  const transform_2d ht = IHT_16[tx_type];
830

Dmitry Kovalev's avatar
Dmitry Kovalev committed
831
  // Rows
832
  for (i = 0; i < 16; ++i) {
Dmitry Kovalev's avatar
Dmitry Kovalev committed
833
    ht.rows(input, outptr);
834
    input += 16;
835 836 837
    outptr += 16;
  }

Dmitry Kovalev's avatar
Dmitry Kovalev committed
838
  // Columns
839 840 841
  for (i = 0; i < 16; ++i) {
    for (j = 0; j < 16; ++j)
      temp_in[j] = out[j * 16 + i];
Dmitry Kovalev's avatar
Dmitry Kovalev committed
842
    ht.cols(temp_in, temp_out);
843
    for (j = 0; j < 16; ++j) {
844 845
      dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
                                            ROUND_POWER_OF_TWO(temp_out[j], 6));
846
    }
Jingning Han's avatar
Jingning Han committed
847
  }
848 849
}

850 851 852 853
void vp9_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest,
                            int stride) {
  tran_low_t out[16 * 16] = { 0 };
  tran_low_t *outptr = out;
Scott LaVarnway's avatar
Scott LaVarnway committed
854
  int i, j;
855
  tran_low_t temp_in[16], temp_out[16];
856

857 858
  // First transform rows. Since all non-zero dct coefficients are in
  // upper-left 4x4 area, we only need to calculate first 4 rows here.
Scott LaVarnway's avatar
Scott LaVarnway committed
859
  for (i = 0; i < 4; ++i) {
860
    idct16(input, outptr);
Scott LaVarnway's avatar
Scott LaVarnway committed
861 862 863 864 865 866 867 868
    input += 16;
    outptr += 16;
  }

  // Then transform columns
  for (i = 0; i < 16; ++i) {
    for (j = 0; j < 16; ++j)
      temp_in[j] = out[j*16 + i];
869
    idct16(temp_in, temp_out);
870
    for (j = 0; j < 16; ++j) {
871 872
      dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
                                            ROUND_POWER_OF_TWO(temp_out[j], 6));
873
    }
Scott LaVarnway's avatar
Scott LaVarnway committed
874 875
  }
}
876

877
void vp9_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
878
  int i, j;
879
  tran_high_t a1;
880 881
  tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), 8);
  out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), 8);
882 883 884
  a1 = ROUND_POWER_OF_TWO(out, 6);
  for (j = 0; j < 16; ++j) {
    for (i = 0; i < 16; ++i)
885
      dest[i] = clip_pixel_add(dest[i], a1);
886
    dest += stride;
887
  }
888
}
889

890 891 892
static void idct32(const tran_low_t *input, tran_low_t *output) {
  tran_low_t step1[32], step2[32];
  tran_high_t temp1, temp2;
893

894 895 896 897 898 899 900 901 902 903 904 905