convolve.c 12.9 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11
/*
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
 *
 * This source code is subject to the terms of the BSD 2 Clause License and
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
 * was not distributed with this source code in the LICENSE file, you can
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
 * Media Patent License 1.0 was not distributed with this source code in the
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
 */

12
#include <assert.h>
Angie Chiang's avatar
Angie Chiang committed
13
#include <string.h>
14

Yaowu Xu's avatar
Yaowu Xu committed
15
#include "./av1_rtcd.h"
16
#include "av1/common/convolve.h"
17
#include "av1/common/filter.h"
Yaowu Xu's avatar
Yaowu Xu committed
18
#include "aom_dsp/aom_dsp_common.h"
19
#include "aom_ports/mem.h"
20

21 22
#define MAX_BLOCK_WIDTH (MAX_SB_SIZE)
#define MAX_BLOCK_HEIGHT (MAX_SB_SIZE)
23 24 25
#define MAX_STEP (32)
#define MAX_FILTER_TAP (12)

Yaowu Xu's avatar
Yaowu Xu committed
26 27 28 29
void av1_convolve_horiz_c(const uint8_t *src, int src_stride, uint8_t *dst,
                          int dst_stride, int w, int h,
                          const InterpFilterParams filter_params,
                          const int subpel_x_q4, int x_step_q4, int avg) {
30
  int x, y;
31
  int filter_size = filter_params.taps;
32 33 34 35 36
  src -= filter_size / 2 - 1;
  for (y = 0; y < h; ++y) {
    int x_q4 = subpel_x_q4;
    for (x = 0; x < w; ++x) {
      const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
Yaowu Xu's avatar
Yaowu Xu committed
37
      const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
clang-format's avatar
clang-format committed
38
          filter_params, x_q4 & SUBPEL_MASK);
39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
      int k, sum = 0;
      for (k = 0; k < filter_size; ++k) sum += src_x[k] * x_filter[k];
      if (avg) {
        dst[x] = ROUND_POWER_OF_TWO(
            dst[x] + clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1);
      } else {
        dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
      }
      x_q4 += x_step_q4;
    }
    src += src_stride;
    dst += dst_stride;
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
54 55 56 57
void av1_convolve_vert_c(const uint8_t *src, int src_stride, uint8_t *dst,
                         int dst_stride, int w, int h,
                         const InterpFilterParams filter_params,
                         const int subpel_y_q4, int y_step_q4, int avg) {
58
  int x, y;
59
  int filter_size = filter_params.taps;
60 61 62 63 64 65
  src -= src_stride * (filter_size / 2 - 1);

  for (x = 0; x < w; ++x) {
    int y_q4 = subpel_y_q4;
    for (y = 0; y < h; ++y) {
      const uint8_t *const src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
Yaowu Xu's avatar
Yaowu Xu committed
66
      const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
clang-format's avatar
clang-format committed
67
          filter_params, y_q4 & SUBPEL_MASK);
68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85
      int k, sum = 0;
      for (k = 0; k < filter_size; ++k)
        sum += src_y[k * src_stride] * y_filter[k];
      if (avg) {
        dst[y * dst_stride] = ROUND_POWER_OF_TWO(
            dst[y * dst_stride] +
                clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)),
            1);
      } else {
        dst[y * dst_stride] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
      }
      y_q4 += y_step_q4;
    }
    ++src;
    ++dst;
  }
}

Angie Chiang's avatar
Angie Chiang committed
86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106
static void convolve_copy(const uint8_t *src, int src_stride, uint8_t *dst,
                          int dst_stride, int w, int h, int avg) {
  if (avg == 0) {
    int r;
    for (r = 0; r < h; ++r) {
      memcpy(dst, src, w);
      src += src_stride;
      dst += dst_stride;
    }
  } else {
    int r, c;
    for (r = 0; r < h; ++r) {
      for (c = 0; c < w; ++c) {
        dst[c] = clip_pixel(ROUND_POWER_OF_TWO(dst[c] + src[c], 1));
      }
      src += src_stride;
      dst += dst_stride;
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
107 108
void av1_convolve(const uint8_t *src, int src_stride, uint8_t *dst,
                  int dst_stride, int w, int h,
109
#if CONFIG_DUAL_FILTER
James Zern's avatar
James Zern committed
110
                  const InterpFilter *interp_filter,
111
#else
James Zern's avatar
James Zern committed
112
                  const InterpFilter interp_filter,
113
#endif
Yaowu Xu's avatar
Yaowu Xu committed
114 115
                  const int subpel_x_q4, int x_step_q4, const int subpel_y_q4,
                  int y_step_q4, int ref_idx) {
Angie Chiang's avatar
Angie Chiang committed
116 117
  int ignore_horiz = x_step_q4 == 16 && subpel_x_q4 == 0;
  int ignore_vert = y_step_q4 == 16 && subpel_y_q4 == 0;
118 119 120 121 122 123

  assert(w <= MAX_BLOCK_WIDTH);
  assert(h <= MAX_BLOCK_HEIGHT);
  assert(y_step_q4 <= MAX_STEP);
  assert(x_step_q4 <= MAX_STEP);

Angie Chiang's avatar
Angie Chiang committed
124
  if (ignore_horiz && ignore_vert) {
125
    convolve_copy(src, src_stride, dst, dst_stride, w, h, ref_idx);
Angie Chiang's avatar
Angie Chiang committed
126
  } else if (ignore_vert) {
127 128
#if CONFIG_DUAL_FILTER
    InterpFilterParams filter_params =
Yaowu Xu's avatar
Yaowu Xu committed
129
        av1_get_interp_filter_params(interp_filter[1 + 2 * ref_idx]);
130 131
#else
    InterpFilterParams filter_params =
Yaowu Xu's avatar
Yaowu Xu committed
132
        av1_get_interp_filter_params(interp_filter);
133 134
#endif
    assert(filter_params.taps <= MAX_FILTER_TAP);
Yaowu Xu's avatar
Yaowu Xu committed
135 136
    av1_convolve_horiz(src, src_stride, dst, dst_stride, w, h, filter_params,
                       subpel_x_q4, x_step_q4, ref_idx);
Angie Chiang's avatar
Angie Chiang committed
137
  } else if (ignore_horiz) {
138 139
#if CONFIG_DUAL_FILTER
    InterpFilterParams filter_params =
Yaowu Xu's avatar
Yaowu Xu committed
140
        av1_get_interp_filter_params(interp_filter[2 * ref_idx]);
141 142
#else
    InterpFilterParams filter_params =
Yaowu Xu's avatar
Yaowu Xu committed
143
        av1_get_interp_filter_params(interp_filter);
144 145
#endif
    assert(filter_params.taps <= MAX_FILTER_TAP);
Yaowu Xu's avatar
Yaowu Xu committed
146 147
    av1_convolve_vert(src, src_stride, dst, dst_stride, w, h, filter_params,
                      subpel_y_q4, y_step_q4, ref_idx);
Angie Chiang's avatar
Angie Chiang committed
148 149 150 151
  } else {
    // temp's size is set to (maximum possible intermediate_height) *
    // MAX_BLOCK_WIDTH
    uint8_t temp[((((MAX_BLOCK_HEIGHT - 1) * MAX_STEP + 15) >> SUBPEL_BITS) +
152
                  MAX_FILTER_TAP) *
Angie Chiang's avatar
Angie Chiang committed
153 154
                 MAX_BLOCK_WIDTH];
    int temp_stride = MAX_BLOCK_WIDTH;
155
#if CONFIG_DUAL_FILTER
156
    InterpFilterParams filter_params_x =
Yaowu Xu's avatar
Yaowu Xu committed
157
        av1_get_interp_filter_params(interp_filter[1 + 2 * ref_idx]);
158
    InterpFilterParams filter_params_y =
Yaowu Xu's avatar
Yaowu Xu committed
159
        av1_get_interp_filter_params(interp_filter[0 + 2 * ref_idx]);
160 161 162 163 164 165
    InterpFilterParams filter_params = filter_params_x;

    // The filter size implies the required number of reference pixels for
    // the second stage filtering. It is possible that the two directions
    // require different filter sizes.
    int filter_size = filter_params_y.taps;
166 167
#else
    InterpFilterParams filter_params =
Yaowu Xu's avatar
Yaowu Xu committed
168
        av1_get_interp_filter_params(interp_filter);
169
    int filter_size = filter_params.taps;
170
#endif
Angie Chiang's avatar
Angie Chiang committed
171 172 173
    int intermediate_height =
        (((h - 1) * y_step_q4 + subpel_y_q4) >> SUBPEL_BITS) + filter_size;

174 175
    assert(filter_params.taps <= MAX_FILTER_TAP);

Yaowu Xu's avatar
Yaowu Xu committed
176 177 178
    av1_convolve_horiz(src - src_stride * (filter_size / 2 - 1), src_stride,
                       temp, temp_stride, w, intermediate_height, filter_params,
                       subpel_x_q4, x_step_q4, 0);
179 180

#if CONFIG_DUAL_FILTER
181
    filter_params = filter_params_y;
182
#else
Yaowu Xu's avatar
Yaowu Xu committed
183
    filter_params = av1_get_interp_filter_params(interp_filter);
184 185 186 187
#endif
    filter_size = filter_params.taps;
    assert(filter_params.taps <= MAX_FILTER_TAP);

Yaowu Xu's avatar
Yaowu Xu committed
188 189 190
    av1_convolve_vert(temp + temp_stride * (filter_size / 2 - 1), temp_stride,
                      dst, dst_stride, w, h, filter_params, subpel_y_q4,
                      y_step_q4, ref_idx);
Angie Chiang's avatar
Angie Chiang committed
191
  }
192 193
}

Angie Chiang's avatar
Angie Chiang committed
194 195 196 197 198
void av1_convolve_init_c(void) {
  // A placeholder for SIMD initialization
  return;
}

Yaowu Xu's avatar
Yaowu Xu committed
199 200 201 202 203 204
#if CONFIG_AOM_HIGHBITDEPTH
void av1_highbd_convolve_horiz_c(const uint16_t *src, int src_stride,
                                 uint16_t *dst, int dst_stride, int w, int h,
                                 const InterpFilterParams filter_params,
                                 const int subpel_x_q4, int x_step_q4, int avg,
                                 int bd) {
205
  int x, y;
206
  int filter_size = filter_params.taps;
207 208 209 210 211
  src -= filter_size / 2 - 1;
  for (y = 0; y < h; ++y) {
    int x_q4 = subpel_x_q4;
    for (x = 0; x < w; ++x) {
      const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
Yaowu Xu's avatar
Yaowu Xu committed
212
      const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
clang-format's avatar
clang-format committed
213
          filter_params, x_q4 & SUBPEL_MASK);
214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229
      int k, sum = 0;
      for (k = 0; k < filter_size; ++k) sum += src_x[k] * x_filter[k];
      if (avg)
        dst[x] = ROUND_POWER_OF_TWO(
            dst[x] +
                clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd),
            1);
      else
        dst[x] = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
      x_q4 += x_step_q4;
    }
    src += src_stride;
    dst += dst_stride;
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
230 231 232 233 234
void av1_highbd_convolve_vert_c(const uint16_t *src, int src_stride,
                                uint16_t *dst, int dst_stride, int w, int h,
                                const InterpFilterParams filter_params,
                                const int subpel_y_q4, int y_step_q4, int avg,
                                int bd) {
235
  int x, y;
236
  int filter_size = filter_params.taps;
237 238 239 240 241 242
  src -= src_stride * (filter_size / 2 - 1);

  for (x = 0; x < w; ++x) {
    int y_q4 = subpel_y_q4;
    for (y = 0; y < h; ++y) {
      const uint16_t *const src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
Yaowu Xu's avatar
Yaowu Xu committed
243
      const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
clang-format's avatar
clang-format committed
244
          filter_params, y_q4 & SUBPEL_MASK);
245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263
      int k, sum = 0;
      for (k = 0; k < filter_size; ++k)
        sum += src_y[k * src_stride] * y_filter[k];
      if (avg) {
        dst[y * dst_stride] = ROUND_POWER_OF_TWO(
            dst[y * dst_stride] +
                clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd),
            1);
      } else {
        dst[y * dst_stride] =
            clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
      }
      y_q4 += y_step_q4;
    }
    ++src;
    ++dst;
  }
}

Angie Chiang's avatar
Angie Chiang committed
264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285
static void highbd_convolve_copy(const uint16_t *src, int src_stride,
                                 uint16_t *dst, int dst_stride, int w, int h,
                                 int avg, int bd) {
  if (avg == 0) {
    int r;
    for (r = 0; r < h; ++r) {
      memcpy(dst, src, w * sizeof(*src));
      src += src_stride;
      dst += dst_stride;
    }
  } else {
    int r, c;
    for (r = 0; r < h; ++r) {
      for (c = 0; c < w; ++c) {
        dst[c] = clip_pixel_highbd(ROUND_POWER_OF_TWO(dst[c] + src[c], 1), bd);
      }
      src += src_stride;
      dst += dst_stride;
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
286 287
void av1_highbd_convolve(const uint8_t *src8, int src_stride, uint8_t *dst8,
                         int dst_stride, int w, int h,
288
#if CONFIG_DUAL_FILTER
James Zern's avatar
James Zern committed
289
                         const InterpFilter *interp_filter,
290
#else
James Zern's avatar
James Zern committed
291
                         const InterpFilter interp_filter,
292
#endif
Yaowu Xu's avatar
Yaowu Xu committed
293 294 295
                         const int subpel_x_q4, int x_step_q4,
                         const int subpel_y_q4, int y_step_q4, int ref_idx,
                         int bd) {
Angie Chiang's avatar
Angie Chiang committed
296 297 298 299
  uint16_t *src = CONVERT_TO_SHORTPTR(src8);
  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
  int ignore_horiz = x_step_q4 == 16 && subpel_x_q4 == 0;
  int ignore_vert = y_step_q4 == 16 && subpel_y_q4 == 0;
300 301 302 303 304 305

  assert(w <= MAX_BLOCK_WIDTH);
  assert(h <= MAX_BLOCK_HEIGHT);
  assert(y_step_q4 <= MAX_STEP);
  assert(x_step_q4 <= MAX_STEP);

Angie Chiang's avatar
Angie Chiang committed
306
  if (ignore_horiz && ignore_vert) {
307
    highbd_convolve_copy(src, src_stride, dst, dst_stride, w, h, ref_idx, bd);
Angie Chiang's avatar
Angie Chiang committed
308
  } else if (ignore_vert) {
309 310
#if CONFIG_DUAL_FILTER
    InterpFilterParams filter_params =
Yaowu Xu's avatar
Yaowu Xu committed
311
        av1_get_interp_filter_params(interp_filter[1 + 2 * ref_idx]);
312 313
#else
    InterpFilterParams filter_params =
Yaowu Xu's avatar
Yaowu Xu committed
314
        av1_get_interp_filter_params(interp_filter);
315
#endif
Yaowu Xu's avatar
Yaowu Xu committed
316 317 318
    av1_highbd_convolve_horiz(src, src_stride, dst, dst_stride, w, h,
                              filter_params, subpel_x_q4, x_step_q4, ref_idx,
                              bd);
Angie Chiang's avatar
Angie Chiang committed
319
  } else if (ignore_horiz) {
320 321
#if CONFIG_DUAL_FILTER
    InterpFilterParams filter_params =
Yaowu Xu's avatar
Yaowu Xu committed
322
        av1_get_interp_filter_params(interp_filter[0 + 2 * ref_idx]);
323 324
#else
    InterpFilterParams filter_params =
Yaowu Xu's avatar
Yaowu Xu committed
325
        av1_get_interp_filter_params(interp_filter);
326
#endif
Yaowu Xu's avatar
Yaowu Xu committed
327 328 329
    av1_highbd_convolve_vert(src, src_stride, dst, dst_stride, w, h,
                             filter_params, subpel_y_q4, y_step_q4, ref_idx,
                             bd);
Angie Chiang's avatar
Angie Chiang committed
330 331 332 333 334 335 336 337
  } else {
    // temp's size is set to (maximum possible intermediate_height) *
    // MAX_BLOCK_WIDTH
    uint16_t temp[((((MAX_BLOCK_HEIGHT - 1) * MAX_STEP + 15) >> SUBPEL_BITS) +
                   MAX_FILTER_TAP) *
                  MAX_BLOCK_WIDTH];
    int temp_stride = MAX_BLOCK_WIDTH;

338
#if CONFIG_DUAL_FILTER
339
    InterpFilterParams filter_params_x =
Yaowu Xu's avatar
Yaowu Xu committed
340
        av1_get_interp_filter_params(interp_filter[1 + 2 * ref_idx]);
341
    InterpFilterParams filter_params_y =
Yaowu Xu's avatar
Yaowu Xu committed
342
        av1_get_interp_filter_params(interp_filter[0 + 2 * ref_idx]);
343 344
    InterpFilterParams filter_params = filter_params_x;
    int filter_size = filter_params_y.taps;
345 346
#else
    InterpFilterParams filter_params =
Yaowu Xu's avatar
Yaowu Xu committed
347
        av1_get_interp_filter_params(interp_filter);
348
    int filter_size = filter_params.taps;
349
#endif
350

Angie Chiang's avatar
Angie Chiang committed
351 352 353
    int intermediate_height =
        (((h - 1) * y_step_q4 + subpel_y_q4) >> SUBPEL_BITS) + filter_size;

Yaowu Xu's avatar
Yaowu Xu committed
354
    av1_highbd_convolve_horiz(
clang-format's avatar
clang-format committed
355 356
        src - src_stride * (filter_size / 2 - 1), src_stride, temp, temp_stride,
        w, intermediate_height, filter_params, subpel_x_q4, x_step_q4, 0, bd);
357 358

#if CONFIG_DUAL_FILTER
359
    filter_params = filter_params_y;
360 361 362 363
#endif
    filter_size = filter_params.taps;
    assert(filter_params.taps <= MAX_FILTER_TAP);

Yaowu Xu's avatar
Yaowu Xu committed
364 365 366
    av1_highbd_convolve_vert(temp + temp_stride * (filter_size / 2 - 1),
                             temp_stride, dst, dst_stride, w, h, filter_params,
                             subpel_y_q4, y_step_q4, ref_idx, bd);
Angie Chiang's avatar
Angie Chiang committed
367
  }
368
}
Yaowu Xu's avatar
Yaowu Xu committed
369
#endif  // CONFIG_AOM_HIGHBITDEPTH