warped_motion.c 43.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
/*
 *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be
 *  found  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */

#include <stdio.h>
#include <stdlib.h>
#include <memory.h>
#include <math.h>
#include <assert.h>

17
#include "av1/common/warped_motion.h"
18

19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87
/* clang-format off */
static const int error_measure_lut[512] = {
  // power 0.6
  255, 254, 254, 253, 253, 252, 251, 251,
  250, 250, 249, 248, 248, 247, 247, 246,
  245, 245, 244, 243, 243, 242, 242, 241,
  240, 240, 239, 238, 238, 237, 237, 236,
  235, 235, 234, 233, 233, 232, 231, 231,
  230, 230, 229, 228, 228, 227, 226, 226,
  225, 224, 224, 223, 222, 222, 221, 220,
  220, 219, 218, 218, 217, 216, 216, 215,
  214, 214, 213, 212, 212, 211, 210, 210,
  209, 208, 208, 207, 206, 206, 205, 204,
  203, 203, 202, 201, 201, 200, 199, 199,
  198, 197, 196, 196, 195, 194, 194, 193,
  192, 191, 191, 190, 189, 188, 188, 187,
  186, 185, 185, 184, 183, 182, 182, 181,
  180, 179, 179, 178, 177, 176, 176, 175,
  174, 173, 173, 172, 171, 170, 169, 169,
  168, 167, 166, 165, 165, 164, 163, 162,
  161, 161, 160, 159, 158, 157, 156, 156,
  155, 154, 153, 152, 151, 151, 150, 149,
  148, 147, 146, 145, 145, 144, 143, 142,
  141, 140, 139, 138, 137, 137, 136, 135,
  134, 133, 132, 131, 130, 129, 128, 127,
  126, 125, 124, 123, 122, 121, 120, 119,
  118, 117, 116, 115, 114, 113, 112, 111,
  110, 109, 108, 107, 106, 105, 104, 103,
  102, 100,  99,  98,  97,  96,  95,  94,
  92, 91, 90, 89, 88, 86, 85, 84,
  83, 81, 80, 79, 77, 76, 75, 73,
  72, 71, 69, 68, 66, 65, 63, 62,
  60, 59, 57, 55, 54, 52, 50, 48,
  47, 45, 43, 41, 39, 37, 34, 32,
  29, 27, 24, 21, 18, 14,  9,  0,
  9, 14, 18, 21, 24, 27, 29, 32,
  34, 37, 39, 41, 43, 45, 47, 48,
  50, 52, 54, 55, 57, 59, 60, 62,
  63, 65, 66, 68, 69, 71, 72, 73,
  75, 76, 77, 79, 80, 81, 83, 84,
  85, 86, 88, 89, 90, 91, 92, 94,
  95,  96,  97,  98,  99, 100, 102, 103,
  104, 105, 106, 107, 108, 109, 110, 111,
  112, 113, 114, 115, 116, 117, 118, 119,
  120, 121, 122, 123, 124, 125, 126, 127,
  128, 129, 130, 131, 132, 133, 134, 135,
  136, 137, 137, 138, 139, 140, 141, 142,
  143, 144, 145, 145, 146, 147, 148, 149,
  150, 151, 151, 152, 153, 154, 155, 156,
  156, 157, 158, 159, 160, 161, 161, 162,
  163, 164, 165, 165, 166, 167, 168, 169,
  169, 170, 171, 172, 173, 173, 174, 175,
  176, 176, 177, 178, 179, 179, 180, 181,
  182, 182, 183, 184, 185, 185, 186, 187,
  188, 188, 189, 190, 191, 191, 192, 193,
  194, 194, 195, 196, 196, 197, 198, 199,
  199, 200, 201, 201, 202, 203, 203, 204,
  205, 206, 206, 207, 208, 208, 209, 210,
  210, 211, 212, 212, 213, 214, 214, 215,
  216, 216, 217, 218, 218, 219, 220, 220,
  221, 222, 222, 223, 224, 224, 225, 226,
  226, 227, 228, 228, 229, 230, 230, 231,
  231, 232, 233, 233, 234, 235, 235, 236,
  237, 237, 238, 238, 239, 240, 240, 241,
  242, 242, 243, 243, 244, 245, 245, 246,
  247, 247, 248, 248, 249, 250, 250, 251,
  251, 252, 253, 253, 254, 254, 255, 255,
};
/* clang-format on */
88

89
static ProjectPointsFunc get_project_points_type(TransformationType type) {
90
  switch (type) {
91 92 93 94
    case HOMOGRAPHY: return project_points_homography;
    case AFFINE: return project_points_affine;
    case ROTZOOM: return project_points_rotzoom;
    case TRANSLATION: return project_points_translation;
clang-format's avatar
clang-format committed
95
    default: assert(0); return NULL;
96 97 98
  }
}

99
void project_points_translation(int32_t *mat, int *points, int *proj,
100 101 102
                                const int n, const int stride_points,
                                const int stride_proj, const int subsampling_x,
                                const int subsampling_y) {
103 104 105 106
  int i;
  for (i = 0; i < n; ++i) {
    const int x = *(points++), y = *(points++);
    if (subsampling_x)
107
      *(proj++) = ROUND_POWER_OF_TWO_SIGNED(
108
          ((x * (1 << (WARPEDMODEL_PREC_BITS + 1))) + mat[0]),
109
          WARPEDDIFF_PREC_BITS + 1);
110
    else
111
      *(proj++) = ROUND_POWER_OF_TWO_SIGNED(
112
          ((x * (1 << WARPEDMODEL_PREC_BITS)) + mat[0]), WARPEDDIFF_PREC_BITS);
113
    if (subsampling_y)
114
      *(proj++) = ROUND_POWER_OF_TWO_SIGNED(
115
          ((y * (1 << (WARPEDMODEL_PREC_BITS + 1))) + mat[1]),
116
          WARPEDDIFF_PREC_BITS + 1);
117
    else
118
      *(proj++) = ROUND_POWER_OF_TWO_SIGNED(
119
          ((y * (1 << WARPEDMODEL_PREC_BITS))) + mat[1], WARPEDDIFF_PREC_BITS);
120 121 122 123 124
    points += stride_points - 2;
    proj += stride_proj - 2;
  }
}

125
void project_points_rotzoom(int32_t *mat, int *points, int *proj, const int n,
126 127
                            const int stride_points, const int stride_proj,
                            const int subsampling_x, const int subsampling_y) {
128 129 130 131
  int i;
  for (i = 0; i < n; ++i) {
    const int x = *(points++), y = *(points++);
    if (subsampling_x)
132
      *(proj++) = ROUND_POWER_OF_TWO_SIGNED(
133 134
          mat[2] * 2 * x + mat[3] * 2 * y + mat[0] +
              (mat[2] + mat[3] - (1 << WARPEDMODEL_PREC_BITS)) / 2,
135 136
          WARPEDDIFF_PREC_BITS + 1);
    else
137
      *(proj++) = ROUND_POWER_OF_TWO_SIGNED(mat[2] * x + mat[3] * y + mat[0],
138
                                            WARPEDDIFF_PREC_BITS);
139
    if (subsampling_y)
140
      *(proj++) = ROUND_POWER_OF_TWO_SIGNED(
141 142
          -mat[3] * 2 * x + mat[2] * 2 * y + mat[1] +
              (-mat[3] + mat[2] - (1 << WARPEDMODEL_PREC_BITS)) / 2,
143 144
          WARPEDDIFF_PREC_BITS + 1);
    else
145
      *(proj++) = ROUND_POWER_OF_TWO_SIGNED(-mat[3] * x + mat[2] * y + mat[1],
146
                                            WARPEDDIFF_PREC_BITS);
147 148 149 150 151
    points += stride_points - 2;
    proj += stride_proj - 2;
  }
}

152
void project_points_affine(int32_t *mat, int *points, int *proj, const int n,
153 154
                           const int stride_points, const int stride_proj,
                           const int subsampling_x, const int subsampling_y) {
155 156 157 158
  int i;
  for (i = 0; i < n; ++i) {
    const int x = *(points++), y = *(points++);
    if (subsampling_x)
159
      *(proj++) = ROUND_POWER_OF_TWO_SIGNED(
160 161
          mat[2] * 2 * x + mat[3] * 2 * y + mat[0] +
              (mat[2] + mat[3] - (1 << WARPEDMODEL_PREC_BITS)) / 2,
162 163
          WARPEDDIFF_PREC_BITS + 1);
    else
164
      *(proj++) = ROUND_POWER_OF_TWO_SIGNED(mat[2] * x + mat[3] * y + mat[0],
165
                                            WARPEDDIFF_PREC_BITS);
166
    if (subsampling_y)
167
      *(proj++) = ROUND_POWER_OF_TWO_SIGNED(
168
          mat[4] * 2 * x + mat[5] * 2 * y + mat[1] +
Sarah Parker's avatar
Sarah Parker committed
169
              (mat[4] + mat[5] - (1 << WARPEDMODEL_PREC_BITS)) / 2,
170 171
          WARPEDDIFF_PREC_BITS + 1);
    else
172
      *(proj++) = ROUND_POWER_OF_TWO_SIGNED(mat[4] * x + mat[5] * y + mat[1],
173
                                            WARPEDDIFF_PREC_BITS);
174 175 176 177 178
    points += stride_points - 2;
    proj += stride_proj - 2;
  }
}

179
void project_points_homography(int32_t *mat, int *points, int *proj,
180 181 182
                               const int n, const int stride_points,
                               const int stride_proj, const int subsampling_x,
                               const int subsampling_y) {
183
  int i;
184 185
  int64_t x, y, Z;
  int64_t xp, yp;
186 187 188 189 190
  for (i = 0; i < n; ++i) {
    x = *(points++), y = *(points++);
    x = (subsampling_x ? 4 * x + 1 : 2 * x);
    y = (subsampling_y ? 4 * y + 1 : 2 * y);

191 192
    Z = (mat[6] * x + mat[7] * y + (1 << (WARPEDMODEL_ROW3HOMO_PREC_BITS + 1)));
    xp = (mat[2] * x + mat[3] * y + 2 * mat[0]) *
193 194
         (1 << (WARPEDPIXEL_PREC_BITS + WARPEDMODEL_ROW3HOMO_PREC_BITS -
                WARPEDMODEL_PREC_BITS));
195
    yp = (mat[4] * x + mat[5] * y + 2 * mat[1]) *
196 197
         (1 << (WARPEDPIXEL_PREC_BITS + WARPEDMODEL_ROW3HOMO_PREC_BITS -
                WARPEDMODEL_PREC_BITS));
198 199 200 201

    xp = xp > 0 ? (xp + Z / 2) / Z : (xp - Z / 2) / Z;
    yp = yp > 0 ? (yp + Z / 2) / Z : (yp - Z / 2) / Z;

clang-format's avatar
clang-format committed
202 203
    if (subsampling_x) xp = (xp - (1 << (WARPEDPIXEL_PREC_BITS - 1))) / 2;
    if (subsampling_y) yp = (yp - (1 << (WARPEDPIXEL_PREC_BITS - 1))) / 2;
204 205 206 207 208 209 210 211 212
    *(proj++) = xp;
    *(proj++) = yp;

    points += stride_points - 2;
    proj += stride_proj - 2;
  }
}

static const int16_t
clang-format's avatar
clang-format committed
213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246
    filter_ntap[WARPEDPIXEL_PREC_SHIFTS][WARPEDPIXEL_FILTER_TAPS] = {
      { 0, 0, 128, 0, 0, 0 },      { 0, -1, 128, 2, -1, 0 },
      { 1, -3, 127, 4, -1, 0 },    { 1, -4, 126, 6, -2, 1 },
      { 1, -5, 126, 8, -3, 1 },    { 1, -6, 125, 11, -4, 1 },
      { 1, -7, 124, 13, -4, 1 },   { 2, -8, 123, 15, -5, 1 },
      { 2, -9, 122, 18, -6, 1 },   { 2, -10, 121, 20, -6, 1 },
      { 2, -11, 120, 22, -7, 2 },  { 2, -12, 119, 25, -8, 2 },
      { 3, -13, 117, 27, -8, 2 },  { 3, -13, 116, 29, -9, 2 },
      { 3, -14, 114, 32, -10, 3 }, { 3, -15, 113, 35, -10, 2 },
      { 3, -15, 111, 37, -11, 3 }, { 3, -16, 109, 40, -11, 3 },
      { 3, -16, 108, 42, -12, 3 }, { 4, -17, 106, 45, -13, 3 },
      { 4, -17, 104, 47, -13, 3 }, { 4, -17, 102, 50, -14, 3 },
      { 4, -17, 100, 52, -14, 3 }, { 4, -18, 98, 55, -15, 4 },
      { 4, -18, 96, 58, -15, 3 },  { 4, -18, 94, 60, -16, 4 },
      { 4, -18, 91, 63, -16, 4 },  { 4, -18, 89, 65, -16, 4 },
      { 4, -18, 87, 68, -17, 4 },  { 4, -18, 85, 70, -17, 4 },
      { 4, -18, 82, 73, -17, 4 },  { 4, -18, 80, 75, -17, 4 },
      { 4, -18, 78, 78, -18, 4 },  { 4, -17, 75, 80, -18, 4 },
      { 4, -17, 73, 82, -18, 4 },  { 4, -17, 70, 85, -18, 4 },
      { 4, -17, 68, 87, -18, 4 },  { 4, -16, 65, 89, -18, 4 },
      { 4, -16, 63, 91, -18, 4 },  { 4, -16, 60, 94, -18, 4 },
      { 3, -15, 58, 96, -18, 4 },  { 4, -15, 55, 98, -18, 4 },
      { 3, -14, 52, 100, -17, 4 }, { 3, -14, 50, 102, -17, 4 },
      { 3, -13, 47, 104, -17, 4 }, { 3, -13, 45, 106, -17, 4 },
      { 3, -12, 42, 108, -16, 3 }, { 3, -11, 40, 109, -16, 3 },
      { 3, -11, 37, 111, -15, 3 }, { 2, -10, 35, 113, -15, 3 },
      { 3, -10, 32, 114, -14, 3 }, { 2, -9, 29, 116, -13, 3 },
      { 2, -8, 27, 117, -13, 3 },  { 2, -8, 25, 119, -12, 2 },
      { 2, -7, 22, 120, -11, 2 },  { 1, -6, 20, 121, -10, 2 },
      { 1, -6, 18, 122, -9, 2 },   { 1, -5, 15, 123, -8, 2 },
      { 1, -4, 13, 124, -7, 1 },   { 1, -4, 11, 125, -6, 1 },
      { 1, -3, 8, 126, -5, 1 },    { 1, -2, 6, 126, -4, 1 },
      { 0, -1, 4, 127, -3, 1 },    { 0, -1, 2, 128, -1, 0 },
    };
247 248 249 250 251 252 253 254 255 256 257

static int32_t do_ntap_filter(int32_t *p, int x) {
  int i;
  int32_t sum = 0;
  for (i = 0; i < WARPEDPIXEL_FILTER_TAPS; ++i) {
    sum += p[i - WARPEDPIXEL_FILTER_TAPS / 2 + 1] * filter_ntap[x][i];
  }
  return sum;
}

static int32_t do_cubic_filter(int32_t *p, int x) {
clang-format's avatar
clang-format committed
258
  if (x == 0) {
259
    return p[0] * (1 << WARPEDPIXEL_FILTER_BITS);
260
  } else if (x == (1 << WARPEDPIXEL_PREC_BITS)) {
261
    return p[1] * (1 << WARPEDPIXEL_FILTER_BITS);
262
  } else {
263
    const int64_t v1 = (int64_t)x * x * x * (3 * (p[0] - p[1]) + p[2] - p[-1]);
264 265 266
    const int64_t v2 = x * x * (2 * p[-1] - 5 * p[0] + 4 * p[1] - p[2]);
    const int64_t v3 = x * (p[1] - p[-1]);
    const int64_t v4 = 2 * p[0];
267
    return (int32_t)ROUND_POWER_OF_TWO_SIGNED(
268 269 270
        (v4 * (1 << (3 * WARPEDPIXEL_PREC_BITS))) +
            (v3 * (1 << (2 * WARPEDPIXEL_PREC_BITS))) +
            (v2 * (1 << WARPEDPIXEL_PREC_BITS)) + v1,
271 272 273 274 275 276 277 278 279 280 281 282 283 284 285
        3 * WARPEDPIXEL_PREC_BITS + 1 - WARPEDPIXEL_FILTER_BITS);
  }
}

static INLINE void get_subcolumn(int taps, uint8_t *ref, int32_t *col,
                                 int stride, int x, int y_start) {
  int i;
  for (i = 0; i < taps; ++i) {
    col[i] = ref[(i + y_start) * stride + x];
  }
}

static uint8_t bi_ntap_filter(uint8_t *ref, int x, int y, int stride) {
  int32_t val, arr[WARPEDPIXEL_FILTER_TAPS];
  int k;
clang-format's avatar
clang-format committed
286 287
  int i = (int)x >> WARPEDPIXEL_PREC_BITS;
  int j = (int)y >> WARPEDPIXEL_PREC_BITS;
288 289 290 291 292 293
  for (k = 0; k < WARPEDPIXEL_FILTER_TAPS; ++k) {
    int32_t arr_temp[WARPEDPIXEL_FILTER_TAPS];
    get_subcolumn(WARPEDPIXEL_FILTER_TAPS, ref, arr_temp, stride,
                  i + k + 1 - WARPEDPIXEL_FILTER_TAPS / 2,
                  j + 1 - WARPEDPIXEL_FILTER_TAPS / 2);
    arr[k] = do_ntap_filter(arr_temp + WARPEDPIXEL_FILTER_TAPS / 2 - 1,
294
                            y - (j * (1 << WARPEDPIXEL_PREC_BITS)));
295 296
  }
  val = do_ntap_filter(arr + WARPEDPIXEL_FILTER_TAPS / 2 - 1,
297
                       x - (i * (1 << WARPEDPIXEL_PREC_BITS)));
298 299 300 301 302 303 304
  val = ROUND_POWER_OF_TWO_SIGNED(val, WARPEDPIXEL_FILTER_BITS * 2);
  return (uint8_t)clip_pixel(val);
}

static uint8_t bi_cubic_filter(uint8_t *ref, int x, int y, int stride) {
  int32_t val, arr[4];
  int k;
clang-format's avatar
clang-format committed
305 306
  int i = (int)x >> WARPEDPIXEL_PREC_BITS;
  int j = (int)y >> WARPEDPIXEL_PREC_BITS;
307 308
  for (k = 0; k < 4; ++k) {
    int32_t arr_temp[4];
clang-format's avatar
clang-format committed
309
    get_subcolumn(4, ref, arr_temp, stride, i + k - 1, j - 1);
310 311
    arr[k] =
        do_cubic_filter(arr_temp + 1, y - (j * (1 << WARPEDPIXEL_PREC_BITS)));
312
  }
313
  val = do_cubic_filter(arr + 1, x - (i * (1 << WARPEDPIXEL_PREC_BITS)));
314 315 316 317 318 319 320
  val = ROUND_POWER_OF_TWO_SIGNED(val, WARPEDPIXEL_FILTER_BITS * 2);
  return (uint8_t)clip_pixel(val);
}

static uint8_t bi_linear_filter(uint8_t *ref, int x, int y, int stride) {
  const int ix = x >> WARPEDPIXEL_PREC_BITS;
  const int iy = y >> WARPEDPIXEL_PREC_BITS;
321 322
  const int sx = x - (ix * (1 << WARPEDPIXEL_PREC_BITS));
  const int sy = y - (iy * (1 << WARPEDPIXEL_PREC_BITS));
323 324 325
  int32_t val;
  val = ROUND_POWER_OF_TWO_SIGNED(
      ref[iy * stride + ix] * (WARPEDPIXEL_PREC_SHIFTS - sy) *
clang-format's avatar
clang-format committed
326 327 328 329
              (WARPEDPIXEL_PREC_SHIFTS - sx) +
          ref[iy * stride + ix + 1] * (WARPEDPIXEL_PREC_SHIFTS - sy) * sx +
          ref[(iy + 1) * stride + ix] * sy * (WARPEDPIXEL_PREC_SHIFTS - sx) +
          ref[(iy + 1) * stride + ix + 1] * sy * sx,
330 331 332 333
      WARPEDPIXEL_PREC_BITS * 2);
  return (uint8_t)clip_pixel(val);
}

clang-format's avatar
clang-format committed
334 335
static uint8_t warp_interpolate(uint8_t *ref, int x, int y, int width,
                                int height, int stride) {
336 337
  int ix = x >> WARPEDPIXEL_PREC_BITS;
  int iy = y >> WARPEDPIXEL_PREC_BITS;
338 339
  int sx = x - (ix * (1 << WARPEDPIXEL_PREC_BITS));
  int sy = y - (iy * (1 << WARPEDPIXEL_PREC_BITS));
340 341
  int32_t v;

clang-format's avatar
clang-format committed
342 343
  if (ix < 0 && iy < 0)
    return ref[0];
344 345 346 347 348 349 350 351 352
  else if (ix < 0 && iy > height - 1)
    return ref[(height - 1) * stride];
  else if (ix > width - 1 && iy < 0)
    return ref[width - 1];
  else if (ix > width - 1 && iy > height - 1)
    return ref[(height - 1) * stride + (width - 1)];
  else if (ix < 0) {
    v = ROUND_POWER_OF_TWO_SIGNED(
        ref[iy * stride] * (WARPEDPIXEL_PREC_SHIFTS - sy) +
clang-format's avatar
clang-format committed
353
            ref[(iy + 1) * stride] * sy,
354 355 356 357
        WARPEDPIXEL_PREC_BITS);
    return clip_pixel(v);
  } else if (iy < 0) {
    v = ROUND_POWER_OF_TWO_SIGNED(
clang-format's avatar
clang-format committed
358
        ref[ix] * (WARPEDPIXEL_PREC_SHIFTS - sx) + ref[ix + 1] * sx,
359 360 361 362 363
        WARPEDPIXEL_PREC_BITS);
    return clip_pixel(v);
  } else if (ix > width - 1) {
    v = ROUND_POWER_OF_TWO_SIGNED(
        ref[iy * stride + width - 1] * (WARPEDPIXEL_PREC_SHIFTS - sy) +
clang-format's avatar
clang-format committed
364
            ref[(iy + 1) * stride + width - 1] * sy,
365 366 367 368 369
        WARPEDPIXEL_PREC_BITS);
    return clip_pixel(v);
  } else if (iy > height - 1) {
    v = ROUND_POWER_OF_TWO_SIGNED(
        ref[(height - 1) * stride + ix] * (WARPEDPIXEL_PREC_SHIFTS - sx) +
clang-format's avatar
clang-format committed
370
            ref[(height - 1) * stride + ix + 1] * sx,
371 372 373 374 375 376 377
        WARPEDPIXEL_PREC_BITS);
    return clip_pixel(v);
  } else if (ix >= WARPEDPIXEL_FILTER_TAPS / 2 - 1 &&
             iy >= WARPEDPIXEL_FILTER_TAPS / 2 - 1 &&
             ix < width - WARPEDPIXEL_FILTER_TAPS / 2 &&
             iy < height - WARPEDPIXEL_FILTER_TAPS / 2) {
    return bi_ntap_filter(ref, x, y, stride);
clang-format's avatar
clang-format committed
378
  } else if (ix >= 1 && iy >= 1 && ix < width - 2 && iy < height - 2) {
379 380 381 382 383 384
    return bi_cubic_filter(ref, x, y, stride);
  } else {
    return bi_linear_filter(ref, x, y, stride);
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
385
#if CONFIG_AOM_HIGHBITDEPTH
386 387 388 389 390 391 392 393
static INLINE void highbd_get_subcolumn(int taps, uint16_t *ref, int32_t *col,
                                        int stride, int x, int y_start) {
  int i;
  for (i = 0; i < taps; ++i) {
    col[i] = ref[(i + y_start) * stride + x];
  }
}

clang-format's avatar
clang-format committed
394
static uint16_t highbd_bi_ntap_filter(uint16_t *ref, int x, int y, int stride,
395 396 397
                                      int bd) {
  int32_t val, arr[WARPEDPIXEL_FILTER_TAPS];
  int k;
clang-format's avatar
clang-format committed
398 399
  int i = (int)x >> WARPEDPIXEL_PREC_BITS;
  int j = (int)y >> WARPEDPIXEL_PREC_BITS;
400 401 402 403 404 405
  for (k = 0; k < WARPEDPIXEL_FILTER_TAPS; ++k) {
    int32_t arr_temp[WARPEDPIXEL_FILTER_TAPS];
    highbd_get_subcolumn(WARPEDPIXEL_FILTER_TAPS, ref, arr_temp, stride,
                         i + k + 1 - WARPEDPIXEL_FILTER_TAPS / 2,
                         j + 1 - WARPEDPIXEL_FILTER_TAPS / 2);
    arr[k] = do_ntap_filter(arr_temp + WARPEDPIXEL_FILTER_TAPS / 2 - 1,
406
                            y - (j * (1 << WARPEDPIXEL_PREC_BITS)));
407 408
  }
  val = do_ntap_filter(arr + WARPEDPIXEL_FILTER_TAPS / 2 - 1,
409
                       x - (i * (1 << WARPEDPIXEL_PREC_BITS)));
410
  val = ROUND_POWER_OF_TWO_SIGNED(val, WARPEDPIXEL_FILTER_BITS * 2);
411
  return (uint16_t)clip_pixel_highbd(val, bd);
412 413
}

clang-format's avatar
clang-format committed
414
static uint16_t highbd_bi_cubic_filter(uint16_t *ref, int x, int y, int stride,
415 416 417
                                       int bd) {
  int32_t val, arr[4];
  int k;
clang-format's avatar
clang-format committed
418 419
  int i = (int)x >> WARPEDPIXEL_PREC_BITS;
  int j = (int)y >> WARPEDPIXEL_PREC_BITS;
420 421
  for (k = 0; k < 4; ++k) {
    int32_t arr_temp[4];
clang-format's avatar
clang-format committed
422
    highbd_get_subcolumn(4, ref, arr_temp, stride, i + k - 1, j - 1);
423 424
    arr[k] =
        do_cubic_filter(arr_temp + 1, y - (j * (1 << WARPEDPIXEL_PREC_BITS)));
425
  }
426
  val = do_cubic_filter(arr + 1, x - (i * (1 << WARPEDPIXEL_PREC_BITS)));
427
  val = ROUND_POWER_OF_TWO_SIGNED(val, WARPEDPIXEL_FILTER_BITS * 2);
428
  return (uint16_t)clip_pixel_highbd(val, bd);
429 430
}

clang-format's avatar
clang-format committed
431
static uint16_t highbd_bi_linear_filter(uint16_t *ref, int x, int y, int stride,
432 433 434
                                        int bd) {
  const int ix = x >> WARPEDPIXEL_PREC_BITS;
  const int iy = y >> WARPEDPIXEL_PREC_BITS;
435 436
  const int sx = x - (ix * (1 << WARPEDPIXEL_PREC_BITS));
  const int sy = y - (iy * (1 << WARPEDPIXEL_PREC_BITS));
437 438 439
  int32_t val;
  val = ROUND_POWER_OF_TWO_SIGNED(
      ref[iy * stride + ix] * (WARPEDPIXEL_PREC_SHIFTS - sy) *
clang-format's avatar
clang-format committed
440 441 442 443
              (WARPEDPIXEL_PREC_SHIFTS - sx) +
          ref[iy * stride + ix + 1] * (WARPEDPIXEL_PREC_SHIFTS - sy) * sx +
          ref[(iy + 1) * stride + ix] * sy * (WARPEDPIXEL_PREC_SHIFTS - sx) +
          ref[(iy + 1) * stride + ix + 1] * sy * sx,
444
      WARPEDPIXEL_PREC_BITS * 2);
445
  return (uint16_t)clip_pixel_highbd(val, bd);
446 447
}

clang-format's avatar
clang-format committed
448 449
static uint16_t highbd_warp_interpolate(uint16_t *ref, int x, int y, int width,
                                        int height, int stride, int bd) {
450 451
  int ix = x >> WARPEDPIXEL_PREC_BITS;
  int iy = y >> WARPEDPIXEL_PREC_BITS;
452 453
  int sx = x - (ix * (1 << WARPEDPIXEL_PREC_BITS));
  int sy = y - (iy * (1 << WARPEDPIXEL_PREC_BITS));
454 455
  int32_t v;

clang-format's avatar
clang-format committed
456 457
  if (ix < 0 && iy < 0)
    return ref[0];
458 459 460 461 462 463 464 465 466
  else if (ix < 0 && iy > height - 1)
    return ref[(height - 1) * stride];
  else if (ix > width - 1 && iy < 0)
    return ref[width - 1];
  else if (ix > width - 1 && iy > height - 1)
    return ref[(height - 1) * stride + (width - 1)];
  else if (ix < 0) {
    v = ROUND_POWER_OF_TWO_SIGNED(
        ref[iy * stride] * (WARPEDPIXEL_PREC_SHIFTS - sy) +
clang-format's avatar
clang-format committed
467
            ref[(iy + 1) * stride] * sy,
468
        WARPEDPIXEL_PREC_BITS);
469
    return clip_pixel_highbd(v, bd);
470 471
  } else if (iy < 0) {
    v = ROUND_POWER_OF_TWO_SIGNED(
clang-format's avatar
clang-format committed
472
        ref[ix] * (WARPEDPIXEL_PREC_SHIFTS - sx) + ref[ix + 1] * sx,
473
        WARPEDPIXEL_PREC_BITS);
474
    return clip_pixel_highbd(v, bd);
475 476 477
  } else if (ix > width - 1) {
    v = ROUND_POWER_OF_TWO_SIGNED(
        ref[iy * stride + width - 1] * (WARPEDPIXEL_PREC_SHIFTS - sy) +
clang-format's avatar
clang-format committed
478
            ref[(iy + 1) * stride + width - 1] * sy,
479
        WARPEDPIXEL_PREC_BITS);
480
    return clip_pixel_highbd(v, bd);
481 482 483
  } else if (iy > height - 1) {
    v = ROUND_POWER_OF_TWO_SIGNED(
        ref[(height - 1) * stride + ix] * (WARPEDPIXEL_PREC_SHIFTS - sx) +
clang-format's avatar
clang-format committed
484
            ref[(height - 1) * stride + ix + 1] * sx,
485
        WARPEDPIXEL_PREC_BITS);
486
    return clip_pixel_highbd(v, bd);
487 488 489 490 491
  } else if (ix >= WARPEDPIXEL_FILTER_TAPS / 2 - 1 &&
             iy >= WARPEDPIXEL_FILTER_TAPS / 2 - 1 &&
             ix < width - WARPEDPIXEL_FILTER_TAPS / 2 &&
             iy < height - WARPEDPIXEL_FILTER_TAPS / 2) {
    return highbd_bi_ntap_filter(ref, x, y, stride, bd);
clang-format's avatar
clang-format committed
492
  } else if (ix >= 1 && iy >= 1 && ix < width - 2 && iy < height - 2) {
493 494 495 496 497 498
    return highbd_bi_cubic_filter(ref, x, y, stride, bd);
  } else {
    return highbd_bi_linear_filter(ref, x, y, stride, bd);
  }
}

499 500 501 502 503 504 505 506 507 508 509
static INLINE int highbd_error_measure(int err, int bd) {
  const int b = bd - 8;
  const int bmask = (1 << b) - 1;
  int e1, e2;
  err = abs(err);
  e1 = err >> b;
  e2 = err & bmask;
  return error_measure_lut[255 + e1] * (v - e2) +
         error_measure_lut[256 + e1] * e2;
}

510 511 512 513 514 515 516
static double highbd_warp_erroradv(WarpedMotionParams *wm, uint8_t *ref8,
                                   int width, int height, int stride,
                                   uint8_t *dst8, int p_col, int p_row,
                                   int p_width, int p_height, int p_stride,
                                   int subsampling_x, int subsampling_y,
                                   int x_scale, int y_scale, int bd) {
  int i, j;
517
  ProjectPointsFunc projectpoints = get_project_points_type(wm->wmtype);
518 519 520
  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
  uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
  int gm_err = 0, no_gm_err = 0;
521
  int64_t gm_sumerr = 0, no_gm_sumerr = 0;
522 523 524 525 526
  for (i = p_row; i < p_row + p_height; ++i) {
    for (j = p_col; j < p_col + p_width; ++j) {
      int in[2], out[2];
      in[0] = j;
      in[1] = i;
527
      projectpoints(wm->wmmat, in, out, 1, 2, 2, subsampling_x, subsampling_y);
528 529 530 531 532 533 534
      out[0] = ROUND_POWER_OF_TWO_SIGNED(out[0] * x_scale, 4);
      out[1] = ROUND_POWER_OF_TWO_SIGNED(out[1] * y_scale, 4);
      gm_err = dst[(j - p_col) + (i - p_row) * p_stride] -
               highbd_warp_interpolate(ref, out[0], out[1], width, height,
                                       stride, bd);
      no_gm_err = dst[(j - p_col) + (i - p_row) * p_stride] -
                  ref[(j - p_col) + (i - p_row) * stride];
535 536
      gm_sumerr += highbd_error_measure(gm_err, bd];
      no_gm_sumerr += highbd_error_measure(no_gm_err bd);
537 538 539 540 541
    }
  }
  return (double)gm_sumerr / no_gm_sumerr;
}

clang-format's avatar
clang-format committed
542 543 544 545 546
static void highbd_warp_plane(WarpedMotionParams *wm, uint8_t *ref8, int width,
                              int height, int stride, uint8_t *pred8, int p_col,
                              int p_row, int p_width, int p_height,
                              int p_stride, int subsampling_x,
                              int subsampling_y, int x_scale, int y_scale,
547
                              int bd, int ref_frm) {
548
  int i, j;
549
  ProjectPointsFunc projectpoints = get_project_points_type(wm->wmtype);
550 551
  uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
  uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
552
  if (projectpoints == NULL) return;
553 554 555
  for (i = p_row; i < p_row + p_height; ++i) {
    for (j = p_col; j < p_col + p_width; ++j) {
      int in[2], out[2];
556 557
      in[0] = j;
      in[1] = i;
558
      projectpoints(wm->wmmat, in, out, 1, 2, 2, subsampling_x, subsampling_y);
559 560
      out[0] = ROUND_POWER_OF_TWO_SIGNED(out[0] * x_scale, 4);
      out[1] = ROUND_POWER_OF_TWO_SIGNED(out[1] * y_scale, 4);
561 562 563 564 565 566 567 568 569
      if (ref_frm)
        pred[(j - p_col) + (i - p_row) * p_stride] = ROUND_POWER_OF_TWO(
            pred[(j - p_col) + (i - p_row) * p_stride] +
                highbd_warp_interpolate(ref, out[0], out[1], width, height,
                                        stride, bd),
            1);
      else
        pred[(j - p_col) + (i - p_row) * p_stride] = highbd_warp_interpolate(
            ref, out[0], out[1], width, height, stride, bd);
570 571 572
    }
  }
}
Yaowu Xu's avatar
Yaowu Xu committed
573
#endif  // CONFIG_AOM_HIGHBITDEPTH
574

575 576 577 578
static INLINE int error_measure(int err) {
  return error_measure_lut[255 + err];
}

579 580 581 582 583 584 585 586
static double warp_erroradv(WarpedMotionParams *wm, uint8_t *ref, int width,
                            int height, int stride, uint8_t *dst, int p_col,
                            int p_row, int p_width, int p_height, int p_stride,
                            int subsampling_x, int subsampling_y, int x_scale,
                            int y_scale) {
  int gm_err = 0, no_gm_err = 0;
  int gm_sumerr = 0, no_gm_sumerr = 0;
  int i, j;
587
  ProjectPointsFunc projectpoints = get_project_points_type(wm->wmtype);
588 589 590 591 592
  for (i = p_row; i < p_row + p_height; ++i) {
    for (j = p_col; j < p_col + p_width; ++j) {
      int in[2], out[2];
      in[0] = j;
      in[1] = i;
593
      projectpoints(wm->wmmat, in, out, 1, 2, 2, subsampling_x, subsampling_y);
594 595 596 597 598 599
      out[0] = ROUND_POWER_OF_TWO_SIGNED(out[0] * x_scale, 4);
      out[1] = ROUND_POWER_OF_TWO_SIGNED(out[1] * y_scale, 4);
      gm_err = dst[(j - p_col) + (i - p_row) * p_stride] -
               warp_interpolate(ref, out[0], out[1], width, height, stride);
      no_gm_err = dst[(j - p_col) + (i - p_row) * p_stride] -
                  ref[(j - p_col) + (i - p_row) * stride];
600 601
      gm_sumerr += error_measure(gm_err);
      no_gm_sumerr += error_measure(no_gm_err);
602 603 604 605 606 607 608 609 610
    }
  }
  return (double)gm_sumerr / no_gm_sumerr;
}

static void warp_plane(WarpedMotionParams *wm, uint8_t *ref, int width,
                       int height, int stride, uint8_t *pred, int p_col,
                       int p_row, int p_width, int p_height, int p_stride,
                       int subsampling_x, int subsampling_y, int x_scale,
611
                       int y_scale, int ref_frm) {
612
  int i, j;
613
  ProjectPointsFunc projectpoints = get_project_points_type(wm->wmtype);
614 615 616 617 618 619
  if (projectpoints == NULL) return;
  for (i = p_row; i < p_row + p_height; ++i) {
    for (j = p_col; j < p_col + p_width; ++j) {
      int in[2], out[2];
      in[0] = j;
      in[1] = i;
620
      projectpoints(wm->wmmat, in, out, 1, 2, 2, subsampling_x, subsampling_y);
621 622
      out[0] = ROUND_POWER_OF_TWO_SIGNED(out[0] * x_scale, 4);
      out[1] = ROUND_POWER_OF_TWO_SIGNED(out[1] * y_scale, 4);
623 624 625 626 627 628 629 630
      if (ref_frm)
        pred[(j - p_col) + (i - p_row) * p_stride] = ROUND_POWER_OF_TWO(
            pred[(j - p_col) + (i - p_row) * p_stride] +
                warp_interpolate(ref, out[0], out[1], width, height, stride),
            1);
      else
        pred[(j - p_col) + (i - p_row) * p_stride] =
            warp_interpolate(ref, out[0], out[1], width, height, stride);
631 632 633 634
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
635 636 637 638 639 640 641 642 643
double av1_warp_erroradv(WarpedMotionParams *wm,
#if CONFIG_AOM_HIGHBITDEPTH
                         int use_hbd, int bd,
#endif  // CONFIG_AOM_HIGHBITDEPTH
                         uint8_t *ref, int width, int height, int stride,
                         uint8_t *dst, int p_col, int p_row, int p_width,
                         int p_height, int p_stride, int subsampling_x,
                         int subsampling_y, int x_scale, int y_scale) {
#if CONFIG_AOM_HIGHBITDEPTH
644 645 646 647
  if (use_hbd)
    return highbd_warp_erroradv(
        wm, ref, width, height, stride, dst, p_col, p_row, p_width, p_height,
        p_stride, subsampling_x, subsampling_y, x_scale, y_scale, bd);
Yaowu Xu's avatar
Yaowu Xu committed
648
#endif  // CONFIG_AOM_HIGHBITDEPTH
649 650 651
  return warp_erroradv(wm, ref, width, height, stride, dst, p_col, p_row,
                       p_width, p_height, p_stride, subsampling_x,
                       subsampling_y, x_scale, y_scale);
652 653
}

Yaowu Xu's avatar
Yaowu Xu committed
654 655 656 657 658 659 660
void av1_warp_plane(WarpedMotionParams *wm,
#if CONFIG_AOM_HIGHBITDEPTH
                    int use_hbd, int bd,
#endif  // CONFIG_AOM_HIGHBITDEPTH
                    uint8_t *ref, int width, int height, int stride,
                    uint8_t *pred, int p_col, int p_row, int p_width,
                    int p_height, int p_stride, int subsampling_x,
661
                    int subsampling_y, int x_scale, int y_scale, int ref_frm) {
Yaowu Xu's avatar
Yaowu Xu committed
662
#if CONFIG_AOM_HIGHBITDEPTH
663
  if (use_hbd)
clang-format's avatar
clang-format committed
664 665
    highbd_warp_plane(wm, ref, width, height, stride, pred, p_col, p_row,
                      p_width, p_height, p_stride, subsampling_x, subsampling_y,
666
                      x_scale, y_scale, bd, ref_frm);
667
  else
Yaowu Xu's avatar
Yaowu Xu committed
668
#endif  // CONFIG_AOM_HIGHBITDEPTH
clang-format's avatar
clang-format committed
669 670
    warp_plane(wm, ref, width, height, stride, pred, p_col, p_row, p_width,
               p_height, p_stride, subsampling_x, subsampling_y, x_scale,
671
               y_scale, ref_frm);
672
}
673

Yaowu Xu's avatar
Yaowu Xu committed
674 675
void av1_integerize_model(const double *model, TransformationType wmtype,
                          WarpedMotionParams *wm) {
676 677 678 679
  wm->wmtype = wmtype;
  switch (wmtype) {
    case HOMOGRAPHY:
      assert(fabs(model[8] - 1.0) < 1e-12);
680 681 682 683
      wm->wmmat[6] =
          (int32_t)lrint(model[6] * (1 << WARPEDMODEL_ROW3HOMO_PREC_BITS));
      wm->wmmat[7] =
          (int32_t)lrint(model[7] * (1 << WARPEDMODEL_ROW3HOMO_PREC_BITS));
684 685
    /* fallthrough intended */
    case AFFINE:
686 687
      wm->wmmat[4] = (int32_t)lrint(model[4] * (1 << WARPEDMODEL_PREC_BITS));
      wm->wmmat[5] = (int32_t)lrint(model[5] * (1 << WARPEDMODEL_PREC_BITS));
688 689
    /* fallthrough intended */
    case ROTZOOM:
690 691
      wm->wmmat[2] = (int32_t)lrint(model[2] * (1 << WARPEDMODEL_PREC_BITS));
      wm->wmmat[3] = (int32_t)lrint(model[3] * (1 << WARPEDMODEL_PREC_BITS));
692 693
    /* fallthrough intended */
    case TRANSLATION:
694 695
      wm->wmmat[0] = (int32_t)lrint(model[0] * (1 << WARPEDMODEL_PREC_BITS));
      wm->wmmat[1] = (int32_t)lrint(model[1] * (1 << WARPEDMODEL_PREC_BITS));
696 697 698 699
      break;
    default: assert(0 && "Invalid TransformationType");
  }
}
700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752

///////////////////////////////////////////////////////////////////////////////
// svdcmp
// Adopted from Numerical Recipes in C

static const double TINY_NEAR_ZERO = 1.0E-12;

static INLINE double sign(double a, double b) {
  return ((b) >= 0 ? fabs(a) : -fabs(a));
}

static INLINE double pythag(double a, double b) {
  double ct;
  const double absa = fabs(a);
  const double absb = fabs(b);

  if (absa > absb) {
    ct = absb / absa;
    return absa * sqrt(1.0 + ct * ct);
  } else {
    ct = absa / absb;
    return (absb == 0) ? 0 : absb * sqrt(1.0 + ct * ct);
  }
}

static void multiply_mat(const double *m1, const double *m2, double *res,
                         const int m1_rows, const int inner_dim,
                         const int m2_cols) {
  double sum;

  int row, col, inner;
  for (row = 0; row < m1_rows; ++row) {
    for (col = 0; col < m2_cols; ++col) {
      sum = 0;
      for (inner = 0; inner < inner_dim; ++inner)
        sum += m1[row * inner_dim + inner] * m2[inner * m2_cols + col];
      *(res++) = sum;
    }
  }
}

static int svdcmp(double **u, int m, int n, double w[], double **v) {
  const int max_its = 30;
  int flag, i, its, j, jj, k, l, nm;
  double anorm, c, f, g, h, s, scale, x, y, z;
  double *rv1 = (double *)aom_malloc(sizeof(*rv1) * (n + 1));
  g = scale = anorm = 0.0;
  for (i = 0; i < n; i++) {
    l = i + 1;
    rv1[i] = scale * g;
    g = s = scale = 0.0;
    if (i < m) {
      for (k = i; k < m; k++) scale += fabs(u[k][i]);
753
      if (scale != 0.) {
754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773
        for (k = i; k < m; k++) {
          u[k][i] /= scale;
          s += u[k][i] * u[k][i];
        }
        f = u[i][i];
        g = -sign(sqrt(s), f);
        h = f * g - s;
        u[i][i] = f - g;
        for (j = l; j < n; j++) {
          for (s = 0.0, k = i; k < m; k++) s += u[k][i] * u[k][j];
          f = s / h;
          for (k = i; k < m; k++) u[k][j] += f * u[k][i];
        }
        for (k = i; k < m; k++) u[k][i] *= scale;
      }
    }
    w[i] = scale * g;
    g = s = scale = 0.0;
    if (i < m && i != n - 1) {
      for (k = l; k < n; k++) scale += fabs(u[i][k]);
774
      if (scale != 0.) {
775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795
        for (k = l; k < n; k++) {
          u[i][k] /= scale;
          s += u[i][k] * u[i][k];
        }
        f = u[i][l];
        g = -sign(sqrt(s), f);
        h = f * g - s;
        u[i][l] = f - g;
        for (k = l; k < n; k++) rv1[k] = u[i][k] / h;
        for (j = l; j < m; j++) {
          for (s = 0.0, k = l; k < n; k++) s += u[j][k] * u[i][k];
          for (k = l; k < n; k++) u[j][k] += s * rv1[k];
        }
        for (k = l; k < n; k++) u[i][k] *= scale;
      }
    }
    anorm = fmax(anorm, (fabs(w[i]) + fabs(rv1[i])));
  }

  for (i = n - 1; i >= 0; i--) {
    if (i < n - 1) {
796
      if (g != 0.) {
797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812
        for (j = l; j < n; j++) v[j][i] = (u[i][j] / u[i][l]) / g;
        for (j = l; j < n; j++) {
          for (s = 0.0, k = l; k < n; k++) s += u[i][k] * v[k][j];
          for (k = l; k < n; k++) v[k][j] += s * v[k][i];
        }
      }
      for (j = l; j < n; j++) v[i][j] = v[j][i] = 0.0;
    }
    v[i][i] = 1.0;
    g = rv1[i];
    l = i;
  }
  for (i = AOMMIN(m, n) - 1; i >= 0; i--) {
    l = i + 1;
    g = w[i];
    for (j = l; j < n; j++) u[i][j] = 0.0;
813
    if (g != 0.) {
814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866
      g = 1.0 / g;
      for (j = l; j < n; j++) {
        for (s = 0.0, k = l; k < m; k++) s += u[k][i] * u[k][j];
        f = (s / u[i][i]) * g;
        for (k = i; k < m; k++) u[k][j] += f * u[k][i];
      }
      for (j = i; j < m; j++) u[j][i] *= g;
    } else {
      for (j = i; j < m; j++) u[j][i] = 0.0;
    }
    ++u[i][i];
  }
  for (k = n - 1; k >= 0; k--) {
    for (its = 0; its < max_its; its++) {
      flag = 1;
      for (l = k; l >= 0; l--) {
        nm = l - 1;
        if ((double)(fabs(rv1[l]) + anorm) == anorm || nm < 0) {
          flag = 0;
          break;
        }
        if ((double)(fabs(w[nm]) + anorm) == anorm) break;
      }
      if (flag) {
        c = 0.0;
        s = 1.0;
        for (i = l; i <= k; i++) {
          f = s * rv1[i];
          rv1[i] = c * rv1[i];
          if ((double)(fabs(f) + anorm) == anorm) break;
          g = w[i];
          h = pythag(f, g);
          w[i] = h;
          h = 1.0 / h;
          c = g * h;
          s = -f * h;
          for (j = 0; j < m; j++) {
            y = u[j][nm];
            z = u[j][i];
            u[j][nm] = y * c + z * s;
            u[j][i] = z * c - y * s;
          }
        }
      }
      z = w[k];
      if (l == k) {
        if (z < 0.0) {
          w[k] = -z;
          for (j = 0; j < n; j++) v[j][k] = -v[j][k];
        }
        break;
      }
      if (its == max_its - 1) {
867
        aom_free(rv1);
868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901
        return 1;
      }
      assert(k > 0);
      x = w[l];
      nm = k - 1;
      y = w[nm];
      g = rv1[nm];
      h = rv1[k];
      f = ((y - z) * (y + z) + (g - h) * (g + h)) / (2.0 * h * y);
      g = pythag(f, 1.0);
      f = ((x - z) * (x + z) + h * ((y / (f + sign(g, f))) - h)) / x;
      c = s = 1.0;
      for (j = l; j <= nm; j++) {
        i = j + 1;
        g = rv1[i];
        y = w[i];
        h = s * g;
        g = c * g;
        z = pythag(f, h);
        rv1[j] = z;
        c = f / z;
        s = h / z;
        f = x * c + g * s;
        g = g * c - x * s;
        h = y * s;
        y *= c;
        for (jj = 0; jj < n; jj++) {
          x = v[jj][j];
          z = v[jj][i];
          v[jj][j] = x * c + z * s;
          v[jj][i] = z * c - x * s;
        }
        z = pythag(f, h);
        w[j] = z;
902
        if (z != 0.) {
903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974
          z = 1.0 / z;
          c = f * z;
          s = h * z;
        }
        f = c * g + s * y;
        x = c * y - s * g;
        for (jj = 0; jj < m; jj++) {
          y = u[jj][j];
          z = u[jj][i];
          u[jj][j] = y * c + z * s;
          u[jj][i] = z * c - y * s;
        }
      }
      rv1[l] = 0.0;
      rv1[k] = f;
      w[k] = x;
    }
  }
  aom_free(rv1);
  return 0;
}

static int SVD(double *U, double *W, double *V, double *matx, int M, int N) {
  // Assumes allocation for U is MxN
  double **nrU = (double **)aom_malloc((M) * sizeof(*nrU));
  double **nrV = (double **)aom_malloc((N) * sizeof(*nrV));
  int problem, i;

  problem = !(nrU && nrV);
  if (!problem) {
    for (i = 0; i < M; i++) {
      nrU[i] = &U[i * N];
    }
    for (i = 0; i < N; i++) {
      nrV[i] = &V[i * N];
    }
  } else {
    if (nrU) aom_free(nrU);
    if (nrV) aom_free(nrV);
    return 1;
  }

  /* copy from given matx into nrU */
  for (i = 0; i < M; i++) {
    memcpy(&(nrU[i][0]), matx + N * i, N * sizeof(*matx));
  }

  /* HERE IT IS: do SVD */
  if (svdcmp(nrU, M, N, W, nrV)) {
    aom_free(nrU);
    aom_free(nrV);
    return 1;
  }

  /* aom_free Numerical Recipes arrays */
  aom_free(nrU);
  aom_free(nrV);

  return 0;
}

int pseudo_inverse(double *inv, double *matx, const int M, const int N) {
  double ans;
  int i, j, k;
  double *const U = (double *)aom_malloc(M * N * sizeof(*matx));
  double *const W = (double *)aom_malloc(N * sizeof(*matx));
  double *const V = (double *)aom_malloc(N * N * sizeof(*matx));

  if (!(U && W && V)) {
    return 1;
  }
  if (SVD(U, W, V, matx, M, N)) {
975 976 977
    aom_free(U);
    aom_free(W);
    aom_free(V);
978 979 980 981
    return 1;
  }
  for (i = 0; i < N; i++) {
    if (fabs(W[i]) < TINY_NEAR_ZERO) {
982 983 984
      aom_free(U);
      aom_free(W);
      aom_free(V);
985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025