warped_motion.c 43.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
/*
 *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be
 *  found  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */

#include <stdio.h>
#include <stdlib.h>
#include <memory.h>
#include <math.h>
#include <assert.h>

17
#include "av1/common/warped_motion.h"
18

19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87
/* clang-format off */
static const int error_measure_lut[512] = {
  // power 0.6
  255, 254, 254, 253, 253, 252, 251, 251,
  250, 250, 249, 248, 248, 247, 247, 246,
  245, 245, 244, 243, 243, 242, 242, 241,
  240, 240, 239, 238, 238, 237, 237, 236,
  235, 235, 234, 233, 233, 232, 231, 231,
  230, 230, 229, 228, 228, 227, 226, 226,
  225, 224, 224, 223, 222, 222, 221, 220,
  220, 219, 218, 218, 217, 216, 216, 215,
  214, 214, 213, 212, 212, 211, 210, 210,
  209, 208, 208, 207, 206, 206, 205, 204,
  203, 203, 202, 201, 201, 200, 199, 199,
  198, 197, 196, 196, 195, 194, 194, 193,
  192, 191, 191, 190, 189, 188, 188, 187,
  186, 185, 185, 184, 183, 182, 182, 181,
  180, 179, 179, 178, 177, 176, 176, 175,
  174, 173, 173, 172, 171, 170, 169, 169,
  168, 167, 166, 165, 165, 164, 163, 162,
  161, 161, 160, 159, 158, 157, 156, 156,
  155, 154, 153, 152, 151, 151, 150, 149,
  148, 147, 146, 145, 145, 144, 143, 142,
  141, 140, 139, 138, 137, 137, 136, 135,
  134, 133, 132, 131, 130, 129, 128, 127,
  126, 125, 124, 123, 122, 121, 120, 119,
  118, 117, 116, 115, 114, 113, 112, 111,
  110, 109, 108, 107, 106, 105, 104, 103,
  102, 100,  99,  98,  97,  96,  95,  94,
  92, 91, 90, 89, 88, 86, 85, 84,
  83, 81, 80, 79, 77, 76, 75, 73,
  72, 71, 69, 68, 66, 65, 63, 62,
  60, 59, 57, 55, 54, 52, 50, 48,
  47, 45, 43, 41, 39, 37, 34, 32,
  29, 27, 24, 21, 18, 14,  9,  0,
  9, 14, 18, 21, 24, 27, 29, 32,
  34, 37, 39, 41, 43, 45, 47, 48,
  50, 52, 54, 55, 57, 59, 60, 62,
  63, 65, 66, 68, 69, 71, 72, 73,
  75, 76, 77, 79, 80, 81, 83, 84,
  85, 86, 88, 89, 90, 91, 92, 94,
  95,  96,  97,  98,  99, 100, 102, 103,
  104, 105, 106, 107, 108, 109, 110, 111,
  112, 113, 114, 115, 116, 117, 118, 119,
  120, 121, 122, 123, 124, 125, 126, 127,
  128, 129, 130, 131, 132, 133, 134, 135,
  136, 137, 137, 138, 139, 140, 141, 142,
  143, 144, 145, 145, 146, 147, 148, 149,
  150, 151, 151, 152, 153, 154, 155, 156,
  156, 157, 158, 159, 160, 161, 161, 162,
  163, 164, 165, 165, 166, 167, 168, 169,
  169, 170, 171, 172, 173, 173, 174, 175,
  176, 176, 177, 178, 179, 179, 180, 181,
  182, 182, 183, 184, 185, 185, 186, 187,
  188, 188, 189, 190, 191, 191, 192, 193,
  194, 194, 195, 196, 196, 197, 198, 199,
  199, 200, 201, 201, 202, 203, 203, 204,
  205, 206, 206, 207, 208, 208, 209, 210,
  210, 211, 212, 212, 213, 214, 214, 215,
  216, 216, 217, 218, 218, 219, 220, 220,
  221, 222, 222, 223, 224, 224, 225, 226,
  226, 227, 228, 228, 229, 230, 230, 231,
  231, 232, 233, 233, 234, 235, 235, 236,
  237, 237, 238, 238, 239, 240, 240, 241,
  242, 242, 243, 243, 244, 245, 245, 246,
  247, 247, 248, 248, 249, 250, 250, 251,
  251, 252, 253, 253, 254, 254, 255, 255,
};
/* clang-format on */
88

89
static ProjectPointsFunc get_project_points_type(TransformationType type) {
90
  switch (type) {
91 92 93 94
    case HOMOGRAPHY: return project_points_homography;
    case AFFINE: return project_points_affine;
    case ROTZOOM: return project_points_rotzoom;
    case TRANSLATION: return project_points_translation;
clang-format's avatar
clang-format committed
95
    default: assert(0); return NULL;
96 97 98
  }
}

99
void project_points_translation(int32_t *mat, int *points, int *proj,
100 101 102
                                const int n, const int stride_points,
                                const int stride_proj, const int subsampling_x,
                                const int subsampling_y) {
103 104 105 106
  int i;
  for (i = 0; i < n; ++i) {
    const int x = *(points++), y = *(points++);
    if (subsampling_x)
107
      *(proj++) = ROUND_POWER_OF_TWO_SIGNED(
108
          ((x * (1 << (WARPEDMODEL_PREC_BITS + 1))) + mat[0]),
109
          WARPEDDIFF_PREC_BITS + 1);
110
    else
111
      *(proj++) = ROUND_POWER_OF_TWO_SIGNED(
112
          ((x * (1 << WARPEDMODEL_PREC_BITS)) + mat[0]), WARPEDDIFF_PREC_BITS);
113
    if (subsampling_y)
114
      *(proj++) = ROUND_POWER_OF_TWO_SIGNED(
115
          ((y * (1 << (WARPEDMODEL_PREC_BITS + 1))) + mat[1]),
116
          WARPEDDIFF_PREC_BITS + 1);
117
    else
118
      *(proj++) = ROUND_POWER_OF_TWO_SIGNED(
119
          ((y * (1 << WARPEDMODEL_PREC_BITS))) + mat[1], WARPEDDIFF_PREC_BITS);
120 121 122 123 124
    points += stride_points - 2;
    proj += stride_proj - 2;
  }
}

125
void project_points_rotzoom(int32_t *mat, int *points, int *proj, const int n,
126 127
                            const int stride_points, const int stride_proj,
                            const int subsampling_x, const int subsampling_y) {
128 129 130 131
  int i;
  for (i = 0; i < n; ++i) {
    const int x = *(points++), y = *(points++);
    if (subsampling_x)
132
      *(proj++) = ROUND_POWER_OF_TWO_SIGNED(
133 134
          mat[2] * 2 * x + mat[3] * 2 * y + mat[0] +
              (mat[2] + mat[3] - (1 << WARPEDMODEL_PREC_BITS)) / 2,
135 136
          WARPEDDIFF_PREC_BITS + 1);
    else
137
      *(proj++) = ROUND_POWER_OF_TWO_SIGNED(mat[2] * x + mat[3] * y + mat[0],
138
                                            WARPEDDIFF_PREC_BITS);
139
    if (subsampling_y)
140
      *(proj++) = ROUND_POWER_OF_TWO_SIGNED(
141 142
          -mat[3] * 2 * x + mat[2] * 2 * y + mat[1] +
              (-mat[3] + mat[2] - (1 << WARPEDMODEL_PREC_BITS)) / 2,
143 144
          WARPEDDIFF_PREC_BITS + 1);
    else
145
      *(proj++) = ROUND_POWER_OF_TWO_SIGNED(-mat[3] * x + mat[2] * y + mat[1],
146
                                            WARPEDDIFF_PREC_BITS);
147 148 149 150 151
    points += stride_points - 2;
    proj += stride_proj - 2;
  }
}

152
void project_points_affine(int32_t *mat, int *points, int *proj, const int n,
153 154
                           const int stride_points, const int stride_proj,
                           const int subsampling_x, const int subsampling_y) {
155 156 157 158
  int i;
  for (i = 0; i < n; ++i) {
    const int x = *(points++), y = *(points++);
    if (subsampling_x)
159
      *(proj++) = ROUND_POWER_OF_TWO_SIGNED(
160 161
          mat[2] * 2 * x + mat[3] * 2 * y + mat[0] +
              (mat[2] + mat[3] - (1 << WARPEDMODEL_PREC_BITS)) / 2,
162 163
          WARPEDDIFF_PREC_BITS + 1);
    else
164
      *(proj++) = ROUND_POWER_OF_TWO_SIGNED(mat[2] * x + mat[3] * y + mat[0],
165
                                            WARPEDDIFF_PREC_BITS);
166
    if (subsampling_y)
167
      *(proj++) = ROUND_POWER_OF_TWO_SIGNED(
168
          mat[4] * 2 * x + mat[5] * 2 * y + mat[1] +
Sarah Parker's avatar
Sarah Parker committed
169
              (mat[4] + mat[5] - (1 << WARPEDMODEL_PREC_BITS)) / 2,
170 171
          WARPEDDIFF_PREC_BITS + 1);
    else
172
      *(proj++) = ROUND_POWER_OF_TWO_SIGNED(mat[4] * x + mat[5] * y + mat[1],
173
                                            WARPEDDIFF_PREC_BITS);
174 175 176 177 178
    points += stride_points - 2;
    proj += stride_proj - 2;
  }
}

179
void project_points_homography(int32_t *mat, int *points, int *proj,
180 181 182
                               const int n, const int stride_points,
                               const int stride_proj, const int subsampling_x,
                               const int subsampling_y) {
183
  int i;
184 185
  int64_t x, y, Z;
  int64_t xp, yp;
186 187 188 189 190
  for (i = 0; i < n; ++i) {
    x = *(points++), y = *(points++);
    x = (subsampling_x ? 4 * x + 1 : 2 * x);
    y = (subsampling_y ? 4 * y + 1 : 2 * y);

191 192
    Z = (mat[6] * x + mat[7] * y + (1 << (WARPEDMODEL_ROW3HOMO_PREC_BITS + 1)));
    xp = (mat[2] * x + mat[3] * y + 2 * mat[0]) *
193 194
         (1 << (WARPEDPIXEL_PREC_BITS + WARPEDMODEL_ROW3HOMO_PREC_BITS -
                WARPEDMODEL_PREC_BITS));
195
    yp = (mat[4] * x + mat[5] * y + 2 * mat[1]) *
196 197
         (1 << (WARPEDPIXEL_PREC_BITS + WARPEDMODEL_ROW3HOMO_PREC_BITS -
                WARPEDMODEL_PREC_BITS));
198 199 200 201

    xp = xp > 0 ? (xp + Z / 2) / Z : (xp - Z / 2) / Z;
    yp = yp > 0 ? (yp + Z / 2) / Z : (yp - Z / 2) / Z;

clang-format's avatar
clang-format committed
202 203
    if (subsampling_x) xp = (xp - (1 << (WARPEDPIXEL_PREC_BITS - 1))) / 2;
    if (subsampling_y) yp = (yp - (1 << (WARPEDPIXEL_PREC_BITS - 1))) / 2;
204 205 206 207 208 209 210 211 212
    *(proj++) = xp;
    *(proj++) = yp;

    points += stride_points - 2;
    proj += stride_proj - 2;
  }
}

static const int16_t
clang-format's avatar
clang-format committed
213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246
    filter_ntap[WARPEDPIXEL_PREC_SHIFTS][WARPEDPIXEL_FILTER_TAPS] = {
      { 0, 0, 128, 0, 0, 0 },      { 0, -1, 128, 2, -1, 0 },
      { 1, -3, 127, 4, -1, 0 },    { 1, -4, 126, 6, -2, 1 },
      { 1, -5, 126, 8, -3, 1 },    { 1, -6, 125, 11, -4, 1 },
      { 1, -7, 124, 13, -4, 1 },   { 2, -8, 123, 15, -5, 1 },
      { 2, -9, 122, 18, -6, 1 },   { 2, -10, 121, 20, -6, 1 },
      { 2, -11, 120, 22, -7, 2 },  { 2, -12, 119, 25, -8, 2 },
      { 3, -13, 117, 27, -8, 2 },  { 3, -13, 116, 29, -9, 2 },
      { 3, -14, 114, 32, -10, 3 }, { 3, -15, 113, 35, -10, 2 },
      { 3, -15, 111, 37, -11, 3 }, { 3, -16, 109, 40, -11, 3 },
      { 3, -16, 108, 42, -12, 3 }, { 4, -17, 106, 45, -13, 3 },
      { 4, -17, 104, 47, -13, 3 }, { 4, -17, 102, 50, -14, 3 },
      { 4, -17, 100, 52, -14, 3 }, { 4, -18, 98, 55, -15, 4 },
      { 4, -18, 96, 58, -15, 3 },  { 4, -18, 94, 60, -16, 4 },
      { 4, -18, 91, 63, -16, 4 },  { 4, -18, 89, 65, -16, 4 },
      { 4, -18, 87, 68, -17, 4 },  { 4, -18, 85, 70, -17, 4 },
      { 4, -18, 82, 73, -17, 4 },  { 4, -18, 80, 75, -17, 4 },
      { 4, -18, 78, 78, -18, 4 },  { 4, -17, 75, 80, -18, 4 },
      { 4, -17, 73, 82, -18, 4 },  { 4, -17, 70, 85, -18, 4 },
      { 4, -17, 68, 87, -18, 4 },  { 4, -16, 65, 89, -18, 4 },
      { 4, -16, 63, 91, -18, 4 },  { 4, -16, 60, 94, -18, 4 },
      { 3, -15, 58, 96, -18, 4 },  { 4, -15, 55, 98, -18, 4 },
      { 3, -14, 52, 100, -17, 4 }, { 3, -14, 50, 102, -17, 4 },
      { 3, -13, 47, 104, -17, 4 }, { 3, -13, 45, 106, -17, 4 },
      { 3, -12, 42, 108, -16, 3 }, { 3, -11, 40, 109, -16, 3 },
      { 3, -11, 37, 111, -15, 3 }, { 2, -10, 35, 113, -15, 3 },
      { 3, -10, 32, 114, -14, 3 }, { 2, -9, 29, 116, -13, 3 },
      { 2, -8, 27, 117, -13, 3 },  { 2, -8, 25, 119, -12, 2 },
      { 2, -7, 22, 120, -11, 2 },  { 1, -6, 20, 121, -10, 2 },
      { 1, -6, 18, 122, -9, 2 },   { 1, -5, 15, 123, -8, 2 },
      { 1, -4, 13, 124, -7, 1 },   { 1, -4, 11, 125, -6, 1 },
      { 1, -3, 8, 126, -5, 1 },    { 1, -2, 6, 126, -4, 1 },
      { 0, -1, 4, 127, -3, 1 },    { 0, -1, 2, 128, -1, 0 },
    };
247 248 249 250 251 252 253 254 255 256 257

static int32_t do_ntap_filter(int32_t *p, int x) {
  int i;
  int32_t sum = 0;
  for (i = 0; i < WARPEDPIXEL_FILTER_TAPS; ++i) {
    sum += p[i - WARPEDPIXEL_FILTER_TAPS / 2 + 1] * filter_ntap[x][i];
  }
  return sum;
}

static int32_t do_cubic_filter(int32_t *p, int x) {
clang-format's avatar
clang-format committed
258
  if (x == 0) {
259
    return p[0] * (1 << WARPEDPIXEL_FILTER_BITS);
260
  } else if (x == (1 << WARPEDPIXEL_PREC_BITS)) {
261
    return p[1] * (1 << WARPEDPIXEL_FILTER_BITS);
262
  } else {
263
    const int64_t v1 = (int64_t)x * x * x * (3 * (p[0] - p[1]) + p[2] - p[-1]);
264 265 266
    const int64_t v2 = x * x * (2 * p[-1] - 5 * p[0] + 4 * p[1] - p[2]);
    const int64_t v3 = x * (p[1] - p[-1]);
    const int64_t v4 = 2 * p[0];
267
    return (int32_t)ROUND_POWER_OF_TWO_SIGNED(
268 269 270
        (v4 * (1 << (3 * WARPEDPIXEL_PREC_BITS))) +
            (v3 * (1 << (2 * WARPEDPIXEL_PREC_BITS))) +
            (v2 * (1 << WARPEDPIXEL_PREC_BITS)) + v1,
271 272 273 274 275 276 277 278 279 280 281 282 283 284 285
        3 * WARPEDPIXEL_PREC_BITS + 1 - WARPEDPIXEL_FILTER_BITS);
  }
}

static INLINE void get_subcolumn(int taps, uint8_t *ref, int32_t *col,
                                 int stride, int x, int y_start) {
  int i;
  for (i = 0; i < taps; ++i) {
    col[i] = ref[(i + y_start) * stride + x];
  }
}

static uint8_t bi_ntap_filter(uint8_t *ref, int x, int y, int stride) {
  int32_t val, arr[WARPEDPIXEL_FILTER_TAPS];
  int k;
clang-format's avatar
clang-format committed
286 287
  int i = (int)x >> WARPEDPIXEL_PREC_BITS;
  int j = (int)y >> WARPEDPIXEL_PREC_BITS;
288 289 290 291 292 293
  for (k = 0; k < WARPEDPIXEL_FILTER_TAPS; ++k) {
    int32_t arr_temp[WARPEDPIXEL_FILTER_TAPS];
    get_subcolumn(WARPEDPIXEL_FILTER_TAPS, ref, arr_temp, stride,
                  i + k + 1 - WARPEDPIXEL_FILTER_TAPS / 2,
                  j + 1 - WARPEDPIXEL_FILTER_TAPS / 2);
    arr[k] = do_ntap_filter(arr_temp + WARPEDPIXEL_FILTER_TAPS / 2 - 1,
294
                            y - (j * (1 << WARPEDPIXEL_PREC_BITS)));
295 296
  }
  val = do_ntap_filter(arr + WARPEDPIXEL_FILTER_TAPS / 2 - 1,
297
                       x - (i * (1 << WARPEDPIXEL_PREC_BITS)));
298 299 300 301 302 303 304
  val = ROUND_POWER_OF_TWO_SIGNED(val, WARPEDPIXEL_FILTER_BITS * 2);
  return (uint8_t)clip_pixel(val);
}

static uint8_t bi_cubic_filter(uint8_t *ref, int x, int y, int stride) {
  int32_t val, arr[4];
  int k;
clang-format's avatar
clang-format committed
305 306
  int i = (int)x >> WARPEDPIXEL_PREC_BITS;
  int j = (int)y >> WARPEDPIXEL_PREC_BITS;
307 308
  for (k = 0; k < 4; ++k) {
    int32_t arr_temp[4];
clang-format's avatar
clang-format committed
309
    get_subcolumn(4, ref, arr_temp, stride, i + k - 1, j - 1);
310 311
    arr[k] =
        do_cubic_filter(arr_temp + 1, y - (j * (1 << WARPEDPIXEL_PREC_BITS)));
312
  }
313
  val = do_cubic_filter(arr + 1, x - (i * (1 << WARPEDPIXEL_PREC_BITS)));
314 315 316 317 318 319 320
  val = ROUND_POWER_OF_TWO_SIGNED(val, WARPEDPIXEL_FILTER_BITS * 2);
  return (uint8_t)clip_pixel(val);
}

static uint8_t bi_linear_filter(uint8_t *ref, int x, int y, int stride) {
  const int ix = x >> WARPEDPIXEL_PREC_BITS;
  const int iy = y >> WARPEDPIXEL_PREC_BITS;
321 322
  const int sx = x - (ix * (1 << WARPEDPIXEL_PREC_BITS));
  const int sy = y - (iy * (1 << WARPEDPIXEL_PREC_BITS));
323 324 325
  int32_t val;
  val = ROUND_POWER_OF_TWO_SIGNED(
      ref[iy * stride + ix] * (WARPEDPIXEL_PREC_SHIFTS - sy) *
clang-format's avatar
clang-format committed
326 327 328 329
              (WARPEDPIXEL_PREC_SHIFTS - sx) +
          ref[iy * stride + ix + 1] * (WARPEDPIXEL_PREC_SHIFTS - sy) * sx +
          ref[(iy + 1) * stride + ix] * sy * (WARPEDPIXEL_PREC_SHIFTS - sx) +
          ref[(iy + 1) * stride + ix + 1] * sy * sx,
330 331 332 333
      WARPEDPIXEL_PREC_BITS * 2);
  return (uint8_t)clip_pixel(val);
}

clang-format's avatar
clang-format committed
334 335
static uint8_t warp_interpolate(uint8_t *ref, int x, int y, int width,
                                int height, int stride) {
336 337
  int ix = x >> WARPEDPIXEL_PREC_BITS;
  int iy = y >> WARPEDPIXEL_PREC_BITS;
338 339
  int sx = x - (ix * (1 << WARPEDPIXEL_PREC_BITS));
  int sy = y - (iy * (1 << WARPEDPIXEL_PREC_BITS));
340 341
  int32_t v;

clang-format's avatar
clang-format committed
342 343
  if (ix < 0 && iy < 0)
    return ref[0];
344 345 346 347 348 349 350 351 352
  else if (ix < 0 && iy > height - 1)
    return ref[(height - 1) * stride];
  else if (ix > width - 1 && iy < 0)
    return ref[width - 1];
  else if (ix > width - 1 && iy > height - 1)
    return ref[(height - 1) * stride + (width - 1)];
  else if (ix < 0) {
    v = ROUND_POWER_OF_TWO_SIGNED(
        ref[iy * stride] * (WARPEDPIXEL_PREC_SHIFTS - sy) +
clang-format's avatar
clang-format committed
353
            ref[(iy + 1) * stride] * sy,
354 355 356 357
        WARPEDPIXEL_PREC_BITS);
    return clip_pixel(v);
  } else if (iy < 0) {
    v = ROUND_POWER_OF_TWO_SIGNED(
clang-format's avatar
clang-format committed
358
        ref[ix] * (WARPEDPIXEL_PREC_SHIFTS - sx) + ref[ix + 1] * sx,
359 360 361 362 363
        WARPEDPIXEL_PREC_BITS);
    return clip_pixel(v);
  } else if (ix > width - 1) {
    v = ROUND_POWER_OF_TWO_SIGNED(
        ref[iy * stride + width - 1] * (WARPEDPIXEL_PREC_SHIFTS - sy) +
clang-format's avatar
clang-format committed
364
            ref[(iy + 1) * stride + width - 1] * sy,
365 366 367 368 369
        WARPEDPIXEL_PREC_BITS);
    return clip_pixel(v);
  } else if (iy > height - 1) {
    v = ROUND_POWER_OF_TWO_SIGNED(
        ref[(height - 1) * stride + ix] * (WARPEDPIXEL_PREC_SHIFTS - sx) +
clang-format's avatar
clang-format committed
370
            ref[(height - 1) * stride + ix + 1] * sx,
371 372 373 374 375 376 377
        WARPEDPIXEL_PREC_BITS);
    return clip_pixel(v);
  } else if (ix >= WARPEDPIXEL_FILTER_TAPS / 2 - 1 &&
             iy >= WARPEDPIXEL_FILTER_TAPS / 2 - 1 &&
             ix < width - WARPEDPIXEL_FILTER_TAPS / 2 &&
             iy < height - WARPEDPIXEL_FILTER_TAPS / 2) {
    return bi_ntap_filter(ref, x, y, stride);
clang-format's avatar
clang-format committed
378
  } else if (ix >= 1 && iy >= 1 && ix < width - 2 && iy < height - 2) {
379 380 381 382 383 384
    return bi_cubic_filter(ref, x, y, stride);
  } else {
    return bi_linear_filter(ref, x, y, stride);
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
385
#if CONFIG_AOM_HIGHBITDEPTH
386 387 388 389 390 391 392 393
static INLINE void highbd_get_subcolumn(int taps, uint16_t *ref, int32_t *col,
                                        int stride, int x, int y_start) {
  int i;
  for (i = 0; i < taps; ++i) {
    col[i] = ref[(i + y_start) * stride + x];
  }
}

clang-format's avatar
clang-format committed
394
static uint16_t highbd_bi_ntap_filter(uint16_t *ref, int x, int y, int stride,
395 396 397
                                      int bd) {
  int32_t val, arr[WARPEDPIXEL_FILTER_TAPS];
  int k;
clang-format's avatar
clang-format committed
398 399
  int i = (int)x >> WARPEDPIXEL_PREC_BITS;
  int j = (int)y >> WARPEDPIXEL_PREC_BITS;
400 401 402 403 404 405
  for (k = 0; k < WARPEDPIXEL_FILTER_TAPS; ++k) {
    int32_t arr_temp[WARPEDPIXEL_FILTER_TAPS];
    highbd_get_subcolumn(WARPEDPIXEL_FILTER_TAPS, ref, arr_temp, stride,
                         i + k + 1 - WARPEDPIXEL_FILTER_TAPS / 2,
                         j + 1 - WARPEDPIXEL_FILTER_TAPS / 2);
    arr[k] = do_ntap_filter(arr_temp + WARPEDPIXEL_FILTER_TAPS / 2 - 1,
406
                            y - (j * (1 << WARPEDPIXEL_PREC_BITS)));
407 408
  }
  val = do_ntap_filter(arr + WARPEDPIXEL_FILTER_TAPS / 2 - 1,
409
                       x - (i * (1 << WARPEDPIXEL_PREC_BITS)));
410
  val = ROUND_POWER_OF_TWO_SIGNED(val, WARPEDPIXEL_FILTER_BITS * 2);
411
  return (uint16_t)clip_pixel_highbd(val, bd);
412 413
}

clang-format's avatar
clang-format committed
414
static uint16_t highbd_bi_cubic_filter(uint16_t *ref, int x, int y, int stride,
415 416 417
                                       int bd) {
  int32_t val, arr[4];
  int k;
clang-format's avatar
clang-format committed
418 419
  int i = (int)x >> WARPEDPIXEL_PREC_BITS;
  int j = (int)y >> WARPEDPIXEL_PREC_BITS;
420 421
  for (k = 0; k < 4; ++k) {
    int32_t arr_temp[4];
clang-format's avatar
clang-format committed
422
    highbd_get_subcolumn(4, ref, arr_temp, stride, i + k - 1, j - 1);
423 424
    arr[k] =
        do_cubic_filter(arr_temp + 1, y - (j * (1 << WARPEDPIXEL_PREC_BITS)));
425
  }
426
  val = do_cubic_filter(arr + 1, x - (i * (1 << WARPEDPIXEL_PREC_BITS)));
427
  val = ROUND_POWER_OF_TWO_SIGNED(val, WARPEDPIXEL_FILTER_BITS * 2);
428
  return (uint16_t)clip_pixel_highbd(val, bd);
429 430
}

clang-format's avatar
clang-format committed
431
static uint16_t highbd_bi_linear_filter(uint16_t *ref, int x, int y, int stride,
432 433 434
                                        int bd) {
  const int ix = x >> WARPEDPIXEL_PREC_BITS;
  const int iy = y >> WARPEDPIXEL_PREC_BITS;
435 436
  const int sx = x - (ix * (1 << WARPEDPIXEL_PREC_BITS));
  const int sy = y - (iy * (1 << WARPEDPIXEL_PREC_BITS));
437 438 439
  int32_t val;
  val = ROUND_POWER_OF_TWO_SIGNED(
      ref[iy * stride + ix] * (WARPEDPIXEL_PREC_SHIFTS - sy) *
clang-format's avatar
clang-format committed
440 441 442 443
              (WARPEDPIXEL_PREC_SHIFTS - sx) +
          ref[iy * stride + ix + 1] * (WARPEDPIXEL_PREC_SHIFTS - sy) * sx +
          ref[(iy + 1) * stride + ix] * sy * (WARPEDPIXEL_PREC_SHIFTS - sx) +
          ref[(iy + 1) * stride + ix + 1] * sy * sx,
444
      WARPEDPIXEL_PREC_BITS * 2);
445
  return (uint16_t)clip_pixel_highbd(val, bd);
446 447
}

clang-format's avatar
clang-format committed
448 449
static uint16_t highbd_warp_interpolate(uint16_t *ref, int x, int y, int width,
                                        int height, int stride, int bd) {
450 451
  int ix = x >> WARPEDPIXEL_PREC_BITS;
  int iy = y >> WARPEDPIXEL_PREC_BITS;
452 453
  int sx = x - (ix * (1 << WARPEDPIXEL_PREC_BITS));
  int sy = y - (iy * (1 << WARPEDPIXEL_PREC_BITS));
454 455
  int32_t v;

clang-format's avatar
clang-format committed
456 457
  if (ix < 0 && iy < 0)
    return ref[0];
458 459 460 461 462 463 464 465 466
  else if (ix < 0 && iy > height - 1)
    return ref[(height - 1) * stride];
  else if (ix > width - 1 && iy < 0)
    return ref[width - 1];
  else if (ix > width - 1 && iy > height - 1)
    return ref[(height - 1) * stride + (width - 1)];
  else if (ix < 0) {
    v = ROUND_POWER_OF_TWO_SIGNED(
        ref[iy * stride] * (WARPEDPIXEL_PREC_SHIFTS - sy) +
clang-format's avatar
clang-format committed
467
            ref[(iy + 1) * stride] * sy,
468
        WARPEDPIXEL_PREC_BITS);
469
    return clip_pixel_highbd(v, bd);
470 471
  } else if (iy < 0) {
    v = ROUND_POWER_OF_TWO_SIGNED(
clang-format's avatar
clang-format committed
472
        ref[ix] * (WARPEDPIXEL_PREC_SHIFTS - sx) + ref[ix + 1] * sx,
473
        WARPEDPIXEL_PREC_BITS);
474
    return clip_pixel_highbd(v, bd);
475 476 477
  } else if (ix > width - 1) {
    v = ROUND_POWER_OF_TWO_SIGNED(
        ref[iy * stride + width - 1] * (WARPEDPIXEL_PREC_SHIFTS - sy) +
clang-format's avatar
clang-format committed
478
            ref[(iy + 1) * stride + width - 1] * sy,
479
        WARPEDPIXEL_PREC_BITS);
480
    return clip_pixel_highbd(v, bd);
481 482 483
  } else if (iy > height - 1) {
    v = ROUND_POWER_OF_TWO_SIGNED(
        ref[(height - 1) * stride + ix] * (WARPEDPIXEL_PREC_SHIFTS - sx) +
clang-format's avatar
clang-format committed
484
            ref[(height - 1) * stride + ix + 1] * sx,
485
        WARPEDPIXEL_PREC_BITS);
486
    return clip_pixel_highbd(v, bd);
487 488 489 490 491
  } else if (ix >= WARPEDPIXEL_FILTER_TAPS / 2 - 1 &&
             iy >= WARPEDPIXEL_FILTER_TAPS / 2 - 1 &&
             ix < width - WARPEDPIXEL_FILTER_TAPS / 2 &&
             iy < height - WARPEDPIXEL_FILTER_TAPS / 2) {
    return highbd_bi_ntap_filter(ref, x, y, stride, bd);
clang-format's avatar
clang-format committed
492
  } else if (ix >= 1 && iy >= 1 && ix < width - 2 && iy < height - 2) {
493 494 495 496 497 498
    return highbd_bi_cubic_filter(ref, x, y, stride, bd);
  } else {
    return highbd_bi_linear_filter(ref, x, y, stride, bd);
  }
}

499 500 501
static INLINE int highbd_error_measure(int err, int bd) {
  const int b = bd - 8;
  const int bmask = (1 << b) - 1;
502
  const int v = (1 << b);
503 504 505 506 507 508 509 510
  int e1, e2;
  err = abs(err);
  e1 = err >> b;
  e2 = err & bmask;
  return error_measure_lut[255 + e1] * (v - e2) +
         error_measure_lut[256 + e1] * e2;
}

511 512 513 514 515 516 517
static double highbd_warp_erroradv(WarpedMotionParams *wm, uint8_t *ref8,
                                   int width, int height, int stride,
                                   uint8_t *dst8, int p_col, int p_row,
                                   int p_width, int p_height, int p_stride,
                                   int subsampling_x, int subsampling_y,
                                   int x_scale, int y_scale, int bd) {
  int i, j;
518
  ProjectPointsFunc projectpoints = get_project_points_type(wm->wmtype);
519 520 521
  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
  uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
  int gm_err = 0, no_gm_err = 0;
522
  int64_t gm_sumerr = 0, no_gm_sumerr = 0;
523 524 525 526 527
  for (i = p_row; i < p_row + p_height; ++i) {
    for (j = p_col; j < p_col + p_width; ++j) {
      int in[2], out[2];
      in[0] = j;
      in[1] = i;
528
      projectpoints(wm->wmmat, in, out, 1, 2, 2, subsampling_x, subsampling_y);
529 530 531 532 533 534 535
      out[0] = ROUND_POWER_OF_TWO_SIGNED(out[0] * x_scale, 4);
      out[1] = ROUND_POWER_OF_TWO_SIGNED(out[1] * y_scale, 4);
      gm_err = dst[(j - p_col) + (i - p_row) * p_stride] -
               highbd_warp_interpolate(ref, out[0], out[1], width, height,
                                       stride, bd);
      no_gm_err = dst[(j - p_col) + (i - p_row) * p_stride] -
                  ref[(j - p_col) + (i - p_row) * stride];
536 537
      gm_sumerr += highbd_error_measure(gm_err, bd);
      no_gm_sumerr += highbd_error_measure(no_gm_err, bd);
538 539 540 541 542
    }
  }
  return (double)gm_sumerr / no_gm_sumerr;
}

clang-format's avatar
clang-format committed
543 544 545 546 547
static void highbd_warp_plane(WarpedMotionParams *wm, uint8_t *ref8, int width,
                              int height, int stride, uint8_t *pred8, int p_col,
                              int p_row, int p_width, int p_height,
                              int p_stride, int subsampling_x,
                              int subsampling_y, int x_scale, int y_scale,
548
                              int bd, int ref_frm) {
549
  int i, j;
550
  ProjectPointsFunc projectpoints = get_project_points_type(wm->wmtype);
551 552
  uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
  uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
553
  if (projectpoints == NULL) return;
554 555 556
  for (i = p_row; i < p_row + p_height; ++i) {
    for (j = p_col; j < p_col + p_width; ++j) {
      int in[2], out[2];
557 558
      in[0] = j;
      in[1] = i;
559
      projectpoints(wm->wmmat, in, out, 1, 2, 2, subsampling_x, subsampling_y);
560 561
      out[0] = ROUND_POWER_OF_TWO_SIGNED(out[0] * x_scale, 4);
      out[1] = ROUND_POWER_OF_TWO_SIGNED(out[1] * y_scale, 4);
562 563 564 565 566 567 568 569 570
      if (ref_frm)
        pred[(j - p_col) + (i - p_row) * p_stride] = ROUND_POWER_OF_TWO(
            pred[(j - p_col) + (i - p_row) * p_stride] +
                highbd_warp_interpolate(ref, out[0], out[1], width, height,
                                        stride, bd),
            1);
      else
        pred[(j - p_col) + (i - p_row) * p_stride] = highbd_warp_interpolate(
            ref, out[0], out[1], width, height, stride, bd);
571 572 573
    }
  }
}
Yaowu Xu's avatar
Yaowu Xu committed
574
#endif  // CONFIG_AOM_HIGHBITDEPTH
575

576 577 578 579
static INLINE int error_measure(int err) {
  return error_measure_lut[255 + err];
}

580 581 582 583 584 585 586 587
static double warp_erroradv(WarpedMotionParams *wm, uint8_t *ref, int width,
                            int height, int stride, uint8_t *dst, int p_col,
                            int p_row, int p_width, int p_height, int p_stride,
                            int subsampling_x, int subsampling_y, int x_scale,
                            int y_scale) {
  int gm_err = 0, no_gm_err = 0;
  int gm_sumerr = 0, no_gm_sumerr = 0;
  int i, j;
588
  ProjectPointsFunc projectpoints = get_project_points_type(wm->wmtype);
589 590 591 592 593
  for (i = p_row; i < p_row + p_height; ++i) {
    for (j = p_col; j < p_col + p_width; ++j) {
      int in[2], out[2];
      in[0] = j;
      in[1] = i;
594
      projectpoints(wm->wmmat, in, out, 1, 2, 2, subsampling_x, subsampling_y);
595 596 597 598 599 600
      out[0] = ROUND_POWER_OF_TWO_SIGNED(out[0] * x_scale, 4);
      out[1] = ROUND_POWER_OF_TWO_SIGNED(out[1] * y_scale, 4);
      gm_err = dst[(j - p_col) + (i - p_row) * p_stride] -
               warp_interpolate(ref, out[0], out[1], width, height, stride);
      no_gm_err = dst[(j - p_col) + (i - p_row) * p_stride] -
                  ref[(j - p_col) + (i - p_row) * stride];
601 602
      gm_sumerr += error_measure(gm_err);
      no_gm_sumerr += error_measure(no_gm_err);
603 604 605 606 607 608 609 610 611
    }
  }
  return (double)gm_sumerr / no_gm_sumerr;
}

static void warp_plane(WarpedMotionParams *wm, uint8_t *ref, int width,
                       int height, int stride, uint8_t *pred, int p_col,
                       int p_row, int p_width, int p_height, int p_stride,
                       int subsampling_x, int subsampling_y, int x_scale,
612
                       int y_scale, int ref_frm) {
613
  int i, j;
614
  ProjectPointsFunc projectpoints = get_project_points_type(wm->wmtype);
615 616 617 618 619 620
  if (projectpoints == NULL) return;
  for (i = p_row; i < p_row + p_height; ++i) {
    for (j = p_col; j < p_col + p_width; ++j) {
      int in[2], out[2];
      in[0] = j;
      in[1] = i;
621
      projectpoints(wm->wmmat, in, out, 1, 2, 2, subsampling_x, subsampling_y);
622 623
      out[0] = ROUND_POWER_OF_TWO_SIGNED(out[0] * x_scale, 4);
      out[1] = ROUND_POWER_OF_TWO_SIGNED(out[1] * y_scale, 4);
624 625 626 627 628 629 630 631
      if (ref_frm)
        pred[(j - p_col) + (i - p_row) * p_stride] = ROUND_POWER_OF_TWO(
            pred[(j - p_col) + (i - p_row) * p_stride] +
                warp_interpolate(ref, out[0], out[1], width, height, stride),
            1);
      else
        pred[(j - p_col) + (i - p_row) * p_stride] =
            warp_interpolate(ref, out[0], out[1], width, height, stride);
632 633 634 635
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
636 637 638 639 640 641 642 643 644
double av1_warp_erroradv(WarpedMotionParams *wm,
#if CONFIG_AOM_HIGHBITDEPTH
                         int use_hbd, int bd,
#endif  // CONFIG_AOM_HIGHBITDEPTH
                         uint8_t *ref, int width, int height, int stride,
                         uint8_t *dst, int p_col, int p_row, int p_width,
                         int p_height, int p_stride, int subsampling_x,
                         int subsampling_y, int x_scale, int y_scale) {
#if CONFIG_AOM_HIGHBITDEPTH
645 646 647 648
  if (use_hbd)
    return highbd_warp_erroradv(
        wm, ref, width, height, stride, dst, p_col, p_row, p_width, p_height,
        p_stride, subsampling_x, subsampling_y, x_scale, y_scale, bd);
Yaowu Xu's avatar
Yaowu Xu committed
649
#endif  // CONFIG_AOM_HIGHBITDEPTH
650 651 652
  return warp_erroradv(wm, ref, width, height, stride, dst, p_col, p_row,
                       p_width, p_height, p_stride, subsampling_x,
                       subsampling_y, x_scale, y_scale);
653 654
}

Yaowu Xu's avatar
Yaowu Xu committed
655 656 657 658 659 660 661
void av1_warp_plane(WarpedMotionParams *wm,
#if CONFIG_AOM_HIGHBITDEPTH
                    int use_hbd, int bd,
#endif  // CONFIG_AOM_HIGHBITDEPTH
                    uint8_t *ref, int width, int height, int stride,
                    uint8_t *pred, int p_col, int p_row, int p_width,
                    int p_height, int p_stride, int subsampling_x,
662
                    int subsampling_y, int x_scale, int y_scale, int ref_frm) {
Yaowu Xu's avatar
Yaowu Xu committed
663
#if CONFIG_AOM_HIGHBITDEPTH
664
  if (use_hbd)
clang-format's avatar
clang-format committed
665 666
    highbd_warp_plane(wm, ref, width, height, stride, pred, p_col, p_row,
                      p_width, p_height, p_stride, subsampling_x, subsampling_y,
667
                      x_scale, y_scale, bd, ref_frm);
668
  else
Yaowu Xu's avatar
Yaowu Xu committed
669
#endif  // CONFIG_AOM_HIGHBITDEPTH
clang-format's avatar
clang-format committed
670 671
    warp_plane(wm, ref, width, height, stride, pred, p_col, p_row, p_width,
               p_height, p_stride, subsampling_x, subsampling_y, x_scale,
672
               y_scale, ref_frm);
673
}
674

Yaowu Xu's avatar
Yaowu Xu committed
675 676
void av1_integerize_model(const double *model, TransformationType wmtype,
                          WarpedMotionParams *wm) {
677 678 679 680
  wm->wmtype = wmtype;
  switch (wmtype) {
    case HOMOGRAPHY:
      assert(fabs(model[8] - 1.0) < 1e-12);
681 682 683 684
      wm->wmmat[6] =
          (int32_t)lrint(model[6] * (1 << WARPEDMODEL_ROW3HOMO_PREC_BITS));
      wm->wmmat[7] =
          (int32_t)lrint(model[7] * (1 << WARPEDMODEL_ROW3HOMO_PREC_BITS));
685 686
    /* fallthrough intended */
    case AFFINE:
687 688
      wm->wmmat[4] = (int32_t)lrint(model[4] * (1 << WARPEDMODEL_PREC_BITS));
      wm->wmmat[5] = (int32_t)lrint(model[5] * (1 << WARPEDMODEL_PREC_BITS));
689 690
    /* fallthrough intended */
    case ROTZOOM:
691 692
      wm->wmmat[2] = (int32_t)lrint(model[2] * (1 << WARPEDMODEL_PREC_BITS));
      wm->wmmat[3] = (int32_t)lrint(model[3] * (1 << WARPEDMODEL_PREC_BITS));
693 694
    /* fallthrough intended */
    case TRANSLATION:
695 696
      wm->wmmat[0] = (int32_t)lrint(model[0] * (1 << WARPEDMODEL_PREC_BITS));
      wm->wmmat[1] = (int32_t)lrint(model[1] * (1 << WARPEDMODEL_PREC_BITS));
697 698 699 700
      break;
    default: assert(0 && "Invalid TransformationType");
  }
}
701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753

///////////////////////////////////////////////////////////////////////////////
// svdcmp
// Adopted from Numerical Recipes in C

static const double TINY_NEAR_ZERO = 1.0E-12;

static INLINE double sign(double a, double b) {
  return ((b) >= 0 ? fabs(a) : -fabs(a));
}

static INLINE double pythag(double a, double b) {
  double ct;
  const double absa = fabs(a);
  const double absb = fabs(b);

  if (absa > absb) {
    ct = absb / absa;
    return absa * sqrt(1.0 + ct * ct);
  } else {
    ct = absa / absb;
    return (absb == 0) ? 0 : absb * sqrt(1.0 + ct * ct);
  }
}

static void multiply_mat(const double *m1, const double *m2, double *res,
                         const int m1_rows, const int inner_dim,
                         const int m2_cols) {
  double sum;

  int row, col, inner;
  for (row = 0; row < m1_rows; ++row) {
    for (col = 0; col < m2_cols; ++col) {
      sum = 0;
      for (inner = 0; inner < inner_dim; ++inner)
        sum += m1[row * inner_dim + inner] * m2[inner * m2_cols + col];
      *(res++) = sum;
    }
  }
}

static int svdcmp(double **u, int m, int n, double w[], double **v) {
  const int max_its = 30;
  int flag, i, its, j, jj, k, l, nm;
  double anorm, c, f, g, h, s, scale, x, y, z;
  double *rv1 = (double *)aom_malloc(sizeof(*rv1) * (n + 1));
  g = scale = anorm = 0.0;
  for (i = 0; i < n; i++) {
    l = i + 1;
    rv1[i] = scale * g;
    g = s = scale = 0.0;
    if (i < m) {
      for (k = i; k < m; k++) scale += fabs(u[k][i]);
754
      if (scale != 0.) {
755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774
        for (k = i; k < m; k++) {
          u[k][i] /= scale;
          s += u[k][i] * u[k][i];
        }
        f = u[i][i];
        g = -sign(sqrt(s), f);
        h = f * g - s;
        u[i][i] = f - g;
        for (j = l; j < n; j++) {
          for (s = 0.0, k = i; k < m; k++) s += u[k][i] * u[k][j];
          f = s / h;
          for (k = i; k < m; k++) u[k][j] += f * u[k][i];
        }
        for (k = i; k < m; k++) u[k][i] *= scale;
      }
    }
    w[i] = scale * g;
    g = s = scale = 0.0;
    if (i < m && i != n - 1) {
      for (k = l; k < n; k++) scale += fabs(u[i][k]);
775
      if (scale != 0.) {
776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796
        for (k = l; k < n; k++) {
          u[i][k] /= scale;
          s += u[i][k] * u[i][k];
        }
        f = u[i][l];
        g = -sign(sqrt(s), f);
        h = f * g - s;
        u[i][l] = f - g;
        for (k = l; k < n; k++) rv1[k] = u[i][k] / h;
        for (j = l; j < m; j++) {
          for (s = 0.0, k = l; k < n; k++) s += u[j][k] * u[i][k];
          for (k = l; k < n; k++) u[j][k] += s * rv1[k];
        }
        for (k = l; k < n; k++) u[i][k] *= scale;
      }
    }
    anorm = fmax(anorm, (fabs(w[i]) + fabs(rv1[i])));
  }

  for (i = n - 1; i >= 0; i--) {
    if (i < n - 1) {
797
      if (g != 0.) {
798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813
        for (j = l; j < n; j++) v[j][i] = (u[i][j] / u[i][l]) / g;
        for (j = l; j < n; j++) {
          for (s = 0.0, k = l; k < n; k++) s += u[i][k] * v[k][j];
          for (k = l; k < n; k++) v[k][j] += s * v[k][i];
        }
      }
      for (j = l; j < n; j++) v[i][j] = v[j][i] = 0.0;
    }
    v[i][i] = 1.0;
    g = rv1[i];
    l = i;
  }
  for (i = AOMMIN(m, n) - 1; i >= 0; i--) {
    l = i + 1;
    g = w[i];
    for (j = l; j < n; j++) u[i][j] = 0.0;
814
    if (g != 0.) {
815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867
      g = 1.0 / g;
      for (j = l; j < n; j++) {
        for (s = 0.0, k = l; k < m; k++) s += u[k][i] * u[k][j];
        f = (s / u[i][i]) * g;
        for (k = i; k < m; k++) u[k][j] += f * u[k][i];
      }
      for (j = i; j < m; j++) u[j][i] *= g;
    } else {
      for (j = i; j < m; j++) u[j][i] = 0.0;
    }
    ++u[i][i];
  }
  for (k = n - 1; k >= 0; k--) {
    for (its = 0; its < max_its; its++) {
      flag = 1;
      for (l = k; l >= 0; l--) {
        nm = l - 1;
        if ((double)(fabs(rv1[l]) + anorm) == anorm || nm < 0) {
          flag = 0;
          break;
        }
        if ((double)(fabs(w[nm]) + anorm) == anorm) break;
      }
      if (flag) {
        c = 0.0;
        s = 1.0;
        for (i = l; i <= k; i++) {
          f = s * rv1[i];
          rv1[i] = c * rv1[i];
          if ((double)(fabs(f) + anorm) == anorm) break;
          g = w[i];
          h = pythag(f, g);
          w[i] = h;
          h = 1.0 / h;
          c = g * h;
          s = -f * h;
          for (j = 0; j < m; j++) {
            y = u[j][nm];
            z = u[j][i];
            u[j][nm] = y * c + z * s;
            u[j][i] = z * c - y * s;
          }
        }
      }
      z = w[k];
      if (l == k) {
        if (z < 0.0) {
          w[k] = -z;
          for (j = 0; j < n; j++) v[j][k] = -v[j][k];
        }
        break;
      }
      if (its == max_its - 1) {
868
        aom_free(rv1);
869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902
        return 1;
      }
      assert(k > 0);
      x = w[l];
      nm = k - 1;
      y = w[nm];
      g = rv1[nm];
      h = rv1[k];
      f = ((y - z) * (y + z) + (g - h) * (g + h)) / (2.0 * h * y);
      g = pythag(f, 1.0);
      f = ((x - z) * (x + z) + h * ((y / (f + sign(g, f))) - h)) / x;
      c = s = 1.0;
      for (j = l; j <= nm; j++) {
        i = j + 1;
        g = rv1[i];
        y = w[i];
        h = s * g;
        g = c * g;
        z = pythag(f, h);
        rv1[j] = z;
        c = f / z;
        s = h / z;
        f = x * c + g * s;
        g = g * c - x * s;
        h = y * s;
        y *= c;
        for (jj = 0; jj < n; jj++) {
          x = v[jj][j];
          z = v[jj][i];
          v[jj][j] = x * c + z * s;
          v[jj][i] = z * c - x * s;
        }
        z = pythag(f, h);
        w[j] = z;
903
        if (z != 0.) {
904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975
          z = 1.0 / z;
          c = f * z;
          s = h * z;
        }
        f = c * g + s * y;
        x = c * y - s * g;
        for (jj = 0; jj < m; jj++) {
          y = u[jj][j];
          z = u[jj][i];
          u[jj][j] = y * c + z * s;
          u[jj][i] = z * c - y * s;
        }
      }
      rv1[l] = 0.0;
      rv1[k] = f;
      w[k] = x;
    }
  }
  aom_free(rv1);
  return 0;
}

static int SVD(double *U, double *W, double *V, double *matx, int M, int N) {
  // Assumes allocation for U is MxN
  double **nrU = (double **)aom_malloc((M) * sizeof(*nrU));
  double **nrV = (double **)aom_malloc((N) * sizeof(*nrV));
  int problem, i;

  problem = !(nrU && nrV);
  if (!problem) {
    for (i = 0; i < M; i++) {
      nrU[i] = &U[i * N];
    }
    for (i = 0; i < N; i++) {
      nrV[i] = &V[i * N];
    }
  } else {
    if (nrU) aom_free(nrU);
    if (nrV) aom_free(nrV);
    return 1;
  }

  /* copy from given matx into nrU */
  for (i = 0; i < M; i++) {
    memcpy(&(nrU[i][0]), matx + N * i, N * sizeof(*matx));
  }

  /* HERE IT IS: do SVD */
  if (svdcmp(nrU, M, N, W, nrV)) {
    aom_free(nrU);
    aom_free(nrV);
    return 1;
  }

  /* aom_free Numerical Recipes arrays */
  aom_free(nrU);
  aom_free(nrV);

  return 0;
}

int pseudo_inverse(double *inv, double *matx, const int M, const int N) {
  double ans;
  int i, j, k;
  double *const U = (double *)aom_malloc(M * N * sizeof(*matx));
  double *const W = (double *)aom_malloc(N * sizeof(*matx));
  double *const V = (double *)aom_malloc(N * N * sizeof(*matx));

  if (!(U && W && V)) {
    return 1;
  }
  if (SVD(U, W, V, matx, M, N)) {
976 977 978
    aom_free(U);
    aom_free(W);
    aom_free(V);
979 980 981 982
    return 1;
  }
  for (i = 0; i < N; i++) {
    if (fabs(W[i]) < TINY_NEAR_ZERO) {
983 984 985
      aom_free(U);
      aom_free(W);
      aom_free(V);
986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013