warped_motion.c 41 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
/*
 *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be
 *  found  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */

#include <stdio.h>
#include <stdlib.h>
#include <memory.h>
#include <math.h>
#include <assert.h>

17
#include "av1/common/warped_motion.h"
18

19
#define ERRORADV_CLAMP 128
20

21
static ProjectPointsFunc get_project_points_type(TransformationType type) {
22
  switch (type) {
23 24 25 26
    case HOMOGRAPHY: return project_points_homography;
    case AFFINE: return project_points_affine;
    case ROTZOOM: return project_points_rotzoom;
    case TRANSLATION: return project_points_translation;
clang-format's avatar
clang-format committed
27
    default: assert(0); return NULL;
28 29 30
  }
}

31
void project_points_translation(int32_t *mat, int *points, int *proj,
32 33 34
                                const int n, const int stride_points,
                                const int stride_proj, const int subsampling_x,
                                const int subsampling_y) {
35 36 37 38
  int i;
  for (i = 0; i < n; ++i) {
    const int x = *(points++), y = *(points++);
    if (subsampling_x)
39
      *(proj++) = ROUND_POWER_OF_TWO_SIGNED(
40
          ((x * (1 << (WARPEDMODEL_PREC_BITS + 1))) + mat[0]),
41
          WARPEDDIFF_PREC_BITS + 1);
42
    else
43
      *(proj++) = ROUND_POWER_OF_TWO_SIGNED(
44
          ((x * (1 << WARPEDMODEL_PREC_BITS)) + mat[0]), WARPEDDIFF_PREC_BITS);
45
    if (subsampling_y)
46
      *(proj++) = ROUND_POWER_OF_TWO_SIGNED(
47
          ((y * (1 << (WARPEDMODEL_PREC_BITS + 1))) + mat[1]),
48
          WARPEDDIFF_PREC_BITS + 1);
49
    else
50
      *(proj++) = ROUND_POWER_OF_TWO_SIGNED(
51
          ((y * (1 << WARPEDMODEL_PREC_BITS))) + mat[1], WARPEDDIFF_PREC_BITS);
52 53 54 55 56
    points += stride_points - 2;
    proj += stride_proj - 2;
  }
}

57
void project_points_rotzoom(int32_t *mat, int *points, int *proj, const int n,
58 59
                            const int stride_points, const int stride_proj,
                            const int subsampling_x, const int subsampling_y) {
60 61 62 63
  int i;
  for (i = 0; i < n; ++i) {
    const int x = *(points++), y = *(points++);
    if (subsampling_x)
64
      *(proj++) = ROUND_POWER_OF_TWO_SIGNED(
65 66
          mat[2] * 2 * x + mat[3] * 2 * y + mat[0] +
              (mat[2] + mat[3] - (1 << WARPEDMODEL_PREC_BITS)) / 2,
67 68
          WARPEDDIFF_PREC_BITS + 1);
    else
69
      *(proj++) = ROUND_POWER_OF_TWO_SIGNED(mat[2] * x + mat[3] * y + mat[0],
70
                                            WARPEDDIFF_PREC_BITS);
71
    if (subsampling_y)
72
      *(proj++) = ROUND_POWER_OF_TWO_SIGNED(
73 74
          -mat[3] * 2 * x + mat[2] * 2 * y + mat[1] +
              (-mat[3] + mat[2] - (1 << WARPEDMODEL_PREC_BITS)) / 2,
75 76
          WARPEDDIFF_PREC_BITS + 1);
    else
77
      *(proj++) = ROUND_POWER_OF_TWO_SIGNED(-mat[3] * x + mat[2] * y + mat[1],
78
                                            WARPEDDIFF_PREC_BITS);
79 80 81 82 83
    points += stride_points - 2;
    proj += stride_proj - 2;
  }
}

84
void project_points_affine(int32_t *mat, int *points, int *proj, const int n,
85 86
                           const int stride_points, const int stride_proj,
                           const int subsampling_x, const int subsampling_y) {
87 88 89 90
  int i;
  for (i = 0; i < n; ++i) {
    const int x = *(points++), y = *(points++);
    if (subsampling_x)
91
      *(proj++) = ROUND_POWER_OF_TWO_SIGNED(
92 93
          mat[2] * 2 * x + mat[3] * 2 * y + mat[0] +
              (mat[2] + mat[3] - (1 << WARPEDMODEL_PREC_BITS)) / 2,
94 95
          WARPEDDIFF_PREC_BITS + 1);
    else
96
      *(proj++) = ROUND_POWER_OF_TWO_SIGNED(mat[2] * x + mat[3] * y + mat[0],
97
                                            WARPEDDIFF_PREC_BITS);
98
    if (subsampling_y)
99
      *(proj++) = ROUND_POWER_OF_TWO_SIGNED(
100
          mat[4] * 2 * x + mat[5] * 2 * y + mat[1] +
Sarah Parker's avatar
Sarah Parker committed
101
              (mat[4] + mat[5] - (1 << WARPEDMODEL_PREC_BITS)) / 2,
102 103
          WARPEDDIFF_PREC_BITS + 1);
    else
104
      *(proj++) = ROUND_POWER_OF_TWO_SIGNED(mat[4] * x + mat[5] * y + mat[1],
105
                                            WARPEDDIFF_PREC_BITS);
106 107 108 109 110
    points += stride_points - 2;
    proj += stride_proj - 2;
  }
}

111
void project_points_homography(int32_t *mat, int *points, int *proj,
112 113 114
                               const int n, const int stride_points,
                               const int stride_proj, const int subsampling_x,
                               const int subsampling_y) {
115
  int i;
116 117
  int64_t x, y, Z;
  int64_t xp, yp;
118 119 120 121 122
  for (i = 0; i < n; ++i) {
    x = *(points++), y = *(points++);
    x = (subsampling_x ? 4 * x + 1 : 2 * x);
    y = (subsampling_y ? 4 * y + 1 : 2 * y);

123 124
    Z = (mat[6] * x + mat[7] * y + (1 << (WARPEDMODEL_ROW3HOMO_PREC_BITS + 1)));
    xp = (mat[2] * x + mat[3] * y + 2 * mat[0]) *
125 126
         (1 << (WARPEDPIXEL_PREC_BITS + WARPEDMODEL_ROW3HOMO_PREC_BITS -
                WARPEDMODEL_PREC_BITS));
127
    yp = (mat[4] * x + mat[5] * y + 2 * mat[1]) *
128 129
         (1 << (WARPEDPIXEL_PREC_BITS + WARPEDMODEL_ROW3HOMO_PREC_BITS -
                WARPEDMODEL_PREC_BITS));
130 131 132 133

    xp = xp > 0 ? (xp + Z / 2) / Z : (xp - Z / 2) / Z;
    yp = yp > 0 ? (yp + Z / 2) / Z : (yp - Z / 2) / Z;

clang-format's avatar
clang-format committed
134 135
    if (subsampling_x) xp = (xp - (1 << (WARPEDPIXEL_PREC_BITS - 1))) / 2;
    if (subsampling_y) yp = (yp - (1 << (WARPEDPIXEL_PREC_BITS - 1))) / 2;
136 137 138 139 140 141 142 143 144
    *(proj++) = xp;
    *(proj++) = yp;

    points += stride_points - 2;
    proj += stride_proj - 2;
  }
}

static const int16_t
clang-format's avatar
clang-format committed
145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178
    filter_ntap[WARPEDPIXEL_PREC_SHIFTS][WARPEDPIXEL_FILTER_TAPS] = {
      { 0, 0, 128, 0, 0, 0 },      { 0, -1, 128, 2, -1, 0 },
      { 1, -3, 127, 4, -1, 0 },    { 1, -4, 126, 6, -2, 1 },
      { 1, -5, 126, 8, -3, 1 },    { 1, -6, 125, 11, -4, 1 },
      { 1, -7, 124, 13, -4, 1 },   { 2, -8, 123, 15, -5, 1 },
      { 2, -9, 122, 18, -6, 1 },   { 2, -10, 121, 20, -6, 1 },
      { 2, -11, 120, 22, -7, 2 },  { 2, -12, 119, 25, -8, 2 },
      { 3, -13, 117, 27, -8, 2 },  { 3, -13, 116, 29, -9, 2 },
      { 3, -14, 114, 32, -10, 3 }, { 3, -15, 113, 35, -10, 2 },
      { 3, -15, 111, 37, -11, 3 }, { 3, -16, 109, 40, -11, 3 },
      { 3, -16, 108, 42, -12, 3 }, { 4, -17, 106, 45, -13, 3 },
      { 4, -17, 104, 47, -13, 3 }, { 4, -17, 102, 50, -14, 3 },
      { 4, -17, 100, 52, -14, 3 }, { 4, -18, 98, 55, -15, 4 },
      { 4, -18, 96, 58, -15, 3 },  { 4, -18, 94, 60, -16, 4 },
      { 4, -18, 91, 63, -16, 4 },  { 4, -18, 89, 65, -16, 4 },
      { 4, -18, 87, 68, -17, 4 },  { 4, -18, 85, 70, -17, 4 },
      { 4, -18, 82, 73, -17, 4 },  { 4, -18, 80, 75, -17, 4 },
      { 4, -18, 78, 78, -18, 4 },  { 4, -17, 75, 80, -18, 4 },
      { 4, -17, 73, 82, -18, 4 },  { 4, -17, 70, 85, -18, 4 },
      { 4, -17, 68, 87, -18, 4 },  { 4, -16, 65, 89, -18, 4 },
      { 4, -16, 63, 91, -18, 4 },  { 4, -16, 60, 94, -18, 4 },
      { 3, -15, 58, 96, -18, 4 },  { 4, -15, 55, 98, -18, 4 },
      { 3, -14, 52, 100, -17, 4 }, { 3, -14, 50, 102, -17, 4 },
      { 3, -13, 47, 104, -17, 4 }, { 3, -13, 45, 106, -17, 4 },
      { 3, -12, 42, 108, -16, 3 }, { 3, -11, 40, 109, -16, 3 },
      { 3, -11, 37, 111, -15, 3 }, { 2, -10, 35, 113, -15, 3 },
      { 3, -10, 32, 114, -14, 3 }, { 2, -9, 29, 116, -13, 3 },
      { 2, -8, 27, 117, -13, 3 },  { 2, -8, 25, 119, -12, 2 },
      { 2, -7, 22, 120, -11, 2 },  { 1, -6, 20, 121, -10, 2 },
      { 1, -6, 18, 122, -9, 2 },   { 1, -5, 15, 123, -8, 2 },
      { 1, -4, 13, 124, -7, 1 },   { 1, -4, 11, 125, -6, 1 },
      { 1, -3, 8, 126, -5, 1 },    { 1, -2, 6, 126, -4, 1 },
      { 0, -1, 4, 127, -3, 1 },    { 0, -1, 2, 128, -1, 0 },
    };
179 180 181 182 183 184 185 186 187 188 189

static int32_t do_ntap_filter(int32_t *p, int x) {
  int i;
  int32_t sum = 0;
  for (i = 0; i < WARPEDPIXEL_FILTER_TAPS; ++i) {
    sum += p[i - WARPEDPIXEL_FILTER_TAPS / 2 + 1] * filter_ntap[x][i];
  }
  return sum;
}

static int32_t do_cubic_filter(int32_t *p, int x) {
clang-format's avatar
clang-format committed
190
  if (x == 0) {
191
    return p[0] * (1 << WARPEDPIXEL_FILTER_BITS);
192
  } else if (x == (1 << WARPEDPIXEL_PREC_BITS)) {
193
    return p[1] * (1 << WARPEDPIXEL_FILTER_BITS);
194
  } else {
195
    const int64_t v1 = (int64_t)x * x * x * (3 * (p[0] - p[1]) + p[2] - p[-1]);
196 197 198
    const int64_t v2 = x * x * (2 * p[-1] - 5 * p[0] + 4 * p[1] - p[2]);
    const int64_t v3 = x * (p[1] - p[-1]);
    const int64_t v4 = 2 * p[0];
199
    return (int32_t)ROUND_POWER_OF_TWO_SIGNED(
200 201 202
        (v4 * (1 << (3 * WARPEDPIXEL_PREC_BITS))) +
            (v3 * (1 << (2 * WARPEDPIXEL_PREC_BITS))) +
            (v2 * (1 << WARPEDPIXEL_PREC_BITS)) + v1,
203 204 205 206 207 208 209 210 211 212 213 214 215 216 217
        3 * WARPEDPIXEL_PREC_BITS + 1 - WARPEDPIXEL_FILTER_BITS);
  }
}

static INLINE void get_subcolumn(int taps, uint8_t *ref, int32_t *col,
                                 int stride, int x, int y_start) {
  int i;
  for (i = 0; i < taps; ++i) {
    col[i] = ref[(i + y_start) * stride + x];
  }
}

static uint8_t bi_ntap_filter(uint8_t *ref, int x, int y, int stride) {
  int32_t val, arr[WARPEDPIXEL_FILTER_TAPS];
  int k;
clang-format's avatar
clang-format committed
218 219
  int i = (int)x >> WARPEDPIXEL_PREC_BITS;
  int j = (int)y >> WARPEDPIXEL_PREC_BITS;
220 221 222 223 224 225
  for (k = 0; k < WARPEDPIXEL_FILTER_TAPS; ++k) {
    int32_t arr_temp[WARPEDPIXEL_FILTER_TAPS];
    get_subcolumn(WARPEDPIXEL_FILTER_TAPS, ref, arr_temp, stride,
                  i + k + 1 - WARPEDPIXEL_FILTER_TAPS / 2,
                  j + 1 - WARPEDPIXEL_FILTER_TAPS / 2);
    arr[k] = do_ntap_filter(arr_temp + WARPEDPIXEL_FILTER_TAPS / 2 - 1,
226
                            y - (j * (1 << WARPEDPIXEL_PREC_BITS)));
227 228
  }
  val = do_ntap_filter(arr + WARPEDPIXEL_FILTER_TAPS / 2 - 1,
229
                       x - (i * (1 << WARPEDPIXEL_PREC_BITS)));
230 231 232 233 234 235 236
  val = ROUND_POWER_OF_TWO_SIGNED(val, WARPEDPIXEL_FILTER_BITS * 2);
  return (uint8_t)clip_pixel(val);
}

static uint8_t bi_cubic_filter(uint8_t *ref, int x, int y, int stride) {
  int32_t val, arr[4];
  int k;
clang-format's avatar
clang-format committed
237 238
  int i = (int)x >> WARPEDPIXEL_PREC_BITS;
  int j = (int)y >> WARPEDPIXEL_PREC_BITS;
239 240
  for (k = 0; k < 4; ++k) {
    int32_t arr_temp[4];
clang-format's avatar
clang-format committed
241
    get_subcolumn(4, ref, arr_temp, stride, i + k - 1, j - 1);
242 243
    arr[k] =
        do_cubic_filter(arr_temp + 1, y - (j * (1 << WARPEDPIXEL_PREC_BITS)));
244
  }
245
  val = do_cubic_filter(arr + 1, x - (i * (1 << WARPEDPIXEL_PREC_BITS)));
246 247 248 249 250 251 252
  val = ROUND_POWER_OF_TWO_SIGNED(val, WARPEDPIXEL_FILTER_BITS * 2);
  return (uint8_t)clip_pixel(val);
}

static uint8_t bi_linear_filter(uint8_t *ref, int x, int y, int stride) {
  const int ix = x >> WARPEDPIXEL_PREC_BITS;
  const int iy = y >> WARPEDPIXEL_PREC_BITS;
253 254
  const int sx = x - (ix * (1 << WARPEDPIXEL_PREC_BITS));
  const int sy = y - (iy * (1 << WARPEDPIXEL_PREC_BITS));
255 256 257
  int32_t val;
  val = ROUND_POWER_OF_TWO_SIGNED(
      ref[iy * stride + ix] * (WARPEDPIXEL_PREC_SHIFTS - sy) *
clang-format's avatar
clang-format committed
258 259 260 261
              (WARPEDPIXEL_PREC_SHIFTS - sx) +
          ref[iy * stride + ix + 1] * (WARPEDPIXEL_PREC_SHIFTS - sy) * sx +
          ref[(iy + 1) * stride + ix] * sy * (WARPEDPIXEL_PREC_SHIFTS - sx) +
          ref[(iy + 1) * stride + ix + 1] * sy * sx,
262 263 264 265
      WARPEDPIXEL_PREC_BITS * 2);
  return (uint8_t)clip_pixel(val);
}

clang-format's avatar
clang-format committed
266 267
static uint8_t warp_interpolate(uint8_t *ref, int x, int y, int width,
                                int height, int stride) {
268 269
  int ix = x >> WARPEDPIXEL_PREC_BITS;
  int iy = y >> WARPEDPIXEL_PREC_BITS;
270 271
  int sx = x - (ix * (1 << WARPEDPIXEL_PREC_BITS));
  int sy = y - (iy * (1 << WARPEDPIXEL_PREC_BITS));
272 273
  int32_t v;

clang-format's avatar
clang-format committed
274 275
  if (ix < 0 && iy < 0)
    return ref[0];
276 277 278 279 280 281 282 283 284
  else if (ix < 0 && iy > height - 1)
    return ref[(height - 1) * stride];
  else if (ix > width - 1 && iy < 0)
    return ref[width - 1];
  else if (ix > width - 1 && iy > height - 1)
    return ref[(height - 1) * stride + (width - 1)];
  else if (ix < 0) {
    v = ROUND_POWER_OF_TWO_SIGNED(
        ref[iy * stride] * (WARPEDPIXEL_PREC_SHIFTS - sy) +
clang-format's avatar
clang-format committed
285
            ref[(iy + 1) * stride] * sy,
286 287 288 289
        WARPEDPIXEL_PREC_BITS);
    return clip_pixel(v);
  } else if (iy < 0) {
    v = ROUND_POWER_OF_TWO_SIGNED(
clang-format's avatar
clang-format committed
290
        ref[ix] * (WARPEDPIXEL_PREC_SHIFTS - sx) + ref[ix + 1] * sx,
291 292 293 294 295
        WARPEDPIXEL_PREC_BITS);
    return clip_pixel(v);
  } else if (ix > width - 1) {
    v = ROUND_POWER_OF_TWO_SIGNED(
        ref[iy * stride + width - 1] * (WARPEDPIXEL_PREC_SHIFTS - sy) +
clang-format's avatar
clang-format committed
296
            ref[(iy + 1) * stride + width - 1] * sy,
297 298 299 300 301
        WARPEDPIXEL_PREC_BITS);
    return clip_pixel(v);
  } else if (iy > height - 1) {
    v = ROUND_POWER_OF_TWO_SIGNED(
        ref[(height - 1) * stride + ix] * (WARPEDPIXEL_PREC_SHIFTS - sx) +
clang-format's avatar
clang-format committed
302
            ref[(height - 1) * stride + ix + 1] * sx,
303 304 305 306 307 308 309
        WARPEDPIXEL_PREC_BITS);
    return clip_pixel(v);
  } else if (ix >= WARPEDPIXEL_FILTER_TAPS / 2 - 1 &&
             iy >= WARPEDPIXEL_FILTER_TAPS / 2 - 1 &&
             ix < width - WARPEDPIXEL_FILTER_TAPS / 2 &&
             iy < height - WARPEDPIXEL_FILTER_TAPS / 2) {
    return bi_ntap_filter(ref, x, y, stride);
clang-format's avatar
clang-format committed
310
  } else if (ix >= 1 && iy >= 1 && ix < width - 2 && iy < height - 2) {
311 312 313 314 315 316
    return bi_cubic_filter(ref, x, y, stride);
  } else {
    return bi_linear_filter(ref, x, y, stride);
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
317
#if CONFIG_AOM_HIGHBITDEPTH
318 319 320 321 322 323 324 325
static INLINE void highbd_get_subcolumn(int taps, uint16_t *ref, int32_t *col,
                                        int stride, int x, int y_start) {
  int i;
  for (i = 0; i < taps; ++i) {
    col[i] = ref[(i + y_start) * stride + x];
  }
}

clang-format's avatar
clang-format committed
326
static uint16_t highbd_bi_ntap_filter(uint16_t *ref, int x, int y, int stride,
327 328 329
                                      int bd) {
  int32_t val, arr[WARPEDPIXEL_FILTER_TAPS];
  int k;
clang-format's avatar
clang-format committed
330 331
  int i = (int)x >> WARPEDPIXEL_PREC_BITS;
  int j = (int)y >> WARPEDPIXEL_PREC_BITS;
332 333 334 335 336 337
  for (k = 0; k < WARPEDPIXEL_FILTER_TAPS; ++k) {
    int32_t arr_temp[WARPEDPIXEL_FILTER_TAPS];
    highbd_get_subcolumn(WARPEDPIXEL_FILTER_TAPS, ref, arr_temp, stride,
                         i + k + 1 - WARPEDPIXEL_FILTER_TAPS / 2,
                         j + 1 - WARPEDPIXEL_FILTER_TAPS / 2);
    arr[k] = do_ntap_filter(arr_temp + WARPEDPIXEL_FILTER_TAPS / 2 - 1,
338
                            y - (j * (1 << WARPEDPIXEL_PREC_BITS)));
339 340
  }
  val = do_ntap_filter(arr + WARPEDPIXEL_FILTER_TAPS / 2 - 1,
341
                       x - (i * (1 << WARPEDPIXEL_PREC_BITS)));
342
  val = ROUND_POWER_OF_TWO_SIGNED(val, WARPEDPIXEL_FILTER_BITS * 2);
343
  return (uint16_t)clip_pixel_highbd(val, bd);
344 345
}

clang-format's avatar
clang-format committed
346
static uint16_t highbd_bi_cubic_filter(uint16_t *ref, int x, int y, int stride,
347 348 349
                                       int bd) {
  int32_t val, arr[4];
  int k;
clang-format's avatar
clang-format committed
350 351
  int i = (int)x >> WARPEDPIXEL_PREC_BITS;
  int j = (int)y >> WARPEDPIXEL_PREC_BITS;
352 353
  for (k = 0; k < 4; ++k) {
    int32_t arr_temp[4];
clang-format's avatar
clang-format committed
354
    highbd_get_subcolumn(4, ref, arr_temp, stride, i + k - 1, j - 1);
355 356
    arr[k] =
        do_cubic_filter(arr_temp + 1, y - (j * (1 << WARPEDPIXEL_PREC_BITS)));
357
  }
358
  val = do_cubic_filter(arr + 1, x - (i * (1 << WARPEDPIXEL_PREC_BITS)));
359
  val = ROUND_POWER_OF_TWO_SIGNED(val, WARPEDPIXEL_FILTER_BITS * 2);
360
  return (uint16_t)clip_pixel_highbd(val, bd);
361 362
}

clang-format's avatar
clang-format committed
363
static uint16_t highbd_bi_linear_filter(uint16_t *ref, int x, int y, int stride,
364 365 366
                                        int bd) {
  const int ix = x >> WARPEDPIXEL_PREC_BITS;
  const int iy = y >> WARPEDPIXEL_PREC_BITS;
367 368
  const int sx = x - (ix * (1 << WARPEDPIXEL_PREC_BITS));
  const int sy = y - (iy * (1 << WARPEDPIXEL_PREC_BITS));
369 370 371
  int32_t val;
  val = ROUND_POWER_OF_TWO_SIGNED(
      ref[iy * stride + ix] * (WARPEDPIXEL_PREC_SHIFTS - sy) *
clang-format's avatar
clang-format committed
372 373 374 375
              (WARPEDPIXEL_PREC_SHIFTS - sx) +
          ref[iy * stride + ix + 1] * (WARPEDPIXEL_PREC_SHIFTS - sy) * sx +
          ref[(iy + 1) * stride + ix] * sy * (WARPEDPIXEL_PREC_SHIFTS - sx) +
          ref[(iy + 1) * stride + ix + 1] * sy * sx,
376
      WARPEDPIXEL_PREC_BITS * 2);
377
  return (uint16_t)clip_pixel_highbd(val, bd);
378 379
}

clang-format's avatar
clang-format committed
380 381
static uint16_t highbd_warp_interpolate(uint16_t *ref, int x, int y, int width,
                                        int height, int stride, int bd) {
382 383
  int ix = x >> WARPEDPIXEL_PREC_BITS;
  int iy = y >> WARPEDPIXEL_PREC_BITS;
384 385
  int sx = x - (ix * (1 << WARPEDPIXEL_PREC_BITS));
  int sy = y - (iy * (1 << WARPEDPIXEL_PREC_BITS));
386 387
  int32_t v;

clang-format's avatar
clang-format committed
388 389
  if (ix < 0 && iy < 0)
    return ref[0];
390 391 392 393 394 395 396 397 398
  else if (ix < 0 && iy > height - 1)
    return ref[(height - 1) * stride];
  else if (ix > width - 1 && iy < 0)
    return ref[width - 1];
  else if (ix > width - 1 && iy > height - 1)
    return ref[(height - 1) * stride + (width - 1)];
  else if (ix < 0) {
    v = ROUND_POWER_OF_TWO_SIGNED(
        ref[iy * stride] * (WARPEDPIXEL_PREC_SHIFTS - sy) +
clang-format's avatar
clang-format committed
399
            ref[(iy + 1) * stride] * sy,
400
        WARPEDPIXEL_PREC_BITS);
401
    return clip_pixel_highbd(v, bd);
402 403
  } else if (iy < 0) {
    v = ROUND_POWER_OF_TWO_SIGNED(
clang-format's avatar
clang-format committed
404
        ref[ix] * (WARPEDPIXEL_PREC_SHIFTS - sx) + ref[ix + 1] * sx,
405
        WARPEDPIXEL_PREC_BITS);
406
    return clip_pixel_highbd(v, bd);
407 408 409
  } else if (ix > width - 1) {
    v = ROUND_POWER_OF_TWO_SIGNED(
        ref[iy * stride + width - 1] * (WARPEDPIXEL_PREC_SHIFTS - sy) +
clang-format's avatar
clang-format committed
410
            ref[(iy + 1) * stride + width - 1] * sy,
411
        WARPEDPIXEL_PREC_BITS);
412
    return clip_pixel_highbd(v, bd);
413 414 415
  } else if (iy > height - 1) {
    v = ROUND_POWER_OF_TWO_SIGNED(
        ref[(height - 1) * stride + ix] * (WARPEDPIXEL_PREC_SHIFTS - sx) +
clang-format's avatar
clang-format committed
416
            ref[(height - 1) * stride + ix + 1] * sx,
417
        WARPEDPIXEL_PREC_BITS);
418
    return clip_pixel_highbd(v, bd);
419 420 421 422 423
  } else if (ix >= WARPEDPIXEL_FILTER_TAPS / 2 - 1 &&
             iy >= WARPEDPIXEL_FILTER_TAPS / 2 - 1 &&
             ix < width - WARPEDPIXEL_FILTER_TAPS / 2 &&
             iy < height - WARPEDPIXEL_FILTER_TAPS / 2) {
    return highbd_bi_ntap_filter(ref, x, y, stride, bd);
clang-format's avatar
clang-format committed
424
  } else if (ix >= 1 && iy >= 1 && ix < width - 2 && iy < height - 2) {
425 426 427 428 429 430
    return highbd_bi_cubic_filter(ref, x, y, stride, bd);
  } else {
    return highbd_bi_linear_filter(ref, x, y, stride, bd);
  }
}

431 432 433 434 435 436 437
static double highbd_warp_erroradv(WarpedMotionParams *wm, uint8_t *ref8,
                                   int width, int height, int stride,
                                   uint8_t *dst8, int p_col, int p_row,
                                   int p_width, int p_height, int p_stride,
                                   int subsampling_x, int subsampling_y,
                                   int x_scale, int y_scale, int bd) {
  int i, j;
438
  ProjectPointsFunc projectpoints = get_project_points_type(wm->wmtype);
439 440 441
  uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
  uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
  int gm_err = 0, no_gm_err = 0;
442
  int64_t gm_sumerr = 0, no_gm_sumerr = 0;
443 444 445 446 447
  for (i = p_row; i < p_row + p_height; ++i) {
    for (j = p_col; j < p_col + p_width; ++j) {
      int in[2], out[2];
      in[0] = j;
      in[1] = i;
448
      projectpoints(wm->wmmat, in, out, 1, 2, 2, subsampling_x, subsampling_y);
449 450 451 452 453 454 455
      out[0] = ROUND_POWER_OF_TWO_SIGNED(out[0] * x_scale, 4);
      out[1] = ROUND_POWER_OF_TWO_SIGNED(out[1] * y_scale, 4);
      gm_err = dst[(j - p_col) + (i - p_row) * p_stride] -
               highbd_warp_interpolate(ref, out[0], out[1], width, height,
                                       stride, bd);
      no_gm_err = dst[(j - p_col) + (i - p_row) * p_stride] -
                  ref[(j - p_col) + (i - p_row) * stride];
456 457 458 459
      gm_err = abs(gm_err);
      no_gm_err = abs(no_gm_err);
      gm_sumerr += (int64_t)AOMMIN(gm_err, ERRORADV_CLAMP << (bd - 8));
      no_gm_sumerr += (int64_t)AOMMIN(no_gm_err, ERRORADV_CLAMP << (bd - 8));
460 461 462 463 464
    }
  }
  return (double)gm_sumerr / no_gm_sumerr;
}

clang-format's avatar
clang-format committed
465 466 467 468 469
static void highbd_warp_plane(WarpedMotionParams *wm, uint8_t *ref8, int width,
                              int height, int stride, uint8_t *pred8, int p_col,
                              int p_row, int p_width, int p_height,
                              int p_stride, int subsampling_x,
                              int subsampling_y, int x_scale, int y_scale,
470
                              int bd, int ref_frm) {
471
  int i, j;
472
  ProjectPointsFunc projectpoints = get_project_points_type(wm->wmtype);
473 474
  uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
  uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
475
  if (projectpoints == NULL) return;
476 477 478
  for (i = p_row; i < p_row + p_height; ++i) {
    for (j = p_col; j < p_col + p_width; ++j) {
      int in[2], out[2];
479 480
      in[0] = j;
      in[1] = i;
481
      projectpoints(wm->wmmat, in, out, 1, 2, 2, subsampling_x, subsampling_y);
482 483
      out[0] = ROUND_POWER_OF_TWO_SIGNED(out[0] * x_scale, 4);
      out[1] = ROUND_POWER_OF_TWO_SIGNED(out[1] * y_scale, 4);
484 485 486 487 488 489 490 491 492
      if (ref_frm)
        pred[(j - p_col) + (i - p_row) * p_stride] = ROUND_POWER_OF_TWO(
            pred[(j - p_col) + (i - p_row) * p_stride] +
                highbd_warp_interpolate(ref, out[0], out[1], width, height,
                                        stride, bd),
            1);
      else
        pred[(j - p_col) + (i - p_row) * p_stride] = highbd_warp_interpolate(
            ref, out[0], out[1], width, height, stride, bd);
493 494 495
    }
  }
}
Yaowu Xu's avatar
Yaowu Xu committed
496
#endif  // CONFIG_AOM_HIGHBITDEPTH
497

498 499 500 501 502 503 504 505
static double warp_erroradv(WarpedMotionParams *wm, uint8_t *ref, int width,
                            int height, int stride, uint8_t *dst, int p_col,
                            int p_row, int p_width, int p_height, int p_stride,
                            int subsampling_x, int subsampling_y, int x_scale,
                            int y_scale) {
  int gm_err = 0, no_gm_err = 0;
  int gm_sumerr = 0, no_gm_sumerr = 0;
  int i, j;
506
  ProjectPointsFunc projectpoints = get_project_points_type(wm->wmtype);
507 508 509 510 511
  for (i = p_row; i < p_row + p_height; ++i) {
    for (j = p_col; j < p_col + p_width; ++j) {
      int in[2], out[2];
      in[0] = j;
      in[1] = i;
512
      projectpoints(wm->wmmat, in, out, 1, 2, 2, subsampling_x, subsampling_y);
513 514 515 516 517 518
      out[0] = ROUND_POWER_OF_TWO_SIGNED(out[0] * x_scale, 4);
      out[1] = ROUND_POWER_OF_TWO_SIGNED(out[1] * y_scale, 4);
      gm_err = dst[(j - p_col) + (i - p_row) * p_stride] -
               warp_interpolate(ref, out[0], out[1], width, height, stride);
      no_gm_err = dst[(j - p_col) + (i - p_row) * p_stride] -
                  ref[(j - p_col) + (i - p_row) * stride];
519 520 521 522
      gm_err = abs(gm_err);
      no_gm_err = abs(no_gm_err);
      gm_sumerr += AOMMIN(gm_err, ERRORADV_CLAMP);
      no_gm_sumerr += AOMMIN(no_gm_err, ERRORADV_CLAMP);
523 524 525 526 527 528 529 530 531
    }
  }
  return (double)gm_sumerr / no_gm_sumerr;
}

static void warp_plane(WarpedMotionParams *wm, uint8_t *ref, int width,
                       int height, int stride, uint8_t *pred, int p_col,
                       int p_row, int p_width, int p_height, int p_stride,
                       int subsampling_x, int subsampling_y, int x_scale,
532
                       int y_scale, int ref_frm) {
533
  int i, j;
534
  ProjectPointsFunc projectpoints = get_project_points_type(wm->wmtype);
535 536 537 538 539 540
  if (projectpoints == NULL) return;
  for (i = p_row; i < p_row + p_height; ++i) {
    for (j = p_col; j < p_col + p_width; ++j) {
      int in[2], out[2];
      in[0] = j;
      in[1] = i;
541
      projectpoints(wm->wmmat, in, out, 1, 2, 2, subsampling_x, subsampling_y);
542 543
      out[0] = ROUND_POWER_OF_TWO_SIGNED(out[0] * x_scale, 4);
      out[1] = ROUND_POWER_OF_TWO_SIGNED(out[1] * y_scale, 4);
544 545 546 547 548 549 550 551
      if (ref_frm)
        pred[(j - p_col) + (i - p_row) * p_stride] = ROUND_POWER_OF_TWO(
            pred[(j - p_col) + (i - p_row) * p_stride] +
                warp_interpolate(ref, out[0], out[1], width, height, stride),
            1);
      else
        pred[(j - p_col) + (i - p_row) * p_stride] =
            warp_interpolate(ref, out[0], out[1], width, height, stride);
552 553 554 555
    }
  }
}

Yaowu Xu's avatar
Yaowu Xu committed
556 557 558 559 560 561 562 563 564
double av1_warp_erroradv(WarpedMotionParams *wm,
#if CONFIG_AOM_HIGHBITDEPTH
                         int use_hbd, int bd,
#endif  // CONFIG_AOM_HIGHBITDEPTH
                         uint8_t *ref, int width, int height, int stride,
                         uint8_t *dst, int p_col, int p_row, int p_width,
                         int p_height, int p_stride, int subsampling_x,
                         int subsampling_y, int x_scale, int y_scale) {
#if CONFIG_AOM_HIGHBITDEPTH
565 566 567 568
  if (use_hbd)
    return highbd_warp_erroradv(
        wm, ref, width, height, stride, dst, p_col, p_row, p_width, p_height,
        p_stride, subsampling_x, subsampling_y, x_scale, y_scale, bd);
Yaowu Xu's avatar
Yaowu Xu committed
569
#endif  // CONFIG_AOM_HIGHBITDEPTH
570 571 572
  return warp_erroradv(wm, ref, width, height, stride, dst, p_col, p_row,
                       p_width, p_height, p_stride, subsampling_x,
                       subsampling_y, x_scale, y_scale);
573 574
}

Yaowu Xu's avatar
Yaowu Xu committed
575 576 577 578 579 580 581
void av1_warp_plane(WarpedMotionParams *wm,
#if CONFIG_AOM_HIGHBITDEPTH
                    int use_hbd, int bd,
#endif  // CONFIG_AOM_HIGHBITDEPTH
                    uint8_t *ref, int width, int height, int stride,
                    uint8_t *pred, int p_col, int p_row, int p_width,
                    int p_height, int p_stride, int subsampling_x,
582
                    int subsampling_y, int x_scale, int y_scale, int ref_frm) {
Yaowu Xu's avatar
Yaowu Xu committed
583
#if CONFIG_AOM_HIGHBITDEPTH
584
  if (use_hbd)
clang-format's avatar
clang-format committed
585 586
    highbd_warp_plane(wm, ref, width, height, stride, pred, p_col, p_row,
                      p_width, p_height, p_stride, subsampling_x, subsampling_y,
587
                      x_scale, y_scale, bd, ref_frm);
588
  else
Yaowu Xu's avatar
Yaowu Xu committed
589
#endif  // CONFIG_AOM_HIGHBITDEPTH
clang-format's avatar
clang-format committed
590 591
    warp_plane(wm, ref, width, height, stride, pred, p_col, p_row, p_width,
               p_height, p_stride, subsampling_x, subsampling_y, x_scale,
592
               y_scale, ref_frm);
593
}
594

Yaowu Xu's avatar
Yaowu Xu committed
595 596
void av1_integerize_model(const double *model, TransformationType wmtype,
                          WarpedMotionParams *wm) {
597 598 599 600
  wm->wmtype = wmtype;
  switch (wmtype) {
    case HOMOGRAPHY:
      assert(fabs(model[8] - 1.0) < 1e-12);
601 602 603 604
      wm->wmmat[6] =
          (int32_t)lrint(model[6] * (1 << WARPEDMODEL_ROW3HOMO_PREC_BITS));
      wm->wmmat[7] =
          (int32_t)lrint(model[7] * (1 << WARPEDMODEL_ROW3HOMO_PREC_BITS));
605 606
    /* fallthrough intended */
    case AFFINE:
607 608
      wm->wmmat[4] = (int32_t)lrint(model[4] * (1 << WARPEDMODEL_PREC_BITS));
      wm->wmmat[5] = (int32_t)lrint(model[5] * (1 << WARPEDMODEL_PREC_BITS));
609 610
    /* fallthrough intended */
    case ROTZOOM:
611 612
      wm->wmmat[2] = (int32_t)lrint(model[2] * (1 << WARPEDMODEL_PREC_BITS));
      wm->wmmat[3] = (int32_t)lrint(model[3] * (1 << WARPEDMODEL_PREC_BITS));
613 614
    /* fallthrough intended */
    case TRANSLATION:
615 616
      wm->wmmat[0] = (int32_t)lrint(model[0] * (1 << WARPEDMODEL_PREC_BITS));
      wm->wmmat[1] = (int32_t)lrint(model[1] * (1 << WARPEDMODEL_PREC_BITS));
617 618 619 620
      break;
    default: assert(0 && "Invalid TransformationType");
  }
}
621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673

///////////////////////////////////////////////////////////////////////////////
// svdcmp
// Adopted from Numerical Recipes in C

static const double TINY_NEAR_ZERO = 1.0E-12;

static INLINE double sign(double a, double b) {
  return ((b) >= 0 ? fabs(a) : -fabs(a));
}

static INLINE double pythag(double a, double b) {
  double ct;
  const double absa = fabs(a);
  const double absb = fabs(b);

  if (absa > absb) {
    ct = absb / absa;
    return absa * sqrt(1.0 + ct * ct);
  } else {
    ct = absa / absb;
    return (absb == 0) ? 0 : absb * sqrt(1.0 + ct * ct);
  }
}

static void multiply_mat(const double *m1, const double *m2, double *res,
                         const int m1_rows, const int inner_dim,
                         const int m2_cols) {
  double sum;

  int row, col, inner;
  for (row = 0; row < m1_rows; ++row) {
    for (col = 0; col < m2_cols; ++col) {
      sum = 0;
      for (inner = 0; inner < inner_dim; ++inner)
        sum += m1[row * inner_dim + inner] * m2[inner * m2_cols + col];
      *(res++) = sum;
    }
  }
}

static int svdcmp(double **u, int m, int n, double w[], double **v) {
  const int max_its = 30;
  int flag, i, its, j, jj, k, l, nm;
  double anorm, c, f, g, h, s, scale, x, y, z;
  double *rv1 = (double *)aom_malloc(sizeof(*rv1) * (n + 1));
  g = scale = anorm = 0.0;
  for (i = 0; i < n; i++) {
    l = i + 1;
    rv1[i] = scale * g;
    g = s = scale = 0.0;
    if (i < m) {
      for (k = i; k < m; k++) scale += fabs(u[k][i]);
674
      if (scale != 0.) {
675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694
        for (k = i; k < m; k++) {
          u[k][i] /= scale;
          s += u[k][i] * u[k][i];
        }
        f = u[i][i];
        g = -sign(sqrt(s), f);
        h = f * g - s;
        u[i][i] = f - g;
        for (j = l; j < n; j++) {
          for (s = 0.0, k = i; k < m; k++) s += u[k][i] * u[k][j];
          f = s / h;
          for (k = i; k < m; k++) u[k][j] += f * u[k][i];
        }
        for (k = i; k < m; k++) u[k][i] *= scale;
      }
    }
    w[i] = scale * g;
    g = s = scale = 0.0;
    if (i < m && i != n - 1) {
      for (k = l; k < n; k++) scale += fabs(u[i][k]);
695
      if (scale != 0.) {
696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716
        for (k = l; k < n; k++) {
          u[i][k] /= scale;
          s += u[i][k] * u[i][k];
        }
        f = u[i][l];
        g = -sign(sqrt(s), f);
        h = f * g - s;
        u[i][l] = f - g;
        for (k = l; k < n; k++) rv1[k] = u[i][k] / h;
        for (j = l; j < m; j++) {
          for (s = 0.0, k = l; k < n; k++) s += u[j][k] * u[i][k];
          for (k = l; k < n; k++) u[j][k] += s * rv1[k];
        }
        for (k = l; k < n; k++) u[i][k] *= scale;
      }
    }
    anorm = fmax(anorm, (fabs(w[i]) + fabs(rv1[i])));
  }

  for (i = n - 1; i >= 0; i--) {
    if (i < n - 1) {
717
      if (g != 0.) {
718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733
        for (j = l; j < n; j++) v[j][i] = (u[i][j] / u[i][l]) / g;
        for (j = l; j < n; j++) {
          for (s = 0.0, k = l; k < n; k++) s += u[i][k] * v[k][j];
          for (k = l; k < n; k++) v[k][j] += s * v[k][i];
        }
      }
      for (j = l; j < n; j++) v[i][j] = v[j][i] = 0.0;
    }
    v[i][i] = 1.0;
    g = rv1[i];
    l = i;
  }
  for (i = AOMMIN(m, n) - 1; i >= 0; i--) {
    l = i + 1;
    g = w[i];
    for (j = l; j < n; j++) u[i][j] = 0.0;
734
    if (g != 0.) {
735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787
      g = 1.0 / g;
      for (j = l; j < n; j++) {
        for (s = 0.0, k = l; k < m; k++) s += u[k][i] * u[k][j];
        f = (s / u[i][i]) * g;
        for (k = i; k < m; k++) u[k][j] += f * u[k][i];
      }
      for (j = i; j < m; j++) u[j][i] *= g;
    } else {
      for (j = i; j < m; j++) u[j][i] = 0.0;
    }
    ++u[i][i];
  }
  for (k = n - 1; k >= 0; k--) {
    for (its = 0; its < max_its; its++) {
      flag = 1;
      for (l = k; l >= 0; l--) {
        nm = l - 1;
        if ((double)(fabs(rv1[l]) + anorm) == anorm || nm < 0) {
          flag = 0;
          break;
        }
        if ((double)(fabs(w[nm]) + anorm) == anorm) break;
      }
      if (flag) {
        c = 0.0;
        s = 1.0;
        for (i = l; i <= k; i++) {
          f = s * rv1[i];
          rv1[i] = c * rv1[i];
          if ((double)(fabs(f) + anorm) == anorm) break;
          g = w[i];
          h = pythag(f, g);
          w[i] = h;
          h = 1.0 / h;
          c = g * h;
          s = -f * h;
          for (j = 0; j < m; j++) {
            y = u[j][nm];
            z = u[j][i];
            u[j][nm] = y * c + z * s;
            u[j][i] = z * c - y * s;
          }
        }
      }
      z = w[k];
      if (l == k) {
        if (z < 0.0) {
          w[k] = -z;
          for (j = 0; j < n; j++) v[j][k] = -v[j][k];
        }
        break;
      }
      if (its == max_its - 1) {
788
        aom_free(rv1);
789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822
        return 1;
      }
      assert(k > 0);
      x = w[l];
      nm = k - 1;
      y = w[nm];
      g = rv1[nm];
      h = rv1[k];
      f = ((y - z) * (y + z) + (g - h) * (g + h)) / (2.0 * h * y);
      g = pythag(f, 1.0);
      f = ((x - z) * (x + z) + h * ((y / (f + sign(g, f))) - h)) / x;
      c = s = 1.0;
      for (j = l; j <= nm; j++) {
        i = j + 1;
        g = rv1[i];
        y = w[i];
        h = s * g;
        g = c * g;
        z = pythag(f, h);
        rv1[j] = z;
        c = f / z;
        s = h / z;
        f = x * c + g * s;
        g = g * c - x * s;
        h = y * s;
        y *= c;
        for (jj = 0; jj < n; jj++) {
          x = v[jj][j];
          z = v[jj][i];
          v[jj][j] = x * c + z * s;
          v[jj][i] = z * c - x * s;
        }
        z = pythag(f, h);
        w[j] = z;
823
        if (z != 0.) {
824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895
          z = 1.0 / z;
          c = f * z;
          s = h * z;
        }
        f = c * g + s * y;
        x = c * y - s * g;
        for (jj = 0; jj < m; jj++) {
          y = u[jj][j];
          z = u[jj][i];
          u[jj][j] = y * c + z * s;
          u[jj][i] = z * c - y * s;
        }
      }
      rv1[l] = 0.0;
      rv1[k] = f;
      w[k] = x;
    }
  }
  aom_free(rv1);
  return 0;
}

static int SVD(double *U, double *W, double *V, double *matx, int M, int N) {
  // Assumes allocation for U is MxN
  double **nrU = (double **)aom_malloc((M) * sizeof(*nrU));
  double **nrV = (double **)aom_malloc((N) * sizeof(*nrV));
  int problem, i;

  problem = !(nrU && nrV);
  if (!problem) {
    for (i = 0; i < M; i++) {
      nrU[i] = &U[i * N];
    }
    for (i = 0; i < N; i++) {
      nrV[i] = &V[i * N];
    }
  } else {
    if (nrU) aom_free(nrU);
    if (nrV) aom_free(nrV);
    return 1;
  }

  /* copy from given matx into nrU */
  for (i = 0; i < M; i++) {
    memcpy(&(nrU[i][0]), matx + N * i, N * sizeof(*matx));
  }

  /* HERE IT IS: do SVD */
  if (svdcmp(nrU, M, N, W, nrV)) {
    aom_free(nrU);
    aom_free(nrV);
    return 1;
  }

  /* aom_free Numerical Recipes arrays */
  aom_free(nrU);
  aom_free(nrV);

  return 0;
}

int pseudo_inverse(double *inv, double *matx, const int M, const int N) {
  double ans;
  int i, j, k;
  double *const U = (double *)aom_malloc(M * N * sizeof(*matx));
  double *const W = (double *)aom_malloc(N * sizeof(*matx));
  double *const V = (double *)aom_malloc(N * N * sizeof(*matx));

  if (!(U && W && V)) {
    return 1;
  }
  if (SVD(U, W, V, matx, M, N)) {
896 897 898
    aom_free(U);
    aom_free(W);
    aom_free(V);
899 900 901 902
    return 1;
  }
  for (i = 0; i < N; i++) {
    if (fabs(W[i]) < TINY_NEAR_ZERO) {
903 904 905
      aom_free(U);
      aom_free(W);
      aom_free(V);
906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981
      return 1;
    }
  }

  for (i = 0; i < N; i++) {
    for (j = 0; j < M; j++) {
      ans = 0;
      for (k = 0; k < N; k++) {
        ans += V[k + N * i] * U[k + N * j] / W[k];
      }
      inv[j + M * i] = ans;
    }
  }
  aom_free(U);
  aom_free(W);
  aom_free(V);
  return 0;
}

static void normalize_homography(double *pts, int n, double *T) {
  double *p = pts;
  double mean[2] = { 0, 0 };
  double msqe = 0;
  double scale;
  int i;
  for (i = 0; i < n; ++i, p += 2) {
    mean[0] += p[0];
    mean[1] += p[1];
  }
  mean[0] /= n;
  mean[1] /= n;
  for (p = pts, i = 0; i < n; ++i, p += 2) {
    p[0] -= mean[0];
    p[1] -= mean[1];
    msqe += sqrt(p[0] * p[0] + p[1] * p[1]);
  }
  msqe /= n;
  scale = sqrt(2) / msqe;
  T[0] = scale;
  T[1] = 0;
  T[2] = -scale * mean[0];
  T[3] = 0;
  T[4] = scale;
  T[5] = -scale * mean[1];
  T[6] = 0;
  T[7] = 0;
  T[8] = 1;
  for (p = pts, i = 0; i < n; ++i, p += 2) {
    p[0] *= scale;
    p[1] *= scale;
  }
}

static void invnormalize_mat(double *T, double *iT) {
  double is = 1.0 / T[0];
  double m0 = -T[2] * is;
  double m1 = -T[5] * is;
  iT[0] = is;
  iT[1] = 0;
  iT[2] = m0;
  iT[3] = 0;
  iT[4] = is;
  iT[5] = m1;
  iT[6] = 0;
  iT[7] = 0;
  iT[8] = 1;
}

static void denormalize_homography(double *params, double *T1, double *T2) {
  double iT2[9];
  double params2[9];
  invnormalize_mat(T2, iT2);
  multiply_mat(params, T1, params2, 3, 3, 3);
  multiply_mat(iT2, params2, params, 3, 3, 3);
}

982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998
static void denormalize_homography_reorder(double *params, double *T1,
                                           double *T2) {
  double params_denorm[MAX_PARAMDIM];
  memcpy(params_denorm, params, sizeof(*params) * 8);
  params_denorm[8] = 1.0;
  denormalize_homography(params_denorm, T1, T2);
  params[0] = params_denorm[2];
  params[1] = params_denorm[5];
  params[2] = params_denorm[0];
  params[3] = params_denorm[1];
  params[4] = params_denorm[3];
  params[5] = params_denorm[4];
  params[6] = params_denorm[6];
  params[7] = params_denorm[7];
}

static void denormalize_affine_reorder(double *params, double *T1, double *T2) {
999 1000 1001 1002 1003 1004 1005 1006 1007 1008
  double params_denorm[MAX_PARAMDIM];
  params_denorm[0] = params[0];
  params_denorm[1] = params[1];
  params_denorm[2] = params[4];
  params_denorm[3] = params[2];
  params_denorm[4] = params[3];
  params_denorm[5] = params[5];
  params_denorm[6] = params_denorm[7] = 0;
  params_denorm[8] = 1;
  denormalize_homography(params_denorm, T1, T2);
1009 1010 1011 1012
  params[0] = params_denorm[2];
  params[1] = params_denorm[5];
  params[2] = params_denorm[0];
  params[3] = params_denorm[1];
1013 1014
  params[4] = params_denorm[3];
  params[5] = params_denorm[4];
1015
  params[6] = params[7] = 0;
1016 1017
}

1018 1019
static void denormalize_rotzoom_reorder(double *params, double *T1,