Commit e6eb3b53 authored by Debargha Mukherjee's avatar Debargha Mukherjee
Browse files

Integerize warped motion computation

Integerizes computation of the least squares for warped motion.
The model is restricted to only Affine. Affine seems easiest
to compute and integerize since it can be split into two 3-dim
least squares problems, as opposed to rotation-zoom which needs
a 4-dim least-squares problem to be solved.
The current implementation requires only one division per block.

BDRATE impact is mminimal. The upgrade to the affine model improves
coding efficiency but integerization also degrades efficiency a
little. Overall there is a net gain of about -0.07% BDRATE on
the lowres set.
BDRATE lowres: -1.113% with ----enable-warped-motion vs. without
(up from -1.044%).

Change-Id: I6b9216ac0737d76f59054293eabee48e17739ec4
parent 4db04d36
......@@ -1114,7 +1114,7 @@ void calc_projection_samples(MB_MODE_INFO *const mbmi,
#if CONFIG_GLOBAL_MOTION
MACROBLOCKD *xd,
#endif
int x, int y, double *pts_inref) {
int x, int y, int *pts_inref) {
if (mbmi->motion_mode == WARPED_CAUSAL
#if CONFIG_GLOBAL_MOTION
|| (mbmi->mode == ZEROMV &&
......@@ -1131,16 +1131,19 @@ void calc_projection_samples(MB_MODE_INFO *const mbmi,
&mbmi->wm_params[0];
project_points(wm, ipts, ipts_inref, 1, 2, 2, 0, 0);
pts_inref[0] = (double)ipts_inref[0] / (double)WARPEDPIXEL_PREC_SHIFTS;
pts_inref[1] = (double)ipts_inref[1] / (double)WARPEDPIXEL_PREC_SHIFTS;
pts_inref[0] =
ROUND_POWER_OF_TWO_SIGNED(ipts_inref[0], WARPEDPIXEL_PREC_BITS - 3);
pts_inref[1] =
ROUND_POWER_OF_TWO_SIGNED(ipts_inref[1], WARPEDPIXEL_PREC_BITS - 3);
} else {
pts_inref[0] = (double)x + (double)(mbmi->mv[0].as_mv.col) * 0.125;
pts_inref[1] = (double)y + (double)(mbmi->mv[0].as_mv.row) * 0.125;
pts_inref[0] = (x * 8) + mbmi->mv[0].as_mv.col;
pts_inref[1] = (y * 8) + mbmi->mv[0].as_mv.row;
}
}
// Note: Samples returned are at 1/8-pel precision
int findSamples(const AV1_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col,
double *pts, double *pts_inref) {
int *pts, int *pts_inref) {
MB_MODE_INFO *const mbmi0 = &(xd->mi[0]->mbmi);
int ref_frame = mbmi0->ref_frame[0];
int up_available = xd->up_available;
......@@ -1178,8 +1181,8 @@ int findSamples(const AV1_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col,
int x = cc_offset + j % 2 + global_offset_c;
int y = cr_offset + j / 2 + global_offset_r;
pts[0] = (double)x;
pts[1] = (double)y;
pts[0] = (x * 8);
pts[1] = (y * 8);
calc_projection_samples(mbmi,
#if CONFIG_GLOBAL_MOTION
xd,
......@@ -1221,8 +1224,8 @@ int findSamples(const AV1_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col,
int x = cc_offset + j % 2 + global_offset_c;
int y = cr_offset + j / 2 + global_offset_r;
pts[0] = (double)x;
pts[1] = (double)y;
pts[0] = (x * 8);
pts[1] = (y * 8);
calc_projection_samples(mbmi,
#if CONFIG_GLOBAL_MOTION
xd,
......@@ -1260,8 +1263,8 @@ int findSamples(const AV1_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col,
int x = cc_offset + j % 2 + global_offset_c;
int y = cr_offset + j / 2 + global_offset_r;
pts[0] = (double)x;
pts[1] = (double)y;
pts[0] = (x * 8);
pts[1] = (y * 8);
calc_projection_samples(mbmi,
#if CONFIG_GLOBAL_MOTION
xd,
......@@ -1298,11 +1301,13 @@ int findSamples(const AV1_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col,
int r_offset = j / 2;
int c_offset = j % 2;
pts[0] = (double)(cc_offset + c_offset + global_offset_c);
pts[1] = (double)(cr_offset + r_offset + global_offset_r);
int x = (cc_offset + c_offset + global_offset_c);
int y = (cr_offset + r_offset + global_offset_r);
pts_inref[0] = pts[0] + (double)(mv_col)*0.125;
pts_inref[1] = pts[1] + (double)(mv_row)*0.125;
pts[0] = (x * 8);
pts[1] = (y * 8);
pts_inref[0] = pts[0] + mv_col;
pts_inref[1] = pts[1] + mv_row;
pts += 2;
pts_inref += 2;
......
......@@ -515,7 +515,7 @@ void av1_update_mv_context(const MACROBLOCKD *xd, MODE_INFO *mi,
#if CONFIG_WARPED_MOTION
int findSamples(const AV1_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col,
double *pts, double *pts_inref);
int *pts, int *pts_inref);
#endif // CONFIG_WARPED_MOTION
#ifdef __cplusplus
......
This diff is collapsed.
......@@ -27,7 +27,7 @@
#if CONFIG_WARPED_MOTION
#define SAMPLES_PER_NEIGHBOR 4
#define SAMPLES_ARRAY_SIZE ((2 * MAX_MIB_SIZE + 2) * SAMPLES_PER_NEIGHBOR * 2)
#define DEFAULT_WMTYPE ROTZOOM
#define DEFAULT_WMTYPE AFFINE
#endif // CONFIG_WARPED_MOTION
const int16_t warped_filter[WARPEDPIXEL_PREC_SHIFTS * 3][8];
......@@ -86,16 +86,6 @@ void av1_warp_plane(WarpedMotionParams *wm,
int p_height, int p_stride, int subsampling_x,
int subsampling_y, int x_scale, int y_scale, int ref_frm);
// Integerize model into the WarpedMotionParams structure
void av1_integerize_model(const double *model, TransformationType wmtype,
WarpedMotionParams *wm);
int find_translation(const int np, double *pts1, double *pts2, double *mat);
int find_rotzoom(const int np, double *pts1, double *pts2, double *mat);
int find_affine(const int np, double *pts1, double *pts2, double *mat);
int find_hortrapezoid(const int np, double *pts1, double *pts2, double *mat);
int find_vertrapezoid(const int np, double *pts1, double *pts2, double *mat);
int find_homography(const int np, double *pts1, double *pts2, double *mat);
int find_projection(const int np, double *pts1, double *pts2,
int find_projection(const int np, int *pts1, int *pts2,
WarpedMotionParams *wm_params);
#endif // AV1_COMMON_WARPED_MOTION_H_
......@@ -1577,7 +1577,7 @@ static void read_inter_block_mode_info(AV1Decoder *const pbi,
#endif // CONFIG_REF_MV && CONFIG_EXT_INTER
int16_t mode_ctx = 0;
#if CONFIG_WARPED_MOTION
double pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
#endif // CONFIG_WARPED_MOTION
#if CONFIG_EC_ADAPT
FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
......
......@@ -133,6 +133,767 @@ static void project_points_double_homography(double *mat, double *points,
}
}
///////////////////////////////////////////////////////////////////////////////
// svdcmp
// Adopted from Numerical Recipes in C
static const double TINY_NEAR_ZERO = 1.0E-12;
static INLINE double sign(double a, double b) {
return ((b) >= 0 ? fabs(a) : -fabs(a));
}
static INLINE double pythag(double a, double b) {
double ct;
const double absa = fabs(a);
const double absb = fabs(b);
if (absa > absb) {
ct = absb / absa;
return absa * sqrt(1.0 + ct * ct);
} else {
ct = absa / absb;
return (absb == 0) ? 0 : absb * sqrt(1.0 + ct * ct);
}
}
static void multiply_mat(const double *m1, const double *m2, double *res,
const int m1_rows, const int inner_dim,
const int m2_cols) {
double sum;
int row, col, inner;
for (row = 0; row < m1_rows; ++row) {
for (col = 0; col < m2_cols; ++col) {
sum = 0;
for (inner = 0; inner < inner_dim; ++inner)
sum += m1[row * inner_dim + inner] * m2[inner * m2_cols + col];
*(res++) = sum;
}
}
}
static int svdcmp(double **u, int m, int n, double w[], double **v) {
const int max_its = 30;
int flag, i, its, j, jj, k, l, nm;
double anorm, c, f, g, h, s, scale, x, y, z;
double *rv1 = (double *)aom_malloc(sizeof(*rv1) * (n + 1));
g = scale = anorm = 0.0;
for (i = 0; i < n; i++) {
l = i + 1;
rv1[i] = scale * g;
g = s = scale = 0.0;
if (i < m) {
for (k = i; k < m; k++) scale += fabs(u[k][i]);
if (scale != 0.) {
for (k = i; k < m; k++) {
u[k][i] /= scale;
s += u[k][i] * u[k][i];
}
f = u[i][i];
g = -sign(sqrt(s), f);
h = f * g - s;
u[i][i] = f - g;
for (j = l; j < n; j++) {
for (s = 0.0, k = i; k < m; k++) s += u[k][i] * u[k][j];
f = s / h;
for (k = i; k < m; k++) u[k][j] += f * u[k][i];
}
for (k = i; k < m; k++) u[k][i] *= scale;
}
}
w[i] = scale * g;
g = s = scale = 0.0;
if (i < m && i != n - 1) {
for (k = l; k < n; k++) scale += fabs(u[i][k]);
if (scale != 0.) {
for (k = l; k < n; k++) {
u[i][k] /= scale;
s += u[i][k] * u[i][k];
}
f = u[i][l];
g = -sign(sqrt(s), f);
h = f * g - s;
u[i][l] = f - g;
for (k = l; k < n; k++) rv1[k] = u[i][k] / h;
for (j = l; j < m; j++) {
for (s = 0.0, k = l; k < n; k++) s += u[j][k] * u[i][k];
for (k = l; k < n; k++) u[j][k] += s * rv1[k];
}
for (k = l; k < n; k++) u[i][k] *= scale;
}
}
anorm = fmax(anorm, (fabs(w[i]) + fabs(rv1[i])));
}
for (i = n - 1; i >= 0; i--) {
if (i < n - 1) {
if (g != 0.) {
for (j = l; j < n; j++) v[j][i] = (u[i][j] / u[i][l]) / g;
for (j = l; j < n; j++) {
for (s = 0.0, k = l; k < n; k++) s += u[i][k] * v[k][j];
for (k = l; k < n; k++) v[k][j] += s * v[k][i];
}
}
for (j = l; j < n; j++) v[i][j] = v[j][i] = 0.0;
}
v[i][i] = 1.0;
g = rv1[i];
l = i;
}
for (i = AOMMIN(m, n) - 1; i >= 0; i--) {
l = i + 1;
g = w[i];
for (j = l; j < n; j++) u[i][j] = 0.0;
if (g != 0.) {
g = 1.0 / g;
for (j = l; j < n; j++) {
for (s = 0.0, k = l; k < m; k++) s += u[k][i] * u[k][j];
f = (s / u[i][i]) * g;
for (k = i; k < m; k++) u[k][j] += f * u[k][i];
}
for (j = i; j < m; j++) u[j][i] *= g;
} else {
for (j = i; j < m; j++) u[j][i] = 0.0;
}
++u[i][i];
}
for (k = n - 1; k >= 0; k--) {
for (its = 0; its < max_its; its++) {
flag = 1;
for (l = k; l >= 0; l--) {
nm = l - 1;
if ((double)(fabs(rv1[l]) + anorm) == anorm || nm < 0) {
flag = 0;
break;
}
if ((double)(fabs(w[nm]) + anorm) == anorm) break;
}
if (flag) {
c = 0.0;
s = 1.0;
for (i = l; i <= k; i++) {
f = s * rv1[i];
rv1[i] = c * rv1[i];
if ((double)(fabs(f) + anorm) == anorm) break;
g = w[i];
h = pythag(f, g);
w[i] = h;
h = 1.0 / h;
c = g * h;
s = -f * h;
for (j = 0; j < m; j++) {
y = u[j][nm];
z = u[j][i];
u[j][nm] = y * c + z * s;
u[j][i] = z * c - y * s;
}
}
}
z = w[k];
if (l == k) {
if (z < 0.0) {
w[k] = -z;
for (j = 0; j < n; j++) v[j][k] = -v[j][k];
}
break;
}
if (its == max_its - 1) {
aom_free(rv1);
return 1;
}
assert(k > 0);
x = w[l];
nm = k - 1;
y = w[nm];
g = rv1[nm];
h = rv1[k];
f = ((y - z) * (y + z) + (g - h) * (g + h)) / (2.0 * h * y);
g = pythag(f, 1.0);
f = ((x - z) * (x + z) + h * ((y / (f + sign(g, f))) - h)) / x;
c = s = 1.0;
for (j = l; j <= nm; j++) {
i = j + 1;
g = rv1[i];
y = w[i];
h = s * g;
g = c * g;
z = pythag(f, h);
rv1[j] = z;
c = f / z;
s = h / z;
f = x * c + g * s;
g = g * c - x * s;
h = y * s;
y *= c;
for (jj = 0; jj < n; jj++) {
x = v[jj][j];
z = v[jj][i];
v[jj][j] = x * c + z * s;
v[jj][i] = z * c - x * s;
}
z = pythag(f, h);
w[j] = z;
if (z != 0.) {
z = 1.0 / z;
c = f * z;
s = h * z;
}
f = c * g + s * y;
x = c * y - s * g;
for (jj = 0; jj < m; jj++) {
y = u[jj][j];
z = u[jj][i];
u[jj][j] = y * c + z * s;
u[jj][i] = z * c - y * s;
}
}
rv1[l] = 0.0;
rv1[k] = f;
w[k] = x;
}
}
aom_free(rv1);
return 0;
}
static int SVD(double *U, double *W, double *V, double *matx, int M, int N) {
// Assumes allocation for U is MxN
double **nrU = (double **)aom_malloc((M) * sizeof(*nrU));
double **nrV = (double **)aom_malloc((N) * sizeof(*nrV));
int problem, i;
problem = !(nrU && nrV);
if (!problem) {
for (i = 0; i < M; i++) {
nrU[i] = &U[i * N];
}
for (i = 0; i < N; i++) {
nrV[i] = &V[i * N];
}
} else {
if (nrU) aom_free(nrU);
if (nrV) aom_free(nrV);
return 1;
}
/* copy from given matx into nrU */
for (i = 0; i < M; i++) {
memcpy(&(nrU[i][0]), matx + N * i, N * sizeof(*matx));
}
/* HERE IT IS: do SVD */
if (svdcmp(nrU, M, N, W, nrV)) {
aom_free(nrU);
aom_free(nrV);
return 1;
}
/* aom_free Numerical Recipes arrays */
aom_free(nrU);
aom_free(nrV);
return 0;
}
int pseudo_inverse(double *inv, double *matx, const int M, const int N) {
double ans;
int i, j, k;
double *const U = (double *)aom_malloc(M * N * sizeof(*matx));
double *const W = (double *)aom_malloc(N * sizeof(*matx));
double *const V = (double *)aom_malloc(N * N * sizeof(*matx));
if (!(U && W && V)) {
return 1;
}
if (SVD(U, W, V, matx, M, N)) {
aom_free(U);
aom_free(W);
aom_free(V);
return 1;
}
for (i = 0; i < N; i++) {
if (fabs(W[i]) < TINY_NEAR_ZERO) {
aom_free(U);
aom_free(W);
aom_free(V);
return 1;
}
}
for (i = 0; i < N; i++) {
for (j = 0; j < M; j++) {
ans = 0;
for (k = 0; k < N; k++) {
ans += V[k + N * i] * U[k + N * j] / W[k];
}
inv[j + M * i] = ans;
}
}
aom_free(U);
aom_free(W);
aom_free(V);
return 0;
}
static void normalize_homography(double *pts, int n, double *T) {
double *p = pts;
double mean[2] = { 0, 0 };
double msqe = 0;
double scale;
int i;
for (i = 0; i < n; ++i, p += 2) {
mean[0] += p[0];
mean[1] += p[1];
}
mean[0] /= n;
mean[1] /= n;
for (p = pts, i = 0; i < n; ++i, p += 2) {
p[0] -= mean[0];
p[1] -= mean[1];
msqe += sqrt(p[0] * p[0] + p[1] * p[1]);
}
msqe /= n;
scale = sqrt(2) / msqe;
T[0] = scale;
T[1] = 0;
T[2] = -scale * mean[0];
T[3] = 0;
T[4] = scale;
T[5] = -scale * mean[1];
T[6] = 0;
T[7] = 0;
T[8] = 1;
for (p = pts, i = 0; i < n; ++i, p += 2) {
p[0] *= scale;
p[1] *= scale;
}
}
static void invnormalize_mat(double *T, double *iT) {
double is = 1.0 / T[0];
double m0 = -T[2] * is;
double m1 = -T[5] * is;
iT[0] = is;
iT[1] = 0;
iT[2] = m0;
iT[3] = 0;
iT[4] = is;
iT[5] = m1;
iT[6] = 0;
iT[7] = 0;
iT[8] = 1;
}
static void denormalize_homography(double *params, double *T1, double *T2) {
double iT2[9];
double params2[9];
invnormalize_mat(T2, iT2);
multiply_mat(params, T1, params2, 3, 3, 3);
multiply_mat(iT2, params2, params, 3, 3, 3);
}
static void denormalize_homography_reorder(double *params, double *T1,
double *T2) {
double params_denorm[MAX_PARAMDIM];
memcpy(params_denorm, params, sizeof(*params) * 8);
params_denorm[8] = 1.0;
denormalize_homography(params_denorm, T1, T2);
params[0] = params_denorm[2];
params[1] = params_denorm[5];
params[2] = params_denorm[0];
params[3] = params_denorm[1];
params[4] = params_denorm[3];
params[5] = params_denorm[4];
params[6] = params_denorm[6];
params[7] = params_denorm[7];
}
static void denormalize_affine_reorder(double *params, double *T1, double *T2) {
double params_denorm[MAX_PARAMDIM];
params_denorm[0] = params[0];
params_denorm[1] = params[1];
params_denorm[2] = params[4];
params_denorm[3] = params[2];
params_denorm[4] = params[3];
params_denorm[5] = params[5];
params_denorm[6] = params_denorm[7] = 0;
params_denorm[8] = 1;
denormalize_homography(params_denorm, T1, T2);
params[0] = params_denorm[2];
params[1] = params_denorm[5];
params[2] = params_denorm[0];
params[3] = params_denorm[1];
params[4] = params_denorm[3];
params[5] = params_denorm[4];
params[6] = params[7] = 0;
}
static void denormalize_rotzoom_reorder(double *params, double *T1,
double *T2) {
double params_denorm[MAX_PARAMDIM];
params_denorm[0] = params[0];
params_denorm[1] = params[1];
params_denorm[2] = params[2];
params_denorm[3] = -params[1];
params_denorm[4] = params[0];
params_denorm[5] = params[3];
params_denorm[6] = params_denorm[7] = 0;
params_denorm[8] = 1;
denormalize_homography(params_denorm, T1, T2);
params[0] = params_denorm[2];
params[1] = params_denorm[5];
params[2] = params_denorm[0];
params[3] = params_denorm[1];
params[4] = -params[3];
params[5] = params[2];
params[6] = params[7] = 0;
}
static void denormalize_translation_reorder(double *params, double *T1,
double *T2) {
double params_denorm[MAX_PARAMDIM];
params_denorm[0] = 1;
params_denorm[1] = 0;
params_denorm[2] = params[0];
params_denorm[3] = 0;
params_denorm[4] = 1;
params_denorm[5] = params[1];
params_denorm[6] = params_denorm[7] = 0;
params_denorm[8] = 1;
denormalize_homography(params_denorm, T1, T2);
params[0] = params_denorm[2];
params[1] = params_denorm[5];
params[2] = params[5] = 1;
params[3] = params[4] = 0;
params[6] = params[7] = 0;
}
static int find_translation(const int np, double *pts1, double *pts2,
double *mat) {
int i;
double sx, sy, dx, dy;
double sumx, sumy;
double T1[9], T2[9];
normalize_homography(pts1, np, T1);
normalize_homography(pts2, np, T2);