Commit 8c41024b authored by Debargha Mukherjee's avatar Debargha Mukherjee

Reduce prec of matrices/vectors for warp estimate

Reduces precision of matrices by 2 bits.

No material change in performance.

Change-Id: I549a27da1dcb381fb329c345ee280dbd86b45bac
parent cf50989e
......@@ -1425,6 +1425,25 @@ void av1_warp_plane(WarpedMotionParams *wm,
#define LS_MV_MAX 256 // max mv in 1/8-pel
#define LS_STEP 2
// Assuming LS_MV_MAX is < MAX_SB_SIZE * 8,
// the precision needed is:
// (MAX_SB_SIZE_LOG2 + 3) [for sx * sx magnitude] +
// (MAX_SB_SIZE_LOG2 + 4) [for sx * dx magnitude] +
// 1 [for sign] +
// LEAST_SQUARES_SAMPLES_MAX_BITS
// [for adding up to LEAST_SQUARES_SAMPLES_MAX samples]
// The value is 23
#define LS_MAT_RANGE_BITS \
((MAX_SB_SIZE_LOG2 + 4) * 2 + LEAST_SQUARES_SAMPLES_MAX_BITS)
// Bit-depth reduction from the full-range
#define LS_MAT_DOWN_BITS 2
// bits range of A, Bx and By after downshifting
#define LS_MAT_BITS (LS_MAT_RANGE_BITS - LS_MAT_DOWN_BITS)
#define LS_MAT_MIN (-(1 << (LS_MAT_BITS - 1)))
#define LS_MAT_MAX ((1 << (LS_MAT_BITS - 1)) - 1)
#define LS_SUM(a) ((a)*4 + LS_STEP * 2)
#define LS_SQUARE(a) \
(((a) * (a)*4 + (a)*4 * LS_STEP + LS_STEP * LS_STEP * 2) >> 2)
......@@ -1489,9 +1508,32 @@ static int find_affine_int(const int np, int *pts1, int *pts2, BLOCK_SIZE bsize,
n++;
}
}
int64_t Px[2], Py[2];
int64_t iDet, Det, v;
int16_t shift;
int downshift;
if (n >= 4)
downshift = LS_MAT_DOWN_BITS;
else if (n >= 2)
downshift = LS_MAT_DOWN_BITS - 1;
else
downshift = LS_MAT_DOWN_BITS - 2;
// Reduce precision by downshift bits
A[0][0] = clamp(ROUND_POWER_OF_TWO_SIGNED(A[0][0], downshift), LS_MAT_MIN,
LS_MAT_MAX);
A[0][1] = clamp(ROUND_POWER_OF_TWO_SIGNED(A[0][1], downshift), LS_MAT_MIN,
LS_MAT_MAX);
A[1][1] = clamp(ROUND_POWER_OF_TWO_SIGNED(A[1][1], downshift), LS_MAT_MIN,
LS_MAT_MAX);
Bx[0] = clamp(ROUND_POWER_OF_TWO_SIGNED(Bx[0], downshift), LS_MAT_MIN,
LS_MAT_MAX);
Bx[1] = clamp(ROUND_POWER_OF_TWO_SIGNED(Bx[1], downshift), LS_MAT_MIN,
LS_MAT_MAX);
By[0] = clamp(ROUND_POWER_OF_TWO_SIGNED(By[0], downshift), LS_MAT_MIN,
LS_MAT_MAX);
By[1] = clamp(ROUND_POWER_OF_TWO_SIGNED(By[1], downshift), LS_MAT_MIN,
LS_MAT_MAX);
int64_t Px[2], Py[2], Det;
int16_t iDet, shift;
// These divided by the Det, are the least squares solutions
Px[0] = (int64_t)A[1][1] * Bx[0] - (int64_t)A[0][1] * Bx[1];
......@@ -1509,16 +1551,17 @@ static int find_affine_int(const int np, int *pts1, int *pts2, BLOCK_SIZE bsize,
shift = 0;
}
v = Px[0] * iDet;
int64_t v;
v = Px[0] * (int64_t)iDet;
wm->wmmat[2] = ROUND_POWER_OF_TWO_SIGNED_64(v, shift);
v = Px[1] * iDet;
v = Px[1] * (int64_t)iDet;
wm->wmmat[3] = ROUND_POWER_OF_TWO_SIGNED_64(v, shift);
v = (dux << WARPEDMODEL_PREC_BITS) - sux * wm->wmmat[2] - suy * wm->wmmat[3];
wm->wmmat[0] = ROUND_POWER_OF_TWO_SIGNED(v, 3);
v = Py[0] * iDet;
v = Py[0] * (int64_t)iDet;
wm->wmmat[4] = ROUND_POWER_OF_TWO_SIGNED_64(v, shift);
v = Py[1] * iDet;
v = Py[1] * (int64_t)iDet;
wm->wmmat[5] = ROUND_POWER_OF_TWO_SIGNED_64(v, shift);
v = (duy << WARPEDMODEL_PREC_BITS) - sux * wm->wmmat[4] - suy * wm->wmmat[5];
wm->wmmat[1] = ROUND_POWER_OF_TWO_SIGNED(v, 3);
......@@ -1631,7 +1674,7 @@ static int find_affine_int(const int np, int *pts1, int *pts2, BLOCK_SIZE bsize,
C12 = (int64_t)A[0][1] * A[0][2] - (int64_t)A[0][0] * A[1][2];
C22 = (int64_t)A[0][0] * A[1][1] - (int64_t)A[0][1] * A[0][1];
// Scale by 1/16
// Scale by 1/64
C00 = ROUND_POWER_OF_TWO_SIGNED(C00, 6);
C01 = ROUND_POWER_OF_TWO_SIGNED(C01, 6);
C02 = ROUND_POWER_OF_TWO_SIGNED(C02, 6);
......
......@@ -26,7 +26,10 @@
#define MAX_PARAMDIM 9
#if CONFIG_WARPED_MOTION
#define SAMPLES_ARRAY_SIZE ((2 * MAX_MIB_SIZE + 2) * 2)
#define LEAST_SQUARES_SAMPLES_MAX 8
#define LEAST_SQUARES_SAMPLES_MAX_BITS 3
#define LEAST_SQUARES_SAMPLES_MAX (1 << LEAST_SQUARES_SAMPLES_MAX_BITS)
#define DEFAULT_WMTYPE AFFINE
#endif // CONFIG_WARPED_MOTION
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment