### Some optimizations on integer affine estimation

```1. Adds a limit on number of candidate samples used for the
estimation.
2. Adds a limit on max mv magnitude for use in the least-squares
3. Makes some of the internal variables 32-bit.

Impact on coding efficiency in the noise range.

Change-Id: I8c1c3216368ceb2e3548660a3b8c159df54a8312```
parent c20176e5
 ... ... @@ -46,6 +46,14 @@ (((value) < 0) ? -ROUND_POWER_OF_TWO(-(value), (n)) \ : ROUND_POWER_OF_TWO((value), (n))) /* Shift down with rounding for use when n >= 0, value >= 0 for (64 bit) */ #define ROUND_POWER_OF_TWO_64(value, n) \ (((value) + ((((int64_t)1 << (n)) >> 1))) >> (n)) /* Shift down with rounding for signed integers, for use when n >= 0 (64 bit) */ #define ROUND_POWER_OF_TWO_SIGNED_64(value, n) \ (((value) < 0) ? -ROUND_POWER_OF_TWO_64(-(value), (n)) \ : ROUND_POWER_OF_TWO_64((value), (n))) #define ALIGN_POWER_OF_TWO(value, n) \ (((value) + ((1 << (n)) - 1)) & ~((1 << (n)) - 1)) ... ...
 ... ... @@ -1150,7 +1150,6 @@ int findSamples(const AV1_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col, int mvnumber = 0; int global_offset_c = mi_col * MI_SIZE; int global_offset_r = mi_row * MI_SIZE; int samples_per_neighbor = 4; // scan the above row if (up_available) { ... ... @@ -1169,7 +1168,7 @@ int findSamples(const AV1_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col, int cr_offset = -AOMMAX(bh, MI_SIZE) / 2 - 1; int cc_offset = i * MI_SIZE + AOMMAX(bw, MI_SIZE) / 2 - 1; int j; int pixelperblock = samples_per_neighbor; int pixelperblock = SAMPLES_PER_NEIGHBOR; mvasint[mvnumber] = mbmi->mv.as_int; mvnumber++; ... ... @@ -1212,7 +1211,7 @@ int findSamples(const AV1_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col, int cr_offset = i * MI_SIZE + AOMMAX(bh, MI_SIZE) / 2 - 1; int cc_offset = -AOMMAX(bw, MI_SIZE) / 2 - 1; int j; int pixelperblock = samples_per_neighbor; int pixelperblock = SAMPLES_PER_NEIGHBOR; mvasint[mvnumber] = mbmi->mv.as_int; mvnumber++; ... ... @@ -1251,7 +1250,7 @@ int findSamples(const AV1_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col, int cr_offset = -AOMMAX(bh, MI_SIZE) / 2 - 1; int cc_offset = -AOMMAX(bw, MI_SIZE) / 2 - 1; int j; int pixelperblock = samples_per_neighbor; int pixelperblock = SAMPLES_PER_NEIGHBOR; mvasint[mvnumber] = mbmi->mv.as_int; mvnumber++; ... ... @@ -1292,7 +1291,7 @@ int findSamples(const AV1_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col, int cr_offset = AOMMAX(bh, MI_SIZE) / 2 - 1; int cc_offset = AOMMAX(bw, MI_SIZE) / 2 - 1; int j; int pixelperblock = samples_per_neighbor; int pixelperblock = SAMPLES_PER_NEIGHBOR; for (j = 0; j < pixelperblock; j++) { int r_offset = j / 2; ... ...
 ... ... @@ -1216,16 +1216,21 @@ void av1_warp_plane(WarpedMotionParams *wm, #if CONFIG_WARPED_MOTION #define IDET_PREC_BITS 48 #define LEAST_SQUARES_SAMPLES_MAX 32 #define LEAST_SQUARES_MV_MAX 1024 // max mv in 1/8-pel #define IDET_WARPEDMODEL_DIFF_BITS (IDET_PREC_BITS - WARPEDMODEL_PREC_BITS) static int find_affine_int(const int np, int *pts1, int *pts2, WarpedMotionParams *wm, int mi_row, int mi_col) { int64_t A = { { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 } }; int64_t C = { { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 } }; int64_t Bx = { 0, 0, 0 }; int64_t By = { 0, 0, 0 }; int32_t A = { { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 } }; int32_t Bx = { 0, 0, 0 }; int32_t By = { 0, 0, 0 }; int i, j, n = 0, off; int64_t C00, C01, C02, C11, C12, C22; int64_t Px, Py; int64_t Det, iDet; int i, off; int64_t Det, iDet, v; // Offsets to make the values in the arrays smaller const int ux = mi_col * MI_SIZE * 8, uy = mi_row * MI_SIZE * 8; // Let source points (xi, yi) map to destimation points (xi', yi'), ... ... @@ -1245,79 +1250,83 @@ static int find_affine_int(const int np, int *pts1, int *pts2, // y' = h3.x + h4.y + dy // // The loop below computes: A = P'P, Bx = P'q, By = P'r for (i = 0; i < np; ++i) { const int dx = *(pts2++) - ux; const int dy = *(pts2++) - uy; const int sx = *(pts1++) - ux; const int sy = *(pts1++) - uy; A += sx * sx; A += sx * sy; A += sx; A += sy * sy; A += sy; A += 1; Bx += sx * dx; Bx += sy * dx; Bx += dx; By += sx * dy; By += sy * dy; By += dy; // We need to just compute inv(A).Bx and inv(A).By for the solutions. // for (j = 0; j < SAMPLES_PER_NEIGHBOR && n < LEAST_SQUARES_SAMPLES_MAX; ++j) { for (i = j; i < np && n < LEAST_SQUARES_SAMPLES_MAX; i += SAMPLES_PER_NEIGHBOR) { const int dx = pts2[i * 2] - ux; const int dy = pts2[i * 2 + 1] - uy; const int sx = pts1[i * 2] - ux; const int sy = pts1[i * 2 + 1] - uy; if (abs(sx - dx) >= LEAST_SQUARES_MV_MAX || abs(sy - dy) >= LEAST_SQUARES_MV_MAX) continue; A += sx * sx; A += sx * sy; A += sx; A += sy * sy; A += sy; A += 1; Bx += sx * dx; Bx += sy * dx; Bx += dx; By += sx * dy; By += sy * dy; By += dy; n++; } } // Compute Cofactors of A C = A * A - A * A; C = A * A - A * A; C = A * A - A * A; C = A * A - A * A; C = A * A - A * A; C = A * A - A * A; // Compute Determinant of A Det = C * A + C * A + C * A; // These are the least squares solutions but need scaling. Px = C * Bx + C * Bx + C * Bx; Px = C * Bx + C * Bx + C * Bx; Px = C * Bx + C * Bx + C * Bx; Py = C * By + C * By + C * By; Py = C * By + C * By + C * By; Py = C * By + C * By + C * By; C00 = (int64_t)A * A - (int64_t)A * A; C01 = (int64_t)A * A - (int64_t)A * A; C02 = (int64_t)A * A - (int64_t)A * A; C11 = (int64_t)A * A - (int64_t)A * A; C12 = (int64_t)A * A - (int64_t)A * A; C22 = (int64_t)A * A - (int64_t)A * A; // Scale by 1/16 Px = ROUND_POWER_OF_TWO_SIGNED(Px, 4); Px = ROUND_POWER_OF_TWO_SIGNED(Px, 4); Px = ROUND_POWER_OF_TWO_SIGNED(Px, 4); Py = ROUND_POWER_OF_TWO_SIGNED(Py, 4); Py = ROUND_POWER_OF_TWO_SIGNED(Py, 4); Py = ROUND_POWER_OF_TWO_SIGNED(Py, 4); Det = ROUND_POWER_OF_TWO_SIGNED(Det, 4); C00 = ROUND_POWER_OF_TWO_SIGNED(C00, 6); C01 = ROUND_POWER_OF_TWO_SIGNED(C01, 6); C02 = ROUND_POWER_OF_TWO_SIGNED(C02, 6); C11 = ROUND_POWER_OF_TWO_SIGNED(C11, 6); C12 = ROUND_POWER_OF_TWO_SIGNED(C12, 6); C22 = ROUND_POWER_OF_TWO_SIGNED(C22, 6); // Compute Determinant of A Det = C00 * A + C01 * A + C02 * A; if (Det == 0) return 1; // These divided by the Det, are the least squares solutions Px = C00 * Bx + C01 * Bx + C02 * Bx; Px = C01 * Bx + C11 * Bx + C12 * Bx; Px = C02 * Bx + C12 * Bx + C22 * Bx; Py = C00 * By + C01 * By + C02 * By; Py = C01 * By + C11 * By + C12 * By; Py = C02 * By + C12 * By + C22 * By; // Compute inverse of the Determinant // TODO(debargha, yuec): Try to remove this only division // TODO(debargha, yuec): Try to remove this only division if possible iDet = ((int64_t)1 << IDET_PREC_BITS) / Det; wm->wmmat = ((int64_t)Px * (int64_t)iDet + ((int64_t)1 << (IDET_WARPEDMODEL_DIFF_BITS - 1))) >> IDET_WARPEDMODEL_DIFF_BITS; wm->wmmat = ((int64_t)Px * (int64_t)iDet + ((int64_t)1 << (IDET_WARPEDMODEL_DIFF_BITS - 1))) >> IDET_WARPEDMODEL_DIFF_BITS; wm->wmmat = ((int64_t)Px * (int64_t)iDet + ((int64_t)1 << (IDET_WARPEDMODEL_DIFF_BITS + 2))) >> (IDET_WARPEDMODEL_DIFF_BITS + 3); v = Px * iDet; wm->wmmat = ROUND_POWER_OF_TWO_SIGNED_64(v, IDET_WARPEDMODEL_DIFF_BITS); v = Px * iDet; wm->wmmat = ROUND_POWER_OF_TWO_SIGNED_64(v, IDET_WARPEDMODEL_DIFF_BITS); v = Px * iDet; wm->wmmat = ROUND_POWER_OF_TWO_SIGNED_64(v, IDET_WARPEDMODEL_DIFF_BITS + 3); // Adjust x displacement for the offset off = (ux << WARPEDMODEL_PREC_BITS) - ux * wm->wmmat - uy * wm->wmmat; wm->wmmat += ROUND_POWER_OF_TWO_SIGNED(off, 3); wm->wmmat = ((int64_t)Py * (int64_t)iDet + ((int64_t)1 << (IDET_WARPEDMODEL_DIFF_BITS - 1))) >> IDET_WARPEDMODEL_DIFF_BITS; wm->wmmat = ((int64_t)Py * (int64_t)iDet + ((int64_t)1 << (IDET_WARPEDMODEL_DIFF_BITS - 1))) >> IDET_WARPEDMODEL_DIFF_BITS; wm->wmmat = ((int64_t)Py * (int64_t)iDet + ((int64_t)1 << (IDET_WARPEDMODEL_DIFF_BITS + 2))) >> (IDET_WARPEDMODEL_DIFF_BITS + 3); v = Py * iDet; wm->wmmat = ROUND_POWER_OF_TWO_SIGNED_64(v, IDET_WARPEDMODEL_DIFF_BITS); v = Py * iDet; wm->wmmat = ROUND_POWER_OF_TWO_SIGNED_64(v, IDET_WARPEDMODEL_DIFF_BITS); v = Py * iDet; wm->wmmat = ROUND_POWER_OF_TWO_SIGNED_64(v, IDET_WARPEDMODEL_DIFF_BITS + 3); // Adjust y displacement for the offset off = (uy << WARPEDMODEL_PREC_BITS) - ux * wm->wmmat - uy * wm->wmmat; wm->wmmat += ROUND_POWER_OF_TWO_SIGNED(off, 3); ... ...
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!