Commit 3a0df186 authored by David Barker's avatar David Barker Committed by Debargha Mukherjee
Browse files

Simplify buffer management for self-guided restoration filter

* Remove some unused variables
* Reduce need for casts by typing intermediate buffers appropriately
* Avoid copying data which is never modified; use the original data
  instead.
* Reduce number of intermediate buffers required, saving allocations
  of 576KiB in the decoder and ~1MiB in the encoder

No effect on performance

Change-Id: I55243904dd8e818fb6d43fa431903736475d23ff
parent 2cc057cf
...@@ -91,7 +91,7 @@ void av1_free_ref_frame_buffers(BufferPool *pool) { ...@@ -91,7 +91,7 @@ void av1_free_ref_frame_buffers(BufferPool *pool) {
void av1_alloc_restoration_buffers(AV1_COMMON *cm) { void av1_alloc_restoration_buffers(AV1_COMMON *cm) {
av1_alloc_restoration_struct(&cm->rst_info, cm->width, cm->height); av1_alloc_restoration_struct(&cm->rst_info, cm->width, cm->height);
cm->rst_internal.tmpbuf = cm->rst_internal.tmpbuf =
(uint8_t *)aom_realloc(cm->rst_internal.tmpbuf, RESTORATION_TMPBUF_SIZE); (int32_t *)aom_realloc(cm->rst_internal.tmpbuf, RESTORATION_TMPBUF_SIZE);
if (cm->rst_internal.tmpbuf == NULL) if (cm->rst_internal.tmpbuf == NULL)
aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR, aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
"Failed to allocate internal tmpbuf for restoration"); "Failed to allocate internal tmpbuf for restoration");
......
...@@ -328,8 +328,9 @@ void decode_xq(int *xqd, int *xq) { ...@@ -328,8 +328,9 @@ void decode_xq(int *xqd, int *xq) {
#define APPROXIMATE_SGR 1 #define APPROXIMATE_SGR 1
void av1_selfguided_restoration(int32_t *dgd, int width, int height, int stride, void av1_selfguided_restoration(int32_t *dgd, int width, int height, int stride,
int bit_depth, int r, int eps, void *tmpbuf) { int bit_depth, int r, int eps,
int32_t *A = (int32_t *)tmpbuf; int32_t *tmpbuf) {
int32_t *A = tmpbuf;
int32_t *B = A + RESTORATION_TILEPELS_MAX; int32_t *B = A + RESTORATION_TILEPELS_MAX;
int32_t *T = B + RESTORATION_TILEPELS_MAX; int32_t *T = B + RESTORATION_TILEPELS_MAX;
int8_t num[RESTORATION_TILEPELS_MAX]; int8_t num[RESTORATION_TILEPELS_MAX];
...@@ -498,15 +499,15 @@ void av1_selfguided_restoration(int32_t *dgd, int width, int height, int stride, ...@@ -498,15 +499,15 @@ void av1_selfguided_restoration(int32_t *dgd, int width, int height, int stride,
static void apply_selfguided_restoration(uint8_t *dat, int width, int height, static void apply_selfguided_restoration(uint8_t *dat, int width, int height,
int stride, int bit_depth, int eps, int stride, int bit_depth, int eps,
int *xqd, uint8_t *dst, int dst_stride, int *xqd, uint8_t *dst, int dst_stride,
void *tmpbuf) { int32_t *tmpbuf) {
int xq[2]; int xq[2];
int32_t *flt1 = (int32_t *)tmpbuf; int32_t *flt1 = tmpbuf;
int32_t *flt2 = flt1 + RESTORATION_TILEPELS_MAX; int32_t *flt2 = flt1 + RESTORATION_TILEPELS_MAX;
uint8_t *tmpbuf2 = (uint8_t *)(flt2 + RESTORATION_TILEPELS_MAX); int32_t *tmpbuf2 = flt2 + RESTORATION_TILEPELS_MAX;
int i, j; int i, j;
assert(width * height <= RESTORATION_TILEPELS_MAX);
for (i = 0; i < height; ++i) { for (i = 0; i < height; ++i) {
for (j = 0; j < width; ++j) { for (j = 0; j < width; ++j) {
assert(i * width + j < RESTORATION_TILEPELS_MAX);
flt1[i * width + j] = dat[i * stride + j]; flt1[i * width + j] = dat[i * stride + j];
flt2[i * width + j] = dat[i * stride + j]; flt2[i * width + j] = dat[i * stride + j];
} }
...@@ -540,9 +541,6 @@ static void loop_sgrproj_filter_tile(uint8_t *data, int tile_idx, int width, ...@@ -540,9 +541,6 @@ static void loop_sgrproj_filter_tile(uint8_t *data, int tile_idx, int width,
const int tile_height = rst->tile_height >> rst->subsampling_y; const int tile_height = rst->tile_height >> rst->subsampling_y;
int h_start, h_end, v_start, v_end; int h_start, h_end, v_start, v_end;
uint8_t *data_p, *dst_p; uint8_t *data_p, *dst_p;
uint8_t *dat = (uint8_t *)rst->tmpbuf;
uint8_t *tmpbuf =
(uint8_t *)rst->tmpbuf + RESTORATION_TILEPELS_MAX * sizeof(*dat);
if (rst->rsi->sgrproj_info[tile_idx].level == 0) { if (rst->rsi->sgrproj_info[tile_idx].level == 0) {
loop_copy_tile(data, tile_idx, 0, 0, width, height, stride, rst, dst, loop_copy_tile(data, tile_idx, 0, 0, width, height, stride, rst, dst,
...@@ -557,7 +555,7 @@ static void loop_sgrproj_filter_tile(uint8_t *data, int tile_idx, int width, ...@@ -557,7 +555,7 @@ static void loop_sgrproj_filter_tile(uint8_t *data, int tile_idx, int width,
apply_selfguided_restoration(data_p, h_end - h_start, v_end - v_start, stride, apply_selfguided_restoration(data_p, h_end - h_start, v_end - v_start, stride,
8, rst->rsi->sgrproj_info[tile_idx].ep, 8, rst->rsi->sgrproj_info[tile_idx].ep,
rst->rsi->sgrproj_info[tile_idx].xqd, dst_p, rst->rsi->sgrproj_info[tile_idx].xqd, dst_p,
dst_stride, tmpbuf); dst_stride, rst->tmpbuf);
} }
static void loop_sgrproj_filter(uint8_t *data, int width, int height, static void loop_sgrproj_filter(uint8_t *data, int width, int height,
...@@ -814,15 +812,13 @@ static void loop_wiener_filter_highbd(uint8_t *data8, int width, int height, ...@@ -814,15 +812,13 @@ static void loop_wiener_filter_highbd(uint8_t *data8, int width, int height,
} }
} }
static void apply_selfguided_restoration_highbd(uint16_t *dat, int width, static void apply_selfguided_restoration_highbd(
int height, int stride, uint16_t *dat, int width, int height, int stride, int bit_depth, int eps,
int bit_depth, int eps, int *xqd, uint16_t *dst, int dst_stride, int32_t *tmpbuf) {
int *xqd, uint16_t *dst,
int dst_stride, void *tmpbuf) {
int xq[2]; int xq[2];
int32_t *flt1 = (int32_t *)tmpbuf; int32_t *flt1 = tmpbuf;
int32_t *flt2 = flt1 + RESTORATION_TILEPELS_MAX; int32_t *flt2 = flt1 + RESTORATION_TILEPELS_MAX;
uint8_t *tmpbuf2 = (uint8_t *)(flt2 + RESTORATION_TILEPELS_MAX); int32_t *tmpbuf2 = flt2 + RESTORATION_TILEPELS_MAX;
int i, j; int i, j;
for (i = 0; i < height; ++i) { for (i = 0; i < height; ++i) {
for (j = 0; j < width; ++j) { for (j = 0; j < width; ++j) {
...@@ -861,9 +857,6 @@ static void loop_sgrproj_filter_tile_highbd(uint16_t *data, int tile_idx, ...@@ -861,9 +857,6 @@ static void loop_sgrproj_filter_tile_highbd(uint16_t *data, int tile_idx,
const int tile_height = rst->tile_height >> rst->subsampling_y; const int tile_height = rst->tile_height >> rst->subsampling_y;
int h_start, h_end, v_start, v_end; int h_start, h_end, v_start, v_end;
uint16_t *data_p, *dst_p; uint16_t *data_p, *dst_p;
uint16_t *dat = (uint16_t *)rst->tmpbuf;
uint8_t *tmpbuf =
(uint8_t *)rst->tmpbuf + RESTORATION_TILEPELS_MAX * sizeof(*dat);
if (rst->rsi->sgrproj_info[tile_idx].level == 0) { if (rst->rsi->sgrproj_info[tile_idx].level == 0) {
loop_copy_tile_highbd(data, tile_idx, 0, 0, width, height, stride, rst, dst, loop_copy_tile_highbd(data, tile_idx, 0, 0, width, height, stride, rst, dst,
...@@ -878,7 +871,7 @@ static void loop_sgrproj_filter_tile_highbd(uint16_t *data, int tile_idx, ...@@ -878,7 +871,7 @@ static void loop_sgrproj_filter_tile_highbd(uint16_t *data, int tile_idx,
apply_selfguided_restoration_highbd( apply_selfguided_restoration_highbd(
data_p, h_end - h_start, v_end - v_start, stride, bit_depth, data_p, h_end - h_start, v_end - v_start, stride, bit_depth,
rst->rsi->sgrproj_info[tile_idx].ep, rst->rsi->sgrproj_info[tile_idx].xqd, rst->rsi->sgrproj_info[tile_idx].ep, rst->rsi->sgrproj_info[tile_idx].xqd,
dst_p, dst_stride, tmpbuf); dst_p, dst_stride, rst->tmpbuf);
} }
static void loop_sgrproj_filter_highbd(uint8_t *data8, int width, int height, static void loop_sgrproj_filter_highbd(uint8_t *data8, int width, int height,
......
...@@ -40,12 +40,20 @@ extern "C" { ...@@ -40,12 +40,20 @@ extern "C" {
sqrt(((1 << (DOMAINTXFMRF_ITERS * 2)) - 1) * 2.0 / 3.0) sqrt(((1 << (DOMAINTXFMRF_ITERS * 2)) - 1) * 2.0 / 3.0)
// A single 32 bit buffer needed for the filter // A single 32 bit buffer needed for the filter
#define DOMAINTXFMRF_TMPBUF_SIZE (RESTORATION_TILEPELS_MAX * sizeof(int32_t)) #define DOMAINTXFMRF_TMPBUF_SIZE (RESTORATION_TILEPELS_MAX * sizeof(int32_t))
// One extra buffer needed in encoder, which is either 8-bit or 16-bit
// depending on the video bit depth.
#if CONFIG_AOM_HIGHBITDEPTH
#define DOMAINTXFMRF_EXTBUF_SIZE (RESTORATION_TILEPELS_MAX * sizeof(uint16_t))
#else
#define DOMAINTXFMRF_EXTBUF_SIZE (RESTORATION_TILEPELS_MAX * sizeof(uint8_t))
#endif
#define DOMAINTXFMRF_BITS (DOMAINTXFMRF_PARAMS_BITS) #define DOMAINTXFMRF_BITS (DOMAINTXFMRF_PARAMS_BITS)
// 6 highprecision buffers needed for the filter: // 5 32-bit buffers needed for the filter:
// 1 for the degraded frame, 2 for the restored versions and // 2 for the restored versions of the frame and
// 3 for each restoration operation // 3 for each restoration operation
#define SGRPROJ_TMPBUF_SIZE (RESTORATION_TILEPELS_MAX * 6 * sizeof(int32_t)) #define SGRPROJ_TMPBUF_SIZE (RESTORATION_TILEPELS_MAX * 5 * sizeof(int32_t))
#define SGRPROJ_EXTBUF_SIZE (0)
#define SGRPROJ_PARAMS_BITS 3 #define SGRPROJ_PARAMS_BITS 3
#define SGRPROJ_PARAMS (1 << SGRPROJ_PARAMS_BITS) #define SGRPROJ_PARAMS (1 << SGRPROJ_PARAMS_BITS)
...@@ -69,6 +77,7 @@ extern "C" { ...@@ -69,6 +77,7 @@ extern "C" {
#define WIENER_WIN (2 * WIENER_HALFWIN + 1) #define WIENER_WIN (2 * WIENER_HALFWIN + 1)
#define WIENER_WIN2 ((WIENER_WIN) * (WIENER_WIN)) #define WIENER_WIN2 ((WIENER_WIN) * (WIENER_WIN))
#define WIENER_TMPBUF_SIZE (0) #define WIENER_TMPBUF_SIZE (0)
#define WIENER_EXTBUF_SIZE (0)
#define WIENER_FILT_PREC_BITS 7 #define WIENER_FILT_PREC_BITS 7
#define WIENER_FILT_STEP (1 << WIENER_FILT_PREC_BITS) #define WIENER_FILT_STEP (1 << WIENER_FILT_PREC_BITS)
...@@ -101,6 +110,8 @@ extern "C" { ...@@ -101,6 +110,8 @@ extern "C" {
// Max of SGRPROJ_TMPBUF_SIZE, DOMAINTXFMRF_TMPBUF_SIZE, WIENER_TMPBUF_SIZE // Max of SGRPROJ_TMPBUF_SIZE, DOMAINTXFMRF_TMPBUF_SIZE, WIENER_TMPBUF_SIZE
#define RESTORATION_TMPBUF_SIZE (SGRPROJ_TMPBUF_SIZE) #define RESTORATION_TMPBUF_SIZE (SGRPROJ_TMPBUF_SIZE)
// Max of SGRPROJ_EXTBUF_SIZE, DOMAINTXFMRF_EXTBUF_SIZE, WIENER_EXTBUF_SIZE
#define RESTORATION_EXTBUF_SIZE (DOMAINTXFMRF_EXTBUF_SIZE)
typedef struct { typedef struct {
int level; int level;
...@@ -144,7 +155,7 @@ typedef struct { ...@@ -144,7 +155,7 @@ typedef struct {
int ntiles; int ntiles;
int tile_width, tile_height; int tile_width, tile_height;
int nhtiles, nvtiles; int nhtiles, nvtiles;
uint8_t *tmpbuf; int32_t *tmpbuf;
} RestorationInternal; } RestorationInternal;
static INLINE int get_rest_tilesize(int width, int height) { static INLINE int get_rest_tilesize(int width, int height) {
...@@ -211,7 +222,7 @@ int av1_alloc_restoration_struct(RestorationInfo *rst_info, int width, ...@@ -211,7 +222,7 @@ int av1_alloc_restoration_struct(RestorationInfo *rst_info, int width,
void av1_free_restoration_struct(RestorationInfo *rst_info); void av1_free_restoration_struct(RestorationInfo *rst_info);
void av1_selfguided_restoration(int32_t *dgd, int width, int height, int stride, void av1_selfguided_restoration(int32_t *dgd, int width, int height, int stride,
int bit_depth, int r, int eps, void *tmpbuf); int bit_depth, int r, int eps, int32_t *tmpbuf);
void av1_domaintxfmrf_restoration(uint8_t *dgd, int width, int height, void av1_domaintxfmrf_restoration(uint8_t *dgd, int width, int height,
int stride, int param, uint8_t *dst, int stride, int param, uint8_t *dst,
int dst_stride, int32_t *tmpbuf); int dst_stride, int32_t *tmpbuf);
......
...@@ -742,8 +742,8 @@ static void alloc_util_frame_buffers(AV1_COMP *cpi) { ...@@ -742,8 +742,8 @@ static void alloc_util_frame_buffers(AV1_COMP *cpi) {
NULL, NULL)) NULL, NULL))
aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR, aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
"Failed to allocate trial restored frame buffer"); "Failed to allocate trial restored frame buffer");
cpi->extra_rstbuf = (uint8_t *)aom_realloc( cpi->extra_rstbuf =
cpi->extra_rstbuf, RESTORATION_TILEPELS_MAX * sizeof(int32_t)); (uint8_t *)aom_realloc(cpi->extra_rstbuf, RESTORATION_EXTBUF_SIZE);
if (!cpi->extra_rstbuf) if (!cpi->extra_rstbuf)
aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR, aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
"Failed to allocate extra rstbuf for restoration"); "Failed to allocate extra rstbuf for restoration");
......
...@@ -404,7 +404,7 @@ typedef struct AV1_COMP { ...@@ -404,7 +404,7 @@ typedef struct AV1_COMP {
#if CONFIG_LOOP_RESTORATION #if CONFIG_LOOP_RESTORATION
YV12_BUFFER_CONFIG last_frame_db; YV12_BUFFER_CONFIG last_frame_db;
YV12_BUFFER_CONFIG trial_frame_rst; YV12_BUFFER_CONFIG trial_frame_rst;
uint8_t *extra_rstbuf; // Size RESTORATION_TILEPELS_MAX at highest precision uint8_t *extra_rstbuf; // Extra buffers used in restoration search
RestorationInfo rst_search; // Used for encoder side search RestorationInfo rst_search; // Used for encoder side search
#endif // CONFIG_LOOP_RESTORATION #endif // CONFIG_LOOP_RESTORATION
......
...@@ -121,35 +121,55 @@ static int64_t try_restoration_frame(const YV12_BUFFER_CONFIG *src, ...@@ -121,35 +121,55 @@ static int64_t try_restoration_frame(const YV12_BUFFER_CONFIG *src,
return filt_err; return filt_err;
} }
static int64_t get_pixel_proj_error(int32_t *src, int width, int height, static int64_t get_pixel_proj_error(uint8_t *src8, int width, int height,
int src_stride, int32_t *dgd, int src_stride, uint8_t *dat8,
int dgd_stride, int32_t *flt1, int dat_stride, int bit_depth,
int flt1_stride, int32_t *flt2, int32_t *flt1, int flt1_stride,
int flt2_stride, int *xqd) { int32_t *flt2, int flt2_stride, int *xqd) {
int i, j; int i, j;
int64_t err = 0; int64_t err = 0;
int xq[2]; int xq[2];
decode_xq(xqd, xq); decode_xq(xqd, xq);
for (i = 0; i < height; ++i) { if (bit_depth == 8) {
for (j = 0; j < width; ++j) { const uint8_t *src = src8;
const int32_t s = (int32_t)src[i * src_stride + j]; const uint8_t *dat = dat8;
const int32_t u = (int32_t)dgd[i * dgd_stride + j]; for (i = 0; i < height; ++i) {
const int32_t f1 = (int32_t)flt1[i * flt1_stride + j] - u; for (j = 0; j < width; ++j) {
const int32_t f2 = (int32_t)flt2[i * flt2_stride + j] - u; const int32_t u =
const int64_t v = xq[0] * f1 + xq[1] * f2 + (u << SGRPROJ_PRJ_BITS); (int32_t)(dat[i * dat_stride + j] << SGRPROJ_RST_BITS);
const int32_t e = const int32_t f1 = (int32_t)flt1[i * flt1_stride + j] - u;
ROUND_POWER_OF_TWO(v, SGRPROJ_RST_BITS + SGRPROJ_PRJ_BITS) - const int32_t f2 = (int32_t)flt2[i * flt2_stride + j] - u;
ROUND_POWER_OF_TWO(s, SGRPROJ_RST_BITS); const int64_t v = xq[0] * f1 + xq[1] * f2 + (u << SGRPROJ_PRJ_BITS);
err += e * e; const int32_t e =
ROUND_POWER_OF_TWO(v, SGRPROJ_RST_BITS + SGRPROJ_PRJ_BITS) -
src[i * src_stride + j];
err += e * e;
}
}
} else {
const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
const uint16_t *dat = CONVERT_TO_SHORTPTR(dat8);
for (i = 0; i < height; ++i) {
for (j = 0; j < width; ++j) {
const int32_t u =
(int32_t)(dat[i * dat_stride + j] << SGRPROJ_RST_BITS);
const int32_t f1 = (int32_t)flt1[i * flt1_stride + j] - u;
const int32_t f2 = (int32_t)flt2[i * flt2_stride + j] - u;
const int64_t v = xq[0] * f1 + xq[1] * f2 + (u << SGRPROJ_PRJ_BITS);
const int32_t e =
ROUND_POWER_OF_TWO(v, SGRPROJ_RST_BITS + SGRPROJ_PRJ_BITS) -
src[i * src_stride + j];
err += e * e;
}
} }
} }
return err; return err;
} }
static void get_proj_subspace(int32_t *src, int width, int height, static void get_proj_subspace(uint8_t *src8, int width, int height,
int src_stride, int32_t *dgd, int dgd_stride, int src_stride, uint8_t *dat8, int dat_stride,
int32_t *flt1, int flt1_stride, int32_t *flt2, int bit_depth, int32_t *flt1, int flt1_stride,
int flt2_stride, int *xq) { int32_t *flt2, int flt2_stride, int *xq) {
int i, j; int i, j;
double H[2][2] = { { 0, 0 }, { 0, 0 } }; double H[2][2] = { { 0, 0 }, { 0, 0 } };
double C[2] = { 0, 0 }; double C[2] = { 0, 0 };
...@@ -159,17 +179,39 @@ static void get_proj_subspace(int32_t *src, int width, int height, ...@@ -159,17 +179,39 @@ static void get_proj_subspace(int32_t *src, int width, int height,
xq[0] = -(1 << SGRPROJ_PRJ_BITS) / 4; xq[0] = -(1 << SGRPROJ_PRJ_BITS) / 4;
xq[1] = (1 << SGRPROJ_PRJ_BITS) - xq[0]; xq[1] = (1 << SGRPROJ_PRJ_BITS) - xq[0];
for (i = 0; i < height; ++i) { if (bit_depth == 8) {
for (j = 0; j < width; ++j) { const uint8_t *src = src8;
const double u = (double)dgd[i * dgd_stride + j]; const uint8_t *dat = dat8;
const double s = (double)src[i * src_stride + j] - u; for (i = 0; i < height; ++i) {
const double f1 = (double)flt1[i * flt1_stride + j] - u; for (j = 0; j < width; ++j) {
const double f2 = (double)flt2[i * flt2_stride + j] - u; const double u = (double)(dat[i * dat_stride + j] << SGRPROJ_RST_BITS);
H[0][0] += f1 * f1; const double s =
H[1][1] += f2 * f2; (double)(src[i * src_stride + j] << SGRPROJ_RST_BITS) - u;
H[0][1] += f1 * f2; const double f1 = (double)flt1[i * flt1_stride + j] - u;
C[0] += f1 * s; const double f2 = (double)flt2[i * flt2_stride + j] - u;
C[1] += f2 * s; H[0][0] += f1 * f1;
H[1][1] += f2 * f2;
H[0][1] += f1 * f2;
C[0] += f1 * s;
C[1] += f2 * s;
}
}
} else {
const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
const uint16_t *dat = CONVERT_TO_SHORTPTR(dat8);
for (i = 0; i < height; ++i) {
for (j = 0; j < width; ++j) {
const double u = (double)(dat[i * dat_stride + j] << SGRPROJ_RST_BITS);
const double s =
(double)(src[i * src_stride + j] << SGRPROJ_RST_BITS) - u;
const double f1 = (double)flt1[i * flt1_stride + j] - u;
const double f2 = (double)flt2[i * flt2_stride + j] - u;
H[0][0] += f1 * f1;
H[1][1] += f2 * f2;
H[0][1] += f1 * f2;
C[0] += f1 * s;
C[1] += f2 * s;
}
} }
} }
H[0][0] /= size; H[0][0] /= size;
...@@ -196,33 +238,25 @@ void encode_xq(int *xq, int *xqd) { ...@@ -196,33 +238,25 @@ void encode_xq(int *xq, int *xqd) {
static void search_selfguided_restoration(uint8_t *dat8, int width, int height, static void search_selfguided_restoration(uint8_t *dat8, int width, int height,
int dat_stride, uint8_t *src8, int dat_stride, uint8_t *src8,
int src_stride, int bit_depth, int src_stride, int bit_depth,
int *eps, int *xqd, void *srcbuf, int *eps, int *xqd, int32_t *rstbuf) {
void *rstbuf) { int32_t *flt1 = rstbuf;
int32_t *srd = (int32_t *)srcbuf;
int32_t *dgd = (int32_t *)rstbuf;
int32_t *flt1 = dgd + RESTORATION_TILEPELS_MAX;
int32_t *flt2 = flt1 + RESTORATION_TILEPELS_MAX; int32_t *flt2 = flt1 + RESTORATION_TILEPELS_MAX;
uint8_t *tmpbuf2 = (uint8_t *)(flt2 + RESTORATION_TILEPELS_MAX); int32_t *tmpbuf2 = flt2 + RESTORATION_TILEPELS_MAX;
int i, j, ep, bestep = 0; int i, j, ep, bestep = 0;
int64_t err, besterr = -1; int64_t err, besterr = -1;
int exqd[2], bestxqd[2] = { 0, 0 }; int exqd[2], bestxqd[2] = { 0, 0 };
for (ep = 0; ep < SGRPROJ_PARAMS; ep++) { for (ep = 0; ep < SGRPROJ_PARAMS; ep++) {
int exq[2]; int exq[2];
if (bit_depth > 8) { if (bit_depth > 8) {
uint16_t *src = CONVERT_TO_SHORTPTR(src8);
uint16_t *dat = CONVERT_TO_SHORTPTR(dat8); uint16_t *dat = CONVERT_TO_SHORTPTR(dat8);
for (i = 0; i < height; ++i) { for (i = 0; i < height; ++i) {
for (j = 0; j < width; ++j) { for (j = 0; j < width; ++j) {
flt1[i * width + j] = (int32_t)dat[i * dat_stride + j]; flt1[i * width + j] = (int32_t)dat[i * dat_stride + j];
flt2[i * width + j] = (int32_t)dat[i * dat_stride + j]; flt2[i * width + j] = (int32_t)dat[i * dat_stride + j];
dgd[i * width + j] = (int32_t)dat[i * dat_stride + j]
<< SGRPROJ_RST_BITS;
srd[i * width + j] = (int32_t)src[i * src_stride + j]
<< SGRPROJ_RST_BITS;
} }
} }
} else { } else {
uint8_t *src = src8;
uint8_t *dat = dat8; uint8_t *dat = dat8;
for (i = 0; i < height; ++i) { for (i = 0; i < height; ++i) {
for (j = 0; j < width; ++j) { for (j = 0; j < width; ++j) {
...@@ -230,8 +264,6 @@ static void search_selfguided_restoration(uint8_t *dat8, int width, int height, ...@@ -230,8 +264,6 @@ static void search_selfguided_restoration(uint8_t *dat8, int width, int height,
const int l = i * dat_stride + j; const int l = i * dat_stride + j;
flt1[k] = (int32_t)dat[l]; flt1[k] = (int32_t)dat[l];
flt2[k] = (int32_t)dat[l]; flt2[k] = (int32_t)dat[l];
dgd[k] = (int32_t)dat[l] << SGRPROJ_RST_BITS;
srd[k] = (int32_t)src[i * src_stride + j] << SGRPROJ_RST_BITS;
} }
} }
} }
...@@ -239,11 +271,12 @@ static void search_selfguided_restoration(uint8_t *dat8, int width, int height, ...@@ -239,11 +271,12 @@ static void search_selfguided_restoration(uint8_t *dat8, int width, int height,
sgr_params[ep].r1, sgr_params[ep].e1, tmpbuf2); sgr_params[ep].r1, sgr_params[ep].e1, tmpbuf2);
av1_selfguided_restoration(flt2, width, height, width, bit_depth, av1_selfguided_restoration(flt2, width, height, width, bit_depth,
sgr_params[ep].r2, sgr_params[ep].e2, tmpbuf2); sgr_params[ep].r2, sgr_params[ep].e2, tmpbuf2);
get_proj_subspace(srd, width, height, width, dgd, width, flt1, width, flt2, get_proj_subspace(src8, width, height, src_stride, dat8, dat_stride,
width, exq); bit_depth, flt1, width, flt2, width, exq);
encode_xq(exq, exqd); encode_xq(exq, exqd);
err = get_pixel_proj_error(srd, width, height, width, dgd, width, flt1, err =
width, flt2, width, exqd); get_pixel_proj_error(src8, width, height, src_stride, dat8, dat_stride,
bit_depth, flt1, width, flt2, width, exqd);
if (besterr == -1 || err < besterr) { if (besterr == -1 || err < besterr) {
bestep = ep; bestep = ep;
besterr = err; besterr = err;
...@@ -303,7 +336,7 @@ static double search_sgrproj(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi, ...@@ -303,7 +336,7 @@ static double search_sgrproj(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi,
8, 8,
#endif // CONFIG_AOM_HIGHBITDEPTH #endif // CONFIG_AOM_HIGHBITDEPTH
&rsi->sgrproj_info[tile_idx].ep, rsi->sgrproj_info[tile_idx].xqd, &rsi->sgrproj_info[tile_idx].ep, rsi->sgrproj_info[tile_idx].xqd,
cpi->extra_rstbuf, cm->rst_internal.tmpbuf); cm->rst_internal.tmpbuf);
rsi->sgrproj_info[tile_idx].level = 1; rsi->sgrproj_info[tile_idx].level = 1;
err = try_restoration_tile(src, cpi, rsi, 1, partial_frame, tile_idx, 0, 0, err = try_restoration_tile(src, cpi, rsi, 1, partial_frame, tile_idx, 0, 0,
dst_frame); dst_frame);
...@@ -376,7 +409,7 @@ static void search_domaintxfmrf_restoration(uint8_t *dgd8, int width, ...@@ -376,7 +409,7 @@ static void search_domaintxfmrf_restoration(uint8_t *dgd8, int width,
int height, int dgd_stride, int height, int dgd_stride,
uint8_t *src8, int src_stride, uint8_t *src8, int src_stride,
int bit_depth, int *sigma_r, int bit_depth, int *sigma_r,
void *fltbuf, void *rstbuf) { uint8_t *fltbuf, int32_t *tmpbuf) {
const int first_p_step = 8; const int first_p_step = 8;
const int second_p_range = first_p_step >> 1; const int second_p_range = first_p_step >> 1;
const int second_p_step = 2; const int second_p_step = 2;
...@@ -385,8 +418,7 @@ static void search_domaintxfmrf_restoration(uint8_t *dgd8, int width, ...@@ -385,8 +418,7 @@ static void search_domaintxfmrf_restoration(uint8_t *dgd8, int width,
int p, best_p0, best_p = -1; int p, best_p0, best_p = -1;
int64_t best_sse = INT64_MAX, sse; int64_t best_sse = INT64_MAX, sse;
if (bit_depth == 8) { if (bit_depth == 8) {
uint8_t *flt = (uint8_t *)fltbuf; uint8_t *flt = fltbuf;
int32_t *tmpbuf = (int32_t *)rstbuf;
uint8_t *dgd = dgd8; uint8_t *dgd = dgd8;
uint8_t *src = src8; uint8_t *src = src8;
// First phase // First phase
...@@ -428,7 +460,6 @@ static void search_domaintxfmrf_restoration(uint8_t *dgd8, int width, ...@@ -428,7 +460,6 @@ static void search_domaintxfmrf_restoration(uint8_t *dgd8, int width,
} else { } else {
#if CONFIG_AOM_HIGHBITDEPTH #if CONFIG_AOM_HIGHBITDEPTH
uint16_t *flt = (uint16_t *)fltbuf; uint16_t *flt = (uint16_t *)fltbuf;
int32_t *tmpbuf = (int32_t *)rstbuf;
uint16_t *dgd = CONVERT_TO_SHORTPTR(dgd8); uint16_t *dgd = CONVERT_TO_SHORTPTR(dgd8);
uint16_t *src = CONVERT_TO_SHORTPTR(src8); uint16_t *src = CONVERT_TO_SHORTPTR(src8);
// First phase // First phase
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment