Commit bf1d62dd authored by Yaowu Xu's avatar Yaowu Xu
Browse files

Move large buffers from stack to heap

This commit moves a number of large buffers from stack to heap to fix
crashes due to stack overflow.

Change-Id: I9d1592e4f6dbfa18a475d0fc5674f6d3632f39ed
parent 67b9921b
...@@ -641,8 +641,8 @@ static void apply_domaintxfmrf_reduce_prec(int32_t *dat, int width, int height, ...@@ -641,8 +641,8 @@ static void apply_domaintxfmrf_reduce_prec(int32_t *dat, int width, int height,
void av1_domaintxfmrf_restoration(uint8_t *dgd, int width, int height, void av1_domaintxfmrf_restoration(uint8_t *dgd, int width, int height,
int stride, int param, uint8_t *dst, int stride, int param, uint8_t *dst,
int dst_stride) { int dst_stride, int32_t *tmpbuf) {
int32_t dat[RESTORATION_TILEPELS_MAX]; int32_t *dat = tmpbuf;
int i, j, t; int i, j, t;
for (i = 0; i < height; ++i) { for (i = 0; i < height; ++i) {
for (j = 0; j < width; ++j) { for (j = 0; j < width; ++j) {
...@@ -664,7 +664,8 @@ void av1_domaintxfmrf_restoration(uint8_t *dgd, int width, int height, ...@@ -664,7 +664,8 @@ void av1_domaintxfmrf_restoration(uint8_t *dgd, int width, int height,
static void loop_domaintxfmrf_filter_tile(uint8_t *data, int tile_idx, static void loop_domaintxfmrf_filter_tile(uint8_t *data, int tile_idx,
int width, int height, int stride, int width, int height, int stride,
RestorationInternal *rst, RestorationInternal *rst,
uint8_t *dst, int dst_stride) { uint8_t *dst, int dst_stride,
int32_t *tmpbuf) {
const int tile_width = rst->tile_width >> rst->subsampling_x; const int tile_width = rst->tile_width >> rst->subsampling_x;
const int tile_height = rst->tile_height >> rst->subsampling_y; const int tile_height = rst->tile_height >> rst->subsampling_y;
int h_start, h_end, v_start, v_end; int h_start, h_end, v_start, v_end;
...@@ -680,17 +681,21 @@ static void loop_domaintxfmrf_filter_tile(uint8_t *data, int tile_idx, ...@@ -680,17 +681,21 @@ static void loop_domaintxfmrf_filter_tile(uint8_t *data, int tile_idx,
av1_domaintxfmrf_restoration( av1_domaintxfmrf_restoration(
data + h_start + v_start * stride, h_end - h_start, v_end - v_start, data + h_start + v_start * stride, h_end - h_start, v_end - v_start,
stride, rst->rsi->domaintxfmrf_info[tile_idx].sigma_r, stride, rst->rsi->domaintxfmrf_info[tile_idx].sigma_r,
dst + h_start + v_start * dst_stride, dst_stride); dst + h_start + v_start * dst_stride, dst_stride, tmpbuf);
} }
static void loop_domaintxfmrf_filter(uint8_t *data, int width, int height, static void loop_domaintxfmrf_filter(uint8_t *data, int width, int height,
int stride, RestorationInternal *rst, int stride, RestorationInternal *rst,
uint8_t *dst, int dst_stride) { uint8_t *dst, int dst_stride) {
int tile_idx; int tile_idx;
int32_t *tmpbuf =
(int32_t *)aom_malloc(RESTORATION_TILEPELS_MAX * sizeof(*tmpbuf));
for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) { for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
loop_domaintxfmrf_filter_tile(data, tile_idx, width, height, stride, rst, loop_domaintxfmrf_filter_tile(data, tile_idx, width, height, stride, rst,
dst, dst_stride); dst, dst_stride, tmpbuf);
} }
aom_free(tmpbuf);
} }
static void loop_switchable_filter(uint8_t *data, int width, int height, static void loop_switchable_filter(uint8_t *data, int width, int height,
...@@ -698,6 +703,8 @@ static void loop_switchable_filter(uint8_t *data, int width, int height, ...@@ -698,6 +703,8 @@ static void loop_switchable_filter(uint8_t *data, int width, int height,
uint8_t *dst, int dst_stride) { uint8_t *dst, int dst_stride) {
int tile_idx; int tile_idx;
uint8_t *tmpbuf = aom_malloc(SGRPROJ_TMPBUF_SIZE); uint8_t *tmpbuf = aom_malloc(SGRPROJ_TMPBUF_SIZE);
int32_t *tmpbuf32 =
(int32_t *)aom_malloc(RESTORATION_TILEPELS_MAX * sizeof(*tmpbuf32));
extend_frame(data, width, height, stride); extend_frame(data, width, height, stride);
copy_border(data, width, height, stride, dst, dst_stride); copy_border(data, width, height, stride, dst, dst_stride);
for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) { for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
...@@ -712,10 +719,11 @@ static void loop_switchable_filter(uint8_t *data, int width, int height, ...@@ -712,10 +719,11 @@ static void loop_switchable_filter(uint8_t *data, int width, int height,
tmpbuf, dst, dst_stride); tmpbuf, dst, dst_stride);
} else if (rst->rsi->restoration_type[tile_idx] == RESTORE_DOMAINTXFMRF) { } else if (rst->rsi->restoration_type[tile_idx] == RESTORE_DOMAINTXFMRF) {
loop_domaintxfmrf_filter_tile(data, tile_idx, width, height, stride, rst, loop_domaintxfmrf_filter_tile(data, tile_idx, width, height, stride, rst,
dst, dst_stride); dst, dst_stride, tmpbuf32);
} }
} }
aom_free(tmpbuf); aom_free(tmpbuf);
aom_free(tmpbuf32);
} }
#if CONFIG_AOM_HIGHBITDEPTH #if CONFIG_AOM_HIGHBITDEPTH
...@@ -955,8 +963,9 @@ static void apply_domaintxfmrf_ver_highbd(int iter, int param, uint16_t *img, ...@@ -955,8 +963,9 @@ static void apply_domaintxfmrf_ver_highbd(int iter, int param, uint16_t *img,
void av1_domaintxfmrf_restoration_highbd(uint16_t *dgd, int width, int height, void av1_domaintxfmrf_restoration_highbd(uint16_t *dgd, int width, int height,
int stride, int param, int bit_depth, int stride, int param, int bit_depth,
uint16_t *dst, int dst_stride) { uint16_t *dst, int dst_stride,
int32_t dat[RESTORATION_TILEPELS_MAX]; int32_t *tmpbuf) {
int32_t *dat = tmpbuf;
int i, j, t; int i, j, t;
for (i = 0; i < height; ++i) { for (i = 0; i < height; ++i) {
for (j = 0; j < width; ++j) { for (j = 0; j < width; ++j) {
...@@ -980,7 +989,8 @@ void av1_domaintxfmrf_restoration_highbd(uint16_t *dgd, int width, int height, ...@@ -980,7 +989,8 @@ void av1_domaintxfmrf_restoration_highbd(uint16_t *dgd, int width, int height,
static void loop_domaintxfmrf_filter_tile_highbd( static void loop_domaintxfmrf_filter_tile_highbd(
uint16_t *data, int tile_idx, int width, int height, int stride, uint16_t *data, int tile_idx, int width, int height, int stride,
RestorationInternal *rst, int bit_depth, uint16_t *dst, int dst_stride) { RestorationInternal *rst, int bit_depth, uint16_t *dst, int dst_stride,
int32_t *tmpbuf) {
const int tile_width = rst->tile_width >> rst->subsampling_x; const int tile_width = rst->tile_width >> rst->subsampling_x;
const int tile_height = rst->tile_height >> rst->subsampling_y; const int tile_height = rst->tile_height >> rst->subsampling_y;
int h_start, h_end, v_start, v_end; int h_start, h_end, v_start, v_end;
...@@ -996,7 +1006,7 @@ static void loop_domaintxfmrf_filter_tile_highbd( ...@@ -996,7 +1006,7 @@ static void loop_domaintxfmrf_filter_tile_highbd(
av1_domaintxfmrf_restoration_highbd( av1_domaintxfmrf_restoration_highbd(
data + h_start + v_start * stride, h_end - h_start, v_end - v_start, data + h_start + v_start * stride, h_end - h_start, v_end - v_start,
stride, rst->rsi->domaintxfmrf_info[tile_idx].sigma_r, bit_depth, stride, rst->rsi->domaintxfmrf_info[tile_idx].sigma_r, bit_depth,
dst + h_start + v_start * dst_stride, dst_stride); dst + h_start + v_start * dst_stride, dst_stride, tmpbuf);
} }
static void loop_domaintxfmrf_filter_highbd(uint8_t *data8, int width, static void loop_domaintxfmrf_filter_highbd(uint8_t *data8, int width,
...@@ -1005,12 +1015,16 @@ static void loop_domaintxfmrf_filter_highbd(uint8_t *data8, int width, ...@@ -1005,12 +1015,16 @@ static void loop_domaintxfmrf_filter_highbd(uint8_t *data8, int width,
int bit_depth, uint8_t *dst8, int bit_depth, uint8_t *dst8,
int dst_stride) { int dst_stride) {
int tile_idx; int tile_idx;
int32_t *tmpbuf =
(int32_t *)aom_malloc(RESTORATION_TILEPELS_MAX * sizeof(*tmpbuf));
uint16_t *data = CONVERT_TO_SHORTPTR(data8); uint16_t *data = CONVERT_TO_SHORTPTR(data8);
uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) { for (tile_idx = 0; tile_idx < rst->ntiles; ++tile_idx) {
loop_domaintxfmrf_filter_tile_highbd(data, tile_idx, width, height, stride, loop_domaintxfmrf_filter_tile_highbd(data, tile_idx, width, height, stride,
rst, bit_depth, dst, dst_stride); rst, bit_depth, dst, dst_stride,
tmpbuf);
} }
aom_free(tmpbuf);
} }
static void loop_switchable_filter_highbd(uint8_t *data8, int width, int height, static void loop_switchable_filter_highbd(uint8_t *data8, int width, int height,
...@@ -1019,6 +1033,8 @@ static void loop_switchable_filter_highbd(uint8_t *data8, int width, int height, ...@@ -1019,6 +1033,8 @@ static void loop_switchable_filter_highbd(uint8_t *data8, int width, int height,
int dst_stride) { int dst_stride) {
uint16_t *data = CONVERT_TO_SHORTPTR(data8); uint16_t *data = CONVERT_TO_SHORTPTR(data8);
uint8_t *tmpbuf = aom_malloc(SGRPROJ_TMPBUF_SIZE); uint8_t *tmpbuf = aom_malloc(SGRPROJ_TMPBUF_SIZE);
int32_t *tmpbuf32 =
(int32_t *)aom_malloc(RESTORATION_TILEPELS_MAX * sizeof(*tmpbuf32));
uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
int i, tile_idx; int i, tile_idx;
copy_border_highbd(data, width, height, stride, dst, dst_stride); copy_border_highbd(data, width, height, stride, dst, dst_stride);
...@@ -1036,10 +1052,11 @@ static void loop_switchable_filter_highbd(uint8_t *data8, int width, int height, ...@@ -1036,10 +1052,11 @@ static void loop_switchable_filter_highbd(uint8_t *data8, int width, int height,
} else if (rst->rsi->restoration_type[tile_idx] == RESTORE_DOMAINTXFMRF) { } else if (rst->rsi->restoration_type[tile_idx] == RESTORE_DOMAINTXFMRF) {
loop_domaintxfmrf_filter_tile_highbd(data, tile_idx, width, height, loop_domaintxfmrf_filter_tile_highbd(data, tile_idx, width, height,
stride, rst, bit_depth, dst, stride, rst, bit_depth, dst,
dst_stride); dst_stride, tmpbuf32);
} }
} }
aom_free(tmpbuf); aom_free(tmpbuf);
aom_free(tmpbuf32);
} }
#endif // CONFIG_AOM_HIGHBITDEPTH #endif // CONFIG_AOM_HIGHBITDEPTH
......
...@@ -193,11 +193,12 @@ void av1_selfguided_restoration(int64_t *dgd, int width, int height, int stride, ...@@ -193,11 +193,12 @@ void av1_selfguided_restoration(int64_t *dgd, int width, int height, int stride,
int bit_depth, int r, int eps, void *tmpbuf); int bit_depth, int r, int eps, void *tmpbuf);
void av1_domaintxfmrf_restoration(uint8_t *dgd, int width, int height, void av1_domaintxfmrf_restoration(uint8_t *dgd, int width, int height,
int stride, int param, uint8_t *dst, int stride, int param, uint8_t *dst,
int dst_stride); int dst_stride, int32_t *tmpbuf);
#if CONFIG_AOM_HIGHBITDEPTH #if CONFIG_AOM_HIGHBITDEPTH
void av1_domaintxfmrf_restoration_highbd(uint16_t *dgd, int width, int height, void av1_domaintxfmrf_restoration_highbd(uint16_t *dgd, int width, int height,
int stride, int param, int bit_depth, int stride, int param, int bit_depth,
uint16_t *dst, int dst_stride); uint16_t *dst, int dst_stride,
int32_t *tmpbuf);
#endif // CONFIG_AOM_HIGHBITDEPTH #endif // CONFIG_AOM_HIGHBITDEPTH
void decode_xq(int *xqd, int *xq); void decode_xq(int *xqd, int *xq);
void av1_loop_restoration_init(RestorationInternal *rst, RestorationInfo *rsi, void av1_loop_restoration_init(RestorationInternal *rst, RestorationInfo *rsi,
......
...@@ -178,11 +178,11 @@ static void search_selfguided_restoration(uint8_t *dat8, int width, int height, ...@@ -178,11 +178,11 @@ static void search_selfguided_restoration(uint8_t *dat8, int width, int height,
int dat_stride, uint8_t *src8, int dat_stride, uint8_t *src8,
int src_stride, int bit_depth, int src_stride, int bit_depth,
int *eps, int *xqd, void *tmpbuf) { int *eps, int *xqd, void *tmpbuf) {
int64_t *flt1 = (int64_t *)tmpbuf; int64_t *srd = (int64_t *)tmpbuf;
int64_t *dgd = srd + RESTORATION_TILEPELS_MAX;
int64_t *flt1 = dgd + RESTORATION_TILEPELS_MAX;
int64_t *flt2 = flt1 + RESTORATION_TILEPELS_MAX; int64_t *flt2 = flt1 + RESTORATION_TILEPELS_MAX;
uint8_t *tmpbuf2 = (uint8_t *)(flt2 + RESTORATION_TILEPELS_MAX); uint8_t *tmpbuf2 = (uint8_t *)(flt2 + RESTORATION_TILEPELS_MAX);
int64_t srd[RESTORATION_TILEPELS_MAX];
int64_t dgd[RESTORATION_TILEPELS_MAX];
int i, j, ep, bestep = 0; int i, j, ep, bestep = 0;
int64_t err, besterr = -1; int64_t err, besterr = -1;
int exqd[2], bestxqd[2] = { 0, 0 }; int exqd[2], bestxqd[2] = { 0, 0 };
...@@ -249,7 +249,8 @@ static double search_sgrproj(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi, ...@@ -249,7 +249,8 @@ static double search_sgrproj(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi,
RestorationInfo rsi; RestorationInfo rsi;
int tile_idx, tile_width, tile_height, nhtiles, nvtiles; int tile_idx, tile_width, tile_height, nhtiles, nvtiles;
int h_start, h_end, v_start, v_end; int h_start, h_end, v_start, v_end;
uint8_t *tmpbuf = aom_malloc(SGRPROJ_TMPBUF_SIZE); uint8_t *tmpbuf = aom_malloc(SGRPROJ_TMPBUF_SIZE +
RESTORATION_TILEPELS_MAX * sizeof(int64_t) * 2);
const int ntiles = av1_get_rest_ntiles(cm->width, cm->height, &tile_width, const int ntiles = av1_get_rest_ntiles(cm->width, cm->height, &tile_width,
&tile_height, &nhtiles, &nvtiles); &tile_height, &nhtiles, &nvtiles);
// Make a copy of the unfiltered / processed recon buffer // Make a copy of the unfiltered / processed recon buffer
...@@ -370,12 +371,14 @@ static void search_domaintxfmrf_restoration(uint8_t *dgd8, int width, ...@@ -370,12 +371,14 @@ static void search_domaintxfmrf_restoration(uint8_t *dgd8, int width,
int64_t best_sse = INT64_MAX, sse; int64_t best_sse = INT64_MAX, sse;
if (bit_depth == 8) { if (bit_depth == 8) {
uint8_t *tmp = (uint8_t *)aom_malloc(width * height * sizeof(*tmp)); uint8_t *tmp = (uint8_t *)aom_malloc(width * height * sizeof(*tmp));
int32_t *tmpbuf =
(int32_t *)aom_malloc(RESTORATION_TILEPELS_MAX * sizeof(*tmpbuf));
uint8_t *dgd = dgd8; uint8_t *dgd = dgd8;
uint8_t *src = src8; uint8_t *src = src8;
// First phase // First phase
for (p = first_p_step / 2; p < DOMAINTXFMRF_PARAMS; p += first_p_step) { for (p = first_p_step / 2; p < DOMAINTXFMRF_PARAMS; p += first_p_step) {
av1_domaintxfmrf_restoration(dgd, width, height, dgd_stride, p, tmp, av1_domaintxfmrf_restoration(dgd, width, height, dgd_stride, p, tmp,
width); width, tmpbuf);
sse = compute_sse(tmp, width, height, width, src, src_stride); sse = compute_sse(tmp, width, height, width, src, src_stride);
if (sse < best_sse || best_p == -1) { if (sse < best_sse || best_p == -1) {
best_p = p; best_p = p;
...@@ -388,7 +391,7 @@ static void search_domaintxfmrf_restoration(uint8_t *dgd8, int width, ...@@ -388,7 +391,7 @@ static void search_domaintxfmrf_restoration(uint8_t *dgd8, int width,
p += second_p_step) { p += second_p_step) {
if (p < 0 || p == best_p || p >= DOMAINTXFMRF_PARAMS) continue; if (p < 0 || p == best_p || p >= DOMAINTXFMRF_PARAMS) continue;
av1_domaintxfmrf_restoration(dgd, width, height, dgd_stride, p, tmp, av1_domaintxfmrf_restoration(dgd, width, height, dgd_stride, p, tmp,
width); width, tmpbuf);
sse = compute_sse(tmp, width, height, width, src, src_stride); sse = compute_sse(tmp, width, height, width, src, src_stride);
if (sse < best_sse) { if (sse < best_sse) {
best_p = p; best_p = p;
...@@ -401,7 +404,7 @@ static void search_domaintxfmrf_restoration(uint8_t *dgd8, int width, ...@@ -401,7 +404,7 @@ static void search_domaintxfmrf_restoration(uint8_t *dgd8, int width,
p += third_p_step) { p += third_p_step) {
if (p < 0 || p == best_p || p >= DOMAINTXFMRF_PARAMS) continue; if (p < 0 || p == best_p || p >= DOMAINTXFMRF_PARAMS) continue;
av1_domaintxfmrf_restoration(dgd, width, height, dgd_stride, p, tmp, av1_domaintxfmrf_restoration(dgd, width, height, dgd_stride, p, tmp,
width); width, tmpbuf);
sse = compute_sse(tmp, width, height, width, src, src_stride); sse = compute_sse(tmp, width, height, width, src, src_stride);
if (sse < best_sse) { if (sse < best_sse) {
best_p = p; best_p = p;
...@@ -412,12 +415,14 @@ static void search_domaintxfmrf_restoration(uint8_t *dgd8, int width, ...@@ -412,12 +415,14 @@ static void search_domaintxfmrf_restoration(uint8_t *dgd8, int width,
} else { } else {
#if CONFIG_AOM_HIGHBITDEPTH #if CONFIG_AOM_HIGHBITDEPTH
uint16_t *tmp = (uint16_t *)aom_malloc(width * height * sizeof(*tmp)); uint16_t *tmp = (uint16_t *)aom_malloc(width * height * sizeof(*tmp));
int32_t *tmpbuf =
(int32_t *)aom_malloc(RESTORATION_TILEPELS_MAX * sizeof(*tmpbuf));
uint16_t *dgd = CONVERT_TO_SHORTPTR(dgd8); uint16_t *dgd = CONVERT_TO_SHORTPTR(dgd8);
uint16_t *src = CONVERT_TO_SHORTPTR(src8); uint16_t *src = CONVERT_TO_SHORTPTR(src8);
// First phase // First phase
for (p = first_p_step / 2; p < DOMAINTXFMRF_PARAMS; p += first_p_step) { for (p = first_p_step / 2; p < DOMAINTXFMRF_PARAMS; p += first_p_step) {
av1_domaintxfmrf_restoration_highbd(dgd, width, height, dgd_stride, p, av1_domaintxfmrf_restoration_highbd(dgd, width, height, dgd_stride, p,
bit_depth, tmp, width); bit_depth, tmp, width, tmpbuf);
sse = compute_sse_highbd(tmp, width, height, width, src, src_stride); sse = compute_sse_highbd(tmp, width, height, width, src, src_stride);
if (sse < best_sse || best_p == -1) { if (sse < best_sse || best_p == -1) {
best_p = p; best_p = p;
...@@ -430,7 +435,7 @@ static void search_domaintxfmrf_restoration(uint8_t *dgd8, int width, ...@@ -430,7 +435,7 @@ static void search_domaintxfmrf_restoration(uint8_t *dgd8, int width,
p += second_p_step) { p += second_p_step) {
if (p < 0 || p == best_p || p >= DOMAINTXFMRF_PARAMS) continue; if (p < 0 || p == best_p || p >= DOMAINTXFMRF_PARAMS) continue;
av1_domaintxfmrf_restoration_highbd(dgd, width, height, dgd_stride, p, av1_domaintxfmrf_restoration_highbd(dgd, width, height, dgd_stride, p,
bit_depth, tmp, width); bit_depth, tmp, width, tmpbuf);
sse = compute_sse_highbd(tmp, width, height, width, src, src_stride); sse = compute_sse_highbd(tmp, width, height, width, src, src_stride);
if (sse < best_sse) { if (sse < best_sse) {
best_p = p; best_p = p;
...@@ -443,7 +448,7 @@ static void search_domaintxfmrf_restoration(uint8_t *dgd8, int width, ...@@ -443,7 +448,7 @@ static void search_domaintxfmrf_restoration(uint8_t *dgd8, int width,
p += third_p_step) { p += third_p_step) {
if (p < 0 || p == best_p || p >= DOMAINTXFMRF_PARAMS) continue; if (p < 0 || p == best_p || p >= DOMAINTXFMRF_PARAMS) continue;
av1_domaintxfmrf_restoration_highbd(dgd, width, height, dgd_stride, p, av1_domaintxfmrf_restoration_highbd(dgd, width, height, dgd_stride, p,
bit_depth, tmp, width); bit_depth, tmp, width, tmpbuf);
sse = compute_sse_highbd(tmp, width, height, width, src, src_stride); sse = compute_sse_highbd(tmp, width, height, width, src, src_stride);
if (sse < best_sse) { if (sse < best_sse) {
best_p = p; best_p = p;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment