Commit 5d108a36 authored by Debargha Mukherjee's avatar Debargha Mukherjee

Avoid large stack allocations

When ext-partition and ncobmc-adapt-weight is on, avoid too large
stack allocations.

Change-Id: I8db74e45cac80c4e5dfd9e20cfc73d9978d1578e
parent bbcd8f76
...@@ -94,6 +94,35 @@ static void free_seg_map(AV1_COMMON *cm) { ...@@ -94,6 +94,35 @@ static void free_seg_map(AV1_COMMON *cm) {
cm->seg_map_alloc_size = 0; cm->seg_map_alloc_size = 0;
} }
static void free_scratch_buffers(AV1_COMMON *cm) {
(void)cm;
#if CONFIG_NCOBMC && CONFIG_NCOBMC_ADAPT_WEIGHT
for (int i = 0; i < 4; ++i) {
if (cm->ncobmcaw_buf[i]) {
aom_free(cm->ncobmcaw_buf[i]);
cm->ncobmcaw_buf[i] = NULL;
}
}
#endif // CONFIG_NCOBMC && CONFIG_NCOBMC_ADAPT_WEIGHT
}
static int alloc_scratch_buffers(AV1_COMMON *cm) {
(void)cm;
#if CONFIG_NCOBMC && CONFIG_NCOBMC_ADAPT_WEIGHT
// If not allocated already, allocate
if (!cm->ncobmcaw_buf[0] && !cm->ncobmcaw_buf[1] && !cm->ncobmcaw_buf[2] &&
!cm->ncobmcaw_buf[3]) {
for (int i = 0; i < 4; ++i) {
CHECK_MEM_ERROR(
cm, cm->ncobmcaw_buf[i],
(uint8_t *)aom_memalign(
16, (1 + CONFIG_HIGHBITDEPTH) * MAX_MB_PLANE * MAX_SB_SQUARE));
}
}
#endif // CONFIG_NCOBMC && CONFIG_NCOBMC_ADAPT_WEIGHT
return 0;
}
void av1_free_ref_frame_buffers(BufferPool *pool) { void av1_free_ref_frame_buffers(BufferPool *pool) {
int i; int i;
...@@ -177,6 +206,7 @@ void av1_free_context_buffers(AV1_COMMON *cm) { ...@@ -177,6 +206,7 @@ void av1_free_context_buffers(AV1_COMMON *cm) {
int i; int i;
cm->free_mi(cm); cm->free_mi(cm);
free_seg_map(cm); free_seg_map(cm);
free_scratch_buffers(cm);
for (i = 0; i < MAX_MB_PLANE; i++) { for (i = 0; i < MAX_MB_PLANE; i++) {
aom_free(cm->above_context[i]); aom_free(cm->above_context[i]);
cm->above_context[i] = NULL; cm->above_context[i] = NULL;
...@@ -210,6 +240,7 @@ int av1_alloc_context_buffers(AV1_COMMON *cm, int width, int height) { ...@@ -210,6 +240,7 @@ int av1_alloc_context_buffers(AV1_COMMON *cm, int width, int height) {
free_seg_map(cm); free_seg_map(cm);
if (alloc_seg_map(cm, cm->mi_rows * cm->mi_cols)) goto fail; if (alloc_seg_map(cm, cm->mi_rows * cm->mi_cols)) goto fail;
} }
if (alloc_scratch_buffers(cm)) goto fail;
if (cm->above_context_alloc_cols < cm->mi_cols) { if (cm->above_context_alloc_cols < cm->mi_cols) {
// TODO(geza.lore): These are bigger than they need to be. // TODO(geza.lore): These are bigger than they need to be.
......
...@@ -534,6 +534,7 @@ typedef struct AV1Common { ...@@ -534,6 +534,7 @@ typedef struct AV1Common {
#endif #endif
#if CONFIG_NCOBMC_ADAPT_WEIGHT #if CONFIG_NCOBMC_ADAPT_WEIGHT
NCOBMC_KERNELS ncobmc_kernels[ADAPT_OVERLAP_BLOCKS][ALL_NCOBMC_MODES]; NCOBMC_KERNELS ncobmc_kernels[ADAPT_OVERLAP_BLOCKS][ALL_NCOBMC_MODES];
uint8_t *ncobmcaw_buf[4];
#endif #endif
#if CONFIG_LV_MAP #if CONFIG_LV_MAP
LV_MAP_CTX_TABLE coeff_ctx_table; LV_MAP_CTX_TABLE coeff_ctx_table;
......
...@@ -1664,17 +1664,6 @@ static void set_mode_info_offsets(AV1_COMMON *const cm, MACROBLOCKD *const xd, ...@@ -1664,17 +1664,6 @@ static void set_mode_info_offsets(AV1_COMMON *const cm, MACROBLOCKD *const xd,
static void get_ncobmc_recon(AV1_COMMON *const cm, MACROBLOCKD *xd, int mi_row, static void get_ncobmc_recon(AV1_COMMON *const cm, MACROBLOCKD *xd, int mi_row,
int mi_col, int bsize, int mode) { int mi_col, int bsize, int mode) {
#if CONFIG_HIGHBITDEPTH
DECLARE_ALIGNED(16, uint8_t, tmp_buf_0[2 * MAX_MB_PLANE * MAX_SB_SQUARE]);
DECLARE_ALIGNED(16, uint8_t, tmp_buf_1[2 * MAX_MB_PLANE * MAX_SB_SQUARE]);
DECLARE_ALIGNED(16, uint8_t, tmp_buf_2[2 * MAX_MB_PLANE * MAX_SB_SQUARE]);
DECLARE_ALIGNED(16, uint8_t, tmp_buf_3[2 * MAX_MB_PLANE * MAX_SB_SQUARE]);
#else
DECLARE_ALIGNED(16, uint8_t, tmp_buf_0[MAX_MB_PLANE * MAX_SB_SQUARE]);
DECLARE_ALIGNED(16, uint8_t, tmp_buf_1[MAX_MB_PLANE * MAX_SB_SQUARE]);
DECLARE_ALIGNED(16, uint8_t, tmp_buf_2[MAX_MB_PLANE * MAX_SB_SQUARE]);
DECLARE_ALIGNED(16, uint8_t, tmp_buf_3[MAX_MB_PLANE * MAX_SB_SQUARE]);
#endif
uint8_t *pred_buf[4][MAX_MB_PLANE]; uint8_t *pred_buf[4][MAX_MB_PLANE];
int pred_stride[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE }; int pred_stride[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
// target block in pxl // target block in pxl
...@@ -1685,16 +1674,20 @@ static void get_ncobmc_recon(AV1_COMMON *const cm, MACROBLOCKD *xd, int mi_row, ...@@ -1685,16 +1674,20 @@ static void get_ncobmc_recon(AV1_COMMON *const cm, MACROBLOCKD *xd, int mi_row,
#if CONFIG_HIGHBITDEPTH #if CONFIG_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
int len = sizeof(uint16_t); int len = sizeof(uint16_t);
ASSIGN_ALIGNED_PTRS_HBD(pred_buf[0], tmp_buf_0, MAX_SB_SQUARE, len); ASSIGN_ALIGNED_PTRS_HBD(pred_buf[0], cm->ncobmcaw_buf[0], MAX_SB_SQUARE,
ASSIGN_ALIGNED_PTRS_HBD(pred_buf[1], tmp_buf_0, MAX_SB_SQUARE, len); len);
ASSIGN_ALIGNED_PTRS_HBD(pred_buf[2], tmp_buf_0, MAX_SB_SQUARE, len); ASSIGN_ALIGNED_PTRS_HBD(pred_buf[1], cm->ncobmcaw_buf[1], MAX_SB_SQUARE,
ASSIGN_ALIGNED_PTRS_HBD(pred_buf[3], tmp_buf_0, MAX_SB_SQUARE, len); len);
ASSIGN_ALIGNED_PTRS_HBD(pred_buf[2], cm->ncobmcaw_buf[2], MAX_SB_SQUARE,
len);
ASSIGN_ALIGNED_PTRS_HBD(pred_buf[3], cm->ncobmcaw_buf[3], MAX_SB_SQUARE,
len);
} else { } else {
#endif // CONFIG_HIGHBITDEPTH #endif // CONFIG_HIGHBITDEPTH
ASSIGN_ALIGNED_PTRS(pred_buf[0], tmp_buf_0, MAX_SB_SQUARE); ASSIGN_ALIGNED_PTRS(pred_buf[0], cm->ncobmcaw_buf[0], MAX_SB_SQUARE);
ASSIGN_ALIGNED_PTRS(pred_buf[1], tmp_buf_1, MAX_SB_SQUARE); ASSIGN_ALIGNED_PTRS(pred_buf[1], cm->ncobmcaw_buf[1], MAX_SB_SQUARE);
ASSIGN_ALIGNED_PTRS(pred_buf[2], tmp_buf_2, MAX_SB_SQUARE); ASSIGN_ALIGNED_PTRS(pred_buf[2], cm->ncobmcaw_buf[2], MAX_SB_SQUARE);
ASSIGN_ALIGNED_PTRS(pred_buf[3], tmp_buf_3, MAX_SB_SQUARE); ASSIGN_ALIGNED_PTRS(pred_buf[3], cm->ncobmcaw_buf[3], MAX_SB_SQUARE);
#if CONFIG_HIGHBITDEPTH #if CONFIG_HIGHBITDEPTH
} }
#endif #endif
......
...@@ -12637,17 +12637,6 @@ int64_t get_ncobmc_error(MACROBLOCKD *xd, int pxl_row, int pxl_col, ...@@ -12637,17 +12637,6 @@ int64_t get_ncobmc_error(MACROBLOCKD *xd, int pxl_row, int pxl_col,
int get_ncobmc_mode(const AV1_COMP *const cpi, MACROBLOCK *const x, int get_ncobmc_mode(const AV1_COMP *const cpi, MACROBLOCK *const x,
MACROBLOCKD *xd, int mi_row, int mi_col, int bsize) { MACROBLOCKD *xd, int mi_row, int mi_col, int bsize) {
const AV1_COMMON *const cm = &cpi->common; const AV1_COMMON *const cm = &cpi->common;
#if CONFIG_HIGHBITDEPTH
DECLARE_ALIGNED(16, uint8_t, tmp_buf_0[2 * MAX_MB_PLANE * MAX_SB_SQUARE]);
DECLARE_ALIGNED(16, uint8_t, tmp_buf_1[2 * MAX_MB_PLANE * MAX_SB_SQUARE]);
DECLARE_ALIGNED(16, uint8_t, tmp_buf_2[2 * MAX_MB_PLANE * MAX_SB_SQUARE]);
DECLARE_ALIGNED(16, uint8_t, tmp_buf_3[2 * MAX_MB_PLANE * MAX_SB_SQUARE]);
#else
DECLARE_ALIGNED(16, uint8_t, tmp_buf_0[MAX_MB_PLANE * MAX_SB_SQUARE]);
DECLARE_ALIGNED(16, uint8_t, tmp_buf_1[MAX_MB_PLANE * MAX_SB_SQUARE]);
DECLARE_ALIGNED(16, uint8_t, tmp_buf_2[MAX_MB_PLANE * MAX_SB_SQUARE]);
DECLARE_ALIGNED(16, uint8_t, tmp_buf_3[MAX_MB_PLANE * MAX_SB_SQUARE]);
#endif
uint8_t *pred_buf[4][MAX_MB_PLANE]; uint8_t *pred_buf[4][MAX_MB_PLANE];
// TODO(weitinglin): stride size needs to be fixed for high-bit depth // TODO(weitinglin): stride size needs to be fixed for high-bit depth
...@@ -12661,16 +12650,20 @@ int get_ncobmc_mode(const AV1_COMP *const cpi, MACROBLOCK *const x, ...@@ -12661,16 +12650,20 @@ int get_ncobmc_mode(const AV1_COMP *const cpi, MACROBLOCK *const x,
#if CONFIG_HIGHBITDEPTH #if CONFIG_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
int len = sizeof(uint16_t); int len = sizeof(uint16_t);
ASSIGN_ALIGNED_PTRS_HBD(pred_buf[0], tmp_buf_0, MAX_SB_SQUARE, len); ASSIGN_ALIGNED_PTRS_HBD(pred_buf[0], cm->ncobmcaw_buf[0], MAX_SB_SQUARE,
ASSIGN_ALIGNED_PTRS_HBD(pred_buf[1], tmp_buf_0, MAX_SB_SQUARE, len); len);
ASSIGN_ALIGNED_PTRS_HBD(pred_buf[2], tmp_buf_0, MAX_SB_SQUARE, len); ASSIGN_ALIGNED_PTRS_HBD(pred_buf[1], cm->ncobmcaw_buf[1], MAX_SB_SQUARE,
ASSIGN_ALIGNED_PTRS_HBD(pred_buf[3], tmp_buf_0, MAX_SB_SQUARE, len); len);
ASSIGN_ALIGNED_PTRS_HBD(pred_buf[2], cm->ncobmcaw_buf[2], MAX_SB_SQUARE,
len);
ASSIGN_ALIGNED_PTRS_HBD(pred_buf[3], cm->ncobmcaw_buf[3], MAX_SB_SQUARE,
len);
} else { } else {
#endif // CONFIG_HIGHBITDEPTH #endif // CONFIG_HIGHBITDEPTH
ASSIGN_ALIGNED_PTRS(pred_buf[0], tmp_buf_0, MAX_SB_SQUARE); ASSIGN_ALIGNED_PTRS(pred_buf[0], cm->ncobmcaw_buf[0], MAX_SB_SQUARE);
ASSIGN_ALIGNED_PTRS(pred_buf[1], tmp_buf_1, MAX_SB_SQUARE); ASSIGN_ALIGNED_PTRS(pred_buf[1], cm->ncobmcaw_buf[1], MAX_SB_SQUARE);
ASSIGN_ALIGNED_PTRS(pred_buf[2], tmp_buf_2, MAX_SB_SQUARE); ASSIGN_ALIGNED_PTRS(pred_buf[2], cm->ncobmcaw_buf[2], MAX_SB_SQUARE);
ASSIGN_ALIGNED_PTRS(pred_buf[3], tmp_buf_3, MAX_SB_SQUARE); ASSIGN_ALIGNED_PTRS(pred_buf[3], cm->ncobmcaw_buf[3], MAX_SB_SQUARE);
#if CONFIG_HIGHBITDEPTH #if CONFIG_HIGHBITDEPTH
} }
#endif #endif
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment