OSUOSL/Nero are experiencing Internet connectivity problems. This affects us as we're hosted with OSUOSL. We apologize for the inconvenience.

Commit 99bac01b authored by Yaowu Xu's avatar Yaowu Xu

change intermediate buffer type to be of int16_t

This aligns with the intention of data size, reduces stack size.

Change-Id: Ib772143c20c860c871a1e4399f4a3d1024499680
parent 40ef0c7f
......@@ -384,7 +384,7 @@ void av1_convolve_2d_c(const uint8_t *src, int src_stride, uint8_t *dst0,
ConvolveParams *conv_params) {
CONV_BUF_TYPE *dst = conv_params->dst;
int dst_stride = conv_params->dst_stride;
int32_t im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE];
int16_t im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE];
int im_h = h + filter_params_y->taps - 1;
int im_stride = w;
const int fo_vert = filter_params_y->taps / 2 - 1;
......@@ -405,12 +405,12 @@ void av1_convolve_2d_c(const uint8_t *src, int src_stride, uint8_t *dst0,
}
assert(0 <= sum && sum < (1 << (bd + FILTER_BITS + 1)));
im_block[y * im_stride + x] =
ROUND_POWER_OF_TWO(sum, conv_params->round_0);
(int16_t)ROUND_POWER_OF_TWO(sum, conv_params->round_0);
}
}
// vertical filter
int32_t *src_vert = im_block + fo_vert * im_stride;
int16_t *src_vert = im_block + fo_vert * im_stride;
const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
*filter_params_y, subpel_y_q4 & SUBPEL_MASK);
const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
......@@ -534,7 +534,7 @@ void av1_convolve_2d_sr_c(const uint8_t *src, int src_stride, uint8_t *dst,
InterpFilterParams *filter_params_y,
const int subpel_x_q4, const int subpel_y_q4,
ConvolveParams *conv_params) {
int32_t im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE];
int16_t im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE];
int im_h = h + filter_params_y->taps - 1;
int im_stride = w;
const int fo_vert = filter_params_y->taps / 2 - 1;
......@@ -555,12 +555,12 @@ void av1_convolve_2d_sr_c(const uint8_t *src, int src_stride, uint8_t *dst,
}
assert(0 <= sum && sum < (1 << (bd + FILTER_BITS + 1)));
im_block[y * im_stride + x] =
ROUND_POWER_OF_TWO(sum, conv_params->round_0);
(int16_t)ROUND_POWER_OF_TWO(sum, conv_params->round_0);
}
}
// vertical filter
int32_t *src_vert = im_block + fo_vert * im_stride;
int16_t *src_vert = im_block + fo_vert * im_stride;
const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
*filter_params_y, subpel_y_q4 & SUBPEL_MASK);
const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
......@@ -660,7 +660,7 @@ void av1_jnt_convolve_2d_c(const uint8_t *src, int src_stride, uint8_t *dst0,
ConvolveParams *conv_params) {
CONV_BUF_TYPE *dst = conv_params->dst;
int dst_stride = conv_params->dst_stride;
int32_t im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE];
int16_t im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE];
int im_h = h + filter_params_y->taps - 1;
int im_stride = w;
const int fo_vert = filter_params_y->taps / 2 - 1;
......@@ -681,12 +681,12 @@ void av1_jnt_convolve_2d_c(const uint8_t *src, int src_stride, uint8_t *dst0,
}
assert(0 <= sum && sum < (1 << (bd + FILTER_BITS + 1)));
im_block[y * im_stride + x] =
ROUND_POWER_OF_TWO(sum, conv_params->round_0);
(int16_t)ROUND_POWER_OF_TWO(sum, conv_params->round_0);
}
}
// vertical filter
int32_t *src_vert = im_block + fo_vert * im_stride;
int16_t *src_vert = im_block + fo_vert * im_stride;
const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
*filter_params_y, subpel_y_q4 & SUBPEL_MASK);
const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
......@@ -844,7 +844,7 @@ void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride,
const int subpel_x_qn, const int x_step_qn,
const int subpel_y_qn, const int y_step_qn,
ConvolveParams *conv_params) {
int32_t im_block[(2 * MAX_SB_SIZE + MAX_FILTER_TAP) * MAX_SB_SIZE];
int16_t im_block[(2 * MAX_SB_SIZE + MAX_FILTER_TAP) * MAX_SB_SIZE];
int im_h = (((h - 1) * y_step_qn + subpel_y_qn) >> SCALE_SUBPEL_BITS) +
filter_params_y->taps;
int im_stride = w;
......@@ -868,18 +868,18 @@ void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride,
}
assert(0 <= sum && sum < (1 << (bd + FILTER_BITS + 1)));
im_block[y * im_stride + x] =
ROUND_POWER_OF_TWO(sum, conv_params->round_0);
(int16_t)ROUND_POWER_OF_TWO(sum, conv_params->round_0);
}
src_horiz += src_stride;
}
// vertical filter
int32_t *src_vert = im_block + fo_vert * im_stride;
int16_t *src_vert = im_block + fo_vert * im_stride;
const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
for (int x = 0; x < w; ++x) {
int y_qn = subpel_y_qn;
for (int y = 0; y < h; ++y, y_qn += y_step_qn) {
const int32_t *src_y = &src_vert[(y_qn >> SCALE_SUBPEL_BITS) * im_stride];
const int16_t *src_y = &src_vert[(y_qn >> SCALE_SUBPEL_BITS) * im_stride];
const int y_filter_idx = (y_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
assert(y_filter_idx < SUBPEL_SHIFTS);
const int16_t *y_filter =
......@@ -990,7 +990,7 @@ void av1_highbd_convolve_2d_c(const uint16_t *src, int src_stride,
InterpFilterParams *filter_params_y,
const int subpel_x_q4, const int subpel_y_q4,
ConvolveParams *conv_params, int bd) {
int32_t im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE];
int16_t im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE];
int im_h = h + filter_params_y->taps - 1;
int im_stride = w;
const int fo_vert = filter_params_y->taps / 2 - 1;
......@@ -1009,12 +1009,12 @@ void av1_highbd_convolve_2d_c(const uint16_t *src, int src_stride,
assert(0 <= sum && sum < (1 << (bd + FILTER_BITS + 1)));
(void)bd;
im_block[y * im_stride + x] =
ROUND_POWER_OF_TWO(sum, conv_params->round_0);
(int16_t)ROUND_POWER_OF_TWO(sum, conv_params->round_0);
}
}
// vertical filter
int32_t *src_vert = im_block + fo_vert * im_stride;
int16_t *src_vert = im_block + fo_vert * im_stride;
const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
*filter_params_y, subpel_y_q4 & SUBPEL_MASK);
......@@ -1044,7 +1044,7 @@ void av1_highbd_jnt_convolve_2d_c(const uint16_t *src, int src_stride,
const int subpel_x_q4, const int subpel_y_q4,
ConvolveParams *conv_params, int bd) {
int x, y, k;
int32_t im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE];
int16_t im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE];
int im_h = h + filter_params_y->taps - 1;
int im_stride = w;
const int fo_vert = filter_params_y->taps / 2 - 1;
......@@ -1063,12 +1063,12 @@ void av1_highbd_jnt_convolve_2d_c(const uint16_t *src, int src_stride,
assert(0 <= sum && sum < (1 << (bd + FILTER_BITS + 1)));
(void)bd;
im_block[y * im_stride + x] =
ROUND_POWER_OF_TWO(sum, conv_params->round_0);
(int16_t)ROUND_POWER_OF_TWO(sum, conv_params->round_0);
}
}
// vertical filter
int32_t *src_vert = im_block + fo_vert * im_stride;
int16_t *src_vert = im_block + fo_vert * im_stride;
const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
*filter_params_y, subpel_y_q4 & SUBPEL_MASK);
......@@ -1106,7 +1106,7 @@ void av1_highbd_convolve_2d_scale_c(const uint16_t *src, int src_stride,
const int subpel_x_qn, const int x_step_qn,
const int subpel_y_qn, const int y_step_qn,
ConvolveParams *conv_params, int bd) {
int32_t im_block[(2 * MAX_SB_SIZE + MAX_FILTER_TAP) * MAX_SB_SIZE];
int16_t im_block[(2 * MAX_SB_SIZE + MAX_FILTER_TAP) * MAX_SB_SIZE];
int im_h = (((h - 1) * y_step_qn + subpel_y_qn) >> SCALE_SUBPEL_BITS) +
filter_params_y->taps;
int im_stride = w;
......@@ -1129,18 +1129,18 @@ void av1_highbd_convolve_2d_scale_c(const uint16_t *src, int src_stride,
}
assert(0 <= sum && sum < (1 << (bd + FILTER_BITS + 1)));
im_block[y * im_stride + x] =
ROUND_POWER_OF_TWO(sum, conv_params->round_0);
(int16_t)ROUND_POWER_OF_TWO(sum, conv_params->round_0);
}
src_horiz += src_stride;
}
// vertical filter
int32_t *src_vert = im_block + fo_vert * im_stride;
int16_t *src_vert = im_block + fo_vert * im_stride;
const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
for (int x = 0; x < w; ++x) {
int y_qn = subpel_y_qn;
for (int y = 0; y < h; ++y, y_qn += y_step_qn) {
const int32_t *src_y = &src_vert[(y_qn >> SCALE_SUBPEL_BITS) * im_stride];
const int16_t *src_y = &src_vert[(y_qn >> SCALE_SUBPEL_BITS) * im_stride];
const int y_filter_idx = (y_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
assert(y_filter_idx < SUBPEL_SHIFTS);
const int16_t *y_filter =
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment