Commit 185575a7 authored by David Barker's avatar David Barker Committed by Urvang Joshi

Fix some UBSan warnings

* Make intermediate arrays in av1(_highbd)_warp_affine_c signed,
  to avoid integer overflow when multiplying an 'unsigned int'
  by a negative 'int' value.

* Pad out arrays in masked_variance_test.cc so that the array
  stride is a multiple of 16 bytes.
  This fixes some UBSan errors in masked_variance_intrin_ssse3.c
  related to unaligned loads of 32-bit values.

BUG=aomedia:572

Change-Id: I0cf786c94870ff128c883bed8e900b0686afc3f7
parent c2d500aa
......@@ -954,7 +954,7 @@ void av1_highbd_warp_affine_c(const int32_t *mat, const uint16_t *ref,
int subsampling_y, int bd, int comp_avg,
int16_t alpha, int16_t beta, int16_t gamma,
int16_t delta) {
uint32_t tmp[15 * 8];
int32_t tmp[15 * 8];
int i, j, k, l, m;
for (i = p_row; i < p_row + p_height; i += 8) {
......@@ -1250,7 +1250,7 @@ void av1_warp_affine_c(const int32_t *mat, const uint8_t *ref, int width,
int subsampling_x, int subsampling_y, int comp_avg,
int16_t alpha, int16_t beta, int16_t gamma,
int16_t delta) {
uint16_t tmp[15 * 8];
int32_t tmp[15 * 8];
int i, j, k, l, m;
const int bd = 8;
......
......@@ -59,30 +59,25 @@ TEST_P(MaskedSubPixelVarianceTest, OperationCheck) {
unsigned int ref_ret, opt_ret;
unsigned int ref_sse, opt_sse;
ACMRandom rnd(ACMRandom::DeterministicSeed());
// Note: We pad the input arrays out with 15 extra elements, since the SSE
// implementations can read up to 15 elements off the end of the main data.
// The extra data is never actually used, but it simplifies the code
// if we can do this.
// Note: We pad out the input array to a multiple of 16 bytes wide, so that
// consecutive rows keep the 16-byte alignment.
DECLARE_ALIGNED(16, uint8_t, src_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 16)]);
DECLARE_ALIGNED(16, uint8_t, ref_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 16)]);
DECLARE_ALIGNED(16, uint8_t,
src_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1) + 15]);
DECLARE_ALIGNED(16, uint8_t,
ref_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1) + 15]);
DECLARE_ALIGNED(16, uint8_t,
second_pred_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1) + 15]);
DECLARE_ALIGNED(16, uint8_t,
msk_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1) + 15]);
second_pred_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 16)]);
DECLARE_ALIGNED(16, uint8_t, msk_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 16)]);
int err_count = 0;
int first_failure = -1;
int src_stride = (MAX_SB_SIZE + 1);
int ref_stride = (MAX_SB_SIZE + 1);
int msk_stride = (MAX_SB_SIZE + 1);
int src_stride = (MAX_SB_SIZE + 16);
int ref_stride = (MAX_SB_SIZE + 16);
int msk_stride = (MAX_SB_SIZE + 16);
int xoffset;
int yoffset;
for (int i = 0; i < number_of_iterations; ++i) {
int xoffsets[] = { 0, 4, rnd(BIL_SUBPEL_SHIFTS) };
int yoffsets[] = { 0, 4, rnd(BIL_SUBPEL_SHIFTS) };
for (int j = 0; j < (MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1); j++) {
for (int j = 0; j < (MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 16); j++) {
src_ptr[j] = rnd.Rand8();
ref_ptr[j] = rnd.Rand8();
second_pred_ptr[j] = rnd.Rand8();
......@@ -120,33 +115,30 @@ TEST_P(MaskedSubPixelVarianceTest, ExtremeValues) {
unsigned int ref_ret, opt_ret;
unsigned int ref_sse, opt_sse;
ACMRandom rnd(ACMRandom::DeterministicSeed());
DECLARE_ALIGNED(16, uint8_t, src_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 16)]);
DECLARE_ALIGNED(16, uint8_t, ref_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 16)]);
DECLARE_ALIGNED(16, uint8_t,
src_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1) + 15]);
DECLARE_ALIGNED(16, uint8_t,
ref_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1) + 15]);
DECLARE_ALIGNED(16, uint8_t,
second_pred_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1) + 15]);
DECLARE_ALIGNED(16, uint8_t,
msk_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1) + 15]);
second_pred_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 16)]);
DECLARE_ALIGNED(16, uint8_t, msk_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 16)]);
int first_failure_x = -1;
int first_failure_y = -1;
int err_count = 0;
int first_failure = -1;
int src_stride = (MAX_SB_SIZE + 1);
int ref_stride = (MAX_SB_SIZE + 1);
int msk_stride = (MAX_SB_SIZE + 1);
int src_stride = (MAX_SB_SIZE + 16);
int ref_stride = (MAX_SB_SIZE + 16);
int msk_stride = (MAX_SB_SIZE + 16);
for (int xoffset = 0; xoffset < BIL_SUBPEL_SHIFTS; xoffset++) {
for (int yoffset = 0; yoffset < BIL_SUBPEL_SHIFTS; yoffset++) {
for (int i = 0; i < 16; ++i) {
memset(src_ptr, (i & 0x1) ? 255 : 0,
(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1));
(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 16));
memset(ref_ptr, (i & 0x2) ? 255 : 0,
(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1));
(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 16));
memset(second_pred_ptr, (i & 0x4) ? 255 : 0,
(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1));
(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 16));
memset(msk_ptr, (i & 0x8) ? 64 : 0,
(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1));
(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 16));
for (int invert_mask = 0; invert_mask < 2; ++invert_mask) {
ref_ret = ref_func_(src_ptr, src_stride, xoffset, yoffset, ref_ptr,
......@@ -204,18 +196,11 @@ TEST_P(HighbdMaskedSubPixelVarianceTest, OperationCheck) {
unsigned int ref_ret, opt_ret;
unsigned int ref_sse, opt_sse;
ACMRandom rnd(ACMRandom::DeterministicSeed());
// Note: We pad the input arrays out with 7 extra elements, since the SSE
// implementations can read up to 7 elements off the end of the main data.
// The extra data is never actually used, but it simplifies the code
// if we can do this.
DECLARE_ALIGNED(16, uint16_t, src_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 8)]);
DECLARE_ALIGNED(16, uint16_t, ref_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 8)]);
DECLARE_ALIGNED(16, uint16_t,
src_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1) + 7]);
DECLARE_ALIGNED(16, uint16_t,
ref_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1) + 7]);
DECLARE_ALIGNED(16, uint16_t,
second_pred_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1) + 7]);
DECLARE_ALIGNED(16, uint8_t,
msk_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1) + 7]);
second_pred_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 8)]);
DECLARE_ALIGNED(16, uint8_t, msk_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 8)]);
uint8_t *src8_ptr = CONVERT_TO_BYTEPTR(src_ptr);
uint8_t *ref8_ptr = CONVERT_TO_BYTEPTR(ref_ptr);
uint8_t *second_pred8_ptr = CONVERT_TO_BYTEPTR(second_pred_ptr);
......@@ -223,13 +208,13 @@ TEST_P(HighbdMaskedSubPixelVarianceTest, OperationCheck) {
int first_failure = -1;
int first_failure_x = -1;
int first_failure_y = -1;
int src_stride = (MAX_SB_SIZE + 1);
int ref_stride = (MAX_SB_SIZE + 1);
int msk_stride = (MAX_SB_SIZE + 1);
int src_stride = (MAX_SB_SIZE + 8);
int ref_stride = (MAX_SB_SIZE + 8);
int msk_stride = (MAX_SB_SIZE + 8);
int xoffset, yoffset;
for (int i = 0; i < number_of_iterations; ++i) {
for (int j = 0; j < (MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1); j++) {
for (int j = 0; j < (MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 8); j++) {
src_ptr[j] = rnd.Rand16() & ((1 << bit_depth_) - 1);
ref_ptr[j] = rnd.Rand16() & ((1 << bit_depth_) - 1);
second_pred_ptr[j] = rnd.Rand16() & ((1 << bit_depth_) - 1);
......@@ -270,14 +255,11 @@ TEST_P(HighbdMaskedSubPixelVarianceTest, ExtremeValues) {
unsigned int ref_ret, opt_ret;
unsigned int ref_sse, opt_sse;
ACMRandom rnd(ACMRandom::DeterministicSeed());
DECLARE_ALIGNED(16, uint16_t, src_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 8)]);
DECLARE_ALIGNED(16, uint16_t, ref_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 8)]);
DECLARE_ALIGNED(16, uint8_t, msk_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 8)]);
DECLARE_ALIGNED(16, uint16_t,
src_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1) + 7]);
DECLARE_ALIGNED(16, uint16_t,
ref_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1) + 7]);
DECLARE_ALIGNED(16, uint8_t,
msk_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1) + 7]);
DECLARE_ALIGNED(16, uint16_t,
second_pred_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1) + 7]);
second_pred_ptr[(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 8)]);
uint8_t *src8_ptr = CONVERT_TO_BYTEPTR(src_ptr);
uint8_t *ref8_ptr = CONVERT_TO_BYTEPTR(ref_ptr);
uint8_t *second_pred8_ptr = CONVERT_TO_BYTEPTR(second_pred_ptr);
......@@ -285,21 +267,21 @@ TEST_P(HighbdMaskedSubPixelVarianceTest, ExtremeValues) {
int first_failure_y = -1;
int err_count = 0;
int first_failure = -1;
int src_stride = (MAX_SB_SIZE + 1);
int ref_stride = (MAX_SB_SIZE + 1);
int msk_stride = (MAX_SB_SIZE + 1);
int src_stride = (MAX_SB_SIZE + 8);
int ref_stride = (MAX_SB_SIZE + 8);
int msk_stride = (MAX_SB_SIZE + 8);
for (int xoffset = 0; xoffset < BIL_SUBPEL_SHIFTS; xoffset++) {
for (int yoffset = 0; yoffset < BIL_SUBPEL_SHIFTS; yoffset++) {
for (int i = 0; i < 16; ++i) {
aom_memset16(src_ptr, (i & 0x1) ? ((1 << bit_depth_) - 1) : 0,
(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1));
(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 8));
aom_memset16(ref_ptr, (i & 0x2) ? ((1 << bit_depth_) - 1) : 0,
(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1));
(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 8));
aom_memset16(second_pred_ptr, (i & 0x4) ? ((1 << bit_depth_) - 1) : 0,
(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1));
(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 8));
memset(msk_ptr, (i & 0x8) ? 64 : 0,
(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 1));
(MAX_SB_SIZE + 1) * (MAX_SB_SIZE + 8));
for (int invert_mask = 0; invert_mask < 2; ++invert_mask) {
ref_ret = ref_func_(src8_ptr, src_stride, xoffset, yoffset, ref8_ptr,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment