diff --git a/aom_dsp/x86/variance_sse2.c b/aom_dsp/x86/variance_sse2.c index 823e016caa109f39dfc0324f2715c7a2cf660f9b..18bd02cab86b2634b7b85673b84725af078692d1 100644 --- a/aom_dsp/x86/variance_sse2.c +++ b/aom_dsp/x86/variance_sse2.c @@ -9,6 +9,7 @@ * PATENTS file, you can obtain it at www.aomedia.org/license/patent. */ +#include <assert.h> #include <emmintrin.h> // SSE2 #include "./aom_config.h" @@ -166,12 +167,14 @@ static void variance_sse2(const unsigned char *src, int src_stride, } } -unsigned int aom_variance4x4_sse2(const unsigned char *src, int src_stride, - const unsigned char *ref, int ref_stride, +unsigned int aom_variance4x4_sse2(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride, unsigned int *sse) { int sum; get4x4var_sse2(src, src_stride, ref, ref_stride, sse, &sum); - return *sse - (((unsigned int)sum * sum) >> 4); + assert(sum <= 255 * 4 * 4); + assert(sum >= -255 * 4 * 4); + return *sse - ((sum * sum) >> 4); } unsigned int aom_variance8x4_sse2(const uint8_t *src, int src_stride, @@ -180,7 +183,9 @@ unsigned int aom_variance8x4_sse2(const uint8_t *src, int src_stride, int sum; variance_sse2(src, src_stride, ref, ref_stride, 8, 4, sse, &sum, get4x4var_sse2, 4); - return *sse - (((unsigned int)sum * sum) >> 5); + assert(sum <= 255 * 8 * 4); + assert(sum >= -255 * 8 * 4); + return *sse - ((sum * sum) >> 5); } unsigned int aom_variance4x8_sse2(const uint8_t *src, int src_stride, @@ -189,7 +194,9 @@ unsigned int aom_variance4x8_sse2(const uint8_t *src, int src_stride, int sum; variance_sse2(src, src_stride, ref, ref_stride, 4, 8, sse, &sum, get4x4var_sse2, 4); - return *sse - (((unsigned int)sum * sum) >> 5); + assert(sum <= 255 * 8 * 4); + assert(sum >= -255 * 8 * 4); + return *sse - ((sum * sum) >> 5); } unsigned int aom_variance8x8_sse2(const unsigned char *src, int src_stride, @@ -197,7 +204,9 @@ unsigned int aom_variance8x8_sse2(const unsigned char *src, int src_stride, unsigned int *sse) { int sum; aom_get8x8var_sse2(src, src_stride, ref, ref_stride, sse, &sum); - return *sse - (((unsigned int)sum * sum) >> 6); + assert(sum <= 255 * 8 * 8); + assert(sum >= -255 * 8 * 8); + return *sse - ((sum * sum) >> 6); } unsigned int aom_variance16x8_sse2(const unsigned char *src, int src_stride, @@ -206,7 +215,9 @@ unsigned int aom_variance16x8_sse2(const unsigned char *src, int src_stride, int sum; variance_sse2(src, src_stride, ref, ref_stride, 16, 8, sse, &sum, aom_get8x8var_sse2, 8); - return *sse - (((unsigned int)sum * sum) >> 7); + assert(sum <= 255 * 16 * 8); + assert(sum >= -255 * 16 * 8); + return *sse - ((sum * sum) >> 7); } unsigned int aom_variance8x16_sse2(const unsigned char *src, int src_stride, @@ -215,7 +226,9 @@ unsigned int aom_variance8x16_sse2(const unsigned char *src, int src_stride, int sum; variance_sse2(src, src_stride, ref, ref_stride, 8, 16, sse, &sum, aom_get8x8var_sse2, 8); - return *sse - (((unsigned int)sum * sum) >> 7); + assert(sum <= 255 * 16 * 8); + assert(sum >= -255 * 16 * 8); + return *sse - ((sum * sum) >> 7); } unsigned int aom_variance16x16_sse2(const unsigned char *src, int src_stride, @@ -223,7 +236,9 @@ unsigned int aom_variance16x16_sse2(const unsigned char *src, int src_stride, unsigned int *sse) { int sum; aom_get16x16var_sse2(src, src_stride, ref, ref_stride, sse, &sum); - return *sse - (((unsigned int)sum * sum) >> 8); + assert(sum <= 255 * 16 * 16); + assert(sum >= -255 * 16 * 16); + return *sse - ((uint32_t)((int64_t)sum * sum) >> 8); } unsigned int aom_variance32x32_sse2(const uint8_t *src, int src_stride, @@ -232,6 +247,8 @@ unsigned int aom_variance32x32_sse2(const uint8_t *src, int src_stride, int sum; variance_sse2(src, src_stride, ref, ref_stride, 32, 32, sse, &sum, aom_get16x16var_sse2, 16); + assert(sum <= 255 * 32 * 32); + assert(sum >= -255 * 32 * 32); return *sse - (((int64_t)sum * sum) >> 10); } @@ -241,6 +258,8 @@ unsigned int aom_variance32x16_sse2(const uint8_t *src, int src_stride, int sum; variance_sse2(src, src_stride, ref, ref_stride, 32, 16, sse, &sum, aom_get16x16var_sse2, 16); + assert(sum <= 255 * 32 * 16); + assert(sum >= -255 * 32 * 16); return *sse - (((int64_t)sum * sum) >> 9); } @@ -250,6 +269,8 @@ unsigned int aom_variance16x32_sse2(const uint8_t *src, int src_stride, int sum; variance_sse2(src, src_stride, ref, ref_stride, 16, 32, sse, &sum, aom_get16x16var_sse2, 16); + assert(sum <= 255 * 32 * 16); + assert(sum >= -255 * 32 * 16); return *sse - (((int64_t)sum * sum) >> 9); } @@ -259,6 +280,8 @@ unsigned int aom_variance64x64_sse2(const uint8_t *src, int src_stride, int sum; variance_sse2(src, src_stride, ref, ref_stride, 64, 64, sse, &sum, aom_get16x16var_sse2, 16); + assert(sum <= 255 * 64 * 64); + assert(sum >= -255 * 64 * 64); return *sse - (((int64_t)sum * sum) >> 12); } @@ -268,6 +291,8 @@ unsigned int aom_variance64x32_sse2(const uint8_t *src, int src_stride, int sum; variance_sse2(src, src_stride, ref, ref_stride, 64, 32, sse, &sum, aom_get16x16var_sse2, 16); + assert(sum <= 255 * 64 * 32); + assert(sum >= -255 * 64 * 32); return *sse - (((int64_t)sum * sum) >> 11); } @@ -277,6 +302,8 @@ unsigned int aom_variance32x64_sse2(const uint8_t *src, int src_stride, int sum; variance_sse2(src, src_stride, ref, ref_stride, 32, 64, sse, &sum, aom_get16x16var_sse2, 16); + assert(sum <= 255 * 64 * 32); + assert(sum >= -255 * 64 * 32); return *sse - (((int64_t)sum * sum) >> 11); }