From 81624caf9ceb452fa8f909dcd8ad14511136da87 Mon Sep 17 00:00:00 2001
From: Jean-Marc Valin <jmvalin@jmvalin.ca>
Date: Sat, 7 Oct 2023 17:45:39 -0400
Subject: [PATCH] Silencing alignment warnings on x86 intrinsics

Those intrinsics don't actually require alignment so we're OK
---
 celt/x86/pitch_avx.c          |  2 +-
 celt/x86/vq_sse2.c            |  8 +++----
 celt/x86/x86cpu.h             |  2 +-
 dnn/vec_avx.h                 | 42 +++++++++++++++++------------------
 silk/x86/NSQ_del_dec_sse4_1.c | 16 ++++++-------
 silk/x86/NSQ_sse4_1.c         | 36 +++++++++++++++---------------
 silk/x86/VAD_sse4_1.c         |  2 +-
 silk/x86/VQ_WMat_EC_sse4_1.c  |  2 +-
 8 files changed, 55 insertions(+), 55 deletions(-)

diff --git a/celt/x86/pitch_avx.c b/celt/x86/pitch_avx.c
index 1a667dd74..63dea97f1 100644
--- a/celt/x86/pitch_avx.c
+++ b/celt/x86/pitch_avx.c
@@ -56,7 +56,7 @@ static void xcorr_kernel_avx(const float *x, const float *y, float sum[8], int l
     if (i != len) {
         static const int mask[15] = {-1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0};
         __m256i m;
-        m = _mm256_loadu_si256((__m256i*)(mask + 7+i-len));
+        m = _mm256_loadu_si256((__m256i*)(void*)(mask + 7+i-len));
         x0 = _mm256_maskload_ps(x+i, m);
         xsum0 = _mm256_fmadd_ps(x0, _mm256_maskload_ps(y+i  , m), xsum0);
         xsum1 = _mm256_fmadd_ps(x0, _mm256_maskload_ps(y+i+1, m), xsum1);
diff --git a/celt/x86/vq_sse2.c b/celt/x86/vq_sse2.c
index 775042860..4c4ebf8e2 100644
--- a/celt/x86/vq_sse2.c
+++ b/celt/x86/vq_sse2.c
@@ -75,7 +75,7 @@ opus_val16 op_pvq_search_sse2(celt_norm *_X, int *iy, int K, int N, int arch)
       sums = _mm_add_ps(sums, x4);
       /* Clear y and iy in case we don't do the projection. */
       _mm_storeu_ps(&y[j], _mm_setzero_ps());
-      _mm_storeu_si128((__m128i*)&iy[j], _mm_setzero_si128());
+      _mm_storeu_si128((__m128i*)(void*)&iy[j], _mm_setzero_si128());
       _mm_storeu_ps(&X[j], x4);
       _mm_storeu_ps(&signy[j], s4);
    }
@@ -116,7 +116,7 @@ opus_val16 op_pvq_search_sse2(celt_norm *_X, int *iy, int K, int N, int arch)
          rx4 = _mm_mul_ps(x4, rcp4);
          iy4 = _mm_cvttps_epi32(rx4);
          pulses_sum = _mm_add_epi32(pulses_sum, iy4);
-         _mm_storeu_si128((__m128i*)&iy[j], iy4);
+         _mm_storeu_si128((__m128i*)(void*)&iy[j], iy4);
          y4 = _mm_cvtepi32_ps(iy4);
          xy4 = _mm_add_ps(xy4, _mm_mul_ps(x4, y4));
          yy4 = _mm_add_ps(yy4, _mm_mul_ps(y4, y4));
@@ -205,10 +205,10 @@ opus_val16 op_pvq_search_sse2(celt_norm *_X, int *iy, int K, int N, int arch)
    {
       __m128i y4;
       __m128i s4;
-      y4 = _mm_loadu_si128((__m128i*)&iy[j]);
+      y4 = _mm_loadu_si128((__m128i*)(void*)&iy[j]);
       s4 = _mm_castps_si128(_mm_loadu_ps(&signy[j]));
       y4 = _mm_xor_si128(_mm_add_epi32(y4, s4), s4);
-      _mm_storeu_si128((__m128i*)&iy[j], y4);
+      _mm_storeu_si128((__m128i*)(void*)&iy[j], y4);
    }
    RESTORE_STACK;
    return yy;
diff --git a/celt/x86/x86cpu.h b/celt/x86/x86cpu.h
index 8bd69551b..fe46d1d94 100644
--- a/celt/x86/x86cpu.h
+++ b/celt/x86/x86cpu.h
@@ -68,6 +68,6 @@ int opus_select_arch(void);
  (_mm_cvtepi8_epi32(_mm_cvtsi32_si128(OP_LOADU_EPI32(x))))
 
 #define OP_CVTEPI16_EPI32_M64(x) \
- (_mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i *)(x))))
+ (_mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i *)(void*)(x))))
 
 #endif
diff --git a/dnn/vec_avx.h b/dnn/vec_avx.h
index bc9331301..39002b110 100644
--- a/dnn/vec_avx.h
+++ b/dnn/vec_avx.h
@@ -335,7 +335,7 @@ static inline void vector_ps_to_epi8(unsigned char *x, const float *_x, int len)
        xi = _mm256_permute4x64_epi64(xi, 0xD8);
        xi = _mm256_packus_epi16(xi, _mm256_setzero_si256());
        xi = _mm256_permutevar8x32_epi32(xi, _mm256_setr_epi32(0,1, 0,0, 0,0, 0,0));
-       _mm256_storeu_si256 ((__m256i *)&x[i], xi);
+       _mm256_storeu_si256 ((__m256i *)(void*)&x[i], xi);
    }
 }
 
@@ -794,20 +794,20 @@ static inline void sparse_cgemv8x4(float *_out, const opus_int8 *w, const int *i
       {
          __m256i vxj;
          __m256i vw;
-         vxj = _mm256_set1_epi32(*(int*)&x[*idx++]);
-         vw = _mm256_loadu_si256((const __m256i *)w);
+         vxj = _mm256_set1_epi32(*(int*)(void*)&x[*idx++]);
+         vw = _mm256_loadu_si256((const __m256i *)(void*)w);
          vy0 = opus_mm256_dpbusds_epi32(vy0, vxj, vw);
          w += 32;
-         vxj = _mm256_set1_epi32(*(int*)&x[*idx++]);
-         vw = _mm256_loadu_si256((const __m256i *)w);
+         vxj = _mm256_set1_epi32(*(int*)(void*)&x[*idx++]);
+         vw = _mm256_loadu_si256((const __m256i *)(void*)w);
          vy0 = opus_mm256_dpbusds_epi32(vy0, vxj, vw);
          w += 32;
-         vxj = _mm256_set1_epi32(*(int*)&x[*idx++]);
-         vw = _mm256_loadu_si256((const __m256i *)w);
+         vxj = _mm256_set1_epi32(*(int*)(void*)&x[*idx++]);
+         vw = _mm256_loadu_si256((const __m256i *)(void*)w);
          vy0 = opus_mm256_dpbusds_epi32(vy0, vxj, vw);
          w += 32;
-         vxj = _mm256_set1_epi32(*(int*)&x[*idx++]);
-         vw = _mm256_loadu_si256((const __m256i *)w);
+         vxj = _mm256_set1_epi32(*(int*)(void*)&x[*idx++]);
+         vw = _mm256_loadu_si256((const __m256i *)(void*)w);
          vy0 = opus_mm256_dpbusds_epi32(vy0, vxj, vw);
          w += 32;
       }
@@ -818,8 +818,8 @@ static inline void sparse_cgemv8x4(float *_out, const opus_int8 *w, const int *i
          __m256i vw;
          int pos;
          pos = (*idx++);
-         vxj = _mm256_set1_epi32(*(int*)&x[pos]);
-         vw = _mm256_loadu_si256((const __m256i *)w);
+         vxj = _mm256_set1_epi32(*(int*)(void*)&x[pos]);
+         vw = _mm256_loadu_si256((const __m256i *)(void*)w);
          vy0 = opus_mm256_dpbusds_epi32(vy0, vxj, vw);
          w += 32;
       }
@@ -845,20 +845,20 @@ static inline void cgemv8x4(float *_out, const opus_int8 *w, const float *scale,
       {
          __m256i vxj;
          __m256i vw;
-         vxj = _mm256_set1_epi32(*(int*)&x[j]);
-         vw = _mm256_loadu_si256((const __m256i *)w);
+         vxj = _mm256_set1_epi32(*(int*)(void*)&x[j]);
+         vw = _mm256_loadu_si256((const __m256i *)(void*)w);
          vy0 = opus_mm256_dpbusds_epi32(vy0, vxj, vw);
          w += 32;
-         vxj = _mm256_set1_epi32(*(int*)&x[j+4]);
-         vw = _mm256_loadu_si256((const __m256i *)w);
+         vxj = _mm256_set1_epi32(*(int*)(void*)&x[j+4]);
+         vw = _mm256_loadu_si256((const __m256i *)(void*)w);
          vy0 = opus_mm256_dpbusds_epi32(vy0, vxj, vw);
          w += 32;
-         vxj = _mm256_set1_epi32(*(int*)&x[j+8]);
-         vw = _mm256_loadu_si256((const __m256i *)w);
+         vxj = _mm256_set1_epi32(*(int*)(void*)&x[j+8]);
+         vw = _mm256_loadu_si256((const __m256i *)(void*)w);
          vy0 = opus_mm256_dpbusds_epi32(vy0, vxj, vw);
          w += 32;
-         vxj = _mm256_set1_epi32(*(int*)&x[j+12]);
-         vw = _mm256_loadu_si256((const __m256i *)w);
+         vxj = _mm256_set1_epi32(*(int*)(void*)&x[j+12]);
+         vw = _mm256_loadu_si256((const __m256i *)(void*)w);
          vy0 = opus_mm256_dpbusds_epi32(vy0, vxj, vw);
          w += 32;
       }
@@ -867,8 +867,8 @@ static inline void cgemv8x4(float *_out, const opus_int8 *w, const float *scale,
       {
          __m256i vxj;
          __m256i vw;
-         vxj = _mm256_set1_epi32(*(int*)&x[j]);
-         vw = _mm256_loadu_si256((const __m256i *)w);
+         vxj = _mm256_set1_epi32(*(int*)(void*)&x[j]);
+         vw = _mm256_loadu_si256((const __m256i *)(void*)w);
          vy0 = opus_mm256_dpbusds_epi32(vy0, vxj, vw);
          w += 32;
       }
diff --git a/silk/x86/NSQ_del_dec_sse4_1.c b/silk/x86/NSQ_del_dec_sse4_1.c
index a58a76cd8..c7d47e5ea 100644
--- a/silk/x86/NSQ_del_dec_sse4_1.c
+++ b/silk/x86/NSQ_del_dec_sse4_1.c
@@ -428,7 +428,7 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec_sse4_1(
             LTP_pred_Q14 = 2;
             {
                 __m128i tmpa, tmpb, pred_lag_ptr_tmp;
-                pred_lag_ptr_tmp    = _mm_loadu_si128( (__m128i *)(&pred_lag_ptr[ -3 ] ) );
+                pred_lag_ptr_tmp    = _mm_loadu_si128( (__m128i *)(void*)(&pred_lag_ptr[ -3 ] ) );
                 pred_lag_ptr_tmp    = _mm_shuffle_epi32( pred_lag_ptr_tmp, 0x1B );
                 tmpa                = _mm_mul_epi32( pred_lag_ptr_tmp, b_Q12_0123 );
                 tmpa                = _mm_srli_si128( tmpa, 2 );
@@ -483,7 +483,7 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec_sse4_1(
                 tmpb = _mm_setzero_si128();
 
                 /* step 1 */
-                psLPC_Q14_tmp   = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[ -3 ] ) ); /* -3, -2 , -1, 0 */
+                psLPC_Q14_tmp   = _mm_loadu_si128( (__m128i *)(void*)(&psLPC_Q14[ -3 ] ) ); /* -3, -2 , -1, 0 */
                 psLPC_Q14_tmp   = _mm_shuffle_epi32( psLPC_Q14_tmp, 0x1B );      /* 0, -1, -2, -3 */
                 tmpa            = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_0123 );    /* 0, -1, -2, -3 * 0123 -> 0*0, 2*-2 */
 
@@ -497,7 +497,7 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec_sse4_1(
                 tmpb            = _mm_add_epi32( tmpb, psLPC_Q14_tmp );
 
                 /* step 2 */
-                psLPC_Q14_tmp   = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[ -7 ] ) );
+                psLPC_Q14_tmp   = _mm_loadu_si128( (__m128i *)(void*)(&psLPC_Q14[ -7 ] ) );
                 psLPC_Q14_tmp   = _mm_shuffle_epi32( psLPC_Q14_tmp, 0x1B );
                 tmpa            = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_4567 );
                 tmpa            = _mm_srli_epi64( tmpa, 16 );
@@ -512,7 +512,7 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec_sse4_1(
                 if ( opus_likely( predictLPCOrder == 16 ) )
                 {
                     /* step 3 */
-                    psLPC_Q14_tmp   = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[ -11 ] ) );
+                    psLPC_Q14_tmp   = _mm_loadu_si128( (__m128i *)(void*)(&psLPC_Q14[ -11 ] ) );
                     psLPC_Q14_tmp   = _mm_shuffle_epi32( psLPC_Q14_tmp, 0x1B );
                     tmpa            = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_89AB );
                     tmpa            = _mm_srli_epi64( tmpa, 16 );
@@ -525,7 +525,7 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec_sse4_1(
                     tmpb            = _mm_add_epi32( tmpb, psLPC_Q14_tmp );
 
                     /* step 4 */
-                    psLPC_Q14_tmp   = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[ -15 ] ) );
+                    psLPC_Q14_tmp   = _mm_loadu_si128( (__m128i *)(void*)(&psLPC_Q14[ -15 ] ) );
                     psLPC_Q14_tmp   = _mm_shuffle_epi32( psLPC_Q14_tmp, 0x1B );
                     tmpa            = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_CDEF );
                     tmpa            = _mm_srli_epi64( tmpa, 16 );
@@ -830,7 +830,7 @@ static OPUS_INLINE void silk_nsq_del_dec_scale_states_sse4_1(
 
         xmm_x16_x2x0 = _mm_blend_epi16( xmm_x16_x2x0, xmm_x16_x3x1, 0xCC );
 
-        _mm_storeu_si128( (__m128i *)(&(x_sc_Q10[ i ] ) ), xmm_x16_x2x0 );
+        _mm_storeu_si128( (__m128i *)(void*)(&(x_sc_Q10[ i ] ) ), xmm_x16_x2x0 );
     }
 
     for( ; i < psEncC->subfr_length; i++ ) {
@@ -862,7 +862,7 @@ static OPUS_INLINE void silk_nsq_del_dec_scale_states_sse4_1(
 
             for( i = NSQ->sLTP_shp_buf_idx - psEncC->ltp_mem_length; i < NSQ->sLTP_shp_buf_idx - 3; i += 4 )
             {
-                xmm_sLTP_shp_Q14_x2x0 = _mm_loadu_si128( (__m128i *)(&(NSQ->sLTP_shp_Q14[ i ] ) ) );
+                xmm_sLTP_shp_Q14_x2x0 = _mm_loadu_si128( (__m128i *)(void*)(&(NSQ->sLTP_shp_Q14[ i ] ) ) );
                 /* equal shift right 4 bytes*/
                 xmm_sLTP_shp_Q14_x3x1 = _mm_shuffle_epi32( xmm_sLTP_shp_Q14_x2x0, _MM_SHUFFLE( 0, 3, 2, 1 ) );
 
@@ -874,7 +874,7 @@ static OPUS_INLINE void silk_nsq_del_dec_scale_states_sse4_1(
 
                 xmm_sLTP_shp_Q14_x2x0 = _mm_blend_epi16( xmm_sLTP_shp_Q14_x2x0, xmm_sLTP_shp_Q14_x3x1, 0xCC );
 
-                _mm_storeu_si128( (__m128i *)(&(NSQ->sLTP_shp_Q14[ i ] ) ), xmm_sLTP_shp_Q14_x2x0 );
+                _mm_storeu_si128( (__m128i *)(void*)(&(NSQ->sLTP_shp_Q14[ i ] ) ), xmm_sLTP_shp_Q14_x2x0 );
             }
 
             for( ; i < NSQ->sLTP_shp_buf_idx; i++ ) {
diff --git a/silk/x86/NSQ_sse4_1.c b/silk/x86/NSQ_sse4_1.c
index d5ae1d3b1..51f5957c0 100644
--- a/silk/x86/NSQ_sse4_1.c
+++ b/silk/x86/NSQ_sse4_1.c
@@ -338,21 +338,21 @@ static OPUS_INLINE void silk_noise_shape_quantizer_10_16_sse4_1(
     xmm_one = _mm_set_epi8( 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 );
 
     /* load a_Q12[0] - a_Q12[7] */
-    a_Q12_01234567 = _mm_loadu_si128( (__m128i *)(&a_Q12[ 0 ] ) );
+    a_Q12_01234567 = _mm_loadu_si128( (__m128i *)(void*)(&a_Q12[ 0 ] ) );
     /* load a_Q12[ 8 ] - a_Q12[ 15 ] */
-    a_Q12_89ABCDEF = _mm_loadu_si128( (__m128i *)(&a_Q12[ 8 ] ) );
+    a_Q12_89ABCDEF = _mm_loadu_si128( (__m128i *)(void*)(&a_Q12[ 8 ] ) );
 
     a_Q12_01234567 = _mm_shuffle_epi8( a_Q12_01234567, xmm_one );
     a_Q12_89ABCDEF = _mm_shuffle_epi8( a_Q12_89ABCDEF, xmm_one );
 
     /* load AR_shp_Q13 */
-    AR_shp_Q13_76543210 = _mm_loadu_si128( (__m128i *)(&AR_shp_Q13[0] ) );
+    AR_shp_Q13_76543210 = _mm_loadu_si128( (__m128i *)(void*)(&AR_shp_Q13[0] ) );
 
     /* load psLPC_Q14 */
     xmm_one = _mm_set_epi8(15, 14, 11, 10, 7, 6, 3, 2, 13, 12, 9, 8, 5, 4, 1, 0 );
 
-    xmm_tempa = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[-16]) );
-    xmm_tempb = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[-12]) );
+    xmm_tempa = _mm_loadu_si128( (__m128i *)(void*)(&psLPC_Q14[-16]) );
+    xmm_tempb = _mm_loadu_si128( (__m128i *)(void*)(&psLPC_Q14[-12]) );
 
     xmm_tempa = _mm_shuffle_epi8( xmm_tempa, xmm_one );
     xmm_tempb = _mm_shuffle_epi8( xmm_tempb, xmm_one );
@@ -360,8 +360,8 @@ static OPUS_INLINE void silk_noise_shape_quantizer_10_16_sse4_1(
     psLPC_Q14_hi_89ABCDEF = _mm_unpackhi_epi64( xmm_tempa, xmm_tempb );
     psLPC_Q14_lo_89ABCDEF = _mm_unpacklo_epi64( xmm_tempa, xmm_tempb );
 
-    xmm_tempa = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[ -8 ]) );
-    xmm_tempb = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[ -4 ]) );
+    xmm_tempa = _mm_loadu_si128( (__m128i *)(void*)(&psLPC_Q14[ -8 ]) );
+    xmm_tempb = _mm_loadu_si128( (__m128i *)(void*)(&psLPC_Q14[ -4 ]) );
 
     xmm_tempa = _mm_shuffle_epi8( xmm_tempa, xmm_one );
     xmm_tempb = _mm_shuffle_epi8( xmm_tempb, xmm_one );
@@ -370,8 +370,8 @@ static OPUS_INLINE void silk_noise_shape_quantizer_10_16_sse4_1(
     psLPC_Q14_lo_01234567 = _mm_unpacklo_epi64( xmm_tempa, xmm_tempb );
 
     /* load sAR2_Q14 */
-    xmm_tempa = _mm_loadu_si128( (__m128i *)(&(NSQ->sAR2_Q14[ 0 ]) ) );
-    xmm_tempb = _mm_loadu_si128( (__m128i *)(&(NSQ->sAR2_Q14[ 4 ]) ) );
+    xmm_tempa = _mm_loadu_si128( (__m128i *)(void*)(&(NSQ->sAR2_Q14[ 0 ]) ) );
+    xmm_tempb = _mm_loadu_si128( (__m128i *)(void*)(&(NSQ->sAR2_Q14[ 4 ]) ) );
 
     xmm_tempa = _mm_shuffle_epi8( xmm_tempa, xmm_one );
     xmm_tempb = _mm_shuffle_epi8( xmm_tempb, xmm_one );
@@ -443,7 +443,7 @@ static OPUS_INLINE void silk_noise_shape_quantizer_10_16_sse4_1(
                 b_Q14_0123 = _mm_shuffle_epi32( b_Q14_3210, 0x1B );
 
                 /* loaded: [0] [-1] [-2] [-3] */
-                pred_lag_ptr_0123 = _mm_loadu_si128( (__m128i *)(&pred_lag_ptr[ -3 ] ) );
+                pred_lag_ptr_0123 = _mm_loadu_si128( (__m128i *)(void*)(&pred_lag_ptr[ -3 ] ) );
                 /* shuffle to [-3] [-2] [-1] [0] and to new xmm */
                 xmm_tempa = _mm_shuffle_epi32( pred_lag_ptr_0123, 0x1B );
                 /*64-bit multiply, a[2] * b[-2], a[0] * b[0] */
@@ -595,8 +595,8 @@ static OPUS_INLINE void silk_noise_shape_quantizer_10_16_sse4_1(
     /* write back sAR2_Q14 */
     xmm_tempa = _mm_unpackhi_epi16( sAR2_Q14_lo_76543210, sAR2_Q14_hi_76543210 );
     xmm_tempb = _mm_unpacklo_epi16( sAR2_Q14_lo_76543210, sAR2_Q14_hi_76543210 );
-    _mm_storeu_si128( (__m128i *)(&NSQ->sAR2_Q14[ 4 ]), xmm_tempa );
-    _mm_storeu_si128( (__m128i *)(&NSQ->sAR2_Q14[ 0 ]), xmm_tempb );
+    _mm_storeu_si128( (__m128i *)(void*)(&NSQ->sAR2_Q14[ 4 ]), xmm_tempa );
+    _mm_storeu_si128( (__m128i *)(void*)(&NSQ->sAR2_Q14[ 0 ]), xmm_tempb );
 
     /* xq[ i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( psLPC_Q14[ i ], Gain_Q10 ), 8 ) ); */
     {
@@ -612,8 +612,8 @@ static OPUS_INLINE void silk_noise_shape_quantizer_10_16_sse4_1(
         /* process xq */
         for (i = 0; i < length - 7; i += 8)
         {
-            xmm_xq_Q14_3210 = _mm_loadu_si128( (__m128i *)(&(psLPC_Q14[ i + 0 ] ) ) );
-            xmm_xq_Q14_7654 = _mm_loadu_si128( (__m128i *)(&(psLPC_Q14[ i + 4 ] ) ) );
+            xmm_xq_Q14_3210 = _mm_loadu_si128( (__m128i *)(void*)(&(psLPC_Q14[ i + 0 ] ) ) );
+            xmm_xq_Q14_7654 = _mm_loadu_si128( (__m128i *)(void*)(&(psLPC_Q14[ i + 4 ] ) ) );
 
             /* equal shift right 4 bytes*/
             xmm_xq_Q14_x3x1 = _mm_shuffle_epi32( xmm_xq_Q14_3210, _MM_SHUFFLE( 0, 3, 2, 1 ) );
@@ -644,7 +644,7 @@ static OPUS_INLINE void silk_noise_shape_quantizer_10_16_sse4_1(
             xmm_xq_Q14_3210 = _mm_packs_epi32( xmm_xq_Q14_3210, xmm_xq_Q14_7654 );
 
             /* save to xq */
-            _mm_storeu_si128( (__m128i *)(&xq[ i ] ), xmm_xq_Q14_3210 );
+            _mm_storeu_si128( (__m128i *)(void*)(&xq[ i ] ), xmm_xq_Q14_3210 );
         }
     }
     for ( ; i < length; i++)
@@ -698,7 +698,7 @@ static OPUS_INLINE void silk_nsq_scale_states_sse4_1(
 
         xmm_x16_x2x0 = _mm_blend_epi16( xmm_x16_x2x0, xmm_x16_x3x1, 0xCC );
 
-        _mm_storeu_si128( (__m128i *)(&(x_sc_Q10[ i ] ) ), xmm_x16_x2x0 );
+        _mm_storeu_si128( (__m128i *)(void*)(&(x_sc_Q10[ i ] ) ), xmm_x16_x2x0 );
     }
 
     for( ; i < psEncC->subfr_length; i++ ) {
@@ -729,7 +729,7 @@ static OPUS_INLINE void silk_nsq_scale_states_sse4_1(
 
         for( i = NSQ->sLTP_shp_buf_idx - psEncC->ltp_mem_length; i < NSQ->sLTP_shp_buf_idx - 3; i += 4 )
         {
-            xmm_sLTP_shp_Q14_x2x0 = _mm_loadu_si128( (__m128i *)(&(NSQ->sLTP_shp_Q14[ i ] ) ) );
+            xmm_sLTP_shp_Q14_x2x0 = _mm_loadu_si128( (__m128i *)(void*)(&(NSQ->sLTP_shp_Q14[ i ] ) ) );
             /* equal shift right 4 bytes*/
             xmm_sLTP_shp_Q14_x3x1 = _mm_shuffle_epi32( xmm_sLTP_shp_Q14_x2x0, _MM_SHUFFLE( 0, 3, 2, 1 ) );
 
@@ -741,7 +741,7 @@ static OPUS_INLINE void silk_nsq_scale_states_sse4_1(
 
             xmm_sLTP_shp_Q14_x2x0 = _mm_blend_epi16( xmm_sLTP_shp_Q14_x2x0, xmm_sLTP_shp_Q14_x3x1, 0xCC );
 
-            _mm_storeu_si128( (__m128i *)(&(NSQ->sLTP_shp_Q14[ i ] ) ), xmm_sLTP_shp_Q14_x2x0 );
+            _mm_storeu_si128( (__m128i *)(void*)(&(NSQ->sLTP_shp_Q14[ i ] ) ), xmm_sLTP_shp_Q14_x2x0 );
         }
 
         for( ; i < NSQ->sLTP_shp_buf_idx; i++ ) {
diff --git a/silk/x86/VAD_sse4_1.c b/silk/x86/VAD_sse4_1.c
index e7eaf9714..9e06bc79d 100644
--- a/silk/x86/VAD_sse4_1.c
+++ b/silk/x86/VAD_sse4_1.c
@@ -144,7 +144,7 @@ opus_int silk_VAD_GetSA_Q8_sse4_1(                  /* O    Return value, 0 if s
 
             for( i = 0; i < dec_subframe_length - 7; i += 8 )
             {
-                xmm_X   = _mm_loadu_si128( (__m128i *)&(X[ X_offset[ b ] + i + dec_subframe_offset ] ) );
+                xmm_X   = _mm_loadu_si128( (__m128i *)(void*)&(X[ X_offset[ b ] + i + dec_subframe_offset ] ) );
                 xmm_X   = _mm_srai_epi16( xmm_X, 3 );
                 xmm_X   = _mm_madd_epi16( xmm_X, xmm_X );
                 xmm_acc = _mm_add_epi32( xmm_acc, xmm_X );
diff --git a/silk/x86/VQ_WMat_EC_sse4_1.c b/silk/x86/VQ_WMat_EC_sse4_1.c
index 2c7d18d05..df4626b60 100644
--- a/silk/x86/VQ_WMat_EC_sse4_1.c
+++ b/silk/x86/VQ_WMat_EC_sse4_1.c
@@ -65,7 +65,7 @@ void silk_VQ_WMat_EC_sse4_1(
     neg_xX_Q24[ 3 ] = -silk_LSHIFT32( xX_Q17[ 3 ], 7 );
     neg_xX_Q24[ 4 ] = -silk_LSHIFT32( xX_Q17[ 4 ], 7 );
 
-    v_XX_31_Q17 = _mm_loadu_si128( (__m128i *)(&XX_Q17[ 1 ] ) );
+    v_XX_31_Q17 = _mm_loadu_si128( (__m128i *)(void*)(&XX_Q17[ 1 ] ) );
     v_XX_42_Q17 = _mm_shuffle_epi32( v_XX_31_Q17, _MM_SHUFFLE( 0, 3, 2, 1 ) );
 
     /* Loop over codebook */
-- 
GitLab