Commit 014fa452 authored by Jingning Han's avatar Jingning Han Committed by Gerrit Code Review
Browse files

Use aligned copy in 8x8 Hadamard transform SSE2

This reduces the 8x8 Hadamard transform cycles by 20%.

Change-Id: If34c5e02f3afa42244c6efabe121f7cf5d2df41b
parent ebe1be91
...@@ -148,21 +148,21 @@ void vp9_hadamard_8x8_sse2(int16_t const *src_diff, int src_stride, ...@@ -148,21 +148,21 @@ void vp9_hadamard_8x8_sse2(int16_t const *src_diff, int src_stride,
hadamard_col8_sse2(src, 0); hadamard_col8_sse2(src, 0);
hadamard_col8_sse2(src, 1); hadamard_col8_sse2(src, 1);
_mm_storeu_si128((__m128i *)coeff, src[0]); _mm_store_si128((__m128i *)coeff, src[0]);
coeff += 8; coeff += 8;
_mm_storeu_si128((__m128i *)coeff, src[1]); _mm_store_si128((__m128i *)coeff, src[1]);
coeff += 8; coeff += 8;
_mm_storeu_si128((__m128i *)coeff, src[2]); _mm_store_si128((__m128i *)coeff, src[2]);
coeff += 8; coeff += 8;
_mm_storeu_si128((__m128i *)coeff, src[3]); _mm_store_si128((__m128i *)coeff, src[3]);
coeff += 8; coeff += 8;
_mm_storeu_si128((__m128i *)coeff, src[4]); _mm_store_si128((__m128i *)coeff, src[4]);
coeff += 8; coeff += 8;
_mm_storeu_si128((__m128i *)coeff, src[5]); _mm_store_si128((__m128i *)coeff, src[5]);
coeff += 8; coeff += 8;
_mm_storeu_si128((__m128i *)coeff, src[6]); _mm_store_si128((__m128i *)coeff, src[6]);
coeff += 8; coeff += 8;
_mm_storeu_si128((__m128i *)coeff, src[7]); _mm_store_si128((__m128i *)coeff, src[7]);
} }
void vp9_hadamard_16x16_sse2(int16_t const *src_diff, int src_stride, void vp9_hadamard_16x16_sse2(int16_t const *src_diff, int src_stride,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment