Commit 9c1d2440 authored by Steinar Midtskogen's avatar Steinar Midtskogen

Improve od_dering_simd_sse4_1

Change-Id: I6173dfb181777343d254aef880a8c9d379206556
parent 9b501e19
......@@ -179,18 +179,11 @@ int SIMD_FUNC(od_dir_find8)(const od_dering_in *img, int stride, int32_t *var,
__m128i max = _mm_max_epi32(dir03, dir47);
max = _mm_max_epi32(max, _mm_shuffle_epi32(max, _MM_SHUFFLE(1, 0, 3, 2)));
max = _mm_max_epi32(max, _mm_shuffle_epi32(max, _MM_SHUFFLE(2, 3, 0, 1)));
dir03 = _mm_and_si128(_mm_cmpeq_epi32(max, dir03),
_mm_setr_epi32(-1, -2, -3, -4));
dir47 = _mm_and_si128(_mm_cmpeq_epi32(max, dir47),
_mm_setr_epi32(-5, -6, -7, -8));
dir03 = _mm_max_epu32(dir03, dir47);
dir03 = _mm_max_epu32(dir03, _mm_unpackhi_epi64(dir03, dir03));
dir03 =
_mm_max_epu32(dir03, _mm_shufflelo_epi16(dir03, _MM_SHUFFLE(1, 0, 3, 2)));
dir03 = _mm_xor_si128(dir03, _mm_set1_epi32(0xFFFFFFFF));
best_dir = _mm_cvtsi128_si32(dir03);
best_cost = _mm_cvtsi128_si32(max);
__m128i t =
_mm_packs_epi32(_mm_cmpeq_epi32(max, dir03), _mm_cmpeq_epi32(max, dir47));
best_dir = _mm_movemask_epi8(_mm_packs_epi16(t, t));
best_dir = get_msb(best_dir ^ (best_dir - 1)); // Count trailing zeros
#else
/* Compute "mostly vertical" directions. */
compute_directions(lines, cost + 4);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment