Commit 8a42dd72 authored by Debargha Mukherjee's avatar Debargha Mukherjee

Remove CONFIG_CB4X4 from aom_dsp

Change-Id: I1f385e61029e6f8a2a5841f07799d4a220f34937
parent d0b77ac7
...@@ -170,7 +170,7 @@ void aom_lpf_horizontal_4_c(uint8_t *s, int p /* pitch */, ...@@ -170,7 +170,7 @@ void aom_lpf_horizontal_4_c(uint8_t *s, int p /* pitch */,
const uint8_t *blimit, const uint8_t *limit, const uint8_t *blimit, const uint8_t *limit,
const uint8_t *thresh) { const uint8_t *thresh) {
int i; int i;
#if CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4 #if CONFIG_PARALLEL_DEBLOCKING
int count = 4; int count = 4;
#else #else
int count = 8; int count = 8;
...@@ -205,7 +205,7 @@ void aom_lpf_horizontal_4_dual_c(uint8_t *s, int p, const uint8_t *blimit0, ...@@ -205,7 +205,7 @@ void aom_lpf_horizontal_4_dual_c(uint8_t *s, int p, const uint8_t *blimit0,
void aom_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit, void aom_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh) { const uint8_t *limit, const uint8_t *thresh) {
int i; int i;
#if CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4 #if CONFIG_PARALLEL_DEBLOCKING
int count = 4; int count = 4;
#else #else
int count = 8; int count = 8;
...@@ -280,7 +280,7 @@ static INLINE void filter8(int8_t mask, uint8_t thresh, int8_t flat, ...@@ -280,7 +280,7 @@ static INLINE void filter8(int8_t mask, uint8_t thresh, int8_t flat,
void aom_lpf_horizontal_6_c(uint8_t *s, int p, const uint8_t *blimit, void aom_lpf_horizontal_6_c(uint8_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh) { const uint8_t *limit, const uint8_t *thresh) {
int i; int i;
#if CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4 #if CONFIG_PARALLEL_DEBLOCKING
int count = 4; int count = 4;
#else #else
int count = 8; int count = 8;
...@@ -305,7 +305,7 @@ void aom_lpf_horizontal_6_c(uint8_t *s, int p, const uint8_t *blimit, ...@@ -305,7 +305,7 @@ void aom_lpf_horizontal_6_c(uint8_t *s, int p, const uint8_t *blimit,
void aom_lpf_horizontal_8_c(uint8_t *s, int p, const uint8_t *blimit, void aom_lpf_horizontal_8_c(uint8_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh) { const uint8_t *limit, const uint8_t *thresh) {
int i; int i;
#if CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4 #if CONFIG_PARALLEL_DEBLOCKING
int count = 4; int count = 4;
#else #else
int count = 8; int count = 8;
...@@ -338,7 +338,7 @@ void aom_lpf_horizontal_8_dual_c(uint8_t *s, int p, const uint8_t *blimit0, ...@@ -338,7 +338,7 @@ void aom_lpf_horizontal_8_dual_c(uint8_t *s, int p, const uint8_t *blimit0,
void aom_lpf_vertical_6_c(uint8_t *s, int pitch, const uint8_t *blimit, void aom_lpf_vertical_6_c(uint8_t *s, int pitch, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh) { const uint8_t *limit, const uint8_t *thresh) {
int i; int i;
#if CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4 #if CONFIG_PARALLEL_DEBLOCKING
int count = 4; int count = 4;
#else #else
int count = 8; int count = 8;
...@@ -359,7 +359,7 @@ void aom_lpf_vertical_6_c(uint8_t *s, int pitch, const uint8_t *blimit, ...@@ -359,7 +359,7 @@ void aom_lpf_vertical_6_c(uint8_t *s, int pitch, const uint8_t *blimit,
void aom_lpf_vertical_8_c(uint8_t *s, int pitch, const uint8_t *blimit, void aom_lpf_vertical_8_c(uint8_t *s, int pitch, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh) { const uint8_t *limit, const uint8_t *thresh) {
int i; int i;
#if CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4 #if CONFIG_PARALLEL_DEBLOCKING
int count = 4; int count = 4;
#else #else
int count = 8; int count = 8;
...@@ -548,7 +548,7 @@ static void mb_lpf_horizontal_edge_w(uint8_t *s, int p, const uint8_t *blimit, ...@@ -548,7 +548,7 @@ static void mb_lpf_horizontal_edge_w(uint8_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *limit,
const uint8_t *thresh, int count) { const uint8_t *thresh, int count) {
int i; int i;
#if CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4 #if CONFIG_PARALLEL_DEBLOCKING
int step = 4; int step = 4;
#else #else
int step = 8; int step = 8;
...@@ -608,7 +608,7 @@ void aom_lpf_horizontal_edge_8_c(uint8_t *s, int p, const uint8_t *blimit, ...@@ -608,7 +608,7 @@ void aom_lpf_horizontal_edge_8_c(uint8_t *s, int p, const uint8_t *blimit,
void aom_lpf_horizontal_edge_16_c(uint8_t *s, int p, const uint8_t *blimit, void aom_lpf_horizontal_edge_16_c(uint8_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh) { const uint8_t *limit, const uint8_t *thresh) {
#if CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4 #if CONFIG_PARALLEL_DEBLOCKING
mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 1); mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 1);
#else #else
mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 2); mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 2);
...@@ -661,7 +661,7 @@ static void mb_lpf_vertical_edge_w(uint8_t *s, int p, const uint8_t *blimit, ...@@ -661,7 +661,7 @@ static void mb_lpf_vertical_edge_w(uint8_t *s, int p, const uint8_t *blimit,
void aom_lpf_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit, void aom_lpf_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh) { const uint8_t *limit, const uint8_t *thresh) {
#if CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4 #if CONFIG_PARALLEL_DEBLOCKING
mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 4); mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 4);
#else #else
mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 8); mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 8);
...@@ -798,7 +798,7 @@ void aom_highbd_lpf_horizontal_4_c(uint16_t *s, int p /* pitch */, ...@@ -798,7 +798,7 @@ void aom_highbd_lpf_horizontal_4_c(uint16_t *s, int p /* pitch */,
const uint8_t *blimit, const uint8_t *limit, const uint8_t *blimit, const uint8_t *limit,
const uint8_t *thresh, int bd) { const uint8_t *thresh, int bd) {
int i; int i;
#if CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4 #if CONFIG_PARALLEL_DEBLOCKING
int count = 4; int count = 4;
#else #else
int count = 8; int count = 8;
...@@ -843,7 +843,7 @@ void aom_highbd_lpf_vertical_4_c(uint16_t *s, int pitch, const uint8_t *blimit, ...@@ -843,7 +843,7 @@ void aom_highbd_lpf_vertical_4_c(uint16_t *s, int pitch, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh, const uint8_t *limit, const uint8_t *thresh,
int bd) { int bd) {
int i; int i;
#if CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4 #if CONFIG_PARALLEL_DEBLOCKING
int count = 4; int count = 4;
#else #else
int count = 8; int count = 8;
...@@ -921,7 +921,7 @@ void aom_highbd_lpf_horizontal_8_c(uint16_t *s, int p, const uint8_t *blimit, ...@@ -921,7 +921,7 @@ void aom_highbd_lpf_horizontal_8_c(uint16_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh, const uint8_t *limit, const uint8_t *thresh,
int bd) { int bd) {
int i; int i;
#if CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4 #if CONFIG_PARALLEL_DEBLOCKING
int count = 4; int count = 4;
#else #else
int count = 8; int count = 8;
...@@ -948,7 +948,7 @@ void aom_highbd_lpf_horizontal_6_c(uint16_t *s, int p, const uint8_t *blimit, ...@@ -948,7 +948,7 @@ void aom_highbd_lpf_horizontal_6_c(uint16_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh, const uint8_t *limit, const uint8_t *thresh,
int bd) { int bd) {
int i; int i;
#if CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4 #if CONFIG_PARALLEL_DEBLOCKING
int count = 4; int count = 4;
#else #else
int count = 8; int count = 8;
...@@ -983,7 +983,7 @@ void aom_highbd_lpf_vertical_6_c(uint16_t *s, int pitch, const uint8_t *blimit, ...@@ -983,7 +983,7 @@ void aom_highbd_lpf_vertical_6_c(uint16_t *s, int pitch, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh, const uint8_t *limit, const uint8_t *thresh,
int bd) { int bd) {
int i; int i;
#if CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4 #if CONFIG_PARALLEL_DEBLOCKING
int count = 4; int count = 4;
#else #else
int count = 8; int count = 8;
...@@ -1006,7 +1006,7 @@ void aom_highbd_lpf_vertical_8_c(uint16_t *s, int pitch, const uint8_t *blimit, ...@@ -1006,7 +1006,7 @@ void aom_highbd_lpf_vertical_8_c(uint16_t *s, int pitch, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh, const uint8_t *limit, const uint8_t *thresh,
int bd) { int bd) {
int i; int i;
#if CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4 #if CONFIG_PARALLEL_DEBLOCKING
int count = 4; int count = 4;
#else #else
int count = 8; int count = 8;
...@@ -1168,7 +1168,7 @@ static void highbd_mb_lpf_horizontal_edge_w(uint16_t *s, int p, ...@@ -1168,7 +1168,7 @@ static void highbd_mb_lpf_horizontal_edge_w(uint16_t *s, int p,
const uint8_t *thresh, int count, const uint8_t *thresh, int count,
int bd) { int bd) {
int i; int i;
#if CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4 #if CONFIG_PARALLEL_DEBLOCKING
int step = 4; int step = 4;
#else #else
int step = 8; int step = 8;
...@@ -1223,7 +1223,7 @@ void aom_highbd_lpf_horizontal_edge_16_c(uint16_t *s, int p, ...@@ -1223,7 +1223,7 @@ void aom_highbd_lpf_horizontal_edge_16_c(uint16_t *s, int p,
const uint8_t *blimit, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *limit,
const uint8_t *thresh, int bd) { const uint8_t *thresh, int bd) {
#if CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4 #if CONFIG_PARALLEL_DEBLOCKING
highbd_mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 1, bd); highbd_mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 1, bd);
#else #else
highbd_mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 2, bd); highbd_mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 2, bd);
...@@ -1272,7 +1272,7 @@ static void highbd_mb_lpf_vertical_edge_w(uint16_t *s, int p, ...@@ -1272,7 +1272,7 @@ static void highbd_mb_lpf_vertical_edge_w(uint16_t *s, int p,
void aom_highbd_lpf_vertical_16_c(uint16_t *s, int p, const uint8_t *blimit, void aom_highbd_lpf_vertical_16_c(uint16_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh, const uint8_t *limit, const uint8_t *thresh,
int bd) { int bd) {
#if CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4 #if CONFIG_PARALLEL_DEBLOCKING
highbd_mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 4, bd); highbd_mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 4, bd);
#else #else
highbd_mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 8, bd); highbd_mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 8, bd);
...@@ -1283,7 +1283,7 @@ void aom_highbd_lpf_vertical_16_dual_c(uint16_t *s, int p, ...@@ -1283,7 +1283,7 @@ void aom_highbd_lpf_vertical_16_dual_c(uint16_t *s, int p,
const uint8_t *blimit, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *limit,
const uint8_t *thresh, int bd) { const uint8_t *thresh, int bd) {
#if CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4 #if CONFIG_PARALLEL_DEBLOCKING
highbd_mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 8, bd); highbd_mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 8, bd);
#else #else
highbd_mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 16, bd); highbd_mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 16, bd);
......
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
#include "aom_dsp/x86/lpf_common_sse2.h" #include "aom_dsp/x86/lpf_common_sse2.h"
#include "aom/aom_integer.h" #include "aom/aom_integer.h"
#if !CONFIG_PARALLEL_DEBLOCKING || !CONFIG_CB4X4 #if !CONFIG_PARALLEL_DEBLOCKING
static INLINE void get_limit(const uint8_t *bl, const uint8_t *l, static INLINE void get_limit(const uint8_t *bl, const uint8_t *l,
const uint8_t *t, int bd, __m256i *blt, const uint8_t *t, int bd, __m256i *blt,
__m256i *lt, __m256i *thr) { __m256i *lt, __m256i *thr) {
...@@ -201,9 +201,9 @@ static INLINE void highbd_filter4(__m256i *p, __m256i *q, const __m256i *mask, ...@@ -201,9 +201,9 @@ static INLINE void highbd_filter4(__m256i *p, __m256i *q, const __m256i *mask,
qs[1] = _mm256_adds_epi16(qs1, t80); qs[1] = _mm256_adds_epi16(qs1, t80);
ps[1] = _mm256_adds_epi16(ps1, t80); ps[1] = _mm256_adds_epi16(ps1, t80);
} }
#endif // #if !CONFIG_PARALLEL_DEBLOCKING || !CONFIG_CB4X4 #endif // #if !CONFIG_PARALLEL_DEBLOCKING
#if CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4 #if CONFIG_PARALLEL_DEBLOCKING
void aom_highbd_lpf_horizontal_edge_16_avx2(uint16_t *s, int p, void aom_highbd_lpf_horizontal_edge_16_avx2(uint16_t *s, int p,
const uint8_t *blt, const uint8_t *blt,
const uint8_t *lt, const uint8_t *lt,
...@@ -870,4 +870,4 @@ void aom_highbd_lpf_vertical_8_dual_avx2( ...@@ -870,4 +870,4 @@ void aom_highbd_lpf_vertical_8_dual_avx2(
// Transpose back // Transpose back
highbd_transpose(src, 16, dst, p, 2); highbd_transpose(src, 16, dst, p, 2);
} }
#endif // CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4 #endif // CONFIG_PARALLEL_DEBLOCKING
...@@ -367,14 +367,14 @@ static INLINE void highbd_lpf_horz_edge_8_internal(uint16_t *s, int pitch, ...@@ -367,14 +367,14 @@ static INLINE void highbd_lpf_horz_edge_8_internal(uint16_t *s, int pitch,
// Note: // Note:
// highbd_lpf_horz_edge_8_8p() output 8 pixels per register // highbd_lpf_horz_edge_8_8p() output 8 pixels per register
// highbd_lpf_horz_edge_8_4p() output 4 pixels per register // highbd_lpf_horz_edge_8_4p() output 4 pixels per register
#if CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4 #if CONFIG_PARALLEL_DEBLOCKING
static INLINE void highbd_lpf_horz_edge_8_4p(uint16_t *s, int pitch, static INLINE void highbd_lpf_horz_edge_8_4p(uint16_t *s, int pitch,
const uint8_t *blt, const uint8_t *blt,
const uint8_t *lt, const uint8_t *lt,
const uint8_t *thr, int bd) { const uint8_t *thr, int bd) {
highbd_lpf_horz_edge_8_internal(s, pitch, blt, lt, thr, bd, FOUR_PIXELS); highbd_lpf_horz_edge_8_internal(s, pitch, blt, lt, thr, bd, FOUR_PIXELS);
} }
#endif // #if CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4 #endif // #if CONFIG_PARALLEL_DEBLOCKING
static INLINE void highbd_lpf_horz_edge_8_8p(uint16_t *s, int pitch, static INLINE void highbd_lpf_horz_edge_8_8p(uint16_t *s, int pitch,
const uint8_t *blt, const uint8_t *blt,
...@@ -387,7 +387,7 @@ void aom_highbd_lpf_horizontal_edge_8_sse2(uint16_t *s, int p, ...@@ -387,7 +387,7 @@ void aom_highbd_lpf_horizontal_edge_8_sse2(uint16_t *s, int p,
const uint8_t *_blimit, const uint8_t *_blimit,
const uint8_t *_limit, const uint8_t *_limit,
const uint8_t *_thresh, int bd) { const uint8_t *_thresh, int bd) {
#if CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4 #if CONFIG_PARALLEL_DEBLOCKING
highbd_lpf_horz_edge_8_4p(s, p, _blimit, _limit, _thresh, bd); highbd_lpf_horz_edge_8_4p(s, p, _blimit, _limit, _thresh, bd);
#else #else
highbd_lpf_horz_edge_8_8p(s, p, _blimit, _limit, _thresh, bd); highbd_lpf_horz_edge_8_8p(s, p, _blimit, _limit, _thresh, bd);
...@@ -398,7 +398,7 @@ void aom_highbd_lpf_horizontal_edge_16_sse2(uint16_t *s, int p, ...@@ -398,7 +398,7 @@ void aom_highbd_lpf_horizontal_edge_16_sse2(uint16_t *s, int p,
const uint8_t *_blimit, const uint8_t *_blimit,
const uint8_t *_limit, const uint8_t *_limit,
const uint8_t *_thresh, int bd) { const uint8_t *_thresh, int bd) {
#if CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4 #if CONFIG_PARALLEL_DEBLOCKING
highbd_lpf_horz_edge_8_4p(s, p, _blimit, _limit, _thresh, bd); highbd_lpf_horz_edge_8_4p(s, p, _blimit, _limit, _thresh, bd);
#else #else
highbd_lpf_horz_edge_8_8p(s, p, _blimit, _limit, _thresh, bd); highbd_lpf_horz_edge_8_8p(s, p, _blimit, _limit, _thresh, bd);
...@@ -410,7 +410,7 @@ static INLINE void store_horizontal_8(const __m128i *p2, const __m128i *p1, ...@@ -410,7 +410,7 @@ static INLINE void store_horizontal_8(const __m128i *p2, const __m128i *p1,
const __m128i *p0, const __m128i *q0, const __m128i *p0, const __m128i *q0,
const __m128i *q1, const __m128i *q2, const __m128i *q1, const __m128i *q2,
int p, uint16_t *s) { int p, uint16_t *s) {
#if CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4 #if CONFIG_PARALLEL_DEBLOCKING
_mm_storel_epi64((__m128i *)(s - 3 * p), *p2); _mm_storel_epi64((__m128i *)(s - 3 * p), *p2);
_mm_storel_epi64((__m128i *)(s - 2 * p), *p1); _mm_storel_epi64((__m128i *)(s - 2 * p), *p1);
_mm_storel_epi64((__m128i *)(s - 1 * p), *p0); _mm_storel_epi64((__m128i *)(s - 1 * p), *p0);
...@@ -672,7 +672,7 @@ void aom_highbd_lpf_horizontal_4_sse2(uint16_t *s, int p, ...@@ -672,7 +672,7 @@ void aom_highbd_lpf_horizontal_4_sse2(uint16_t *s, int p,
const __m128i zero = _mm_set1_epi16(0); const __m128i zero = _mm_set1_epi16(0);
__m128i blimit, limit, thresh; __m128i blimit, limit, thresh;
__m128i mask, hev, flat; __m128i mask, hev, flat;
#if !(CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4) #if !(CONFIG_PARALLEL_DEBLOCKING)
__m128i p3 = _mm_loadu_si128((__m128i *)(s - 4 * p)); __m128i p3 = _mm_loadu_si128((__m128i *)(s - 4 * p));
__m128i p2 = _mm_loadu_si128((__m128i *)(s - 3 * p)); __m128i p2 = _mm_loadu_si128((__m128i *)(s - 3 * p));
#endif #endif
...@@ -680,7 +680,7 @@ void aom_highbd_lpf_horizontal_4_sse2(uint16_t *s, int p, ...@@ -680,7 +680,7 @@ void aom_highbd_lpf_horizontal_4_sse2(uint16_t *s, int p,
__m128i p0 = _mm_loadu_si128((__m128i *)(s - 1 * p)); __m128i p0 = _mm_loadu_si128((__m128i *)(s - 1 * p));
__m128i q0 = _mm_loadu_si128((__m128i *)(s - 0 * p)); __m128i q0 = _mm_loadu_si128((__m128i *)(s - 0 * p));
__m128i q1 = _mm_loadu_si128((__m128i *)(s + 1 * p)); __m128i q1 = _mm_loadu_si128((__m128i *)(s + 1 * p));
#if !(CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4) #if !(CONFIG_PARALLEL_DEBLOCKING)
__m128i q2 = _mm_loadu_si128((__m128i *)(s + 2 * p)); __m128i q2 = _mm_loadu_si128((__m128i *)(s + 2 * p));
__m128i q3 = _mm_loadu_si128((__m128i *)(s + 3 * p)); __m128i q3 = _mm_loadu_si128((__m128i *)(s + 3 * p));
#endif #endif
...@@ -766,7 +766,7 @@ void aom_highbd_lpf_horizontal_4_sse2(uint16_t *s, int p, ...@@ -766,7 +766,7 @@ void aom_highbd_lpf_horizontal_4_sse2(uint16_t *s, int p,
mask = _mm_and_si128(mask, _mm_adds_epu16(limit, one)); mask = _mm_and_si128(mask, _mm_adds_epu16(limit, one));
mask = _mm_max_epi16(flat, mask); mask = _mm_max_epi16(flat, mask);
#if !(CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4) #if !(CONFIG_PARALLEL_DEBLOCKING)
__m128i work = _mm_max_epi16( __m128i work = _mm_max_epi16(
_mm_or_si128(_mm_subs_epu16(p2, p1), _mm_subs_epu16(p1, p2)), _mm_or_si128(_mm_subs_epu16(p2, p1), _mm_subs_epu16(p1, p2)),
_mm_or_si128(_mm_subs_epu16(p3, p2), _mm_subs_epu16(p2, p3))); _mm_or_si128(_mm_subs_epu16(p3, p2), _mm_subs_epu16(p2, p3)));
...@@ -841,7 +841,7 @@ void aom_highbd_lpf_horizontal_4_sse2(uint16_t *s, int p, ...@@ -841,7 +841,7 @@ void aom_highbd_lpf_horizontal_4_sse2(uint16_t *s, int p,
p1 = _mm_adds_epi16(ps1, filt); p1 = _mm_adds_epi16(ps1, filt);
pixel_clamp(&pmin, &pmax, &p1); pixel_clamp(&pmin, &pmax, &p1);
p1 = _mm_adds_epi16(p1, t80); p1 = _mm_adds_epi16(p1, t80);
#if CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4 #if CONFIG_PARALLEL_DEBLOCKING
_mm_storel_epi64((__m128i *)(s - 2 * p), p1); _mm_storel_epi64((__m128i *)(s - 2 * p), p1);
_mm_storel_epi64((__m128i *)(s - 1 * p), p0); _mm_storel_epi64((__m128i *)(s - 1 * p), p0);
_mm_storel_epi64((__m128i *)(s + 0 * p), q0); _mm_storel_epi64((__m128i *)(s + 0 * p), q0);
...@@ -992,7 +992,7 @@ void aom_highbd_lpf_vertical_16_dual_sse2(uint16_t *s, int p, ...@@ -992,7 +992,7 @@ void aom_highbd_lpf_vertical_16_dual_sse2(uint16_t *s, int p,
highbd_transpose8x16(s - 8, s - 8 + 8 * p, p, t_dst, 16); highbd_transpose8x16(s - 8, s - 8 + 8 * p, p, t_dst, 16);
highbd_transpose8x16(s, s + 8 * p, p, t_dst + 8 * 16, 16); highbd_transpose8x16(s, s + 8 * p, p, t_dst + 8 * 16, 16);
#if CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4 #if CONFIG_PARALLEL_DEBLOCKING
highbd_lpf_horz_edge_8_8p(t_dst + 8 * 16, 16, blimit, limit, thresh, bd); highbd_lpf_horz_edge_8_8p(t_dst + 8 * 16, 16, blimit, limit, thresh, bd);
#else #else
aom_highbd_lpf_horizontal_edge_16_sse2(t_dst + 8 * 16, 16, blimit, limit, aom_highbd_lpf_horizontal_edge_16_sse2(t_dst + 8 * 16, 16, blimit, limit,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment