Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Xiph.Org
aom-rav1e
Commits
5b87f1bb
Commit
5b87f1bb
authored
Feb 24, 2015
by
Jingning Han
Browse files
Fix high bit-depth loop-filter sse2 compiling issue - part 4
Change-Id: I39f56f60425836f2e1ec07da71edd4810a4c78bb
parent
a28a8cb7
Changes
1
Hide whitespace changes
Inline
Side-by-side
vp9/common/x86/vp9_high_loopfilter_intrin_sse2.c
View file @
5b87f1bb
...
...
@@ -706,15 +706,7 @@ void vp9_highbd_lpf_horizontal_4_sse2(uint16_t *s, int p,
const
uint8_t
*
_thresh
,
int
count
,
int
bd
)
{
const
__m128i
zero
=
_mm_set1_epi16
(
0
);
const
__m128i
blimit
=
_mm_slli_epi16
(
_mm_unpacklo_epi8
(
_mm_load_si128
((
const
__m128i
*
)
_blimit
),
zero
),
bd
-
8
);
const
__m128i
limit
=
_mm_slli_epi16
(
_mm_unpacklo_epi8
(
_mm_load_si128
((
const
__m128i
*
)
_limit
),
zero
),
bd
-
8
);
const
__m128i
thresh
=
_mm_slli_epi16
(
_mm_unpacklo_epi8
(
_mm_load_si128
((
const
__m128i
*
)
_thresh
),
zero
),
bd
-
8
);
__m128i
blimit
,
limit
,
thresh
;
__m128i
mask
,
hev
,
flat
;
__m128i
p3
=
_mm_loadu_si128
((
__m128i
*
)(
s
-
4
*
p
));
__m128i
p2
=
_mm_loadu_si128
((
__m128i
*
)(
s
-
3
*
p
));
...
...
@@ -737,30 +729,63 @@ void vp9_highbd_lpf_horizontal_4_sse2(uint16_t *s, int p,
__m128i
work
;
const
__m128i
t4
=
_mm_set1_epi16
(
4
);
const
__m128i
t3
=
_mm_set1_epi16
(
3
);
const
__m128i
t80
=
_mm_slli_epi16
(
_mm_set1_epi16
(
0x80
),
bd
-
8
)
;
const
__m128i
tff80
=
_mm_slli_epi16
(
_mm_set1_epi16
(
0xff80
),
bd
-
8
)
;
const
__m128i
tffe0
=
_mm_slli_epi16
(
_mm_set1_epi16
(
0xffe0
),
bd
-
8
)
;
const
__m128i
t1f
=
_mm_srli_epi16
(
_mm_set1_epi16
(
0x1fff
),
16
-
bd
)
;
__m128i
t80
;
__m128i
tff80
;
__m128i
tffe0
;
__m128i
t1f
;
// equivalent to shifting 0x1f left by bitdepth - 8
// and setting new bits to 1
const
__m128i
t1
=
_mm_set1_epi16
(
0x1
);
const
__m128i
t7f
=
_mm_srli_epi16
(
_mm_set1_epi16
(
0x7fff
),
16
-
bd
)
;
__m128i
t7f
;
// equivalent to shifting 0x7f left by bitdepth - 8
// and setting new bits to 1
const
__m128i
ps1
=
_mm_subs_epi16
(
_mm_loadu_si128
((
__m128i
*
)(
s
-
2
*
p
)),
t80
);
const
__m128i
ps0
=
_mm_subs_epi16
(
_mm_loadu_si128
((
__m128i
*
)(
s
-
1
*
p
)),
t80
);
const
__m128i
qs0
=
_mm_subs_epi16
(
_mm_loadu_si128
((
__m128i
*
)(
s
+
0
*
p
)),
t80
);
const
__m128i
qs1
=
_mm_subs_epi16
(
_mm_loadu_si128
((
__m128i
*
)(
s
+
1
*
p
)),
t80
);
__m128i
ps1
,
ps0
,
qs0
,
qs1
;
__m128i
filt
;
__m128i
work_a
;
__m128i
filter1
,
filter2
;
(
void
)
count
;
if
(
bd
==
8
)
{
blimit
=
_mm_unpacklo_epi8
(
_mm_load_si128
((
const
__m128i
*
)
_blimit
),
zero
);
limit
=
_mm_unpacklo_epi8
(
_mm_load_si128
((
const
__m128i
*
)
_limit
),
zero
);
thresh
=
_mm_unpacklo_epi8
(
_mm_load_si128
((
const
__m128i
*
)
_thresh
),
zero
);
t80
=
_mm_set1_epi16
(
0x80
);
tff80
=
_mm_set1_epi16
(
0xff80
);
tffe0
=
_mm_set1_epi16
(
0xffe0
);
t1f
=
_mm_srli_epi16
(
_mm_set1_epi16
(
0x1fff
),
8
);
t7f
=
_mm_srli_epi16
(
_mm_set1_epi16
(
0x7fff
),
8
);
}
else
if
(
bd
==
10
)
{
blimit
=
_mm_slli_epi16
(
_mm_unpacklo_epi8
(
_mm_load_si128
((
const
__m128i
*
)
_blimit
),
zero
),
2
);
limit
=
_mm_slli_epi16
(
_mm_unpacklo_epi8
(
_mm_load_si128
((
const
__m128i
*
)
_limit
),
zero
),
2
);
thresh
=
_mm_slli_epi16
(
_mm_unpacklo_epi8
(
_mm_load_si128
((
const
__m128i
*
)
_thresh
),
zero
),
2
);
t80
=
_mm_slli_epi16
(
_mm_set1_epi16
(
0x80
),
2
);
tff80
=
_mm_slli_epi16
(
_mm_set1_epi16
(
0xff80
),
2
);
tffe0
=
_mm_slli_epi16
(
_mm_set1_epi16
(
0xffe0
),
2
);
t1f
=
_mm_srli_epi16
(
_mm_set1_epi16
(
0x1fff
),
6
);
t7f
=
_mm_srli_epi16
(
_mm_set1_epi16
(
0x7fff
),
6
);
}
else
{
// bd == 12
blimit
=
_mm_slli_epi16
(
_mm_unpacklo_epi8
(
_mm_load_si128
((
const
__m128i
*
)
_blimit
),
zero
),
4
);
limit
=
_mm_slli_epi16
(
_mm_unpacklo_epi8
(
_mm_load_si128
((
const
__m128i
*
)
_limit
),
zero
),
4
);
thresh
=
_mm_slli_epi16
(
_mm_unpacklo_epi8
(
_mm_load_si128
((
const
__m128i
*
)
_thresh
),
zero
),
4
);
t80
=
_mm_slli_epi16
(
_mm_set1_epi16
(
0x80
),
4
);
tff80
=
_mm_slli_epi16
(
_mm_set1_epi16
(
0xff80
),
4
);
tffe0
=
_mm_slli_epi16
(
_mm_set1_epi16
(
0xffe0
),
4
);
t1f
=
_mm_srli_epi16
(
_mm_set1_epi16
(
0x1fff
),
4
);
t7f
=
_mm_srli_epi16
(
_mm_set1_epi16
(
0x7fff
),
4
);
}
ps1
=
_mm_subs_epi16
(
_mm_loadu_si128
((
__m128i
*
)(
s
-
2
*
p
)),
t80
);
ps0
=
_mm_subs_epi16
(
_mm_loadu_si128
((
__m128i
*
)(
s
-
1
*
p
)),
t80
);
qs0
=
_mm_subs_epi16
(
_mm_loadu_si128
((
__m128i
*
)(
s
+
0
*
p
)),
t80
);
qs1
=
_mm_subs_epi16
(
_mm_loadu_si128
((
__m128i
*
)(
s
+
1
*
p
)),
t80
);
// filter_mask and hev_mask
flat
=
_mm_max_epi16
(
abs_p1p0
,
abs_q1q0
);
hev
=
_mm_subs_epu16
(
flat
,
thresh
);
...
...
@@ -796,6 +821,7 @@ void vp9_highbd_lpf_horizontal_4_sse2(uint16_t *s, int p,
filt
=
_mm_adds_epi16
(
filt
,
work_a
);
filt
=
_mm_adds_epi16
(
filt
,
work_a
);
filt
=
signed_char_clamp_bd_sse2
(
_mm_adds_epi16
(
filt
,
work_a
),
bd
);
// (vp9_filter + 3 * (qs0 - ps0)) & mask
filt
=
_mm_and_si128
(
filt
,
mask
);
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment