Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Xiph.Org
aom-rav1e
Commits
e2a90c0b
Commit
e2a90c0b
authored
Sep 23, 2014
by
Deb Mukherjee
Committed by
Gerrit Code Review
Sep 23, 2014
Browse files
Merge "High bit-depth loop/arf/postproc filter functions"
parents
6c6213d9
931ed516
Changes
9
Expand all
Hide whitespace changes
Inline
Side-by-side
test/lpf_8_test.cc
0 → 100644
View file @
e2a90c0b
This diff is collapsed.
Click to expand it.
test/test.mk
View file @
e2a90c0b
...
...
@@ -128,6 +128,7 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct4x4_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER)
+=
fdct8x8_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER)
+=
variance_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER)
+=
vp9_subtract_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER)
+=
lpf_8_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9)
+=
vp9_intrapred_test.cc
ifeq
($(CONFIG_VP9_ENCODER),yes)
...
...
vp9/common/vp9_loopfilter.c
View file @
e2a90c0b
This diff is collapsed.
Click to expand it.
vp9/common/vp9_loopfilter_filters.c
View file @
e2a90c0b
...
...
@@ -17,6 +17,20 @@ static INLINE int8_t signed_char_clamp(int t) {
return
(
int8_t
)
clamp
(
t
,
-
128
,
127
);
}
#if CONFIG_VP9_HIGHBITDEPTH
static
INLINE
int16_t
signed_char_clamp_high
(
int
t
,
int
bd
)
{
switch
(
bd
)
{
case
10
:
return
(
int16_t
)
clamp
(
t
,
-
128
*
4
,
128
*
4
-
1
);
case
12
:
return
(
int16_t
)
clamp
(
t
,
-
128
*
16
,
128
*
16
-
1
);
case
8
:
default:
return
(
int16_t
)
clamp
(
t
,
-
128
,
128
-
1
);
}
}
#endif
// should we apply any filter at all: 11111111 yes, 00000000 no
static
INLINE
int8_t
filter_mask
(
uint8_t
limit
,
uint8_t
blimit
,
uint8_t
p3
,
uint8_t
p2
,
...
...
@@ -337,3 +351,390 @@ void vp9_lpf_vertical_16_dual_c(uint8_t *s, int p, const uint8_t *blimit,
const
uint8_t
*
limit
,
const
uint8_t
*
thresh
)
{
mb_lpf_vertical_edge_w
(
s
,
p
,
blimit
,
limit
,
thresh
,
16
);
}
#if CONFIG_VP9_HIGHBITDEPTH
// Should we apply any filter at all: 11111111 yes, 00000000 no ?
static
INLINE
int8_t
high_filter_mask
(
uint8_t
limit
,
uint8_t
blimit
,
uint16_t
p3
,
uint16_t
p2
,
uint16_t
p1
,
uint16_t
p0
,
uint16_t
q0
,
uint16_t
q1
,
uint16_t
q2
,
uint16_t
q3
,
int
bd
)
{
int8_t
mask
=
0
;
int16_t
limit16
=
(
uint16_t
)
limit
<<
(
bd
-
8
);
int16_t
blimit16
=
(
uint16_t
)
blimit
<<
(
bd
-
8
);
mask
|=
(
abs
(
p3
-
p2
)
>
limit16
)
*
-
1
;
mask
|=
(
abs
(
p2
-
p1
)
>
limit16
)
*
-
1
;
mask
|=
(
abs
(
p1
-
p0
)
>
limit16
)
*
-
1
;
mask
|=
(
abs
(
q1
-
q0
)
>
limit16
)
*
-
1
;
mask
|=
(
abs
(
q2
-
q1
)
>
limit16
)
*
-
1
;
mask
|=
(
abs
(
q3
-
q2
)
>
limit16
)
*
-
1
;
mask
|=
(
abs
(
p0
-
q0
)
*
2
+
abs
(
p1
-
q1
)
/
2
>
blimit16
)
*
-
1
;
return
~
mask
;
}
static
INLINE
int8_t
high_flat_mask4
(
uint8_t
thresh
,
uint16_t
p3
,
uint16_t
p2
,
uint16_t
p1
,
uint16_t
p0
,
uint16_t
q0
,
uint16_t
q1
,
uint16_t
q2
,
uint16_t
q3
,
int
bd
)
{
int8_t
mask
=
0
;
int16_t
thresh16
=
(
uint16_t
)
thresh
<<
(
bd
-
8
);
mask
|=
(
abs
(
p1
-
p0
)
>
thresh16
)
*
-
1
;
mask
|=
(
abs
(
q1
-
q0
)
>
thresh16
)
*
-
1
;
mask
|=
(
abs
(
p2
-
p0
)
>
thresh16
)
*
-
1
;
mask
|=
(
abs
(
q2
-
q0
)
>
thresh16
)
*
-
1
;
mask
|=
(
abs
(
p3
-
p0
)
>
thresh16
)
*
-
1
;
mask
|=
(
abs
(
q3
-
q0
)
>
thresh16
)
*
-
1
;
return
~
mask
;
}
static
INLINE
int8_t
high_flat_mask5
(
uint8_t
thresh
,
uint16_t
p4
,
uint16_t
p3
,
uint16_t
p2
,
uint16_t
p1
,
uint16_t
p0
,
uint16_t
q0
,
uint16_t
q1
,
uint16_t
q2
,
uint16_t
q3
,
uint16_t
q4
,
int
bd
)
{
int8_t
mask
=
~
high_flat_mask4
(
thresh
,
p3
,
p2
,
p1
,
p0
,
q0
,
q1
,
q2
,
q3
,
bd
);
int16_t
thresh16
=
(
uint16_t
)
thresh
<<
(
bd
-
8
);
mask
|=
(
abs
(
p4
-
p0
)
>
thresh16
)
*
-
1
;
mask
|=
(
abs
(
q4
-
q0
)
>
thresh16
)
*
-
1
;
return
~
mask
;
}
// Is there high edge variance internal edge:
// 11111111_11111111 yes, 00000000_00000000 no ?
static
INLINE
int16_t
high_hev_mask
(
uint8_t
thresh
,
uint16_t
p1
,
uint16_t
p0
,
uint16_t
q0
,
uint16_t
q1
,
int
bd
)
{
int16_t
hev
=
0
;
int16_t
thresh16
=
(
uint16_t
)
thresh
<<
(
bd
-
8
);
hev
|=
(
abs
(
p1
-
p0
)
>
thresh16
)
*
-
1
;
hev
|=
(
abs
(
q1
-
q0
)
>
thresh16
)
*
-
1
;
return
hev
;
}
static
INLINE
void
high_filter4
(
int8_t
mask
,
uint8_t
thresh
,
uint16_t
*
op1
,
uint16_t
*
op0
,
uint16_t
*
oq0
,
uint16_t
*
oq1
,
int
bd
)
{
int16_t
filter1
,
filter2
;
// ^0x80 equivalent to subtracting 0x80 from the values to turn them
// into -128 to +127 instead of 0 to 255.
int
shift
=
bd
-
8
;
const
int16_t
ps1
=
(
int16_t
)
*
op1
-
(
0x80
<<
shift
);
const
int16_t
ps0
=
(
int16_t
)
*
op0
-
(
0x80
<<
shift
);
const
int16_t
qs0
=
(
int16_t
)
*
oq0
-
(
0x80
<<
shift
);
const
int16_t
qs1
=
(
int16_t
)
*
oq1
-
(
0x80
<<
shift
);
const
uint16_t
hev
=
high_hev_mask
(
thresh
,
*
op1
,
*
op0
,
*
oq0
,
*
oq1
,
bd
);
// Add outer taps if we have high edge variance.
int16_t
filter
=
signed_char_clamp_high
(
ps1
-
qs1
,
bd
)
&
hev
;
// Inner taps.
filter
=
signed_char_clamp_high
(
filter
+
3
*
(
qs0
-
ps0
),
bd
)
&
mask
;
// Save bottom 3 bits so that we round one side +4 and the other +3
// if it equals 4 we'll set to adjust by -1 to account for the fact
// we'd round 3 the other way.
filter1
=
signed_char_clamp_high
(
filter
+
4
,
bd
)
>>
3
;
filter2
=
signed_char_clamp_high
(
filter
+
3
,
bd
)
>>
3
;
*
oq0
=
signed_char_clamp_high
(
qs0
-
filter1
,
bd
)
+
(
0x80
<<
shift
);
*
op0
=
signed_char_clamp_high
(
ps0
+
filter2
,
bd
)
+
(
0x80
<<
shift
);
// Outer tap adjustments.
filter
=
ROUND_POWER_OF_TWO
(
filter1
,
1
)
&
~
hev
;
*
oq1
=
signed_char_clamp_high
(
qs1
-
filter
,
bd
)
+
(
0x80
<<
shift
);
*
op1
=
signed_char_clamp_high
(
ps1
+
filter
,
bd
)
+
(
0x80
<<
shift
);
}
void
vp9_highbd_lpf_horizontal_4_c
(
uint16_t
*
s
,
int
p
/* pitch */
,
const
uint8_t
*
blimit
,
const
uint8_t
*
limit
,
const
uint8_t
*
thresh
,
int
count
,
int
bd
)
{
int
i
;
// loop filter designed to work using chars so that we can make maximum use
// of 8 bit simd instructions.
for
(
i
=
0
;
i
<
8
*
count
;
++
i
)
{
const
uint16_t
p3
=
s
[
-
4
*
p
];
const
uint16_t
p2
=
s
[
-
3
*
p
];
const
uint16_t
p1
=
s
[
-
2
*
p
];
const
uint16_t
p0
=
s
[
-
p
];
const
uint16_t
q0
=
s
[
0
*
p
];
const
uint16_t
q1
=
s
[
1
*
p
];
const
uint16_t
q2
=
s
[
2
*
p
];
const
uint16_t
q3
=
s
[
3
*
p
];
const
int8_t
mask
=
high_filter_mask
(
*
limit
,
*
blimit
,
p3
,
p2
,
p1
,
p0
,
q0
,
q1
,
q2
,
q3
,
bd
);
high_filter4
(
mask
,
*
thresh
,
s
-
2
*
p
,
s
-
1
*
p
,
s
,
s
+
1
*
p
,
bd
);
++
s
;
}
}
void
vp9_highbd_lpf_horizontal_4_dual_c
(
uint16_t
*
s
,
int
p
,
const
uint8_t
*
blimit0
,
const
uint8_t
*
limit0
,
const
uint8_t
*
thresh0
,
const
uint8_t
*
blimit1
,
const
uint8_t
*
limit1
,
const
uint8_t
*
thresh1
,
int
bd
)
{
vp9_highbd_lpf_horizontal_4_c
(
s
,
p
,
blimit0
,
limit0
,
thresh0
,
1
,
bd
);
vp9_highbd_lpf_horizontal_4_c
(
s
+
8
,
p
,
blimit1
,
limit1
,
thresh1
,
1
,
bd
);
}
void
vp9_highbd_lpf_vertical_4_c
(
uint16_t
*
s
,
int
pitch
,
const
uint8_t
*
blimit
,
const
uint8_t
*
limit
,
const
uint8_t
*
thresh
,
int
count
,
int
bd
)
{
int
i
;
// loop filter designed to work using chars so that we can make maximum use
// of 8 bit simd instructions.
for
(
i
=
0
;
i
<
8
*
count
;
++
i
)
{
const
uint16_t
p3
=
s
[
-
4
],
p2
=
s
[
-
3
],
p1
=
s
[
-
2
],
p0
=
s
[
-
1
];
const
uint16_t
q0
=
s
[
0
],
q1
=
s
[
1
],
q2
=
s
[
2
],
q3
=
s
[
3
];
const
int8_t
mask
=
high_filter_mask
(
*
limit
,
*
blimit
,
p3
,
p2
,
p1
,
p0
,
q0
,
q1
,
q2
,
q3
,
bd
);
high_filter4
(
mask
,
*
thresh
,
s
-
2
,
s
-
1
,
s
,
s
+
1
,
bd
);
s
+=
pitch
;
}
}
void
vp9_highbd_lpf_vertical_4_dual_c
(
uint16_t
*
s
,
int
pitch
,
const
uint8_t
*
blimit0
,
const
uint8_t
*
limit0
,
const
uint8_t
*
thresh0
,
const
uint8_t
*
blimit1
,
const
uint8_t
*
limit1
,
const
uint8_t
*
thresh1
,
int
bd
)
{
vp9_highbd_lpf_vertical_4_c
(
s
,
pitch
,
blimit0
,
limit0
,
thresh0
,
1
,
bd
);
vp9_highbd_lpf_vertical_4_c
(
s
+
8
*
pitch
,
pitch
,
blimit1
,
limit1
,
thresh1
,
1
,
bd
);
}
static
INLINE
void
high_filter8
(
int8_t
mask
,
uint8_t
thresh
,
uint8_t
flat
,
uint16_t
*
op3
,
uint16_t
*
op2
,
uint16_t
*
op1
,
uint16_t
*
op0
,
uint16_t
*
oq0
,
uint16_t
*
oq1
,
uint16_t
*
oq2
,
uint16_t
*
oq3
,
int
bd
)
{
if
(
flat
&&
mask
)
{
const
uint16_t
p3
=
*
op3
,
p2
=
*
op2
,
p1
=
*
op1
,
p0
=
*
op0
;
const
uint16_t
q0
=
*
oq0
,
q1
=
*
oq1
,
q2
=
*
oq2
,
q3
=
*
oq3
;
// 7-tap filter [1, 1, 1, 2, 1, 1, 1]
*
op2
=
ROUND_POWER_OF_TWO
(
p3
+
p3
+
p3
+
2
*
p2
+
p1
+
p0
+
q0
,
3
);
*
op1
=
ROUND_POWER_OF_TWO
(
p3
+
p3
+
p2
+
2
*
p1
+
p0
+
q0
+
q1
,
3
);
*
op0
=
ROUND_POWER_OF_TWO
(
p3
+
p2
+
p1
+
2
*
p0
+
q0
+
q1
+
q2
,
3
);
*
oq0
=
ROUND_POWER_OF_TWO
(
p2
+
p1
+
p0
+
2
*
q0
+
q1
+
q2
+
q3
,
3
);
*
oq1
=
ROUND_POWER_OF_TWO
(
p1
+
p0
+
q0
+
2
*
q1
+
q2
+
q3
+
q3
,
3
);
*
oq2
=
ROUND_POWER_OF_TWO
(
p0
+
q0
+
q1
+
2
*
q2
+
q3
+
q3
+
q3
,
3
);
}
else
{
high_filter4
(
mask
,
thresh
,
op1
,
op0
,
oq0
,
oq1
,
bd
);
}
}
void
vp9_highbd_lpf_horizontal_8_c
(
uint16_t
*
s
,
int
p
,
const
uint8_t
*
blimit
,
const
uint8_t
*
limit
,
const
uint8_t
*
thresh
,
int
count
,
int
bd
)
{
int
i
;
// loop filter designed to work using chars so that we can make maximum use
// of 8 bit simd instructions.
for
(
i
=
0
;
i
<
8
*
count
;
++
i
)
{
const
uint16_t
p3
=
s
[
-
4
*
p
],
p2
=
s
[
-
3
*
p
],
p1
=
s
[
-
2
*
p
],
p0
=
s
[
-
p
];
const
uint16_t
q0
=
s
[
0
*
p
],
q1
=
s
[
1
*
p
],
q2
=
s
[
2
*
p
],
q3
=
s
[
3
*
p
];
const
int8_t
mask
=
high_filter_mask
(
*
limit
,
*
blimit
,
p3
,
p2
,
p1
,
p0
,
q0
,
q1
,
q2
,
q3
,
bd
);
const
int8_t
flat
=
high_flat_mask4
(
1
,
p3
,
p2
,
p1
,
p0
,
q0
,
q1
,
q2
,
q3
,
bd
);
high_filter8
(
mask
,
*
thresh
,
flat
,
s
-
4
*
p
,
s
-
3
*
p
,
s
-
2
*
p
,
s
-
1
*
p
,
s
,
s
+
1
*
p
,
s
+
2
*
p
,
s
+
3
*
p
,
bd
);
++
s
;
}
}
void
vp9_highbd_lpf_horizontal_8_dual_c
(
uint16_t
*
s
,
int
p
,
const
uint8_t
*
blimit0
,
const
uint8_t
*
limit0
,
const
uint8_t
*
thresh0
,
const
uint8_t
*
blimit1
,
const
uint8_t
*
limit1
,
const
uint8_t
*
thresh1
,
int
bd
)
{
vp9_highbd_lpf_horizontal_8_c
(
s
,
p
,
blimit0
,
limit0
,
thresh0
,
1
,
bd
);
vp9_highbd_lpf_horizontal_8_c
(
s
+
8
,
p
,
blimit1
,
limit1
,
thresh1
,
1
,
bd
);
}
void
vp9_highbd_lpf_vertical_8_c
(
uint16_t
*
s
,
int
pitch
,
const
uint8_t
*
blimit
,
const
uint8_t
*
limit
,
const
uint8_t
*
thresh
,
int
count
,
int
bd
)
{
int
i
;
for
(
i
=
0
;
i
<
8
*
count
;
++
i
)
{
const
uint16_t
p3
=
s
[
-
4
],
p2
=
s
[
-
3
],
p1
=
s
[
-
2
],
p0
=
s
[
-
1
];
const
uint16_t
q0
=
s
[
0
],
q1
=
s
[
1
],
q2
=
s
[
2
],
q3
=
s
[
3
];
const
int8_t
mask
=
high_filter_mask
(
*
limit
,
*
blimit
,
p3
,
p2
,
p1
,
p0
,
q0
,
q1
,
q2
,
q3
,
bd
);
const
int8_t
flat
=
high_flat_mask4
(
1
,
p3
,
p2
,
p1
,
p0
,
q0
,
q1
,
q2
,
q3
,
bd
);
high_filter8
(
mask
,
*
thresh
,
flat
,
s
-
4
,
s
-
3
,
s
-
2
,
s
-
1
,
s
,
s
+
1
,
s
+
2
,
s
+
3
,
bd
);
s
+=
pitch
;
}
}
void
vp9_highbd_lpf_vertical_8_dual_c
(
uint16_t
*
s
,
int
pitch
,
const
uint8_t
*
blimit0
,
const
uint8_t
*
limit0
,
const
uint8_t
*
thresh0
,
const
uint8_t
*
blimit1
,
const
uint8_t
*
limit1
,
const
uint8_t
*
thresh1
,
int
bd
)
{
vp9_highbd_lpf_vertical_8_c
(
s
,
pitch
,
blimit0
,
limit0
,
thresh0
,
1
,
bd
);
vp9_highbd_lpf_vertical_8_c
(
s
+
8
*
pitch
,
pitch
,
blimit1
,
limit1
,
thresh1
,
1
,
bd
);
}
static
INLINE
void
high_filter16
(
int8_t
mask
,
uint8_t
thresh
,
uint8_t
flat
,
uint8_t
flat2
,
uint16_t
*
op7
,
uint16_t
*
op6
,
uint16_t
*
op5
,
uint16_t
*
op4
,
uint16_t
*
op3
,
uint16_t
*
op2
,
uint16_t
*
op1
,
uint16_t
*
op0
,
uint16_t
*
oq0
,
uint16_t
*
oq1
,
uint16_t
*
oq2
,
uint16_t
*
oq3
,
uint16_t
*
oq4
,
uint16_t
*
oq5
,
uint16_t
*
oq6
,
uint16_t
*
oq7
,
int
bd
)
{
if
(
flat2
&&
flat
&&
mask
)
{
const
uint16_t
p7
=
*
op7
;
const
uint16_t
p6
=
*
op6
;
const
uint16_t
p5
=
*
op5
;
const
uint16_t
p4
=
*
op4
;
const
uint16_t
p3
=
*
op3
;
const
uint16_t
p2
=
*
op2
;
const
uint16_t
p1
=
*
op1
;
const
uint16_t
p0
=
*
op0
;
const
uint16_t
q0
=
*
oq0
;
const
uint16_t
q1
=
*
oq1
;
const
uint16_t
q2
=
*
oq2
;
const
uint16_t
q3
=
*
oq3
;
const
uint16_t
q4
=
*
oq4
;
const
uint16_t
q5
=
*
oq5
;
const
uint16_t
q6
=
*
oq6
;
const
uint16_t
q7
=
*
oq7
;
// 15-tap filter [1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1]
*
op6
=
ROUND_POWER_OF_TWO
(
p7
*
7
+
p6
*
2
+
p5
+
p4
+
p3
+
p2
+
p1
+
p0
+
q0
,
4
);
*
op5
=
ROUND_POWER_OF_TWO
(
p7
*
6
+
p6
+
p5
*
2
+
p4
+
p3
+
p2
+
p1
+
p0
+
q0
+
q1
,
4
);
*
op4
=
ROUND_POWER_OF_TWO
(
p7
*
5
+
p6
+
p5
+
p4
*
2
+
p3
+
p2
+
p1
+
p0
+
q0
+
q1
+
q2
,
4
);
*
op3
=
ROUND_POWER_OF_TWO
(
p7
*
4
+
p6
+
p5
+
p4
+
p3
*
2
+
p2
+
p1
+
p0
+
q0
+
q1
+
q2
+
q3
,
4
);
*
op2
=
ROUND_POWER_OF_TWO
(
p7
*
3
+
p6
+
p5
+
p4
+
p3
+
p2
*
2
+
p1
+
p0
+
q0
+
q1
+
q2
+
q3
+
q4
,
4
);
*
op1
=
ROUND_POWER_OF_TWO
(
p7
*
2
+
p6
+
p5
+
p4
+
p3
+
p2
+
p1
*
2
+
p0
+
q0
+
q1
+
q2
+
q3
+
q4
+
q5
,
4
);
*
op0
=
ROUND_POWER_OF_TWO
(
p7
+
p6
+
p5
+
p4
+
p3
+
p2
+
p1
+
p0
*
2
+
q0
+
q1
+
q2
+
q3
+
q4
+
q5
+
q6
,
4
);
*
oq0
=
ROUND_POWER_OF_TWO
(
p6
+
p5
+
p4
+
p3
+
p2
+
p1
+
p0
+
q0
*
2
+
q1
+
q2
+
q3
+
q4
+
q5
+
q6
+
q7
,
4
);
*
oq1
=
ROUND_POWER_OF_TWO
(
p5
+
p4
+
p3
+
p2
+
p1
+
p0
+
q0
+
q1
*
2
+
q2
+
q3
+
q4
+
q5
+
q6
+
q7
*
2
,
4
);
*
oq2
=
ROUND_POWER_OF_TWO
(
p4
+
p3
+
p2
+
p1
+
p0
+
q0
+
q1
+
q2
*
2
+
q3
+
q4
+
q5
+
q6
+
q7
*
3
,
4
);
*
oq3
=
ROUND_POWER_OF_TWO
(
p3
+
p2
+
p1
+
p0
+
q0
+
q1
+
q2
+
q3
*
2
+
q4
+
q5
+
q6
+
q7
*
4
,
4
);
*
oq4
=
ROUND_POWER_OF_TWO
(
p2
+
p1
+
p0
+
q0
+
q1
+
q2
+
q3
+
q4
*
2
+
q5
+
q6
+
q7
*
5
,
4
);
*
oq5
=
ROUND_POWER_OF_TWO
(
p1
+
p0
+
q0
+
q1
+
q2
+
q3
+
q4
+
q5
*
2
+
q6
+
q7
*
6
,
4
);
*
oq6
=
ROUND_POWER_OF_TWO
(
p0
+
q0
+
q1
+
q2
+
q3
+
q4
+
q5
+
q6
*
2
+
q7
*
7
,
4
);
}
else
{
high_filter8
(
mask
,
thresh
,
flat
,
op3
,
op2
,
op1
,
op0
,
oq0
,
oq1
,
oq2
,
oq3
,
bd
);
}
}
void
vp9_highbd_lpf_horizontal_16_c
(
uint16_t
*
s
,
int
p
,
const
uint8_t
*
blimit
,
const
uint8_t
*
limit
,
const
uint8_t
*
thresh
,
int
count
,
int
bd
)
{
int
i
;
// loop filter designed to work using chars so that we can make maximum use
// of 8 bit simd instructions.
for
(
i
=
0
;
i
<
8
*
count
;
++
i
)
{
const
uint16_t
p3
=
s
[
-
4
*
p
];
const
uint16_t
p2
=
s
[
-
3
*
p
];
const
uint16_t
p1
=
s
[
-
2
*
p
];
const
uint16_t
p0
=
s
[
-
p
];
const
uint16_t
q0
=
s
[
0
*
p
];
const
uint16_t
q1
=
s
[
1
*
p
];
const
uint16_t
q2
=
s
[
2
*
p
];
const
uint16_t
q3
=
s
[
3
*
p
];
const
int8_t
mask
=
high_filter_mask
(
*
limit
,
*
blimit
,
p3
,
p2
,
p1
,
p0
,
q0
,
q1
,
q2
,
q3
,
bd
);
const
int8_t
flat
=
high_flat_mask4
(
1
,
p3
,
p2
,
p1
,
p0
,
q0
,
q1
,
q2
,
q3
,
bd
);
const
int8_t
flat2
=
high_flat_mask5
(
1
,
s
[
-
8
*
p
],
s
[
-
7
*
p
],
s
[
-
6
*
p
],
s
[
-
5
*
p
],
p0
,
q0
,
s
[
4
*
p
],
s
[
5
*
p
],
s
[
6
*
p
],
s
[
7
*
p
],
bd
);
high_filter16
(
mask
,
*
thresh
,
flat
,
flat2
,
s
-
8
*
p
,
s
-
7
*
p
,
s
-
6
*
p
,
s
-
5
*
p
,
s
-
4
*
p
,
s
-
3
*
p
,
s
-
2
*
p
,
s
-
1
*
p
,
s
,
s
+
1
*
p
,
s
+
2
*
p
,
s
+
3
*
p
,
s
+
4
*
p
,
s
+
5
*
p
,
s
+
6
*
p
,
s
+
7
*
p
,
bd
);
++
s
;
}
}
static
void
high_mb_lpf_vertical_edge_w
(
uint16_t
*
s
,
int
p
,
const
uint8_t
*
blimit
,
const
uint8_t
*
limit
,
const
uint8_t
*
thresh
,
int
count
,
int
bd
)
{
int
i
;
for
(
i
=
0
;
i
<
count
;
++
i
)
{
const
uint16_t
p3
=
s
[
-
4
];
const
uint16_t
p2
=
s
[
-
3
];
const
uint16_t
p1
=
s
[
-
2
];
const
uint16_t
p0
=
s
[
-
1
];
const
uint16_t
q0
=
s
[
0
];
const
uint16_t
q1
=
s
[
1
];
const
uint16_t
q2
=
s
[
2
];
const
uint16_t
q3
=
s
[
3
];
const
int8_t
mask
=
high_filter_mask
(
*
limit
,
*
blimit
,
p3
,
p2
,
p1
,
p0
,
q0
,
q1
,
q2
,
q3
,
bd
);
const
int8_t
flat
=
high_flat_mask4
(
1
,
p3
,
p2
,
p1
,
p0
,
q0
,
q1
,
q2
,
q3
,
bd
);
const
int8_t
flat2
=
high_flat_mask5
(
1
,
s
[
-
8
],
s
[
-
7
],
s
[
-
6
],
s
[
-
5
],
p0
,
q0
,
s
[
4
],
s
[
5
],
s
[
6
],
s
[
7
],
bd
);
high_filter16
(
mask
,
*
thresh
,
flat
,
flat2
,
s
-
8
,
s
-
7
,
s
-
6
,
s
-
5
,
s
-
4
,
s
-
3
,
s
-
2
,
s
-
1
,
s
,
s
+
1
,
s
+
2
,
s
+
3
,
s
+
4
,
s
+
5
,
s
+
6
,
s
+
7
,
bd
);
s
+=
p
;
}
}
void
vp9_highbd_lpf_vertical_16_c
(
uint16_t
*
s
,
int
p
,
const
uint8_t
*
blimit
,
const
uint8_t
*
limit
,
const
uint8_t
*
thresh
,
int
bd
)
{
high_mb_lpf_vertical_edge_w
(
s
,
p
,
blimit
,
limit
,
thresh
,
8
,
bd
);
}
void
vp9_highbd_lpf_vertical_16_dual_c
(
uint16_t
*
s
,
int
p
,
const
uint8_t
*
blimit
,
const
uint8_t
*
limit
,
const
uint8_t
*
thresh
,
int
bd
)
{
high_mb_lpf_vertical_edge_w
(
s
,
p
,
blimit
,
limit
,
thresh
,
16
,
bd
);
}
#endif // CONFIG_VP9_HIGHBITDEPTH
vp9/common/vp9_postproc.c
View file @
e2a90c0b
...
...
@@ -19,6 +19,9 @@
#include
"vpx_scale/vpx_scale.h"
#include
"vpx_scale/yv12config.h"
#if CONFIG_VP9_HIGHBITDEPTH
#include
"vp9/common/vp9_common.h"
#endif
#include
"vp9/common/vp9_onyxc_int.h"
#include
"vp9/common/vp9_postproc.h"
#include
"vp9/common/vp9_systemdependent.h"
...
...
@@ -152,6 +155,84 @@ void vp9_post_proc_down_and_across_c(const uint8_t *src_ptr,
}
}
#if CONFIG_VP9_HIGHBITDEPTH
void
vp9_highbd_post_proc_down_and_across_c
(
const
uint16_t
*
src_ptr
,
uint16_t
*
dst_ptr
,
int
src_pixels_per_line
,
int
dst_pixels_per_line
,
int
rows
,
int
cols
,
int
flimit
)
{
uint16_t
const
*
p_src
;
uint16_t
*
p_dst
;
int
row
;
int
col
;
int
i
;
int
v
;
int
pitch
=
src_pixels_per_line
;
uint16_t
d
[
8
];
for
(
row
=
0
;
row
<
rows
;
row
++
)
{
// post_proc_down for one row.
p_src
=
src_ptr
;
p_dst
=
dst_ptr
;
for
(
col
=
0
;
col
<
cols
;
col
++
)
{
int
kernel
=
4
;
int
v
=
p_src
[
col
];
for
(
i
=
-
2
;
i
<=
2
;
i
++
)
{
if
(
abs
(
v
-
p_src
[
col
+
i
*
pitch
])
>
flimit
)
goto
down_skip_convolve
;
kernel
+=
kernel5
[
2
+
i
]
*
p_src
[
col
+
i
*
pitch
];
}
v
=
(
kernel
>>
3
);
down_skip_convolve:
p_dst
[
col
]
=
v
;
}
/* now post_proc_across */
p_src
=
dst_ptr
;
p_dst
=
dst_ptr
;
for
(
i
=
0
;
i
<
8
;
i
++
)
d
[
i
]
=
p_src
[
i
];
for
(
col
=
0
;
col
<
cols
;
col
++
)
{
int
kernel
=
4
;
v
=
p_src
[
col
];
d
[
col
&
7
]
=
v
;
for
(
i
=
-
2
;
i
<=
2
;
i
++
)
{
if
(
abs
(
v
-
p_src
[
col
+
i
])
>
flimit
)
goto
across_skip_convolve
;
kernel
+=
kernel5
[
2
+
i
]
*
p_src
[
col
+
i
];
}
d
[
col
&
7
]
=
(
kernel
>>
3
);
across_skip_convolve:
if
(
col
>=
2
)
p_dst
[
col
-
2
]
=
d
[(
col
-
2
)
&
7
];
}
/* handle the last two pixels */
p_dst
[
col
-
2
]
=
d
[(
col
-
2
)
&
7
];
p_dst
[
col
-
1
]
=
d
[(
col
-
1
)
&
7
];
/* next row */
src_ptr
+=
pitch
;
dst_ptr
+=
dst_pixels_per_line
;
}
}
#endif // CONFIG_VP9_HIGHBITDEPTH
static
int
q2mbl
(
int
x
)
{
if
(
x
<
20
)
x
=
20
;
...
...
@@ -162,10 +243,46 @@ static int q2mbl(int x) {
void
vp9_mbpost_proc_across_ip_c
(
uint8_t
*
src
,
int
pitch
,
int
rows
,
int
cols
,
int
flimit
)
{
int
r
,
c
,
i
;
uint8_t
*
s
=
src
;
uint8_t
d
[
16
];
for
(
r
=
0
;
r
<
rows
;
r
++
)
{
int
sumsq
=
0
;
int
sum
=
0
;
for
(
i
=
-
8
;
i
<=
6
;
i
++
)
{
sumsq
+=
s
[
i
]
*
s
[
i
];
sum
+=
s
[
i
];
d
[
i
+
8
]
=
0
;
}
for
(
c
=
0
;
c
<
cols
+
8
;
c
++
)
{
int
x
=
s
[
c
+
7
]
-
s
[
c
-
8
];
int
y
=
s
[
c
+
7
]
+
s
[
c
-
8
];
sum
+=
x
;
sumsq
+=
x
*
y
;
d
[
c
&
15
]
=
s
[
c
];
if
(
sumsq
*
15
-
sum
*
sum
<
flimit
)
{
d
[
c
&
15
]
=
(
8
+
sum
+
s
[
c
])
>>
4
;
}
s
[
c
-
8
]
=
d
[(
c
-
8
)
&
15
];
}
s
+=
pitch
;
}
}
#if CONFIG_VP9_HIGHBITDEPTH
void
vp9_highbd_mbpost_proc_across_ip_c
(
uint16_t
*
src
,
int
pitch
,
int
rows
,
int
cols
,
int
flimit
)
{
int
r
,
c
,
i
;
uint16_t
*
s
=
src
;
uint16_t
d
[
16
];
for
(
r
=
0
;
r
<
rows
;
r
++
)
{
int
sumsq
=
0
;
...
...
@@ -196,6 +313,7 @@ void vp9_mbpost_proc_across_ip_c(uint8_t *src, int pitch,
s
+=
pitch
;
}
}
#endif // CONFIG_VP9_HIGHBITDEPTH
void
vp9_mbpost_proc_down_c
(
uint8_t
*
dst
,
int
pitch
,