Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
10
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
Xiph.Org
aom-rav1e
Commits
1a709944
Commit
1a709944
authored
Feb 02, 2018
by
Debargha Mukherjee
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Remove code for CONFIG_FAST_SGR=2 and cleanup
Change-Id: I01cecc829e2d57517427a1de6387e91ba3c64312
parent
d051e560
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
5 additions
and
193 deletions
+5
-193
av1/common/restoration.c
av1/common/restoration.c
+2
-178
av1/encoder/pickrst.c
av1/encoder/pickrst.c
+3
-15
No files found.
av1/common/restoration.c
View file @
1a709944
...
...
@@ -770,164 +770,7 @@ const int32_t one_by_x[MAX_NELEM] = {
293
,
273
,
256
,
241
,
228
,
216
,
205
,
195
,
186
,
178
,
171
,
164
,
};
#if CONFIG_FAST_SGR == 2
static
void
av1_selfguided_restoration_fast2_internal
(
int32_t
*
dgd
,
int
width
,
int
height
,
int
dgd_stride
,
int32_t
*
dst
,
int
dst_stride
,
int
bit_depth
,
int
r
,
int
eps
)
{
const
int
width_ext
=
width
+
2
*
SGRPROJ_BORDER_HORZ
;
const
int
height_ext
=
height
+
2
*
SGRPROJ_BORDER_VERT
;
// Adjusting the stride of A and B here appears to avoid bad cache effects,
// leading to a significant speed improvement.
// We also align the stride to a multiple of 16 bytes, for consistency
// with the SIMD version of this function.
int
buf_stride
=
((
width_ext
+
3
)
&
~
3
)
+
16
;
int32_t
A_
[
RESTORATION_PROC_UNIT_PELS
];
int32_t
B_
[
RESTORATION_PROC_UNIT_PELS
];
int32_t
*
A
=
A_
;
int32_t
*
B
=
B_
;
int
i
,
j
;
assert
(
r
<=
MAX_RADIUS
&&
"Need MAX_RADIUS >= r"
);
assert
(
r
<=
SGRPROJ_BORDER_VERT
-
1
&&
r
<=
SGRPROJ_BORDER_HORZ
-
1
&&
"Need SGRPROJ_BORDER_* >= r+1"
);
boxsum
(
dgd
-
dgd_stride
*
SGRPROJ_BORDER_VERT
-
SGRPROJ_BORDER_HORZ
,
width_ext
,
height_ext
,
dgd_stride
,
r
,
0
,
B
,
buf_stride
);
boxsum
(
dgd
-
dgd_stride
*
SGRPROJ_BORDER_VERT
-
SGRPROJ_BORDER_HORZ
,
width_ext
,
height_ext
,
dgd_stride
,
r
,
1
,
A
,
buf_stride
);
A
+=
SGRPROJ_BORDER_VERT
*
buf_stride
+
SGRPROJ_BORDER_HORZ
;
B
+=
SGRPROJ_BORDER_VERT
*
buf_stride
+
SGRPROJ_BORDER_HORZ
;
// Calculate the eventual A[] and B[] arrays. Include a 1-pixel border - ie,
// for a 64x64 processing unit, we calculate 66x66 pixels of A[] and B[].
for
(
i
=
-
1
;
i
<
height
+
1
;
++
i
)
{
for
(
j
=
-
1
;
j
<
width
+
1
;
j
+=
2
)
{
const
int
k
=
i
*
buf_stride
+
j
;
const
int
n
=
(
2
*
r
+
1
)
*
(
2
*
r
+
1
);
// a < 2^16 * n < 2^22 regardless of bit depth
uint32_t
a
=
ROUND_POWER_OF_TWO
(
A
[
k
],
2
*
(
bit_depth
-
8
));
// b < 2^8 * n < 2^14 regardless of bit depth
uint32_t
b
=
ROUND_POWER_OF_TWO
(
B
[
k
],
bit_depth
-
8
);
// Each term in calculating p = a * n - b * b is < 2^16 * n^2 < 2^28,
// and p itself satisfies p < 2^14 * n^2 < 2^26.
// This bound on p is due to:
// https://en.wikipedia.org/wiki/Popoviciu's_inequality_on_variances
//
// Note: Sometimes, in high bit depth, we can end up with a*n < b*b.
// This is an artefact of rounding, and can only happen if all pixels
// are (almost) identical, so in this case we saturate to p=0.
uint32_t
p
=
(
a
*
n
<
b
*
b
)
?
0
:
a
*
n
-
b
*
b
;
// Note: If MAX_RADIUS <= 2, then this 's' is a function only of
// r and eps. Further, this is the only place we use 'eps', so we could
// pre-calculate 's' for each parameter set and store that in place of
// 'eps'.
uint32_t
s
=
sgrproj_mtable
[
eps
-
1
][
n
-
1
];
// p * s < (2^14 * n^2) * round(2^20 / n^2 eps) < 2^34 / eps < 2^32
// as long as eps >= 4. So p * s fits into a uint32_t, and z < 2^12
// (this holds even after accounting for the rounding in s)
const
uint32_t
z
=
ROUND_POWER_OF_TWO
(
p
*
s
,
SGRPROJ_MTABLE_BITS
);
// Note: We have to be quite careful about the value of A[k].
// This is used as a blend factor between individual pixel values and the
// local mean. So it logically has a range of [0, 256], including both
// endpoints.
//
// This is a pain for hardware, as we'd like something which can be stored
// in exactly 8 bits.
// Further, in the calculation of B[k] below, if z == 0 and r == 2,
// then A[k] "should be" 0. But then we can end up setting B[k] to a value
// slightly above 2^(8 + bit depth), due to rounding in the value of
// one_by_x[25-1].
//
// Thus we saturate so that, when z == 0, A[k] is set to 1 instead of 0.
// This fixes the above issues (256 - A[k] fits in a uint8, and we can't
// overflow), without significantly affecting the final result: z == 0
// implies that the image is essentially "flat", so the local mean and
// individual pixel values are very similar.
//
// Note that saturating on the other side, ie. requring A[k] <= 255,
// would be a bad idea, as that corresponds to the case where the image
// is very variable, when we want to preserve the local pixel value as
// much as possible.
A
[
k
]
=
x_by_xplus1
[
AOMMIN
(
z
,
255
)];
// in range [1, 256]
// SGRPROJ_SGR - A[k] < 2^8 (from above), B[k] < 2^(bit_depth) * n,
// one_by_x[n - 1] = round(2^12 / n)
// => the product here is < 2^(20 + bit_depth) <= 2^32,
// and B[k] is set to a value < 2^(8 + bit depth)
// This holds even with the rounding in one_by_x and in the overall
// result, as long as SGRPROJ_SGR - A[k] is strictly less than 2^8.
B
[
k
]
=
(
int32_t
)
ROUND_POWER_OF_TWO
((
uint32_t
)(
SGRPROJ_SGR
-
A
[
k
])
*
(
uint32_t
)
B
[
k
]
*
(
uint32_t
)
one_by_x
[
n
-
1
],
SGRPROJ_RECIP_BITS
);
}
}
// Use the A[] and B[] arrays to calculate the filtered image
for
(
i
=
0
;
i
<
height
;
++
i
)
{
const
int
width2
=
width
+
(
width
&
1
);
for
(
j
=
0
;
j
<
width2
;
j
+=
2
)
{
{
// even col
const
int
k
=
i
*
buf_stride
+
j
;
const
int
l
=
i
*
dgd_stride
+
j
;
const
int
m
=
i
*
dst_stride
+
j
;
const
int
nb
=
5
;
const
int32_t
a
=
(
A
[
k
-
1
]
+
A
[
k
+
1
])
*
6
+
(
A
[
k
-
1
-
buf_stride
]
+
A
[
k
-
1
+
buf_stride
]
+
A
[
k
+
1
-
buf_stride
]
+
A
[
k
+
1
+
buf_stride
])
*
5
;
const
int32_t
b
=
(
B
[
k
-
1
]
+
B
[
k
+
1
])
*
6
+
(
B
[
k
-
1
-
buf_stride
]
+
B
[
k
-
1
+
buf_stride
]
+
B
[
k
+
1
-
buf_stride
]
+
B
[
k
+
1
+
buf_stride
])
*
5
;
const
int32_t
v
=
a
*
dgd
[
l
]
+
b
;
dst
[
m
]
=
ROUND_POWER_OF_TWO
(
v
,
SGRPROJ_SGR_BITS
+
nb
-
SGRPROJ_RST_BITS
);
}
if
(
j
+
1
<
width
-
1
)
{
// odd col and not last
const
int
k
=
i
*
buf_stride
+
j
+
1
;
const
int
l
=
i
*
dgd_stride
+
j
+
1
;
const
int
m
=
i
*
dst_stride
+
j
+
1
;
const
int
nb
=
6
;
const
int32_t
a
=
A
[
k
]
*
16
+
(
A
[
k
-
buf_stride
]
+
A
[
k
+
buf_stride
])
*
14
+
(
A
[
k
-
2
]
+
A
[
k
+
2
])
*
4
+
(
A
[
k
-
2
-
buf_stride
]
+
A
[
k
-
2
+
buf_stride
]
+
A
[
k
+
2
-
buf_stride
]
+
A
[
k
+
2
+
buf_stride
])
*
3
;
const
int32_t
b
=
B
[
k
]
*
16
+
(
B
[
k
-
buf_stride
]
+
B
[
k
+
buf_stride
])
*
14
+
(
B
[
k
-
2
]
+
B
[
k
+
2
])
*
4
+
(
B
[
k
-
2
-
buf_stride
]
+
B
[
k
-
2
+
buf_stride
]
+
B
[
k
+
2
-
buf_stride
]
+
B
[
k
+
2
+
buf_stride
])
*
3
;
const
int32_t
v
=
a
*
dgd
[
l
]
+
b
;
dst
[
m
]
=
ROUND_POWER_OF_TWO
(
v
,
SGRPROJ_SGR_BITS
+
nb
-
SGRPROJ_RST_BITS
);
}
else
if
(
j
+
1
<
width
)
{
// odd col and last
const
int
k
=
i
*
buf_stride
+
j
+
1
;
const
int
l
=
i
*
dgd_stride
+
j
+
1
;
const
int
m
=
i
*
dst_stride
+
j
+
1
;
const
int
nb
=
6
;
const
int32_t
a
=
A
[
k
]
*
18
+
(
A
[
k
-
buf_stride
]
+
A
[
k
+
buf_stride
])
*
16
+
A
[
k
-
2
]
*
6
+
(
A
[
k
-
2
-
buf_stride
]
+
A
[
k
-
2
+
buf_stride
])
*
4
;
const
int32_t
b
=
B
[
k
]
*
18
+
(
B
[
k
-
buf_stride
]
+
B
[
k
+
buf_stride
])
*
16
+
B
[
k
-
2
]
*
6
+
(
B
[
k
-
2
-
buf_stride
]
+
B
[
k
-
2
+
buf_stride
])
*
4
;
const
int32_t
v
=
a
*
dgd
[
l
]
+
b
;
dst
[
m
]
=
ROUND_POWER_OF_TWO
(
v
,
SGRPROJ_SGR_BITS
+
nb
-
SGRPROJ_RST_BITS
);
}
}
}
}
#elif CONFIG_FAST_SGR == 1
#if CONFIG_FAST_SGR
static
void
av1_selfguided_restoration_fast_internal
(
int32_t
*
dgd
,
int
width
,
int
height
,
int
dgd_stride
,
int32_t
*
dst
,
int
dst_stride
,
int
bit_depth
,
int
r
,
int
eps
)
{
...
...
@@ -1267,14 +1110,7 @@ void av1_selfguided_restoration_c(const uint8_t *dgd8, int width, int height,
}
}
#if CONFIG_FAST_SGR == 2
av1_selfguided_restoration_fast2_internal
(
dgd32
,
width
,
height
,
dgd32_stride
,
flt1
,
flt_stride
,
bit_depth
,
params
->
r1
,
params
->
e1
);
av1_selfguided_restoration_fast2_internal
(
dgd32
,
width
,
height
,
dgd32_stride
,
flt2
,
flt_stride
,
bit_depth
,
params
->
r2
,
params
->
e2
);
#elif CONFIG_FAST_SGR == 1
#if CONFIG_FAST_SGR
// r == 2 filter
av1_selfguided_restoration_fast_internal
(
dgd32
,
width
,
height
,
dgd32_stride
,
flt1
,
flt_stride
,
bit_depth
,
...
...
@@ -1338,15 +1174,9 @@ static void sgrproj_filter_stripe(const RestorationUnitInfo *rui,
for
(
int
j
=
0
;
j
<
stripe_width
;
j
+=
procunit_width
)
{
int
w
=
AOMMIN
(
procunit_width
,
stripe_width
-
j
);
#if CONFIG_FAST_SGR == 2
apply_selfguided_restoration_c
(
src
+
j
,
w
,
stripe_height
,
src_stride
,
rui
->
sgrproj_info
.
ep
,
rui
->
sgrproj_info
.
xqd
,
dst
+
j
,
dst_stride
,
tmpbuf
,
bit_depth
,
0
);
#else
apply_selfguided_restoration
(
src
+
j
,
w
,
stripe_height
,
src_stride
,
rui
->
sgrproj_info
.
ep
,
rui
->
sgrproj_info
.
xqd
,
dst
+
j
,
dst_stride
,
tmpbuf
,
bit_depth
,
0
);
#endif // CONFIG_FAST_SGR == 2
}
}
...
...
@@ -1382,15 +1212,9 @@ static void sgrproj_filter_stripe_highbd(const RestorationUnitInfo *rui,
int32_t
*
tmpbuf
,
int
bit_depth
)
{
for
(
int
j
=
0
;
j
<
stripe_width
;
j
+=
procunit_width
)
{
int
w
=
AOMMIN
(
procunit_width
,
stripe_width
-
j
);
#if CONFIG_FAST_SGR == 2
apply_selfguided_restoration_c
(
src8
+
j
,
w
,
stripe_height
,
src_stride
,
rui
->
sgrproj_info
.
ep
,
rui
->
sgrproj_info
.
xqd
,
dst8
+
j
,
dst_stride
,
tmpbuf
,
bit_depth
,
1
);
#else
apply_selfguided_restoration
(
src8
+
j
,
w
,
stripe_height
,
src_stride
,
rui
->
sgrproj_info
.
ep
,
rui
->
sgrproj_info
.
xqd
,
dst8
+
j
,
dst_stride
,
tmpbuf
,
bit_depth
,
1
);
#endif // CONFIG_FAST_SGR == 2
}
}
...
...
av1/encoder/pickrst.c
View file @
1a709944
...
...
@@ -349,19 +349,6 @@ void encode_xq(int *xq, int *xqd) {
xqd
[
1
]
=
clamp
(
xqd
[
1
],
SGRPROJ_PRJ_MIN1
,
SGRPROJ_PRJ_MAX1
);
}
static
void
sgr_filter_block
(
const
sgr_params_type
*
params
,
const
uint8_t
*
dat8
,
int
width
,
int
height
,
int
dat_stride
,
int
use_highbd
,
int
bit_depth
,
int32_t
*
flt1
,
int32_t
*
flt2
,
int
flt_stride
)
{
#if CONFIG_FAST_SGR == 2
av1_selfguided_restoration_c
(
dat8
,
width
,
height
,
dat_stride
,
flt1
,
flt2
,
flt_stride
,
params
,
bit_depth
,
use_highbd
);
#else
av1_selfguided_restoration
(
dat8
,
width
,
height
,
dat_stride
,
flt1
,
flt2
,
flt_stride
,
params
,
bit_depth
,
use_highbd
);
#endif // CONFIG_FAST_SGR == 2
}
// Apply the self-guided filter across an entire restoration unit.
static
void
apply_sgr
(
const
sgr_params_type
*
params
,
const
uint8_t
*
dat8
,
int
width
,
int
height
,
int
dat_stride
,
int
use_highbd
,
...
...
@@ -376,8 +363,9 @@ static void apply_sgr(const sgr_params_type *params, const uint8_t *dat8,
// Iterate over the stripe in blocks of width pu_width
for
(
int
j
=
0
;
j
<
width
;
j
+=
pu_width
)
{
const
int
w
=
AOMMIN
(
pu_width
,
width
-
j
);
sgr_filter_block
(
params
,
dat8_row
+
j
,
w
,
h
,
dat_stride
,
use_highbd
,
bit_depth
,
flt1_row
+
j
,
flt2_row
+
j
,
flt_stride
);
av1_selfguided_restoration
(
dat8_row
+
j
,
w
,
h
,
dat_stride
,
flt1_row
+
j
,
flt2_row
+
j
,
flt_stride
,
params
,
bit_depth
,
use_highbd
);
}
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment