Commit 064c1d47 authored by Rupert Swarbrick's avatar Rupert Swarbrick

A working rewrite of the sgr sse code

This fixes some Valgrind errors caused by reads from x_by_xplus1 that
used tainted data as an address (see the comments in selfguided_sse4.c
for what's going on).

It also rewrites the algorithm to use an integral image approach
instead of the handwritten filters that the code was using. The end
result is roughly the same efficiency (I think that there's one more
memory load per group of pixels, but this seems not to be measurable)
and I've done some performance optimisation with perf too. Several
32-bit multiplications have been replaced by madd instructions which
do 16-bit multiplications and add adjacent lanes. This is equivalent
to a 32-bit multiplication when the 32-bit lanes contain numbers below
2^15, but runs significantly faster.

Change-Id: I3d0f3043c7861707a56e2fd1849574dc73897d6c
parent 70f9a1f9
......@@ -552,16 +552,20 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
if (aom_config("CONFIG_LOOP_RESTORATION") eq "yes") {
add_proto qw/void apply_selfguided_restoration/, "const uint8_t *dat, int width, int height, int stride, int eps, const int *xqd, uint8_t *dst, int dst_stride, int32_t *tmpbuf";
specialize qw/apply_selfguided_restoration sse4_1/;
add_proto qw/void av1_selfguided_restoration/, "const uint8_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps";
specialize qw/av1_selfguided_restoration sse4_1/;
add_proto qw/void av1_highpass_filter/, "const uint8_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps";
specialize qw/av1_highpass_filter sse4_1/;
if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
add_proto qw/void apply_selfguided_restoration_highbd/, "const uint16_t *dat, int width, int height, int stride, int bit_depth, int eps, const int *xqd, uint16_t *dst, int dst_stride, int32_t *tmpbuf";
specialize qw/apply_selfguided_restoration_highbd sse4_1/;
add_proto qw/void av1_selfguided_restoration_highbd/, "const uint16_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int bit_depth, int r, int eps";
specialize qw/av1_selfguided_restoration_highbd sse4_1/;
add_proto qw/void av1_highpass_filter_highbd/, "const uint16_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps";
specialize qw/av1_highpass_filter_highbd sse4_1/;
......
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment