Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Xiph.Org
aom-rav1e
Commits
abe0484c
Commit
abe0484c
authored
Oct 10, 2016
by
Yaowu Xu
Committed by
Gerrit Code Review
Oct 10, 2016
Browse files
Merge "New CLPF: New kernel and RDO for strength and block size" into nextgenv2
parents
3a8217f2
d06588ab
Changes
10
Hide whitespace changes
Inline
Side-by-side
av1/av1_common.mk
View file @
abe0484c
...
...
@@ -86,8 +86,10 @@ ifeq (yes,$(filter $(CONFIG_GLOBAL_MOTION) $(CONFIG_WARPED_MOTION),yes))
AV1_COMMON_SRCS-yes
+=
common/warped_motion.h
AV1_COMMON_SRCS-yes
+=
common/warped_motion.c
endif
ifeq
($(CONFIG_CLPF),yes)
AV1_COMMON_SRCS-yes
+=
common/clpf.c
AV1_COMMON_SRCS-yes
+=
common/clpf.h
endif
ifeq
($(CONFIG_DERING),yes)
AV1_COMMON_SRCS-yes
+=
common/od_dering.c
AV1_COMMON_SRCS-yes
+=
common/od_dering.h
...
...
av1/av1_cx.mk
View file @
abe0484c
...
...
@@ -101,6 +101,10 @@ AV1_CX_SRCS-yes += encoder/mbgraph.h
ifeq
($(CONFIG_DERING),yes)
AV1_CX_SRCS-yes
+=
encoder/pickdering.c
endif
ifeq
($(CONFIG_CLPF),yes)
AV1_CX_SRCS-yes
+=
encoder/clpf_rdo.c
AV1_CX_SRCS-yes
+=
encoder/clpf_rdo.h
endif
AV1_CX_SRCS-$(HAVE_SSE2)
+=
encoder/x86/temporal_filter_apply_sse2.asm
AV1_CX_SRCS-$(HAVE_SSE2)
+=
encoder/x86/quantize_sse2.c
ifeq
($(CONFIG_AOM_HIGHBITDEPTH),yes)
...
...
av1/common/clpf.c
View file @
abe0484c
...
...
@@ -9,96 +9,119 @@
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#include
"av1/common/clpf.h"
#include
"aom_dsp/aom_dsp_common.h"
// Apply the filter on a single block
static
void
clpf_block
(
const
uint8_t
*
src
,
uint8_t
*
dst
,
int
sstride
,
int
dstride
,
int
has_top
,
int
has_left
,
int
has_bottom
,
int
has_right
,
int
width
,
int
height
)
{
int
x
,
y
;
int
av1_clpf_maxbits
(
const
AV1_COMMON
*
cm
)
{
return
get_msb
(
ALIGN_POWER_OF_TWO
(
cm
->
mi_cols
*
MAX_MIB_SIZE
,
cm
->
clpf_size
+
4
)
*
ALIGN_POWER_OF_TWO
(
cm
->
mi_rows
*
MAX_MIB_SIZE
,
cm
->
clpf_size
+
4
)
>>
(
cm
->
clpf_size
*
2
+
8
))
+
1
;
}
int
av1_clpf_sample
(
int
X
,
int
A
,
int
B
,
int
C
,
int
D
,
int
E
,
int
F
,
int
b
)
{
int
delta
=
4
*
clamp
(
A
-
X
,
-
b
,
b
)
+
clamp
(
B
-
X
,
-
b
,
b
)
+
3
*
clamp
(
C
-
X
,
-
b
,
b
)
+
3
*
clamp
(
D
-
X
,
-
b
,
b
)
+
clamp
(
E
-
X
,
-
b
,
b
)
+
4
*
clamp
(
F
-
X
,
-
b
,
b
);
return
(
8
+
delta
-
(
delta
<
0
))
>>
4
;
}
for
(
y
=
0
;
y
<
height
;
y
++
)
{
for
(
x
=
0
;
x
<
width
;
x
++
)
{
int
X
=
src
[(
y
+
0
)
*
sstride
+
x
+
0
];
int
A
=
has_top
?
src
[(
y
-
1
)
*
sstride
+
x
+
0
]
:
X
;
int
B
=
has_left
?
src
[(
y
+
0
)
*
sstride
+
x
-
1
]
:
X
;
int
C
=
has_right
?
src
[(
y
+
0
)
*
sstride
+
x
+
1
]
:
X
;
int
D
=
has_bottom
?
src
[(
y
+
1
)
*
sstride
+
x
+
0
]
:
X
;
int
delta
=
((
A
>
X
)
+
(
B
>
X
)
+
(
C
>
X
)
+
(
D
>
X
)
>
2
)
-
((
A
<
X
)
+
(
B
<
X
)
+
(
C
<
X
)
+
(
D
<
X
)
>
2
);
dst
[
y
*
dstride
+
x
]
=
X
+
delta
;
static
void
clpf_block
(
const
uint8_t
*
src
,
uint8_t
*
dst
,
int
stride
,
int
x0
,
int
y0
,
int
sizex
,
int
sizey
,
int
width
,
int
height
,
unsigned
int
strength
)
{
int
x
,
y
;
for
(
y
=
y0
;
y
<
y0
+
sizey
;
y
++
)
{
for
(
x
=
x0
;
x
<
x0
+
sizex
;
x
++
)
{
int
X
=
src
[
y
*
stride
+
x
];
int
A
=
src
[
AOMMAX
(
0
,
y
-
1
)
*
stride
+
x
];
int
B
=
src
[
y
*
stride
+
AOMMAX
(
0
,
x
-
2
)];
int
C
=
src
[
y
*
stride
+
AOMMAX
(
0
,
x
-
1
)];
int
D
=
src
[
y
*
stride
+
AOMMIN
(
width
-
1
,
x
+
1
)];
int
E
=
src
[
y
*
stride
+
AOMMIN
(
width
-
1
,
x
+
2
)];
int
F
=
src
[
AOMMIN
(
height
-
1
,
y
+
1
)
*
stride
+
x
];
int
delta
;
delta
=
av1_clpf_sample
(
X
,
A
,
B
,
C
,
D
,
E
,
F
,
strength
);
dst
[
y
*
stride
+
x
]
=
X
+
delta
;
}
}
}
#define BS (MI_SIZE * MAX_MIB_SIZE)
// Iterate over blocks within a superblock
static
void
av1_clpf_sb
(
const
YV12_BUFFER_CONFIG
*
frame_buffer
,
const
AV1_COMMON
*
cm
,
MACROBLOCKD
*
xd
,
MODE_INFO
*
const
*
mi_8x8
,
int
xpos
,
int
ypos
)
{
// Temporary buffer (to allow SIMD parallelism)
uint8_t
buf_unaligned
[
BS
*
BS
+
15
];
uint8_t
*
buf
=
(
uint8_t
*
)(((
intptr_t
)
buf_unaligned
+
15
)
&
~
15
);
int
x
,
y
,
p
;
// Return number of filtered blocks
int
av1_clpf_frame
(
const
YV12_BUFFER_CONFIG
*
dst
,
const
YV12_BUFFER_CONFIG
*
rec
,
const
YV12_BUFFER_CONFIG
*
org
,
const
AV1_COMMON
*
cm
,
int
enable_fb_flag
,
unsigned
int
strength
,
unsigned
int
fb_size_log2
,
uint8_t
*
blocks
,
int
(
*
decision
)(
int
,
int
,
const
YV12_BUFFER_CONFIG
*
,
const
YV12_BUFFER_CONFIG
*
,
const
AV1_COMMON
*
cm
,
int
,
int
,
int
,
unsigned
int
,
unsigned
int
,
uint8_t
*
))
{
/* Constrained low-pass filter (CLPF) */
int
c
,
k
,
l
,
m
,
n
;
int
width
=
rec
->
y_crop_width
;
int
height
=
rec
->
y_crop_height
;
int
xpos
,
ypos
;
int
stride_y
=
rec
->
y_stride
;
int
stride_c
=
rec
->
uv_stride
;
const
int
bs
=
MAX_MIB_SIZE
;
int
num_fb_hor
=
(
width
+
(
1
<<
fb_size_log2
)
-
bs
)
>>
fb_size_log2
;
int
num_fb_ver
=
(
height
+
(
1
<<
fb_size_log2
)
-
bs
)
>>
fb_size_log2
;
int
block_index
=
0
;
for
(
p
=
0
;
p
<
(
CLPF_FILTER_ALL_PLANES
?
MAX_MB_PLANE
:
1
);
p
++
)
{
for
(
y
=
0
;
y
<
MAX_MIB_SIZE
&&
ypos
+
y
<
cm
->
mi_rows
;
y
++
)
{
for
(
x
=
0
;
x
<
MAX_MIB_SIZE
&&
xpos
+
x
<
cm
->
mi_cols
;
x
++
)
{
const
MB_MODE_INFO
*
mbmi
=
&
mi_8x8
[(
ypos
+
y
)
*
cm
->
mi_stride
+
xpos
+
x
]
->
mbmi
;
// Do not filter if there is no residual
if
(
!
mbmi
->
skip
)
{
// Do not filter frame edges
int
has_top
=
ypos
+
y
>
0
;
int
has_left
=
xpos
+
x
>
0
;
int
has_bottom
=
ypos
+
y
<
cm
->
mi_rows
-
1
;
int
has_right
=
xpos
+
x
<
cm
->
mi_cols
-
1
;
#if CLPF_ALLOW_BLOCK_PARALLELISM
// Do not filter superblock edges
has_top
&=
!!
y
;
has_left
&=
!!
x
;
has_bottom
&=
y
!=
MAX_MIB_SIZE
-
1
;
has_right
&=
x
!=
MAX_MIB_SIZE
-
1
;
#endif
av1_setup_dst_planes
(
xd
->
plane
,
frame_buffer
,
ypos
+
y
,
xpos
+
x
);
clpf_block
(
xd
->
plane
[
p
].
dst
.
buf
,
CLPF_ALLOW_PIXEL_PARALLELISM
?
buf
+
y
*
MI_SIZE
*
BS
+
x
*
MI_SIZE
:
xd
->
plane
[
p
].
dst
.
buf
,
xd
->
plane
[
p
].
dst
.
stride
,
CLPF_ALLOW_PIXEL_PARALLELISM
?
BS
:
xd
->
plane
[
p
].
dst
.
stride
,
has_top
,
has_left
,
has_bottom
,
has_right
,
MI_SIZE
>>
xd
->
plane
[
p
].
subsampling_x
,
MI_SIZE
>>
xd
->
plane
[
p
].
subsampling_y
);
// Iterate over all filter blocks
for
(
k
=
0
;
k
<
num_fb_ver
;
k
++
)
{
for
(
l
=
0
;
l
<
num_fb_hor
;
l
++
)
{
int
h
,
w
;
int
allskip
=
1
;
for
(
m
=
0
;
allskip
&&
m
<
(
1
<<
fb_size_log2
)
/
bs
;
m
++
)
{
for
(
n
=
0
;
allskip
&&
n
<
(
1
<<
fb_size_log2
)
/
bs
;
n
++
)
{
xpos
=
(
l
<<
fb_size_log2
)
+
n
*
bs
;
ypos
=
(
k
<<
fb_size_log2
)
+
m
*
bs
;
if
(
xpos
<
width
&&
ypos
<
height
)
{
allskip
&=
cm
->
mi_grid_visible
[
ypos
/
bs
*
cm
->
mi_stride
+
xpos
/
bs
]
->
mbmi
.
skip
;
}
}
}
}
#if CLPF_ALLOW_PIXEL_PARALLELISM
for
(
y
=
0
;
y
<
MAX_MIB_SIZE
&&
ypos
+
y
<
cm
->
mi_rows
;
y
++
)
{
for
(
x
=
0
;
x
<
MAX_MIB_SIZE
&&
xpos
+
x
<
cm
->
mi_cols
;
x
++
)
{
const
MB_MODE_INFO
*
mbmi
=
&
mi_8x8
[(
ypos
+
y
)
*
cm
->
mi_stride
+
xpos
+
x
]
->
mbmi
;
av1_setup_dst_planes
(
xd
->
plane
,
frame_buffer
,
ypos
+
y
,
xpos
+
x
);
if
(
!
mbmi
->
skip
)
{
int
i
=
0
;
for
(
i
=
0
;
i
<
MI_SIZE
>>
xd
->
plane
[
p
].
subsampling_y
;
i
++
)
memcpy
(
xd
->
plane
[
p
].
dst
.
buf
+
i
*
xd
->
plane
[
p
].
dst
.
stride
,
buf
+
(
y
*
MI_SIZE
+
i
)
*
BS
+
x
*
MI_SIZE
,
MI_SIZE
>>
xd
->
plane
[
p
].
subsampling_x
);
// Calculate the actual filter block size near frame edges
h
=
AOMMIN
(
height
,
(
k
+
1
)
<<
fb_size_log2
)
&
((
1
<<
fb_size_log2
)
-
1
);
w
=
AOMMIN
(
width
,
(
l
+
1
)
<<
fb_size_log2
)
&
((
1
<<
fb_size_log2
)
-
1
);
h
+=
!
h
<<
fb_size_log2
;
w
+=
!
w
<<
fb_size_log2
;
if
(
!
allskip
&&
// Do not filter the block if all is skip encoded
(
!
enable_fb_flag
||
decision
(
k
,
l
,
rec
,
org
,
cm
,
bs
,
w
/
bs
,
h
/
bs
,
strength
,
fb_size_log2
,
blocks
+
block_index
)))
{
// Iterate over all smaller blocks inside the filter block
for
(
m
=
0
;
m
<
(
h
+
bs
-
1
)
/
bs
;
m
++
)
{
for
(
n
=
0
;
n
<
(
w
+
bs
-
1
)
/
bs
;
n
++
)
{
xpos
=
(
l
<<
fb_size_log2
)
+
n
*
bs
;
ypos
=
(
k
<<
fb_size_log2
)
+
m
*
bs
;
if
(
!
cm
->
mi_grid_visible
[
ypos
/
bs
*
cm
->
mi_stride
+
xpos
/
bs
]
->
mbmi
.
skip
)
{
// Not skip block, apply the filter
clpf_block
(
rec
->
y_buffer
,
dst
->
y_buffer
,
stride_y
,
xpos
,
ypos
,
bs
,
bs
,
width
,
height
,
strength
);
}
else
{
// Skip block, copy instead
for
(
c
=
0
;
c
<
bs
;
c
++
)
*
(
uint64_t
*
)(
dst
->
y_buffer
+
(
ypos
+
c
)
*
stride_y
+
xpos
)
=
*
(
uint64_t
*
)(
rec
->
y_buffer
+
(
ypos
+
c
)
*
stride_y
+
xpos
);
}
}
}
}
else
{
// Entire filter block is skip, copy
for
(
m
=
0
;
m
<
h
;
m
++
)
memcpy
(
dst
->
y_buffer
+
((
k
<<
fb_size_log2
)
+
m
)
*
stride_y
+
(
l
<<
fb_size_log2
),
rec
->
y_buffer
+
((
k
<<
fb_size_log2
)
+
m
)
*
stride_y
+
(
l
<<
fb_size_log2
),
w
);
}
block_index
+=
!
allskip
;
// Count number of blocks filtered
}
#endif
}
}
// Iterate over the superblocks of an entire frame
void
av1_clpf_frame
(
const
YV12_BUFFER_CONFIG
*
frame
,
const
AV1_COMMON
*
cm
,
MACROBLOCKD
*
xd
)
{
int
x
,
y
;
for
(
y
=
0
;
y
<
cm
->
mi_rows
;
y
+=
MAX_MIB_SIZE
)
for
(
x
=
0
;
x
<
cm
->
mi_cols
;
x
+=
MAX_MIB_SIZE
)
av1_clpf_sb
(
frame
,
cm
,
xd
,
cm
->
mi_grid_visible
,
x
,
y
);
return
block_index
;
}
av1/common/clpf.h
View file @
abe0484c
...
...
@@ -13,15 +13,17 @@
#include
"av1/common/reconinter.h"
// Configuration
#define CLPF_ALLOW_PIXEL_PARALLELISM \
1 // 1 = SIMD friendly (adds a buffer requirement)
#define CLPF_ALLOW_BLOCK_PARALLELISM \
0 // 1 = MT friendly (degrades quality slighty)
#define CLPF_FILTER_ALL_PLANES \
0 // 1 = filter both luma and chroma, 0 = filter only luma
#define MAX_FB_SIZE 128
void
av1_clpf_frame
(
const
YV12_BUFFER_CONFIG
*
frame
,
const
AV1_COMMON
*
cm
,
MACROBLOCKD
*
xd
);
int
av1_clpf_maxbits
(
const
AV1_COMMON
*
cm
);
int
av1_clpf_sample
(
int
X
,
int
A
,
int
B
,
int
C
,
int
D
,
int
E
,
int
F
,
int
b
);
int
av1_clpf_frame
(
const
YV12_BUFFER_CONFIG
*
dst
,
const
YV12_BUFFER_CONFIG
*
rec
,
const
YV12_BUFFER_CONFIG
*
org
,
const
AV1_COMMON
*
cm
,
int
enable_fb_flag
,
unsigned
int
strength
,
unsigned
int
fb_size_log2
,
uint8_t
*
blocks
,
int
(
*
decision
)(
int
,
int
,
const
YV12_BUFFER_CONFIG
*
,
const
YV12_BUFFER_CONFIG
*
,
const
AV1_COMMON
*
cm
,
int
,
int
,
int
,
unsigned
int
,
unsigned
int
,
uint8_t
*
));
#endif
av1/common/onyxc_int.h
View file @
abe0484c
...
...
@@ -151,7 +151,10 @@ typedef struct AV1Common {
int
use_highbitdepth
;
#endif
#if CONFIG_CLPF
int
clpf
;
int
clpf_numblocks
;
int
clpf_size
;
int
clpf_strength
;
uint8_t
*
clpf_blocks
;
#endif
YV12_BUFFER_CONFIG
*
frame_to_show
;
...
...
av1/decoder/decodeframe.c
View file @
abe0484c
...
...
@@ -2044,7 +2044,26 @@ static void setup_loopfilter(AV1_COMMON *cm, struct aom_read_bit_buffer *rb) {
#if CONFIG_CLPF
static
void
setup_clpf
(
AV1_COMMON
*
cm
,
struct
aom_read_bit_buffer
*
rb
)
{
cm
->
clpf
=
aom_rb_read_literal
(
rb
,
1
);
cm
->
clpf_blocks
=
0
;
cm
->
clpf_strength
=
aom_rb_read_literal
(
rb
,
2
);
if
(
cm
->
clpf_strength
)
{
cm
->
clpf_size
=
aom_rb_read_literal
(
rb
,
2
);
if
(
cm
->
clpf_size
)
{
int
i
;
cm
->
clpf_numblocks
=
aom_rb_read_literal
(
rb
,
av1_clpf_maxbits
(
cm
));
CHECK_MEM_ERROR
(
cm
,
cm
->
clpf_blocks
,
aom_malloc
(
cm
->
clpf_numblocks
));
for
(
i
=
0
;
i
<
cm
->
clpf_numblocks
;
i
++
)
{
cm
->
clpf_blocks
[
i
]
=
aom_rb_read_literal
(
rb
,
1
);
}
}
}
}
static
int
clpf_bit
(
int
k
,
int
l
,
const
YV12_BUFFER_CONFIG
*
rec
,
const
YV12_BUFFER_CONFIG
*
org
,
const
AV1_COMMON
*
cm
,
int
block_size
,
int
w
,
int
h
,
unsigned
int
strength
,
unsigned
int
fb_size_log2
,
uint8_t
*
bit
)
{
return
*
bit
;
}
#endif
...
...
@@ -3906,8 +3925,22 @@ void av1_decode_frame(AV1Decoder *pbi, const uint8_t *data,
#endif // CONFIG_LOOP_RESTORATION
#if CONFIG_CLPF
if
(
cm
->
clpf
&&
!
cm
->
skip_loop_filter
)
av1_clpf_frame
(
&
pbi
->
cur_buf
->
buf
,
cm
,
&
pbi
->
mb
);
if
(
cm
->
clpf_strength
&&
!
cm
->
skip_loop_filter
)
{
YV12_BUFFER_CONFIG
dst
;
// Buffer for the result
dst
=
pbi
->
cur_buf
->
buf
;
CHECK_MEM_ERROR
(
cm
,
dst
.
y_buffer
,
aom_malloc
(
dst
.
y_stride
*
dst
.
y_height
));
av1_clpf_frame
(
&
dst
,
&
pbi
->
cur_buf
->
buf
,
0
,
cm
,
!!
cm
->
clpf_size
,
cm
->
clpf_strength
+
(
cm
->
clpf_strength
==
3
),
4
+
cm
->
clpf_size
,
cm
->
clpf_blocks
,
clpf_bit
);
// Copy result
memcpy
(
pbi
->
cur_buf
->
buf
.
y_buffer
,
dst
.
y_buffer
,
dst
.
y_height
*
dst
.
y_stride
);
aom_free
(
dst
.
y_buffer
);
}
if
(
cm
->
clpf_blocks
)
aom_free
(
cm
->
clpf_blocks
);
#endif
#if CONFIG_DERING
if
(
cm
->
dering_level
&&
!
cm
->
skip_loop_filter
)
{
...
...
av1/encoder/bitstream.c
View file @
abe0484c
...
...
@@ -2590,7 +2590,22 @@ static void encode_loopfilter(AV1_COMMON *cm, struct aom_write_bit_buffer *wb) {
#if CONFIG_CLPF
static
void
encode_clpf
(
const
AV1_COMMON
*
cm
,
struct
aom_write_bit_buffer
*
wb
)
{
aom_wb_write_literal
(
wb
,
cm
->
clpf
,
1
);
aom_wb_write_literal
(
wb
,
cm
->
clpf_strength
,
2
);
if
(
cm
->
clpf_strength
)
{
aom_wb_write_literal
(
wb
,
cm
->
clpf_size
,
2
);
if
(
cm
->
clpf_size
)
{
int
i
;
// TODO(stemidts): The number of bits to transmit could be
// implicitly deduced if transmitted after the filter block or
// after the frame (when it's known whether the block is all
// skip and implicitly unfiltered). And the bits do not have
// 50% probability, so a more efficient coding is possible.
aom_wb_write_literal
(
wb
,
cm
->
clpf_numblocks
,
av1_clpf_maxbits
(
cm
));
for
(
i
=
0
;
i
<
cm
->
clpf_numblocks
;
i
++
)
{
aom_wb_write_literal
(
wb
,
cm
->
clpf_blocks
[
i
],
1
);
}
}
}
}
#endif
...
...
av1/encoder/clpf_rdo.c
0 → 100644
View file @
abe0484c
/*
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#include
"av1/common/clpf.h"
#include
"aom/aom_integer.h"
#include
"av1/common/quant_common.h"
// Calculate the error of a filtered and unfiltered block
static
void
detect_clpf
(
const
uint8_t
*
rec
,
const
uint8_t
*
org
,
int
x0
,
int
y0
,
int
width
,
int
height
,
int
so
,
int
stride
,
int
*
sum0
,
int
*
sum1
,
unsigned
int
strength
)
{
int
x
,
y
;
for
(
y
=
y0
;
y
<
y0
+
8
;
y
++
)
{
for
(
x
=
x0
;
x
<
x0
+
8
;
x
++
)
{
int
O
=
org
[
y
*
so
+
x
];
int
X
=
rec
[
y
*
stride
+
x
];
int
A
=
rec
[
AOMMAX
(
0
,
y
-
1
)
*
stride
+
x
];
int
B
=
rec
[
y
*
stride
+
AOMMAX
(
0
,
x
-
2
)];
int
C
=
rec
[
y
*
stride
+
AOMMAX
(
0
,
x
-
1
)];
int
D
=
rec
[
y
*
stride
+
AOMMIN
(
width
-
1
,
x
+
1
)];
int
E
=
rec
[
y
*
stride
+
AOMMIN
(
width
-
1
,
x
+
2
)];
int
F
=
rec
[
AOMMIN
(
height
-
1
,
y
+
1
)
*
stride
+
x
];
int
delta
=
av1_clpf_sample
(
X
,
A
,
B
,
C
,
D
,
E
,
F
,
strength
);
int
Y
=
X
+
delta
;
*
sum0
+=
(
O
-
X
)
*
(
O
-
X
);
*
sum1
+=
(
O
-
Y
)
*
(
O
-
Y
);
}
}
}
static
void
detect_multi_clpf
(
const
uint8_t
*
rec
,
const
uint8_t
*
org
,
int
x0
,
int
y0
,
int
width
,
int
height
,
int
so
,
int
stride
,
int
*
sum
)
{
int
x
,
y
;
for
(
y
=
y0
;
y
<
y0
+
8
;
y
++
)
{
for
(
x
=
x0
;
x
<
x0
+
8
;
x
++
)
{
int
O
=
org
[
y
*
so
+
x
];
int
X
=
rec
[
y
*
stride
+
x
];
int
A
=
rec
[
AOMMAX
(
0
,
y
-
1
)
*
stride
+
x
];
int
B
=
rec
[
y
*
stride
+
AOMMAX
(
0
,
x
-
2
)];
int
C
=
rec
[
y
*
stride
+
AOMMAX
(
0
,
x
-
1
)];
int
D
=
rec
[
y
*
stride
+
AOMMIN
(
width
-
1
,
x
+
1
)];
int
E
=
rec
[
y
*
stride
+
AOMMIN
(
width
-
1
,
x
+
2
)];
int
F
=
rec
[
AOMMIN
(
height
-
1
,
y
+
1
)
*
stride
+
x
];
int
delta1
=
av1_clpf_sample
(
X
,
A
,
B
,
C
,
D
,
E
,
F
,
1
);
int
delta2
=
av1_clpf_sample
(
X
,
A
,
B
,
C
,
D
,
E
,
F
,
2
);
int
delta3
=
av1_clpf_sample
(
X
,
A
,
B
,
C
,
D
,
E
,
F
,
4
);
int
F1
=
X
+
delta1
;
int
F2
=
X
+
delta2
;
int
F3
=
X
+
delta3
;
sum
[
0
]
+=
(
O
-
X
)
*
(
O
-
X
);
sum
[
1
]
+=
(
O
-
F1
)
*
(
O
-
F1
);
sum
[
2
]
+=
(
O
-
F2
)
*
(
O
-
F2
);
sum
[
3
]
+=
(
O
-
F3
)
*
(
O
-
F3
);
}
}
}
int
av1_clpf_decision
(
int
k
,
int
l
,
const
YV12_BUFFER_CONFIG
*
rec
,
const
YV12_BUFFER_CONFIG
*
org
,
const
AV1_COMMON
*
cm
,
int
block_size
,
int
w
,
int
h
,
unsigned
int
strength
,
unsigned
int
fb_size_log2
,
uint8_t
*
res
)
{
int
m
,
n
,
sum0
=
0
,
sum1
=
0
;
for
(
m
=
0
;
m
<
h
;
m
++
)
{
for
(
n
=
0
;
n
<
w
;
n
++
)
{
int
xpos
=
(
l
<<
fb_size_log2
)
+
n
*
block_size
;
int
ypos
=
(
k
<<
fb_size_log2
)
+
m
*
block_size
;
const
int
bs
=
MAX_MIB_SIZE
;
if
(
!
cm
->
mi_grid_visible
[
ypos
/
bs
*
cm
->
mi_stride
+
xpos
/
bs
]
->
mbmi
.
skip
)
detect_clpf
(
rec
->
y_buffer
,
org
->
y_buffer
,
xpos
,
ypos
,
rec
->
y_crop_width
,
rec
->
y_crop_height
,
org
->
y_stride
,
rec
->
y_stride
,
&
sum0
,
&
sum1
,
strength
);
}
}
*
res
=
sum1
<
sum0
;
return
*
res
;
}
// Calculate the square error of all filter settings. Result:
// res[0][0] : unfiltered
// res[0][1-3] : strength=1,2,4, no signals
// res[1][0] : (bit count, fb size = 128)
// res[1][1-3] : strength=1,2,4, fb size = 128
// res[2][0] : (bit count, fb size = 64)
// res[2][1-3] : strength=1,2,4, fb size = 64
// res[3][0] : (bit count, fb size = 32)
// res[3][1-3] : strength=1,2,4, fb size = 32
static
int
clpf_rdo
(
int
y
,
int
x
,
const
YV12_BUFFER_CONFIG
*
rec
,
const
YV12_BUFFER_CONFIG
*
org
,
const
AV1_COMMON
*
cm
,
unsigned
int
block_size
,
unsigned
int
fb_size_log2
,
int
w
,
int
h
,
int64_t
res
[
4
][
4
])
{
int
i
,
m
,
n
,
filtered
=
0
;
int
sum
[
4
];
int
bslog
=
get_msb
(
block_size
);
sum
[
0
]
=
sum
[
1
]
=
sum
[
2
]
=
sum
[
3
]
=
0
;
if
(
fb_size_log2
>
(
unsigned
int
)
get_msb
(
MAX_FB_SIZE
)
-
3
)
{
int
w1
,
h1
,
w2
,
h2
,
i
,
sum1
,
sum2
,
sum3
,
oldfiltered
;
fb_size_log2
--
;
w1
=
AOMMIN
(
1
<<
(
fb_size_log2
-
bslog
),
w
);
h1
=
AOMMIN
(
1
<<
(
fb_size_log2
-
bslog
),
h
);
w2
=
AOMMIN
(
w
-
(
1
<<
(
fb_size_log2
-
bslog
)),
w
>>
1
);
h2
=
AOMMIN
(
h
-
(
1
<<
(
fb_size_log2
-
bslog
)),
h
>>
1
);
i
=
get_msb
(
MAX_FB_SIZE
)
-
fb_size_log2
;
sum1
=
res
[
i
][
1
];
sum2
=
res
[
i
][
2
];
sum3
=
res
[
i
][
3
];
oldfiltered
=
res
[
i
][
0
];
res
[
i
][
0
]
=
0
;
filtered
=
clpf_rdo
(
y
,
x
,
rec
,
org
,
cm
,
block_size
,
fb_size_log2
,
w1
,
h1
,
res
);
if
(
1
<<
(
fb_size_log2
-
bslog
)
<
w
)
filtered
|=
clpf_rdo
(
y
,
x
+
(
1
<<
fb_size_log2
),
rec
,
org
,
cm
,
block_size
,
fb_size_log2
,
w2
,
h1
,
res
);
if
(
1
<<
(
fb_size_log2
-
bslog
)
<
h
)
{
filtered
|=
clpf_rdo
(
y
+
(
1
<<
fb_size_log2
),
x
,
rec
,
org
,
cm
,
block_size
,
fb_size_log2
,
w1
,
h2
,
res
);
filtered
|=
clpf_rdo
(
y
+
(
1
<<
fb_size_log2
),
x
+
(
1
<<
fb_size_log2
),
rec
,
org
,
cm
,
block_size
,
fb_size_log2
,
w2
,
h2
,
res
);
}
res
[
i
][
1
]
=
AOMMIN
(
sum1
+
res
[
i
][
0
],
res
[
i
][
1
]);
res
[
i
][
2
]
=
AOMMIN
(
sum2
+
res
[
i
][
0
],
res
[
i
][
2
]);
res
[
i
][
3
]
=
AOMMIN
(
sum3
+
res
[
i
][
0
],
res
[
i
][
3
]);
res
[
i
][
0
]
=
oldfiltered
+
filtered
;
// Number of signal bits
return
filtered
;
}
for
(
m
=
0
;
m
<
h
;
m
++
)
{
for
(
n
=
0
;
n
<
w
;
n
++
)
{
int
xpos
=
x
+
n
*
block_size
;
int
ypos
=
y
+
m
*
block_size
;
if
(
!
cm
->
mi_grid_visible
[
ypos
/
MAX_MIB_SIZE
*
cm
->
mi_stride
+
xpos
/
MAX_MIB_SIZE
]
->
mbmi
.
skip
)
{
detect_multi_clpf
(
rec
->
y_buffer
,
org
->
y_buffer
,
xpos
,
ypos
,
rec
->
y_crop_width
,
rec
->
y_crop_height
,
org
->
y_stride
,
rec
->
y_stride
,
sum
);
filtered
=
1
;
}
}
}
for
(
i
=
0
;
i
<
4
;
i
++
)
{
res
[
i
][
0
]
+=
sum
[
0
];
res
[
i
][
1
]
+=
sum
[
1
];
res
[
i
][
2
]
+=
sum
[
2
];
res
[
i
][
3
]
+=
sum
[
3
];
}
return
filtered
;
}
void
av1_clpf_test_frame
(
const
YV12_BUFFER_CONFIG
*
rec
,
const
YV12_BUFFER_CONFIG
*
org
,
const
AV1_COMMON
*
cm
,
int
*
best_strength
,
int
*
best_bs
)
{
int
i
,
j
,
k
,
l
;
int64_t
best
,
sums
[
4
][
4
];
int
width
=
rec
->
y_crop_width
,
height
=
rec
->
y_crop_height
;
const
int
bs
=
MAX_MIB_SIZE
;
int
fb_size_log2
=
get_msb
(
MAX_FB_SIZE
);
int
num_fb_ver
=
(
height
+
(
1
<<
fb_size_log2
)
-
bs
)
>>
fb_size_log2
;
int
num_fb_hor
=
(
width
+
(
1
<<
fb_size_log2
)
-
bs
)
>>
fb_size_log2
;
memset
(
sums
,
0
,
sizeof
(
sums
));
for
(
k
=
0
;
k
<
num_fb_ver
;
k
++
)
{
for
(
l
=
0
;
l
<
num_fb_hor
;
l
++
)
{
// Calculate the block size after frame border clipping
int
h
=
AOMMIN
(
height
,
(
k
+
1
)
<<
fb_size_log2
)
&
((
1
<<
fb_size_log2
)
-
1
);
int
w
=
AOMMIN
(
width
,
(
l
+
1
)
<<
fb_size_log2
)
&
((
1
<<
fb_size_log2
)
-
1
);
h
+=
!
h
<<
fb_size_log2
;
w
+=
!
w
<<
fb_size_log2
;
clpf_rdo
(
k
<<
fb_size_log2
,
l
<<
fb_size_log2
,
rec
,
org
,
cm
,
bs
,
fb_size_log2
,
w
/
bs
,
h
/
bs
,
sums
);
}
}
for
(
j
=
0
;
j
<
4
;
j
++
)
{
static
const
double
lambda_square
[]
=
{
// exp((i - 15.4244) / 8.4010)
0
.
159451
,
0
.
179607
,
0
.
202310
,
0
.
227884
,
0
.
256690
,
0
.
289138
,
0
.
325687
,
0
.
366856
,
0
.
413230
,
0
.
465465
,
0
.
524303
,
0
.
590579
,
0
.
665233
,
0
.
749323
,
0
.
844044
,
0
.
950737
,
1
.
070
917
,
1
.
206289
,
1
.
358774
,
1
.
530533
,
1
.
724004
,
1
.
941931
,
2
.
187406
,
2
.
463911
,
2
.
775368
,
3
.
126195
,
3
.
521370
,
3
.
966498
,
4
.
467893
,
5
.
03266
9
,
5
.
668837
,
6
.
385421
,
7
.
192586
,
8
.
101784
,
9
.
125911
,
10
.
27949
,
11
.
57890
,
13
.
04256
,
14
.
69124
,
16
.
54832
,
18
.
64016
,
20
.
99641
,
23
.
65052
,
26
.
64013
,
30
.
00764
,
33
.
80084
,
38
.
07352
,
42
.
88630
,
48
.
30746
,
54
.
41389
,
61
.
29221
,
69
.
04002
,
77
.
76720
,
87
.
59756
,
98
.
67056
,
111
.
1432
,
125
.
1926
,
141
.
017
9
,
158
.
8436
,
178
.
9227
,
201
.
5399
,
227
.
0160
,
255
.
7126
,
288
.
0366
};
// Estimate the bit costs and adjust the square errors
double
lambda
=
lambda_square
[
av1_get_qindex
(
&
cm
->
seg
,
0
,
cm
->
base_qindex
)
>>
2
];
int
i
,
cost
=
(
int
)((
1
.
2
*
lambda
*
(
sums
[
j
][
0
]
+
2
+
2
*
(
j
>
0
))
+
0
.
5
));
for
(
i
=
0
;
i
<
4
;
i
++
)
sums
[
j
][
i
]
=
((
sums
[
j
][
i
]
+
(
i
&&
j
)
*
cost
)
<<
4
)
+
j
*
4
+
i
;
}
best
=
(
int64_t
)
1
<<
62
;
for
(
i
=
0
;
i
<
4
;
i
++
)
for
(
j
=
0
;
j
<
4
;
j
++
)
if
((
!
i
||
j
)
&&
sums
[
i
][
j
]
<
best
)
best
=
sums
[
i
][
j
];
best
&=
15
;
*
best_bs
=
(
best
>
3
)
*
(
5
+
(
best
<
12
)
+
(
best
<
8
));
*
best_strength
=
best
?
1
<<
((
best
-
1
)
&
3
)
:
0
;
}
av1/encoder/clpf_rdo.h
0 → 100644
View file @
abe0484c
/*
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#ifndef AV1_ENCODER_CLPF_H_
#define AV1_ENCODER_CLPF_H_