Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Xiph.Org
aom-rav1e
Commits
14b8112b
Commit
14b8112b
authored
May 03, 2017
by
Sean Purser-Haskell
Committed by
Sean Purser-haskell
May 11, 2017
Browse files
Extra rounding to let hw to use narrower integers.
Change-Id: I175d6ff03f31a2e0d2fe7cd1c3852210d6e0ddf5
parent
f89056aa
Changes
5
Hide whitespace changes
Inline
Side-by-side
av1/common/mv.h
View file @
14b8112b
...
...
@@ -58,6 +58,8 @@ typedef struct mv32 {
// Precision of filter taps
#define WARPEDPIXEL_FILTER_BITS 7
#define WARP_PARAM_REDUCE_BITS 6
// Precision bits reduction after horizontal shear
#define HORSHEAR_REDUCE_PREC_BITS 5
#define VERSHEAR_REDUCE_PREC_BITS \
...
...
av1/common/warped_motion.c
View file @
14b8112b
...
...
@@ -779,6 +779,15 @@ int get_shear_params(WarpedMotionParams *wm) {
INT16_MIN
,
INT16_MAX
);
if
(
!
is_affine_shear_allowed
(
wm
->
alpha
,
wm
->
beta
,
wm
->
gamma
,
wm
->
delta
))
return
0
;
wm
->
alpha
=
ROUND_POWER_OF_TWO_SIGNED
(
wm
->
alpha
,
WARP_PARAM_REDUCE_BITS
)
<<
WARP_PARAM_REDUCE_BITS
;
wm
->
beta
=
ROUND_POWER_OF_TWO_SIGNED
(
wm
->
beta
,
WARP_PARAM_REDUCE_BITS
)
<<
WARP_PARAM_REDUCE_BITS
;
wm
->
gamma
=
ROUND_POWER_OF_TWO_SIGNED
(
wm
->
gamma
,
WARP_PARAM_REDUCE_BITS
)
<<
WARP_PARAM_REDUCE_BITS
;
wm
->
delta
=
ROUND_POWER_OF_TWO_SIGNED
(
wm
->
delta
,
WARP_PARAM_REDUCE_BITS
)
<<
WARP_PARAM_REDUCE_BITS
;
return
1
;
}
...
...
@@ -1002,6 +1011,14 @@ void av1_highbd_warp_affine_c(const int32_t *mat, const uint16_t *ref,
iy4
=
y4
>>
WARPEDMODEL_PREC_BITS
;
sy4
=
y4
&
((
1
<<
WARPEDMODEL_PREC_BITS
)
-
1
);
sx4
+=
alpha
*
(
-
4
)
+
beta
*
(
-
4
);
sy4
+=
gamma
*
(
-
4
)
+
delta
*
(
-
4
);
sx4
=
ROUND_POWER_OF_TWO_SIGNED
(
sx4
,
WARP_PARAM_REDUCE_BITS
)
<<
WARP_PARAM_REDUCE_BITS
;
sy4
=
ROUND_POWER_OF_TWO_SIGNED
(
sy4
,
WARP_PARAM_REDUCE_BITS
)
<<
WARP_PARAM_REDUCE_BITS
;
// Horizontal filter
for
(
k
=
-
7
;
k
<
8
;
++
k
)
{
int
iy
=
iy4
+
k
;
...
...
@@ -1023,7 +1040,7 @@ void av1_highbd_warp_affine_c(const int32_t *mat, const uint16_t *ref,
(
1
<<
(
WARPEDPIXEL_FILTER_BITS
-
HORSHEAR_REDUCE_PREC_BITS
));
}
}
else
{
int
sx
=
sx4
+
alph
a
*
(
-
4
)
+
beta
*
k
;
int
sx
=
sx4
+
bet
a
*
(
k
+
4
)
;
for
(
l
=
-
4
;
l
<
4
;
++
l
)
{
int
ix
=
ix4
+
l
-
3
;
...
...
@@ -1048,8 +1065,8 @@ void av1_highbd_warp_affine_c(const int32_t *mat, const uint16_t *ref,
// Vertical filter
for
(
k
=
-
4
;
k
<
AOMMIN
(
4
,
p_row
+
p_height
-
i
-
4
);
++
k
)
{
int
sy
=
sy4
+
gamma
*
(
-
4
)
+
delta
*
k
;
for
(
l
=
-
4
;
l
<
AOMMIN
(
4
,
p_col
+
p_width
-
j
-
4
)
;
++
l
)
{
int
sy
=
sy4
+
delta
*
(
k
+
4
)
;
for
(
l
=
-
4
;
l
<
4
;
++
l
)
{
uint16_t
*
p
=
&
pred
[(
i
-
p_row
+
k
+
4
)
*
p_stride
+
(
j
-
p_col
+
l
+
4
)];
const
int
offs
=
ROUND_POWER_OF_TWO
(
sy
,
WARPEDDIFF_PREC_BITS
)
+
...
...
@@ -1245,6 +1262,14 @@ void av1_warp_affine_c(const int32_t *mat, const uint8_t *ref, int width,
iy4
=
y4
>>
WARPEDMODEL_PREC_BITS
;
sy4
=
y4
&
((
1
<<
WARPEDMODEL_PREC_BITS
)
-
1
);
sx4
+=
alpha
*
(
-
4
)
+
beta
*
(
-
4
);
sy4
+=
gamma
*
(
-
4
)
+
delta
*
(
-
4
);
sx4
=
ROUND_POWER_OF_TWO_SIGNED
(
sx4
,
WARP_PARAM_REDUCE_BITS
)
<<
WARP_PARAM_REDUCE_BITS
;
sy4
=
ROUND_POWER_OF_TWO_SIGNED
(
sy4
,
WARP_PARAM_REDUCE_BITS
)
<<
WARP_PARAM_REDUCE_BITS
;
// Horizontal filter
for
(
k
=
-
7
;
k
<
8
;
++
k
)
{
int
iy
=
iy4
+
k
;
...
...
@@ -1281,7 +1306,7 @@ void av1_warp_affine_c(const int32_t *mat, const uint8_t *ref, int width,
// ix4 + 3 + 7 - 3 = ix4 + 7 <= width + 12
// So, assuming that border extension has been done, we
// don't need to explicitly clamp values.
int
sx
=
sx4
+
alpha
*
(
-
4
)
+
beta
*
k
;
int
sx
=
sx4
+
alpha
*
(
4
-
4
)
+
beta
*
(
k
+
4
)
;
for
(
l
=
-
4
;
l
<
4
;
++
l
)
{
int
ix
=
ix4
+
l
-
3
;
...
...
@@ -1303,7 +1328,7 @@ void av1_warp_affine_c(const int32_t *mat, const uint8_t *ref, int width,
// Vertical filter
for
(
k
=
-
4
;
k
<
AOMMIN
(
4
,
p_row
+
p_height
-
i
-
4
);
++
k
)
{
int
sy
=
sy4
+
gamma
*
(
-
4
)
+
delta
*
k
;
int
sy
=
sy4
+
gamma
*
(
4
-
4
)
+
delta
*
(
k
+
4
)
;
for
(
l
=
-
4
;
l
<
AOMMIN
(
4
,
p_col
+
p_width
-
j
-
4
);
++
l
)
{
uint8_t
*
p
=
&
pred
[(
i
-
p_row
+
k
+
4
)
*
p_stride
+
(
j
-
p_col
+
l
+
4
)];
...
...
av1/common/x86/highbd_warp_plane_ssse3.c
View file @
14b8112b
...
...
@@ -68,6 +68,14 @@ void av1_highbd_warp_affine_ssse3(const int32_t *mat, const uint16_t *ref,
iy4
=
y4
>>
WARPEDMODEL_PREC_BITS
;
sy4
=
y4
&
((
1
<<
WARPEDMODEL_PREC_BITS
)
-
1
);
sx4
+=
alpha
*
(
-
4
)
+
beta
*
(
-
4
);
sy4
+=
gamma
*
(
-
4
)
+
delta
*
(
-
4
);
sx4
=
ROUND_POWER_OF_TWO_SIGNED
(
sx4
,
WARP_PARAM_REDUCE_BITS
)
<<
WARP_PARAM_REDUCE_BITS
;
sy4
=
ROUND_POWER_OF_TWO_SIGNED
(
sy4
,
WARP_PARAM_REDUCE_BITS
)
<<
WARP_PARAM_REDUCE_BITS
;
// Horizontal filter
for
(
k
=
-
7
;
k
<
AOMMIN
(
8
,
p_height
-
i
);
++
k
)
{
int
iy
=
iy4
+
k
;
...
...
@@ -88,10 +96,10 @@ void av1_highbd_warp_affine_ssse3(const int32_t *mat, const uint16_t *ref,
ref
[
iy
*
stride
+
(
width
-
1
)]
*
(
1
<<
(
WARPEDPIXEL_FILTER_BITS
-
HORSHEAR_REDUCE_PREC_BITS
)));
}
else
{
const
int
sx
=
sx4
+
alph
a
*
(
-
4
)
+
beta
*
k
+
// Include rounding and offset here
(
1
<<
(
WARPEDDIFF_PREC_BITS
-
1
))
+
(
WARPEDPIXEL_PREC_SHIFTS
<<
WARPEDDIFF_PREC_BITS
);
int
sx
=
sx4
+
bet
a
*
(
k
+
4
)
+
// Include rounding and offset here
(
1
<<
(
WARPEDDIFF_PREC_BITS
-
1
))
+
(
WARPEDPIXEL_PREC_SHIFTS
<<
WARPEDDIFF_PREC_BITS
);
// Load source pixels
const
__m128i
src
=
...
...
@@ -195,9 +203,8 @@ void av1_highbd_warp_affine_ssse3(const int32_t *mat, const uint16_t *ref,
// Vertical filter
for
(
k
=
-
4
;
k
<
AOMMIN
(
4
,
p_height
-
i
-
4
);
++
k
)
{
const
int
sy
=
sy4
+
gamma
*
(
-
4
)
+
delta
*
k
+
(
1
<<
(
WARPEDDIFF_PREC_BITS
-
1
))
+
(
WARPEDPIXEL_PREC_SHIFTS
<<
WARPEDDIFF_PREC_BITS
);
int
sy
=
sy4
+
delta
*
(
k
+
4
)
+
(
1
<<
(
WARPEDDIFF_PREC_BITS
-
1
))
+
(
WARPEDPIXEL_PREC_SHIFTS
<<
WARPEDDIFF_PREC_BITS
);
// Load from tmp and rearrange pairs of consecutive rows into the
// column order 0 0 2 2 4 4 6 6; 1 1 3 3 5 5 7 7
...
...
av1/common/x86/warp_plane_sse2.c
View file @
14b8112b
...
...
@@ -63,6 +63,14 @@ void av1_warp_affine_sse2(const int32_t *mat, const uint8_t *ref, int width,
iy4
=
y4
>>
WARPEDMODEL_PREC_BITS
;
sy4
=
y4
&
((
1
<<
WARPEDMODEL_PREC_BITS
)
-
1
);
sx4
+=
alpha
*
(
-
4
)
+
beta
*
(
-
4
);
sy4
+=
gamma
*
(
-
4
)
+
delta
*
(
-
4
);
sx4
=
ROUND_POWER_OF_TWO_SIGNED
(
sx4
,
WARP_PARAM_REDUCE_BITS
)
<<
WARP_PARAM_REDUCE_BITS
;
sy4
=
ROUND_POWER_OF_TWO_SIGNED
(
sy4
,
WARP_PARAM_REDUCE_BITS
)
<<
WARP_PARAM_REDUCE_BITS
;
// Horizontal filter
for
(
k
=
-
7
;
k
<
AOMMIN
(
8
,
p_height
-
i
);
++
k
)
{
int
iy
=
iy4
+
k
;
...
...
@@ -83,10 +91,10 @@ void av1_warp_affine_sse2(const int32_t *mat, const uint8_t *ref, int width,
ref
[
iy
*
stride
+
(
width
-
1
)]
*
(
1
<<
(
WARPEDPIXEL_FILTER_BITS
-
HORSHEAR_REDUCE_PREC_BITS
)));
}
else
{
const
int
sx
=
sx4
+
alph
a
*
(
-
4
)
+
beta
*
k
+
// Include rounding and offset here
(
1
<<
(
WARPEDDIFF_PREC_BITS
-
1
))
+
(
WARPEDPIXEL_PREC_SHIFTS
<<
WARPEDDIFF_PREC_BITS
);
int
sx
=
sx4
+
bet
a
*
(
k
+
4
)
+
// Include rounding and offset here
(
1
<<
(
WARPEDDIFF_PREC_BITS
-
1
))
+
(
WARPEDPIXEL_PREC_SHIFTS
<<
WARPEDDIFF_PREC_BITS
);
// Load source pixels
const
__m128i
zero
=
_mm_setzero_si128
();
...
...
@@ -190,9 +198,8 @@ void av1_warp_affine_sse2(const int32_t *mat, const uint8_t *ref, int width,
// Vertical filter
for
(
k
=
-
4
;
k
<
AOMMIN
(
4
,
p_height
-
i
-
4
);
++
k
)
{
const
int
sy
=
sy4
+
gamma
*
(
-
4
)
+
delta
*
k
+
(
1
<<
(
WARPEDDIFF_PREC_BITS
-
1
))
+
(
WARPEDPIXEL_PREC_SHIFTS
<<
WARPEDDIFF_PREC_BITS
);
int
sy
=
sy4
+
delta
*
(
k
+
4
)
+
(
1
<<
(
WARPEDDIFF_PREC_BITS
-
1
))
+
(
WARPEDPIXEL_PREC_SHIFTS
<<
WARPEDDIFF_PREC_BITS
);
// Load from tmp and rearrange pairs of consecutive rows into the
// column order 0 0 2 2 4 4 6 6; 1 1 3 3 5 5 7 7
...
...
av1/common/x86/warp_plane_ssse3.c
View file @
14b8112b
...
...
@@ -250,6 +250,14 @@ void av1_warp_affine_ssse3(const int32_t *mat, const uint8_t *ref, int width,
iy4
=
y4
>>
WARPEDMODEL_PREC_BITS
;
sy4
=
y4
&
((
1
<<
WARPEDMODEL_PREC_BITS
)
-
1
);
sx4
+=
alpha
*
(
-
4
)
+
beta
*
(
-
4
);
sy4
+=
gamma
*
(
-
4
)
+
delta
*
(
-
4
);
sx4
=
ROUND_POWER_OF_TWO_SIGNED
(
sx4
,
WARP_PARAM_REDUCE_BITS
)
<<
WARP_PARAM_REDUCE_BITS
;
sy4
=
ROUND_POWER_OF_TWO_SIGNED
(
sy4
,
WARP_PARAM_REDUCE_BITS
)
<<
WARP_PARAM_REDUCE_BITS
;
// Horizontal filter
for
(
k
=
-
7
;
k
<
AOMMIN
(
8
,
p_height
-
i
);
++
k
)
{
int
iy
=
iy4
+
k
;
...
...
@@ -270,10 +278,10 @@ void av1_warp_affine_ssse3(const int32_t *mat, const uint8_t *ref, int width,
ref
[
iy
*
stride
+
(
width
-
1
)]
*
(
1
<<
(
WARPEDPIXEL_FILTER_BITS
-
HORSHEAR_REDUCE_PREC_BITS
)));
}
else
{
const
int
sx
=
sx4
+
alph
a
*
(
-
4
)
+
beta
*
k
+
// Include rounding and offset here
(
1
<<
(
WARPEDDIFF_PREC_BITS
-
1
))
+
(
WARPEDPIXEL_PREC_SHIFTS
<<
WARPEDDIFF_PREC_BITS
);
int
sx
=
sx4
+
bet
a
*
(
k
+
4
)
+
// Include rounding and offset here
(
1
<<
(
WARPEDDIFF_PREC_BITS
-
1
))
+
(
WARPEDPIXEL_PREC_SHIFTS
<<
WARPEDDIFF_PREC_BITS
);
// Load source pixels
const
__m128i
src
=
...
...
@@ -367,9 +375,8 @@ void av1_warp_affine_ssse3(const int32_t *mat, const uint8_t *ref, int width,
// Vertical filter
for
(
k
=
-
4
;
k
<
AOMMIN
(
4
,
p_height
-
i
-
4
);
++
k
)
{
const
int
sy
=
sy4
+
gamma
*
(
-
4
)
+
delta
*
k
+
(
1
<<
(
WARPEDDIFF_PREC_BITS
-
1
))
+
(
WARPEDPIXEL_PREC_SHIFTS
<<
WARPEDDIFF_PREC_BITS
);
int
sy
=
sy4
+
delta
*
(
k
+
4
)
+
(
1
<<
(
WARPEDDIFF_PREC_BITS
-
1
))
+
(
WARPEDPIXEL_PREC_SHIFTS
<<
WARPEDDIFF_PREC_BITS
);
// Load from tmp and rearrange pairs of consecutive rows into the
// column order 0 0 2 2 4 4 6 6; 1 1 3 3 5 5 7 7
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment