Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Xiph.Org
aom-rav1e
Commits
1d18460f
Commit
1d18460f
authored
Apr 04, 2017
by
Debargha Mukherjee
Browse files
Reduce precision bis between shears
Change-Id: I89e981c9396c7a1ba8051d65036a16692da94d0d
parent
0d749d60
Changes
3
Hide whitespace changes
Inline
Side-by-side
av1/common/mv.h
View file @
1d18460f
...
...
@@ -53,6 +53,11 @@ typedef struct mv32 {
// Precision of filter taps
#define WARPEDPIXEL_FILTER_BITS 7
// Precision bits reduction after horizontal shear
#define HORSHEAR_REDUCE_PREC_BITS 5
#define VERSHEAR_REDUCE_PREC_BITS \
(2 * WARPEDPIXEL_FILTER_BITS - HORSHEAR_REDUCE_PREC_BITS)
#define WARPEDDIFF_PREC_BITS (WARPEDMODEL_PREC_BITS - WARPEDPIXEL_PREC_BITS)
/* clang-format off */
...
...
av1/common/warped_motion.c
View file @
1d18460f
...
...
@@ -916,6 +916,7 @@ static void highbd_warp_plane(WarpedMotionParams *wm, uint8_t *ref8, int width,
else
sum
+=
ref
[
iy
*
stride
+
ix
+
m
-
3
]
*
coeffs
[
m
];
}
sum
=
ROUND_POWER_OF_TWO
(
sum
,
HORSHEAR_REDUCE_PREC_BITS
);
tmp
[(
k
+
7
)
*
8
+
(
l
+
4
)]
=
sum
;
}
}
...
...
@@ -933,8 +934,7 @@ static void highbd_warp_plane(WarpedMotionParams *wm, uint8_t *ref8, int width,
sum
+=
tmp
[(
k
+
m
+
4
)
*
8
+
(
l
+
4
)]
*
coeffs
[
m
];
}
sum
=
clip_pixel_highbd
(
ROUND_POWER_OF_TWO_SIGNED
(
sum
,
2
*
WARPEDPIXEL_FILTER_BITS
),
bd
);
ROUND_POWER_OF_TWO_SIGNED
(
sum
,
VERSHEAR_REDUCE_PREC_BITS
),
bd
);
if
(
ref_frm
)
*
p
=
ROUND_POWER_OF_TWO_SIGNED
(
*
p
+
sum
,
1
);
else
...
...
@@ -1116,7 +1116,8 @@ void av1_warp_affine_c(int32_t *mat, uint8_t *ref, int width, int height,
// (once border extension is taken into account)
for
(
l
=
0
;
l
<
8
;
++
l
)
{
tmp
[(
k
+
7
)
*
8
+
l
]
=
ref
[
iy
*
stride
]
*
(
1
<<
WARPEDPIXEL_FILTER_BITS
);
ref
[
iy
*
stride
]
*
(
1
<<
(
WARPEDPIXEL_FILTER_BITS
-
HORSHEAR_REDUCE_PREC_BITS
));
}
}
else
if
(
ix4
>=
width
+
6
)
{
// In this case, the leftmost pixel sampled is in column
...
...
@@ -1125,7 +1126,8 @@ void av1_warp_affine_c(int32_t *mat, uint8_t *ref, int width, int height,
// (once border extension is taken into account)
for
(
l
=
0
;
l
<
8
;
++
l
)
{
tmp
[(
k
+
7
)
*
8
+
l
]
=
ref
[
iy
*
stride
+
(
width
-
1
)]
*
(
1
<<
WARPEDPIXEL_FILTER_BITS
);
ref
[
iy
*
stride
+
(
width
-
1
)]
*
(
1
<<
(
WARPEDPIXEL_FILTER_BITS
-
HORSHEAR_REDUCE_PREC_BITS
));
}
}
else
{
// If we get here, then
...
...
@@ -1148,6 +1150,7 @@ void av1_warp_affine_c(int32_t *mat, uint8_t *ref, int width, int height,
for
(
m
=
0
;
m
<
8
;
++
m
)
{
sum
+=
ref
[
iy
*
stride
+
ix
+
m
]
*
coeffs
[
m
];
}
sum
=
ROUND_POWER_OF_TWO
(
sum
,
HORSHEAR_REDUCE_PREC_BITS
);
tmp
[(
k
+
7
)
*
8
+
(
l
+
4
)]
=
saturate_int16
(
sum
);
sx
+=
alpha
;
}
...
...
@@ -1169,8 +1172,7 @@ void av1_warp_affine_c(int32_t *mat, uint8_t *ref, int width, int height,
for
(
m
=
0
;
m
<
8
;
++
m
)
{
sum
+=
tmp
[(
k
+
m
+
4
)
*
8
+
(
l
+
4
)]
*
coeffs
[
m
];
}
sum
=
clip_pixel
(
ROUND_POWER_OF_TWO
(
sum
,
2
*
WARPEDPIXEL_FILTER_BITS
));
sum
=
clip_pixel
(
ROUND_POWER_OF_TWO
(
sum
,
VERSHEAR_REDUCE_PREC_BITS
));
if
(
ref_frm
)
*
p
=
ROUND_POWER_OF_TWO
(
*
p
+
sum
,
1
);
else
...
...
@@ -1199,7 +1201,6 @@ static void warp_plane(WarpedMotionParams *wm, uint8_t *ref, int width,
const
int32_t
gamma
=
wm
->
gamma
;
const
int32_t
delta
=
wm
->
delta
;
// printf("%d %d %d %d\n", mat[2], mat[3], mat[4], mat[5]);
av1_warp_affine
(
mat
,
ref
,
width
,
height
,
stride
,
pred
,
p_col
,
p_row
,
p_width
,
p_height
,
p_stride
,
subsampling_x
,
subsampling_y
,
ref_frm
,
alpha
,
beta
,
gamma
,
delta
);
...
...
av1/common/x86/warp_plane_sse2.c
View file @
1d18460f
...
...
@@ -79,11 +79,13 @@ void av1_warp_affine_sse2(int32_t *mat, uint8_t *ref, int width, int height,
// would be taken from the leftmost/rightmost column, then we can
// skip the expensive horizontal filter.
if
(
ix4
<=
-
7
)
{
tmp
[
k
+
7
]
=
_mm_set1_epi16
(
ref
[
iy
*
stride
]
*
(
1
<<
WARPEDPIXEL_FILTER_BITS
));
tmp
[
k
+
7
]
=
_mm_set1_epi16
(
ref
[
iy
*
stride
]
*
(
1
<<
(
WARPEDPIXEL_FILTER_BITS
-
HORSHEAR_REDUCE_PREC_BITS
)));
}
else
if
(
ix4
>=
width
+
6
)
{
tmp
[
k
+
7
]
=
_mm_set1_epi16
(
ref
[
iy
*
stride
+
(
width
-
1
)]
*
(
1
<<
WARPEDPIXEL_FILTER_BITS
));
tmp
[
k
+
7
]
=
_mm_set1_epi16
(
ref
[
iy
*
stride
+
(
width
-
1
)]
*
(
1
<<
(
WARPEDPIXEL_FILTER_BITS
-
HORSHEAR_REDUCE_PREC_BITS
)));
}
else
{
int
sx
=
sx4
+
alpha
*
(
-
4
)
+
beta
*
k
+
// Include rounding and offset here
...
...
@@ -119,6 +121,9 @@ void av1_warp_affine_sse2(int32_t *mat, uint8_t *ref, int width, int height,
// coeffs 6 7 6 7 6 7 6 7 for pixels 0, 2, 4, 6
__m128i
coeff_6
=
_mm_unpackhi_epi64
(
tmp_12
,
tmp_14
);
__m128i
round_const
=
_mm_set1_epi32
((
1
<<
HORSHEAR_REDUCE_PREC_BITS
)
>>
1
);
// Calculate filtered results
__m128i
src_0
=
_mm_unpacklo_epi8
(
src
,
zero
);
__m128i
res_0
=
_mm_madd_epi16
(
src_0
,
coeff_0
);
...
...
@@ -131,6 +136,8 @@ void av1_warp_affine_sse2(int32_t *mat, uint8_t *ref, int width, int height,
__m128i
res_even
=
_mm_add_epi32
(
_mm_add_epi32
(
res_0
,
res_4
),
_mm_add_epi32
(
res_2
,
res_6
));
res_even
=
_mm_srai_epi32
(
_mm_add_epi32
(
res_even
,
round_const
),
HORSHEAR_REDUCE_PREC_BITS
);
// Filter odd-index pixels
__m128i
tmp_1
=
filter
[(
sx
+
1
*
alpha
)
>>
WARPEDDIFF_PREC_BITS
];
...
...
@@ -159,6 +166,8 @@ void av1_warp_affine_sse2(int32_t *mat, uint8_t *ref, int width, int height,
__m128i
res_odd
=
_mm_add_epi32
(
_mm_add_epi32
(
res_1
,
res_5
),
_mm_add_epi32
(
res_3
,
res_7
));
res_odd
=
_mm_srai_epi32
(
_mm_add_epi32
(
res_odd
,
round_const
),
HORSHEAR_REDUCE_PREC_BITS
);
// Combine results into one register.
// We store the columns in the order 0, 2, 4, 6, 1, 3, 5, 7
...
...
@@ -240,12 +249,12 @@ void av1_warp_affine_sse2(int32_t *mat, uint8_t *ref, int width, int height,
// Round and pack into 8 bits
__m128i
round_const
=
_mm_set1_epi32
((
1
<<
(
2
*
WARPEDPIXEL_FILTER
_BITS
)
)
>>
1
);
_mm_set1_epi32
((
1
<<
VERSHEAR_REDUCE_PREC
_BITS
)
>>
1
);
__m128i
res_lo_round
=
_mm_srai_epi32
(
_mm_add_epi32
(
res_lo
,
round_const
),
2
*
WARPEDPIXEL_FILTER
_BITS
);
_mm_add_epi32
(
res_lo
,
round_const
),
VERSHEAR_REDUCE_PREC
_BITS
);
__m128i
res_hi_round
=
_mm_srai_epi32
(
_mm_add_epi32
(
res_hi
,
round_const
),
2
*
WARPEDPIXEL_FILTER
_BITS
);
_mm_add_epi32
(
res_hi
,
round_const
),
VERSHEAR_REDUCE_PREC
_BITS
);
__m128i
res_16bit
=
_mm_packs_epi32
(
res_lo_round
,
res_hi_round
);
__m128i
res_8bit
=
_mm_packus_epi16
(
res_16bit
,
res_16bit
);
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment