Commit d50716fa authored by Deb Mukherjee's avatar Deb Mukherjee
Browse files

Incorporate WRAPLOW macro into non-highbitdepth tx

Incorporates the WRAPLOW macro into the non-highbitdepth transforms
to aid hardware verification between a software C model and an
intended hardware implementation though the use of the configure
options: --enable-experimental --enable-emulate-hardware.
Note that to avoid further discrepancies between the sse/sse2
implementations of the transforms and the C implementation, when the
emulate hardware option is invoked, we also disable sse/sse2/etc.

Also incudes some minor cleanups/renaming etc.

Change-Id: Ib864d8493313927d429cce402982f1c8e45b3287
parent a0befb93
......@@ -281,7 +281,7 @@ EXPERIMENT_LIST="
spatial_svc
vp9_temporal_denoising
fp_mb_stats
emulate_hardware_highbitdepth
emulate_hardware
"
CONFIG_LIST="
external_build
......
......@@ -217,7 +217,7 @@ void high_filter_block2d_8_c(const uint16_t *src_ptr,
(VP9_FILTER_WEIGHT >> 1); // Rounding
// Normalize back to 0-255...
*output_ptr = clip_pixel_high(temp >> VP9_FILTER_SHIFT, bd);
*output_ptr = clip_pixel_highbd(temp >> VP9_FILTER_SHIFT, bd);
++src_ptr;
output_ptr += intermediate_height;
}
......@@ -245,7 +245,7 @@ void high_filter_block2d_8_c(const uint16_t *src_ptr,
(VP9_FILTER_WEIGHT >> 1); // Rounding
// Normalize back to 0-255...
*dst_ptr++ = clip_pixel_high(temp >> VP9_FILTER_SHIFT, bd);
*dst_ptr++ = clip_pixel_highbd(temp >> VP9_FILTER_SHIFT, bd);
src_ptr += intermediate_height;
}
src_ptr += intermediate_next_stride;
......
......@@ -745,7 +745,7 @@ INSTANTIATE_TEST_CASE_P(
make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 3, VPX_BITS_8)));
#endif
#if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH
#if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P(
NEON, Trans16x16DCT,
::testing::Values(
......@@ -753,7 +753,7 @@ INSTANTIATE_TEST_CASE_P(
&vp9_idct16x16_256_add_neon, 0, VPX_BITS_8)));
#endif
#if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH
#if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P(
SSE2, Trans16x16DCT,
::testing::Values(
......@@ -772,7 +772,7 @@ INSTANTIATE_TEST_CASE_P(
VPX_BITS_8)));
#endif
#if HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH
#if HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P(
SSSE3, Trans16x16DCT,
::testing::Values(
......
......@@ -333,7 +333,7 @@ INSTANTIATE_TEST_CASE_P(
&vp9_idct32x32_1024_add_c, 1, VPX_BITS_8)));
#endif
#if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH
#if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P(
NEON, Trans32x32Test,
::testing::Values(
......@@ -343,7 +343,7 @@ INSTANTIATE_TEST_CASE_P(
&vp9_idct32x32_1024_add_neon, 1, VPX_BITS_8)));
#endif
#if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH
#if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P(
SSE2, Trans32x32Test,
::testing::Values(
......@@ -353,7 +353,7 @@ INSTANTIATE_TEST_CASE_P(
&vp9_idct32x32_1024_add_sse2, 1, VPX_BITS_8)));
#endif
#if HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH
#if HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P(
AVX2, Trans32x32Test,
::testing::Values(
......
......@@ -458,7 +458,7 @@ INSTANTIATE_TEST_CASE_P(
make_tuple(&vp9_fwht4x4_c, &vp9_iwht4x4_16_add_c, 0, VPX_BITS_8)));
#endif
#if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH
#if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P(
NEON, Trans4x4DCT,
::testing::Values(
......@@ -473,14 +473,15 @@ INSTANTIATE_TEST_CASE_P(
make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 3, VPX_BITS_8)));
#endif
#if CONFIG_USE_X86INC && HAVE_MMX && !CONFIG_VP9_HIGHBITDEPTH
#if CONFIG_USE_X86INC && HAVE_MMX && !CONFIG_VP9_HIGHBITDEPTH && \
!CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P(
MMX, Trans4x4WHT,
::testing::Values(
make_tuple(&vp9_fwht4x4_mmx, &vp9_iwht4x4_16_add_c, 0, VPX_BITS_8)));
#endif
#if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH
#if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P(
SSE2, Trans4x4DCT,
::testing::Values(
......
......@@ -568,7 +568,7 @@ INSTANTIATE_TEST_CASE_P(
make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 3, VPX_BITS_8)));
#endif
#if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH
#if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P(
NEON, FwdTrans8x8DCT,
::testing::Values(
......@@ -583,7 +583,7 @@ INSTANTIATE_TEST_CASE_P(
make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 3, VPX_BITS_8)));
#endif
#if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH
#if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P(
SSE2, FwdTrans8x8DCT,
::testing::Values(
......@@ -598,7 +598,8 @@ INSTANTIATE_TEST_CASE_P(
make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 3, VPX_BITS_8)));
#endif
#if HAVE_SSSE3 && ARCH_X86_64 && !CONFIG_VP9_HIGHBITDEPTH
#if HAVE_SSSE3 && ARCH_X86_64 && !CONFIG_VP9_HIGHBITDEPTH && \
!CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P(
SSSE3, FwdTrans8x8DCT,
::testing::Values(
......
......@@ -260,7 +260,7 @@ INSTANTIATE_TEST_CASE_P(
TX_4X4, 1)));
#endif
#if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH
#if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P(
SSE2, PartialIDctTest,
::testing::Values(
......@@ -294,7 +294,8 @@ INSTANTIATE_TEST_CASE_P(
TX_4X4, 1)));
#endif
#if HAVE_SSSE3 && ARCH_X86_64 && !CONFIG_VP9_HIGHBITDEPTH
#if HAVE_SSSE3 && ARCH_X86_64 && !CONFIG_VP9_HIGHBITDEPTH && \
!CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P(
SSSE3_64, PartialIDctTest,
::testing::Values(
......@@ -304,7 +305,7 @@ INSTANTIATE_TEST_CASE_P(
TX_8X8, 12)));
#endif
#if HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH
#if HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P(
SSSE3, PartialIDctTest,
::testing::Values(
......
......@@ -65,7 +65,7 @@ static INLINE int get_unsigned_bits(unsigned int num_values) {
}
#if CONFIG_VP9_HIGHBITDEPTH
static INLINE uint16_t clip_pixel_high(int val, int bd) {
static INLINE uint16_t clip_pixel_highbd(int val, int bd) {
switch (bd) {
case 8:
default:
......
......@@ -301,7 +301,7 @@ static void high_convolve_horiz(const uint8_t *src8, ptrdiff_t src_stride,
int k, sum = 0;
for (k = 0; k < SUBPEL_TAPS; ++k)
sum += src_x[k] * x_filter[k];
dst[x] = clip_pixel_high(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
dst[x] = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
x_q4 += x_step_q4;
}
src += src_stride;
......@@ -327,7 +327,7 @@ static void high_convolve_avg_horiz(const uint8_t *src8, ptrdiff_t src_stride,
for (k = 0; k < SUBPEL_TAPS; ++k)
sum += src_x[k] * x_filter[k];
dst[x] = ROUND_POWER_OF_TWO(dst[x] +
clip_pixel_high(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd), 1);
clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd), 1);
x_q4 += x_step_q4;
}
src += src_stride;
......@@ -352,7 +352,7 @@ static void high_convolve_vert(const uint8_t *src8, ptrdiff_t src_stride,
int k, sum = 0;
for (k = 0; k < SUBPEL_TAPS; ++k)
sum += src_y[k * src_stride] * y_filter[k];
dst[y * dst_stride] = clip_pixel_high(
dst[y * dst_stride] = clip_pixel_highbd(
ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
y_q4 += y_step_q4;
}
......@@ -379,7 +379,7 @@ static void high_convolve_avg_vert(const uint8_t *src8, ptrdiff_t src_stride,
for (k = 0; k < SUBPEL_TAPS; ++k)
sum += src_y[k * src_stride] * y_filter[k];
dst[y * dst_stride] = ROUND_POWER_OF_TWO(dst[y * dst_stride] +
clip_pixel_high(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd), 1);
clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd), 1);
y_q4 += y_step_q4;
}
++src;
......
This diff is collapsed.
......@@ -251,7 +251,7 @@ static INLINE void high_tm_predictor(uint16_t *dst, ptrdiff_t stride, int bs,
for (r = 0; r < bs; r++) {
for (c = 0; c < bs; c++)
dst[c] = clip_pixel_high(left[r] + above[c] - ytop_left, bd);
dst[c] = clip_pixel_highbd(left[r] + above[c] - ytop_left, bd);
dst += stride;
}
}
......
......@@ -331,6 +331,8 @@ $vp9_convolve8_avg_vert_neon_asm=vp9_convolve8_avg_vert_neon;
# dct
#
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
# Note as optimized versions of these functions are added we need to add a check to ensure
# that when CONFIG_EMULATE_HARDWARE is on, it defaults to the C versions only.
add_proto qw/void vp9_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp9_idct4x4_1_add/;
......@@ -380,69 +382,123 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/void vp9_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp9_iwht4x4_16_add/;
} else {
add_proto qw/void vp9_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp9_idct4x4_1_add sse2 neon_asm dspr2/;
$vp9_idct4x4_1_add_neon_asm=vp9_idct4x4_1_add_neon;
# Force C versions if CONFIG_EMULATE_HARDWARE is 1
if (vpx_config("CONFIG_EMULATE_HARDWARE") eq "yes") {
add_proto qw/void vp9_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp9_idct4x4_1_add/;
add_proto qw/void vp9_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp9_idct4x4_16_add sse2 neon_asm dspr2/;
$vp9_idct4x4_16_add_neon_asm=vp9_idct4x4_16_add_neon;
add_proto qw/void vp9_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp9_idct4x4_16_add/;
add_proto qw/void vp9_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp9_idct8x8_1_add sse2 neon_asm dspr2/;
$vp9_idct8x8_1_add_neon_asm=vp9_idct8x8_1_add_neon;
add_proto qw/void vp9_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp9_idct8x8_1_add/;
add_proto qw/void vp9_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp9_idct8x8_64_add sse2 neon_asm dspr2/, "$ssse3_x86_64";
$vp9_idct8x8_64_add_neon_asm=vp9_idct8x8_64_add_neon;
add_proto qw/void vp9_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp9_idct8x8_64_add/;
add_proto qw/void vp9_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp9_idct8x8_12_add sse2 neon_asm dspr2/, "$ssse3_x86_64";
$vp9_idct8x8_12_add_neon_asm=vp9_idct8x8_12_add_neon;
add_proto qw/void vp9_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp9_idct8x8_12_add/;
add_proto qw/void vp9_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp9_idct16x16_1_add sse2 neon_asm dspr2/;
$vp9_idct16x16_1_add_neon_asm=vp9_idct16x16_1_add_neon;
add_proto qw/void vp9_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp9_idct16x16_1_add/;
add_proto qw/void vp9_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp9_idct16x16_256_add sse2 ssse3 neon_asm dspr2/;
$vp9_idct16x16_256_add_neon_asm=vp9_idct16x16_256_add_neon;
add_proto qw/void vp9_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp9_idct16x16_256_add/;
add_proto qw/void vp9_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp9_idct16x16_10_add sse2 ssse3 neon_asm dspr2/;
$vp9_idct16x16_10_add_neon_asm=vp9_idct16x16_10_add_neon;
add_proto qw/void vp9_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp9_idct16x16_10_add/;
add_proto qw/void vp9_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp9_idct32x32_1024_add sse2 neon_asm dspr2/;
$vp9_idct32x32_1024_add_neon_asm=vp9_idct32x32_1024_add_neon;
add_proto qw/void vp9_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp9_idct32x32_1024_add/;
add_proto qw/void vp9_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp9_idct32x32_34_add sse2 neon_asm dspr2/;
$vp9_idct32x32_34_add_neon_asm=vp9_idct32x32_1024_add_neon;
add_proto qw/void vp9_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp9_idct32x32_34_add/;
add_proto qw/void vp9_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp9_idct32x32_1_add sse2 neon_asm dspr2/;
$vp9_idct32x32_1_add_neon_asm=vp9_idct32x32_1_add_neon;
add_proto qw/void vp9_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp9_idct32x32_1_add/;
add_proto qw/void vp9_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
specialize qw/vp9_iht4x4_16_add sse2 neon_asm dspr2/;
$vp9_iht4x4_16_add_neon_asm=vp9_iht4x4_16_add_neon;
add_proto qw/void vp9_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
specialize qw/vp9_iht4x4_16_add/;
add_proto qw/void vp9_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
specialize qw/vp9_iht8x8_64_add sse2 neon_asm dspr2/;
$vp9_iht8x8_64_add_neon_asm=vp9_iht8x8_64_add_neon;
add_proto qw/void vp9_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
specialize qw/vp9_iht8x8_64_add/;
add_proto qw/void vp9_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type";
specialize qw/vp9_iht16x16_256_add sse2 dspr2/;
add_proto qw/void vp9_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type";
specialize qw/vp9_iht16x16_256_add/;
# dct and add
# dct and add
add_proto qw/void vp9_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp9_iwht4x4_1_add/;
add_proto qw/void vp9_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp9_iwht4x4_1_add/;
add_proto qw/void vp9_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp9_iwht4x4_16_add/;
add_proto qw/void vp9_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp9_iwht4x4_16_add/;
} else {
add_proto qw/void vp9_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp9_idct4x4_1_add sse2 neon_asm dspr2/;
$vp9_idct4x4_1_add_neon_asm=vp9_idct4x4_1_add_neon;
add_proto qw/void vp9_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp9_idct4x4_16_add sse2 neon_asm dspr2/;
$vp9_idct4x4_16_add_neon_asm=vp9_idct4x4_16_add_neon;
add_proto qw/void vp9_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp9_idct8x8_1_add sse2 neon_asm dspr2/;
$vp9_idct8x8_1_add_neon_asm=vp9_idct8x8_1_add_neon;
add_proto qw/void vp9_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp9_idct8x8_64_add sse2 neon_asm dspr2/, "$ssse3_x86_64";
$vp9_idct8x8_64_add_neon_asm=vp9_idct8x8_64_add_neon;
add_proto qw/void vp9_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp9_idct8x8_12_add sse2 neon_asm dspr2/, "$ssse3_x86_64";
$vp9_idct8x8_12_add_neon_asm=vp9_idct8x8_12_add_neon;
add_proto qw/void vp9_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp9_idct16x16_1_add sse2 neon_asm dspr2/;
$vp9_idct16x16_1_add_neon_asm=vp9_idct16x16_1_add_neon;
add_proto qw/void vp9_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp9_idct16x16_256_add sse2 ssse3 neon_asm dspr2/;
$vp9_idct16x16_256_add_neon_asm=vp9_idct16x16_256_add_neon;
add_proto qw/void vp9_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp9_idct16x16_10_add sse2 ssse3 neon_asm dspr2/;
$vp9_idct16x16_10_add_neon_asm=vp9_idct16x16_10_add_neon;
add_proto qw/void vp9_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp9_idct32x32_1024_add sse2 neon_asm dspr2/;
$vp9_idct32x32_1024_add_neon_asm=vp9_idct32x32_1024_add_neon;
add_proto qw/void vp9_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp9_idct32x32_34_add sse2 neon_asm dspr2/;
$vp9_idct32x32_34_add_neon_asm=vp9_idct32x32_1024_add_neon;
add_proto qw/void vp9_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp9_idct32x32_1_add sse2 neon_asm dspr2/;
$vp9_idct32x32_1_add_neon_asm=vp9_idct32x32_1_add_neon;
add_proto qw/void vp9_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
specialize qw/vp9_iht4x4_16_add sse2 neon_asm dspr2/;
$vp9_iht4x4_16_add_neon_asm=vp9_iht4x4_16_add_neon;
add_proto qw/void vp9_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
specialize qw/vp9_iht8x8_64_add sse2 neon_asm dspr2/;
$vp9_iht8x8_64_add_neon_asm=vp9_iht8x8_64_add_neon;
add_proto qw/void vp9_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type";
specialize qw/vp9_iht16x16_256_add sse2 dspr2/;
# dct and add
add_proto qw/void vp9_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp9_iwht4x4_1_add/;
add_proto qw/void vp9_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp9_iwht4x4_16_add/;
}
}
# High bitdepth functions
......@@ -689,6 +745,8 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
#
# dct
#
# Note as optimized versions of these functions are added we need to add a check to ensure
# that when CONFIG_EMULATE_HARDWARE is on, it defaults to the C versions only.
add_proto qw/void vp9_high_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/vp9_high_idct4x4_1_add/;
......
......@@ -571,7 +571,7 @@ static void highbd_interpolate(const uint16_t *const input, int inlength,
sum += filter[k] *
input[(pk < 0 ? 0 : (pk >= inlength ? inlength - 1 : pk))];
}
*optr++ = clip_pixel_high(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
*optr++ = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
}
} else {
// Initial part.
......@@ -585,7 +585,7 @@ static void highbd_interpolate(const uint16_t *const input, int inlength,
sum += filter[k] *
input[(int_pel - INTERP_TAPS / 2 + 1 + k < 0 ?
0 : int_pel - INTERP_TAPS / 2 + 1 + k)];
*optr++ = clip_pixel_high(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
*optr++ = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
}
// Middle part.
for (; x <= x2; ++x, y += delta) {
......@@ -596,7 +596,7 @@ static void highbd_interpolate(const uint16_t *const input, int inlength,
sum = 0;
for (k = 0; k < INTERP_TAPS; ++k)
sum += filter[k] * input[int_pel - INTERP_TAPS / 2 + 1 + k];
*optr++ = clip_pixel_high(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
*optr++ = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
}
// End part.
for (; x < outlength; ++x, y += delta) {
......@@ -609,7 +609,7 @@ static void highbd_interpolate(const uint16_t *const input, int inlength,
sum += filter[k] * input[(int_pel - INTERP_TAPS / 2 + 1 + k >=
inlength ? inlength - 1 :
int_pel - INTERP_TAPS / 2 + 1 + k)];
*optr++ = clip_pixel_high(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
*optr++ = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
}
}
}
......@@ -635,7 +635,7 @@ static void highbd_down2_symeven(const uint16_t *const input, int length,
filter[j];
}
sum >>= FILTER_BITS;
*optr++ = clip_pixel_high(sum, bd);
*optr++ = clip_pixel_highbd(sum, bd);
}
} else {
// Initial part.
......@@ -645,7 +645,7 @@ static void highbd_down2_symeven(const uint16_t *const input, int length,
sum += (input[(i - j < 0 ? 0 : i - j)] + input[i + 1 + j]) * filter[j];
}
sum >>= FILTER_BITS;
*optr++ = clip_pixel_high(sum, bd);
*optr++ = clip_pixel_highbd(sum, bd);
}
// Middle part.
for (; i < l2; i += 2) {
......@@ -654,7 +654,7 @@ static void highbd_down2_symeven(const uint16_t *const input, int length,
sum += (input[i - j] + input[i + 1 + j]) * filter[j];
}
sum >>= FILTER_BITS;
*optr++ = clip_pixel_high(sum, bd);
*optr++ = clip_pixel_highbd(sum, bd);
}
// End part.
for (; i < length; i += 2) {
......@@ -665,7 +665,7 @@ static void highbd_down2_symeven(const uint16_t *const input, int length,
filter[j];
}
sum >>= FILTER_BITS;
*optr++ = clip_pixel_high(sum, bd);
*optr++ = clip_pixel_highbd(sum, bd);
}
}
}
......@@ -691,7 +691,7 @@ static void highbd_down2_symodd(const uint16_t *const input, int length,
filter[j];
}
sum >>= FILTER_BITS;
*optr++ = clip_pixel_high(sum, bd);
*optr++ = clip_pixel_highbd(sum, bd);
}
} else {
// Initial part.
......@@ -701,7 +701,7 @@ static void highbd_down2_symodd(const uint16_t *const input, int length,
sum += (input[(i - j < 0 ? 0 : i - j)] + input[i + j]) * filter[j];
}
sum >>= FILTER_BITS;
*optr++ = clip_pixel_high(sum, bd);
*optr++ = clip_pixel_highbd(sum, bd);
}
// Middle part.
for (; i < l2; i += 2) {
......@@ -710,7 +710,7 @@ static void highbd_down2_symodd(const uint16_t *const input, int length,
sum += (input[i - j] + input[i + j]) * filter[j];
}
sum >>= FILTER_BITS;
*optr++ = clip_pixel_high(sum, bd);
*optr++ = clip_pixel_highbd(sum, bd);
}
// End part.
for (; i < length; i += 2) {
......@@ -720,7 +720,7 @@ static void highbd_down2_symodd(const uint16_t *const input, int length,
filter[j];
}
sum >>= FILTER_BITS;
*optr++ = clip_pixel_high(sum, bd);
*optr++ = clip_pixel_highbd(sum, bd);
}
}
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment