Commit 72678577 authored by Rupert Swarbrick's avatar Rupert Swarbrick Committed by Debargha Mukherjee
Browse files

ext-partition-types: Add 64x16 and 16x64 bsizes

Change-Id: I0c3772110e9fa62ac687bd99e290b5006bf3bd6c
parent 2f711f68
...@@ -45,6 +45,8 @@ if (aom_config("CONFIG_EXT_PARTITION_TYPES")) { ...@@ -45,6 +45,8 @@ if (aom_config("CONFIG_EXT_PARTITION_TYPES")) {
push @block_sizes, [16, 4]; push @block_sizes, [16, 4];
push @block_sizes, [8, 32]; push @block_sizes, [8, 32];
push @block_sizes, [32, 8]; push @block_sizes, [32, 8];
push @block_sizes, [16, 64];
push @block_sizes, [64, 16];
} }
@tx_dims = (2, 4, 8, 16, 32); @tx_dims = (2, 4, 8, 16, 32);
...@@ -994,14 +996,20 @@ if (aom_config("CONFIG_EXT_PARTITION_TYPES")) { ...@@ -994,14 +996,20 @@ if (aom_config("CONFIG_EXT_PARTITION_TYPES")) {
specialize qw/aom_variance16x4 sse2/; specialize qw/aom_variance16x4 sse2/;
specialize qw/aom_variance8x32 sse2/; specialize qw/aom_variance8x32 sse2/;
specialize qw/aom_variance32x8 sse2/; specialize qw/aom_variance32x8 sse2/;
specialize qw/aom_variance16x64 sse2/;
specialize qw/aom_variance64x16 sse2/;
specialize qw/aom_sub_pixel_variance4x16 sse2 ssse3/; specialize qw/aom_sub_pixel_variance4x16 sse2 ssse3/;
specialize qw/aom_sub_pixel_variance16x4 sse2 ssse3/; specialize qw/aom_sub_pixel_variance16x4 sse2 ssse3/;
specialize qw/aom_sub_pixel_variance8x32 sse2 ssse3/; specialize qw/aom_sub_pixel_variance8x32 sse2 ssse3/;
specialize qw/aom_sub_pixel_variance32x8 sse2 ssse3/; specialize qw/aom_sub_pixel_variance32x8 sse2 ssse3/;
specialize qw/aom_sub_pixel_variance16x64 sse2 ssse3/;
specialize qw/aom_sub_pixel_variance64x16 sse2 ssse3/;
specialize qw/aom_sub_pixel_avg_variance4x16 sse2 ssse3/; specialize qw/aom_sub_pixel_avg_variance4x16 sse2 ssse3/;
specialize qw/aom_sub_pixel_avg_variance16x4 sse2 ssse3/; specialize qw/aom_sub_pixel_avg_variance16x4 sse2 ssse3/;
specialize qw/aom_sub_pixel_avg_variance8x32 sse2 ssse3/; specialize qw/aom_sub_pixel_avg_variance8x32 sse2 ssse3/;
specialize qw/aom_sub_pixel_avg_variance32x8 sse2 ssse3/; specialize qw/aom_sub_pixel_avg_variance32x8 sse2 ssse3/;
specialize qw/aom_sub_pixel_avg_variance16x64 sse2 ssse3/;
specialize qw/aom_sub_pixel_avg_variance64x16 sse2 ssse3/;
} }
if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") { if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
...@@ -1020,7 +1028,7 @@ if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") { ...@@ -1020,7 +1028,7 @@ if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
if ($w != 128 && $h != 128 && $w != 4 && $h != 4) { if ($w != 128 && $h != 128 && $w != 4 && $h != 4) {
specialize "aom_highbd_${bd}_variance${w}x${h}", "sse2"; specialize "aom_highbd_${bd}_variance${w}x${h}", "sse2";
} }
# TODO(david.barker): When ext-partition-types is enabled, we currenly # TODO(david.barker): When ext-partition-types is enabled, we currently
# don't have vectorized 4x16 highbd variance functions # don't have vectorized 4x16 highbd variance functions
if ($w == 4 && $h == 4) { if ($w == 4 && $h == 4) {
specialize "aom_highbd_${bd}_variance${w}x${h}", "sse4_1"; specialize "aom_highbd_${bd}_variance${w}x${h}", "sse4_1";
......
...@@ -163,11 +163,15 @@ sadMxN(8, 32) ...@@ -163,11 +163,15 @@ sadMxN(8, 32)
sadMxNx4D(8, 32) sadMxNx4D(8, 32)
sadMxN(32, 8) sadMxN(32, 8)
sadMxNx4D(32, 8) sadMxNx4D(32, 8)
sadMxN(16, 64)
sadMxNx4D(16, 64)
sadMxN(64, 16)
sadMxNx4D(64, 16)
#endif #endif
/* clang-format on */ /* clang-format on */
#if CONFIG_HIGHBITDEPTH #if CONFIG_HIGHBITDEPTH
static INLINE static INLINE
unsigned int highbd_sad(const uint8_t *a8, int a_stride, const uint8_t *b8, unsigned int highbd_sad(const uint8_t *a8, int a_stride, const uint8_t *b8,
int b_stride, int width, int height) { int b_stride, int width, int height) {
int y, x; int y, x;
...@@ -328,12 +332,16 @@ highbd_sadMxN(8, 32) ...@@ -328,12 +332,16 @@ highbd_sadMxN(8, 32)
highbd_sadMxNx4D(8, 32) highbd_sadMxNx4D(8, 32)
highbd_sadMxN(32, 8) highbd_sadMxN(32, 8)
highbd_sadMxNx4D(32, 8) highbd_sadMxNx4D(32, 8)
highbd_sadMxN(16, 64)
highbd_sadMxNx4D(16, 64)
highbd_sadMxN(64, 16)
highbd_sadMxNx4D(64, 16)
#endif #endif
/* clang-format on */ /* clang-format on */
#endif // CONFIG_HIGHBITDEPTH #endif // CONFIG_HIGHBITDEPTH
#if CONFIG_AV1 && CONFIG_EXT_INTER #if CONFIG_AV1 && CONFIG_EXT_INTER
static INLINE static INLINE
unsigned int masked_sad(const uint8_t *src, int src_stride, unsigned int masked_sad(const uint8_t *src, int src_stride,
const uint8_t *a, int a_stride, const uint8_t *b, const uint8_t *a, int a_stride, const uint8_t *b,
int b_stride, const uint8_t *m, int m_stride, int b_stride, const uint8_t *m, int m_stride,
...@@ -395,6 +403,8 @@ MASKSADMxN(4, 16) ...@@ -395,6 +403,8 @@ MASKSADMxN(4, 16)
MASKSADMxN(16, 4) MASKSADMxN(16, 4)
MASKSADMxN(8, 32) MASKSADMxN(8, 32)
MASKSADMxN(32, 8) MASKSADMxN(32, 8)
MASKSADMxN(16, 64)
MASKSADMxN(64, 16)
#endif #endif
/* clang-format on */ /* clang-format on */
...@@ -464,6 +474,8 @@ HIGHBD_MASKSADMXN(4, 16) ...@@ -464,6 +474,8 @@ HIGHBD_MASKSADMXN(4, 16)
HIGHBD_MASKSADMXN(16, 4) HIGHBD_MASKSADMXN(16, 4)
HIGHBD_MASKSADMXN(8, 32) HIGHBD_MASKSADMXN(8, 32)
HIGHBD_MASKSADMXN(32, 8) HIGHBD_MASKSADMXN(32, 8)
HIGHBD_MASKSADMXN(16, 64)
HIGHBD_MASKSADMXN(64, 16)
#endif #endif
#endif // CONFIG_HIGHBITDEPTH #endif // CONFIG_HIGHBITDEPTH
#endif // CONFIG_AV1 && CONFIG_EXT_INTER #endif // CONFIG_AV1 && CONFIG_EXT_INTER
...@@ -522,6 +534,8 @@ OBMCSADMxN(4, 16) ...@@ -522,6 +534,8 @@ OBMCSADMxN(4, 16)
OBMCSADMxN(16, 4) OBMCSADMxN(16, 4)
OBMCSADMxN(8, 32) OBMCSADMxN(8, 32)
OBMCSADMxN(32, 8) OBMCSADMxN(32, 8)
OBMCSADMxN(16, 64)
OBMCSADMxN(64, 16)
#endif #endif
/* clang-format on */ /* clang-format on */
...@@ -578,6 +592,8 @@ HIGHBD_OBMCSADMXN(4, 16) ...@@ -578,6 +592,8 @@ HIGHBD_OBMCSADMXN(4, 16)
HIGHBD_OBMCSADMXN(16, 4) HIGHBD_OBMCSADMXN(16, 4)
HIGHBD_OBMCSADMXN(8, 32) HIGHBD_OBMCSADMXN(8, 32)
HIGHBD_OBMCSADMXN(32, 8) HIGHBD_OBMCSADMXN(32, 8)
HIGHBD_OBMCSADMXN(16, 64)
HIGHBD_OBMCSADMXN(64, 16)
#endif #endif
/* clang-format on */ /* clang-format on */
#endif // CONFIG_HIGHBITDEPTH #endif // CONFIG_HIGHBITDEPTH
......
...@@ -256,6 +256,8 @@ VARIANCES(4, 16) ...@@ -256,6 +256,8 @@ VARIANCES(4, 16)
VARIANCES(16, 4) VARIANCES(16, 4)
VARIANCES(8, 32) VARIANCES(8, 32)
VARIANCES(32, 8) VARIANCES(32, 8)
VARIANCES(16, 64)
VARIANCES(64, 16)
#endif #endif
GET_VAR(16, 16) GET_VAR(16, 16)
...@@ -661,6 +663,8 @@ HIGHBD_VARIANCES(4, 16) ...@@ -661,6 +663,8 @@ HIGHBD_VARIANCES(4, 16)
HIGHBD_VARIANCES(16, 4) HIGHBD_VARIANCES(16, 4)
HIGHBD_VARIANCES(8, 32) HIGHBD_VARIANCES(8, 32)
HIGHBD_VARIANCES(32, 8) HIGHBD_VARIANCES(32, 8)
HIGHBD_VARIANCES(16, 64)
HIGHBD_VARIANCES(64, 16)
#endif #endif
HIGHBD_GET_VAR(8) HIGHBD_GET_VAR(8)
...@@ -848,6 +852,8 @@ MASK_SUBPIX_VAR(4, 16) ...@@ -848,6 +852,8 @@ MASK_SUBPIX_VAR(4, 16)
MASK_SUBPIX_VAR(16, 4) MASK_SUBPIX_VAR(16, 4)
MASK_SUBPIX_VAR(8, 32) MASK_SUBPIX_VAR(8, 32)
MASK_SUBPIX_VAR(32, 8) MASK_SUBPIX_VAR(32, 8)
MASK_SUBPIX_VAR(16, 64)
MASK_SUBPIX_VAR(64, 16)
#endif #endif
#if CONFIG_HIGHBITDEPTH #if CONFIG_HIGHBITDEPTH
...@@ -985,6 +991,8 @@ HIGHBD_MASK_SUBPIX_VAR(4, 16) ...@@ -985,6 +991,8 @@ HIGHBD_MASK_SUBPIX_VAR(4, 16)
HIGHBD_MASK_SUBPIX_VAR(16, 4) HIGHBD_MASK_SUBPIX_VAR(16, 4)
HIGHBD_MASK_SUBPIX_VAR(8, 32) HIGHBD_MASK_SUBPIX_VAR(8, 32)
HIGHBD_MASK_SUBPIX_VAR(32, 8) HIGHBD_MASK_SUBPIX_VAR(32, 8)
HIGHBD_MASK_SUBPIX_VAR(16, 64)
HIGHBD_MASK_SUBPIX_VAR(64, 16)
#endif #endif
#endif // CONFIG_HIGHBITDEPTH #endif // CONFIG_HIGHBITDEPTH
#endif // CONFIG_AV1 && CONFIG_EXT_INTER #endif // CONFIG_AV1 && CONFIG_EXT_INTER
...@@ -1094,6 +1102,10 @@ OBMC_VAR(8, 32) ...@@ -1094,6 +1102,10 @@ OBMC_VAR(8, 32)
OBMC_SUBPIX_VAR(8, 32) OBMC_SUBPIX_VAR(8, 32)
OBMC_VAR(32, 8) OBMC_VAR(32, 8)
OBMC_SUBPIX_VAR(32, 8) OBMC_SUBPIX_VAR(32, 8)
OBMC_VAR(16, 64)
OBMC_SUBPIX_VAR(16, 64)
OBMC_VAR(64, 16)
OBMC_SUBPIX_VAR(64, 16)
#endif #endif
#if CONFIG_HIGHBITDEPTH #if CONFIG_HIGHBITDEPTH
...@@ -1287,6 +1299,10 @@ HIGHBD_OBMC_VAR(8, 32) ...@@ -1287,6 +1299,10 @@ HIGHBD_OBMC_VAR(8, 32)
HIGHBD_OBMC_SUBPIX_VAR(8, 32) HIGHBD_OBMC_SUBPIX_VAR(8, 32)
HIGHBD_OBMC_VAR(32, 8) HIGHBD_OBMC_VAR(32, 8)
HIGHBD_OBMC_SUBPIX_VAR(32, 8) HIGHBD_OBMC_SUBPIX_VAR(32, 8)
HIGHBD_OBMC_VAR(16, 64)
HIGHBD_OBMC_SUBPIX_VAR(16, 64)
HIGHBD_OBMC_VAR(64, 16)
HIGHBD_OBMC_SUBPIX_VAR(64, 16)
#endif #endif
#endif // CONFIG_HIGHBITDEPTH #endif // CONFIG_HIGHBITDEPTH
#endif // CONFIG_AV1 && CONFIG_MOTION_VAR #endif // CONFIG_AV1 && CONFIG_MOTION_VAR
...@@ -293,4 +293,6 @@ HIGH_SADNXN4D 4, 16 ...@@ -293,4 +293,6 @@ HIGH_SADNXN4D 4, 16
HIGH_SADNXN4D 16, 4 HIGH_SADNXN4D 16, 4
HIGH_SADNXN4D 8, 32 HIGH_SADNXN4D 8, 32
HIGH_SADNXN4D 32, 8 HIGH_SADNXN4D 32, 8
HIGH_SADNXN4D 16, 64
HIGH_SADNXN4D 64, 16
%endif %endif
...@@ -158,7 +158,10 @@ HIGH_SAD64XN 64 ; highbd_sad64x64_sse2 ...@@ -158,7 +158,10 @@ HIGH_SAD64XN 64 ; highbd_sad64x64_sse2
HIGH_SAD64XN 32 ; highbd_sad64x32_sse2 HIGH_SAD64XN 32 ; highbd_sad64x32_sse2
HIGH_SAD64XN 64, 1 ; highbd_sad64x64_avg_sse2 HIGH_SAD64XN 64, 1 ; highbd_sad64x64_avg_sse2
HIGH_SAD64XN 32, 1 ; highbd_sad64x32_avg_sse2 HIGH_SAD64XN 32, 1 ; highbd_sad64x32_avg_sse2
%if CONFIG_EXT_PARTITION_TYPES
HIGH_SAD64XN 16 ; highbd_sad_64x16_sse2
HIGH_SAD64XN 16, 1 ; highbd_sad_64x16_avg_sse2
%endif
; unsigned int aom_highbd_sad32x{16,32,64}_sse2(uint8_t *src, int src_stride, ; unsigned int aom_highbd_sad32x{16,32,64}_sse2(uint8_t *src, int src_stride,
; uint8_t *ref, int ref_stride); ; uint8_t *ref, int ref_stride);
...@@ -302,6 +305,8 @@ HIGH_SAD16XN 8, 1 ; highbd_sad16x8_avg_sse2 ...@@ -302,6 +305,8 @@ HIGH_SAD16XN 8, 1 ; highbd_sad16x8_avg_sse2
%if CONFIG_EXT_PARTITION_TYPES %if CONFIG_EXT_PARTITION_TYPES
HIGH_SAD16XN 4 ; highbd_sad_16x4_sse2 HIGH_SAD16XN 4 ; highbd_sad_16x4_sse2
HIGH_SAD16XN 4, 1 ; highbd_sad_16x4_avg_sse2 HIGH_SAD16XN 4, 1 ; highbd_sad_16x4_avg_sse2
HIGH_SAD16XN 64 ; highbd_sad_16x64_sse2
HIGH_SAD16XN 64, 1 ; highbd_sad_16x64_avg_sse2
%endif %endif
; unsigned int aom_highbd_sad8x{4,8,16}_sse2(uint8_t *src, int src_stride, ; unsigned int aom_highbd_sad8x{4,8,16}_sse2(uint8_t *src, int src_stride,
......
...@@ -189,6 +189,8 @@ VAR_FN(8, 8, 8, 6); ...@@ -189,6 +189,8 @@ VAR_FN(8, 8, 8, 6);
VAR_FN(16, 4, 16, 6); VAR_FN(16, 4, 16, 6);
VAR_FN(8, 32, 8, 8); VAR_FN(8, 32, 8, 8);
VAR_FN(32, 8, 16, 8); VAR_FN(32, 8, 16, 8);
VAR_FN(16, 64, 16, 10);
VAR_FN(64, 16, 16, 10);
#endif #endif
#undef VAR_FN #undef VAR_FN
...@@ -411,7 +413,9 @@ DECLS(sse2); ...@@ -411,7 +413,9 @@ DECLS(sse2);
FN(8, 4, 8, 3, 2, opt, (int64_t)); \ FN(8, 4, 8, 3, 2, opt, (int64_t)); \
FN(16, 4, 16, 4, 2, opt, (int64_t)); \ FN(16, 4, 16, 4, 2, opt, (int64_t)); \
FN(8, 32, 8, 3, 5, opt, (int64_t)); \ FN(8, 32, 8, 3, 5, opt, (int64_t)); \
FN(32, 8, 16, 5, 3, opt, (int64_t)) FN(32, 8, 16, 5, 3, opt, (int64_t)); \
FN(16, 64, 16, 4, 6, opt, (int64_t)); \
FN(64, 16, 16, 6, 4, opt, (int64_t))
#else #else
#define FNS(opt) \ #define FNS(opt) \
FN(64, 64, 16, 6, 6, opt, (int64_t)); \ FN(64, 64, 16, 6, 6, opt, (int64_t)); \
...@@ -588,7 +592,9 @@ DECLS(sse2); ...@@ -588,7 +592,9 @@ DECLS(sse2);
FN(8, 4, 8, 3, 2, opt, (int64_t)); \ FN(8, 4, 8, 3, 2, opt, (int64_t)); \
FN(16, 4, 16, 4, 2, opt, (int64_t)); \ FN(16, 4, 16, 4, 2, opt, (int64_t)); \
FN(8, 32, 8, 3, 5, opt, (int64_t)); \ FN(8, 32, 8, 3, 5, opt, (int64_t)); \
FN(32, 8, 16, 5, 3, opt, (int64_t)); FN(32, 8, 16, 5, 3, opt, (int64_t)); \
FN(16, 64, 16, 4, 6, opt, (int64_t)); \
FN(64, 16, 16, 6, 4, opt, (int64_t));
#else #else
#define FNS(opt) \ #define FNS(opt) \
FN(64, 64, 16, 6, 6, opt, (int64_t)); \ FN(64, 64, 16, 6, 6, opt, (int64_t)); \
......
...@@ -98,6 +98,8 @@ MASKSAD4XN_SSSE3(16) ...@@ -98,6 +98,8 @@ MASKSAD4XN_SSSE3(16)
MASKSADMXN_SSSE3(16, 4) MASKSADMXN_SSSE3(16, 4)
MASKSAD8XN_SSSE3(32) MASKSAD8XN_SSSE3(32)
MASKSADMXN_SSSE3(32, 8) MASKSADMXN_SSSE3(32, 8)
MASKSADMXN_SSSE3(16, 64)
MASKSADMXN_SSSE3(64, 16)
#endif #endif
static INLINE unsigned int masked_sad_ssse3(const uint8_t *src_ptr, static INLINE unsigned int masked_sad_ssse3(const uint8_t *src_ptr,
...@@ -294,6 +296,8 @@ HIGHBD_MASKSAD4XN_SSSE3(16) ...@@ -294,6 +296,8 @@ HIGHBD_MASKSAD4XN_SSSE3(16)
HIGHBD_MASKSADMXN_SSSE3(16, 4) HIGHBD_MASKSADMXN_SSSE3(16, 4)
HIGHBD_MASKSADMXN_SSSE3(8, 32) HIGHBD_MASKSADMXN_SSSE3(8, 32)
HIGHBD_MASKSADMXN_SSSE3(32, 8) HIGHBD_MASKSADMXN_SSSE3(32, 8)
HIGHBD_MASKSADMXN_SSSE3(16, 64)
HIGHBD_MASKSADMXN_SSSE3(64, 16)
#endif #endif
static INLINE unsigned int highbd_masked_sad_ssse3( static INLINE unsigned int highbd_masked_sad_ssse3(
......
...@@ -131,6 +131,8 @@ MASK_SUBPIX_VAR4XH_SSSE3(16) ...@@ -131,6 +131,8 @@ MASK_SUBPIX_VAR4XH_SSSE3(16)
MASK_SUBPIX_VAR_SSSE3(16, 4) MASK_SUBPIX_VAR_SSSE3(16, 4)
MASK_SUBPIX_VAR8XH_SSSE3(32) MASK_SUBPIX_VAR8XH_SSSE3(32)
MASK_SUBPIX_VAR_SSSE3(32, 8) MASK_SUBPIX_VAR_SSSE3(32, 8)
MASK_SUBPIX_VAR_SSSE3(64, 16)
MASK_SUBPIX_VAR_SSSE3(16, 64)
#endif #endif
static INLINE __m128i filter_block(const __m128i a, const __m128i b, static INLINE __m128i filter_block(const __m128i a, const __m128i b,
...@@ -712,6 +714,8 @@ HIGHBD_MASK_SUBPIX_VAR4XH_SSSE3(16) ...@@ -712,6 +714,8 @@ HIGHBD_MASK_SUBPIX_VAR4XH_SSSE3(16)
HIGHBD_MASK_SUBPIX_VAR_SSSE3(16, 4) HIGHBD_MASK_SUBPIX_VAR_SSSE3(16, 4)
HIGHBD_MASK_SUBPIX_VAR_SSSE3(8, 32) HIGHBD_MASK_SUBPIX_VAR_SSSE3(8, 32)
HIGHBD_MASK_SUBPIX_VAR_SSSE3(32, 8) HIGHBD_MASK_SUBPIX_VAR_SSSE3(32, 8)
HIGHBD_MASK_SUBPIX_VAR_SSSE3(16, 64)
HIGHBD_MASK_SUBPIX_VAR_SSSE3(64, 16)
#endif #endif
static INLINE __m128i highbd_filter_block(const __m128i a, const __m128i b, static INLINE __m128i highbd_filter_block(const __m128i a, const __m128i b,
......
...@@ -142,6 +142,8 @@ OBMCSADWXH(4, 16) ...@@ -142,6 +142,8 @@ OBMCSADWXH(4, 16)
OBMCSADWXH(16, 4) OBMCSADWXH(16, 4)
OBMCSADWXH(8, 32) OBMCSADWXH(8, 32)
OBMCSADWXH(32, 8) OBMCSADWXH(32, 8)
OBMCSADWXH(16, 64)
OBMCSADWXH(64, 16)
#endif #endif
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
...@@ -271,5 +273,7 @@ HBD_OBMCSADWXH(4, 16) ...@@ -271,5 +273,7 @@ HBD_OBMCSADWXH(4, 16)
HBD_OBMCSADWXH(16, 4) HBD_OBMCSADWXH(16, 4)
HBD_OBMCSADWXH(8, 32) HBD_OBMCSADWXH(8, 32)
HBD_OBMCSADWXH(32, 8) HBD_OBMCSADWXH(32, 8)
HBD_OBMCSADWXH(16, 64)
HBD_OBMCSADWXH(64, 16)
#endif #endif
#endif // CONFIG_HIGHBITDEPTH #endif // CONFIG_HIGHBITDEPTH
...@@ -151,6 +151,8 @@ OBMCVARWXH(4, 16) ...@@ -151,6 +151,8 @@ OBMCVARWXH(4, 16)
OBMCVARWXH(16, 4) OBMCVARWXH(16, 4)
OBMCVARWXH(8, 32) OBMCVARWXH(8, 32)
OBMCVARWXH(32, 8) OBMCVARWXH(32, 8)
OBMCVARWXH(16, 64)
OBMCVARWXH(64, 16)
#endif #endif
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
...@@ -364,5 +366,7 @@ HBD_OBMCVARWXH(4, 16) ...@@ -364,5 +366,7 @@ HBD_OBMCVARWXH(4, 16)
HBD_OBMCVARWXH(16, 4) HBD_OBMCVARWXH(16, 4)
HBD_OBMCVARWXH(8, 32) HBD_OBMCVARWXH(8, 32)
HBD_OBMCVARWXH(32, 8) HBD_OBMCVARWXH(32, 8)
HBD_OBMCVARWXH(16, 64)
HBD_OBMCVARWXH(64, 16)
#endif #endif
#endif // CONFIG_HIGHBITDEPTH #endif // CONFIG_HIGHBITDEPTH
...@@ -256,4 +256,6 @@ SADNXN4D 4, 16 ...@@ -256,4 +256,6 @@ SADNXN4D 4, 16
SADNXN4D 16, 4 SADNXN4D 16, 4
SADNXN4D 8, 32 SADNXN4D 8, 32
SADNXN4D 32, 8 SADNXN4D 32, 8
SADNXN4D 16, 64
SADNXN4D 64, 16
%endif %endif
...@@ -163,6 +163,10 @@ SAD64XN 64 ; sad64x64_sse2 ...@@ -163,6 +163,10 @@ SAD64XN 64 ; sad64x64_sse2
SAD64XN 32 ; sad64x32_sse2 SAD64XN 32 ; sad64x32_sse2
SAD64XN 64, 1 ; sad64x64_avg_sse2 SAD64XN 64, 1 ; sad64x64_avg_sse2
SAD64XN 32, 1 ; sad64x32_avg_sse2 SAD64XN 32, 1 ; sad64x32_avg_sse2
%if CONFIG_EXT_PARTITION_TYPES
SAD64XN 16 ; sad64x16_sse2
SAD64XN 16, 1 ; sad64x16_avg_sse2
%endif
; unsigned int aom_sad32x32_sse2(uint8_t *src, int src_stride, ; unsigned int aom_sad32x32_sse2(uint8_t *src, int src_stride,
; uint8_t *ref, int ref_stride); ; uint8_t *ref, int ref_stride);
...@@ -261,6 +265,8 @@ SAD16XN 8, 1 ; sad16x8_avg_sse2 ...@@ -261,6 +265,8 @@ SAD16XN 8, 1 ; sad16x8_avg_sse2
%if CONFIG_EXT_PARTITION_TYPES %if CONFIG_EXT_PARTITION_TYPES
SAD16XN 4 ; sad_16x4_sse2 SAD16XN 4 ; sad_16x4_sse2
SAD16XN 4, 1 ; sad_16x4_avg_sse2 SAD16XN 4, 1 ; sad_16x4_avg_sse2
SAD16XN 64 ; sad_16x64_sse2
SAD16XN 64, 1 ; sad_16x64_avg_sse2
%endif %endif
; unsigned int aom_sad8x{8,16}_sse2(uint8_t *src, int src_stride, ; unsigned int aom_sad8x{8,16}_sse2(uint8_t *src, int src_stride,
......
...@@ -382,6 +382,28 @@ unsigned int aom_variance32x8_sse2(const uint8_t *src, int src_stride, ...@@ -382,6 +382,28 @@ unsigned int aom_variance32x8_sse2(const uint8_t *src, int src_stride,
assert(sum >= -255 * 32 * 8); assert(sum >= -255 * 32 * 8);
return *sse - (unsigned int)(((int64_t)sum * sum) >> 8); return *sse - (unsigned int)(((int64_t)sum * sum) >> 8);
} }
unsigned int aom_variance16x64_sse2(const uint8_t *src, int src_stride,
const uint8_t *ref, int ref_stride,
unsigned int *sse) {
int sum;
variance_sse2(src, src_stride, ref, ref_stride, 16, 64, sse, &sum,
aom_get16x16var_sse2, 16);
assert(sum <= 255 * 16 * 64);
assert(sum >= -255 * 16 * 64);
return *sse - (unsigned int)(((int64_t)sum * sum) >> 10);
}
unsigned int aom_variance64x16_sse2(const uint8_t *src, int src_stride,
const uint8_t *ref, int ref_stride,
unsigned int *sse) {
int sum;
variance_sse2(src, src_stride, ref, ref_stride, 64, 16, sse, &sum,
aom_get16x16var_sse2, 16);
assert(sum <= 255 * 64 * 16);
assert(sum >= -255 * 64 * 16);
return *sse - (unsigned int)(((int64_t)sum * sum) >> 10);
}
#endif #endif
// The 2 unused parameters are place holders for PIC enabled build. // The 2 unused parameters are place holders for PIC enabled build.
...@@ -451,7 +473,9 @@ DECLS(ssse3); ...@@ -451,7 +473,9 @@ DECLS(ssse3);
FN(4, 16, 4, 2, 4, opt, (int32_t), (int32_t)); \ FN(4, 16, 4, 2, 4, opt, (int32_t), (int32_t)); \
FN(16, 4, 16, 4, 2, opt, (int32_t), (int32_t)); \ FN(16, 4, 16, 4, 2, opt, (int32_t), (int32_t)); \
FN(8, 32, 8, 3, 5, opt, (int32_t), (int32_t)); \ FN(8, 32, 8, 3, 5, opt, (int32_t), (int32_t)); \
FN(32, 8, 16, 5, 3, opt, (int32_t), (int32_t)) FN(32, 8, 16, 5, 3, opt, (int32_t), (int32_t)); \
FN(16, 64, 16, 4, 6, opt, (int32_t), (int32_t)); \
FN(64, 16, 16, 6, 4, opt, (int32_t), (int32_t))
#else #else
#define FNS(opt) \ #define FNS(opt) \
FN(64, 64, 16, 6, 6, opt, (int64_t), (int64_t)); \ FN(64, 64, 16, 6, 6, opt, (int64_t), (int64_t)); \
...@@ -543,7 +567,9 @@ DECLS(ssse3); ...@@ -543,7 +567,9 @@ DECLS(ssse3);
FN(4, 16, 4, 2, 4, opt, (int32_t), (int32_t)); \ FN(4, 16, 4, 2, 4, opt, (int32_t), (int32_t)); \
FN(16, 4, 16, 4, 2, opt, (int32_t), (int32_t)); \ FN(16, 4, 16, 4, 2, opt, (int32_t), (int32_t)); \
FN(8, 32, 8, 3, 5, opt, (int32_t), (int32_t)); \ FN(8, 32, 8, 3, 5, opt, (int32_t), (int32_t)); \
FN(32, 8, 16, 5, 3, opt, (int32_t), (int32_t)) FN(32, 8, 16, 5, 3, opt, (int32_t), (int32_t)); \
FN(16, 64, 16, 4, 6, opt, (int32_t), (int32_t)); \
FN(64, 16, 16, 6, 4, opt, (int32_t), (int32_t))
#else #else
#define FNS(opt) \ #define FNS(opt) \
FN(64, 64, 16, 6, 6, opt, (int64_t), (int64_t)); \ FN(64, 64, 16, 6, 6, opt, (int64_t), (int64_t)); \
......
...@@ -376,7 +376,9 @@ static const uint64_t left_prediction_mask[BLOCK_SIZES_ALL] = { ...@@ -376,7 +376,9 @@ static const uint64_t left_prediction_mask[BLOCK_SIZES_ALL] = {
0x0000000000000101ULL, // BLOCK_4X16, 0x0000000000000101ULL, // BLOCK_4X16,
0x0000000000000001ULL, // BLOCK_16X4, 0x0000000000000001ULL, // BLOCK_16X4,
0x0000000001010101ULL, // BLOCK_8X32, 0x0000000001010101ULL, // BLOCK_8X32,
0x0000000000000001ULL, // BLOCK_32X8 0x0000000000000001ULL, // BLOCK_32X8,
0x0101010101010101ULL, // BLOCK_16X64,
0x0000000000000101ULL, // BLOCK_64X16
}; };
// 64 bit mask to shift and set for each prediction size. // 64 bit mask to shift and set for each prediction size.
...@@ -402,7 +404,9 @@ static const uint64_t above_prediction_mask[BLOCK_SIZES_ALL] = { ...@@ -402,7 +404,9 @@ static const uint64_t above_prediction_mask[BLOCK_SIZES_ALL] = {
0x0000000000000001ULL, // BLOCK_4X16, 0x0000000000000001ULL, // BLOCK_4X16,
0x0000000000000003ULL, // BLOCK_16X4, 0x0000000000000003ULL, // BLOCK_16X4,
0x0000000000000001ULL, // BLOCK_8X32, 0x0000000000000001ULL, // BLOCK_8X32,
0x000000000000000fULL, // BLOCK_32X8 0x000000000000000fULL, // BLOCK_32X8,
0x0000000000000003ULL, // BLOCK_16X64,
0x00000000000000ffULL, // BLOCK_64X16
}; };
// 64 bit mask to shift and set for each prediction size. A bit is set for // 64 bit mask to shift and set for each prediction size. A bit is set for
// each 8x8 block that would be in the top left most block of the given block // each 8x8 block that would be in the top left most block of the given block
...@@ -429,7 +433,9 @@ static const uint64_t size_mask[BLOCK_SIZES_ALL] = { ...@@ -429,7 +433,9 @@ static const uint64_t size_mask[BLOCK_SIZES_ALL] = {
0x0000000000000101ULL, // BLOCK_4X16, 0x0000000000000101ULL, // BLOCK_4X16,
0x0000000000000003ULL, // BLOCK_16X4, 0x0000000000000003ULL, // BLOCK_16X4,
0x0000000001010101ULL, // BLOCK_8X32, 0x0000000001010101ULL, // BLOCK_8X32,
0x000000000000000fULL, // BLOCK_32X8 0x000000000000000fULL, // BLOCK_32X8,
0x0303030303030303ULL, // BLOCK_16X64,
0x000000000000ffffULL, // BLOCK_64X16
}; };
// These are used for masking the left and above 32x32 borders. // These are used for masking the left and above 32x32 borders.
...@@ -486,7 +492,9 @@ static const uint16_t left_prediction_mask_uv[BLOCK_SIZES_ALL] = { ...@@ -486,7 +492,9 @@ static const uint16_t left_prediction_mask_uv[BLOCK_SIZES_ALL] = {
0x0001, // BLOCK_4X16, 0x0001, // BLOCK_4X16,
0x0001, // BLOCK_16X4, 0x0001, // BLOCK_16X4,
0x0011, // BLOCK_8X32, 0x0011, // BLOCK_8X32,
0x0001, // BLOCK_32X8 0x0001, // BLOCK_32X8,
0x1111, // BLOCK_16X64,
0x0001, // BLOCK_64X16,
}; };
// 16 bit above mask to shift and set for uv each prediction size. // 16 bit above mask to shift and set for uv each prediction size.
...@@ -512,7 +520,9 @@ static const uint16_t above_prediction_mask_uv[BLOCK_SIZES_ALL] = { ...@@ -512,7 +520,9 @@ static const uint16_t above_prediction_mask_uv[BLOCK_SIZES_ALL] = {
0x0001, // BLOCK_4X16, 0x0001, // BLOCK_4X16,
0x0001, // BLOCK_16X4, 0x0001, // BLOCK_16X4,