Commit 93c39e91 authored by Rupert Swarbrick's avatar Rupert Swarbrick Committed by Debargha Mukherjee

ext-partition-types: Add 4:1 partitions

This patch adds support for 4:1 rectangular blocks to various common
data arrays, and adds new partition types to the EXT_PARTITION_TYPES
experiment which will use them.

This patch has the following restrictions, which can be lifted in
future patches:

  * ext-partition-types is incompatible with fp_mb_stats and supertx
    for the moment

  * Currently only 32x32 superblocks can use the new partition types

There's a slightly odd restriction about when we allow
PARTITION_HORZ_4 or PARTITION_VERT_4. Since these both live in the
EXT_PARTITION_TYPES CDF, read_partition() can only return them if both
has_rows and has_cols is true. This means that at least half of the
width and height of the block must be visible. It might be nice to
relax that restriction but that would imply a change to how we encode
partition types, which seems already to be in a state of flux, so
maybe it's better to wait until that has settled down.

Change-Id: Id7fc3fd0f762f35f63b3d3e3bf4e07c245c7b4fa
parent 328d57b8
...@@ -40,6 +40,12 @@ foreach $w (@block_widths) { ...@@ -40,6 +40,12 @@ foreach $w (@block_widths) {
push @block_sizes, [$w, $h] if ($w <= 2*$h && $h <= 2*$w) ; push @block_sizes, [$w, $h] if ($w <= 2*$h && $h <= 2*$w) ;
} }
} }
if (aom_config("CONFIG_EXT_PARTITION_TYPES")) {
push @block_sizes, [4, 16];
push @block_sizes, [16, 4];
push @block_sizes, [8, 32];
push @block_sizes, [32, 8];
}
@tx_dims = (2, 4, 8, 16, 32); @tx_dims = (2, 4, 8, 16, 32);
if (aom_config("CONFIG_TX64X64") eq "yes") { if (aom_config("CONFIG_TX64X64") eq "yes") {
...@@ -979,6 +985,22 @@ specialize qw/aom_sub_pixel_avg_variance8x8 msa sse2 ssse3/; ...@@ -979,6 +985,22 @@ specialize qw/aom_sub_pixel_avg_variance8x8 msa sse2 ssse3/;
specialize qw/aom_sub_pixel_avg_variance8x4 msa sse2 ssse3/; specialize qw/aom_sub_pixel_avg_variance8x4 msa sse2 ssse3/;
specialize qw/aom_sub_pixel_avg_variance4x8 msa sse2 ssse3/; specialize qw/aom_sub_pixel_avg_variance4x8 msa sse2 ssse3/;
specialize qw/aom_sub_pixel_avg_variance4x4 msa sse2 ssse3/; specialize qw/aom_sub_pixel_avg_variance4x4 msa sse2 ssse3/;
if (aom_config("CONFIG_EXT_PARTITION_TYPES")) {
specialize qw/aom_variance4x16 sse2/;
specialize qw/aom_variance16x4 sse2/;
specialize qw/aom_variance8x32 sse2/;
specialize qw/aom_variance32x8 sse2/;
specialize qw/aom_sub_pixel_variance4x16 sse2 ssse3/;
specialize qw/aom_sub_pixel_variance16x4 sse2 ssse3/;
specialize qw/aom_sub_pixel_variance8x32 sse2 ssse3/;
specialize qw/aom_sub_pixel_variance32x8 sse2 ssse3/;
specialize qw/aom_sub_pixel_avg_variance4x16 sse2 ssse3/;
specialize qw/aom_sub_pixel_avg_variance16x4 sse2 ssse3/;
specialize qw/aom_sub_pixel_avg_variance8x32 sse2 ssse3/;
specialize qw/aom_sub_pixel_avg_variance32x8 sse2 ssse3/;
}
if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") { if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
foreach $bd (8, 10, 12) { foreach $bd (8, 10, 12) {
add_proto qw/unsigned int/, "aom_highbd_${bd}_variance2x2", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; add_proto qw/unsigned int/, "aom_highbd_${bd}_variance2x2", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
...@@ -995,6 +1017,8 @@ if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") { ...@@ -995,6 +1017,8 @@ if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
if ($w != 128 && $h != 128 && $w != 4 && $h != 4) { if ($w != 128 && $h != 128 && $w != 4 && $h != 4) {
specialize "aom_highbd_${bd}_variance${w}x${h}", "sse2"; specialize "aom_highbd_${bd}_variance${w}x${h}", "sse2";
} }
# TODO(david.barker): When ext-partition-types is enabled, we currenly
# don't have vectorized 4x16 highbd variance functions
if ($w == 4 && $h == 4) { if ($w == 4 && $h == 4) {
specialize "aom_highbd_${bd}_variance${w}x${h}", "sse4_1"; specialize "aom_highbd_${bd}_variance${w}x${h}", "sse4_1";
} }
......
...@@ -153,10 +153,21 @@ sadMxN(4, 4) ...@@ -153,10 +153,21 @@ sadMxN(4, 4)
sadMxNxK(4, 4, 3) sadMxNxK(4, 4, 3)
sadMxNxK(4, 4, 8) sadMxNxK(4, 4, 8)
sadMxNx4D(4, 4) sadMxNx4D(4, 4)
#if CONFIG_AV1 && CONFIG_EXT_PARTITION_TYPES
sadMxN(4, 16)
sadMxNx4D(4, 16)
sadMxN(16, 4)
sadMxNx4D(16, 4)
sadMxN(8, 32)
sadMxNx4D(8, 32)
sadMxN(32, 8)
sadMxNx4D(32, 8)
#endif
/* clang-format on */ /* clang-format on */
#if CONFIG_HIGHBITDEPTH #if CONFIG_HIGHBITDEPTH
static INLINE static INLINE
unsigned int highbd_sad(const uint8_t *a8, int a_stride, const uint8_t *b8, unsigned int highbd_sad(const uint8_t *a8, int a_stride, const uint8_t *b8,
int b_stride, int width, int height) { int b_stride, int width, int height) {
int y, x; int y, x;
...@@ -307,11 +318,22 @@ highbd_sadMxN(4, 4) ...@@ -307,11 +318,22 @@ highbd_sadMxN(4, 4)
highbd_sadMxNxK(4, 4, 3) highbd_sadMxNxK(4, 4, 3)
highbd_sadMxNxK(4, 4, 8) highbd_sadMxNxK(4, 4, 8)
highbd_sadMxNx4D(4, 4) highbd_sadMxNx4D(4, 4)
#if CONFIG_AV1 && CONFIG_EXT_PARTITION_TYPES
highbd_sadMxN(4, 16)
highbd_sadMxNx4D(4, 16)
highbd_sadMxN(16, 4)
highbd_sadMxNx4D(16, 4)
highbd_sadMxN(8, 32)
highbd_sadMxNx4D(8, 32)
highbd_sadMxN(32, 8)
highbd_sadMxNx4D(32, 8)
#endif
/* clang-format on */ /* clang-format on */
#endif // CONFIG_HIGHBITDEPTH #endif // CONFIG_HIGHBITDEPTH
#if CONFIG_AV1 && CONFIG_EXT_INTER #if CONFIG_AV1 && CONFIG_EXT_INTER
static INLINE static INLINE
unsigned int masked_sad(const uint8_t *src, int src_stride, unsigned int masked_sad(const uint8_t *src, int src_stride,
const uint8_t *a, int a_stride, const uint8_t *b, const uint8_t *a, int a_stride, const uint8_t *b,
int b_stride, const uint8_t *m, int m_stride, int b_stride, const uint8_t *m, int m_stride,
...@@ -367,10 +389,17 @@ MASKSADMxN(8, 8) ...@@ -367,10 +389,17 @@ MASKSADMxN(8, 8)
MASKSADMxN(8, 4) MASKSADMxN(8, 4)
MASKSADMxN(4, 8) MASKSADMxN(4, 8)
MASKSADMxN(4, 4) MASKSADMxN(4, 4)
#if CONFIG_EXT_PARTITION_TYPES
MASKSADMxN(4, 16)
MASKSADMxN(16, 4)
MASKSADMxN(8, 32)
MASKSADMxN(32, 8)
#endif
/* clang-format on */ /* clang-format on */
#if CONFIG_HIGHBITDEPTH #if CONFIG_HIGHBITDEPTH
static INLINE static INLINE
unsigned int highbd_masked_sad(const uint8_t *src8, int src_stride, unsigned int highbd_masked_sad(const uint8_t *src8, int src_stride,
const uint8_t *a8, int a_stride, const uint8_t *a8, int a_stride,
const uint8_t *b8, int b_stride, const uint8_t *b8, int b_stride,
...@@ -429,6 +458,13 @@ HIGHBD_MASKSADMXN(8, 8) ...@@ -429,6 +458,13 @@ HIGHBD_MASKSADMXN(8, 8)
HIGHBD_MASKSADMXN(8, 4) HIGHBD_MASKSADMXN(8, 4)
HIGHBD_MASKSADMXN(4, 8) HIGHBD_MASKSADMXN(4, 8)
HIGHBD_MASKSADMXN(4, 4) HIGHBD_MASKSADMXN(4, 4)
#if CONFIG_AV1 && CONFIG_EXT_PARTITION_TYPES
HIGHBD_MASKSADMXN(4, 16)
HIGHBD_MASKSADMXN(16, 4)
HIGHBD_MASKSADMXN(8, 32)
HIGHBD_MASKSADMXN(32, 8)
#endif
#endif // CONFIG_HIGHBITDEPTH #endif // CONFIG_HIGHBITDEPTH
#endif // CONFIG_AV1 && CONFIG_EXT_INTER #endif // CONFIG_AV1 && CONFIG_EXT_INTER
...@@ -480,10 +516,17 @@ OBMCSADMxN(8, 8) ...@@ -480,10 +516,17 @@ OBMCSADMxN(8, 8)
OBMCSADMxN(8, 4) OBMCSADMxN(8, 4)
OBMCSADMxN(4, 8) OBMCSADMxN(4, 8)
OBMCSADMxN(4, 4) OBMCSADMxN(4, 4)
#if CONFIG_AV1 && CONFIG_EXT_PARTITION_TYPES
OBMCSADMxN(4, 16)
OBMCSADMxN(16, 4)
OBMCSADMxN(8, 32)
OBMCSADMxN(32, 8)
#endif
/* clang-format on */ /* clang-format on */
#if CONFIG_HIGHBITDEPTH #if CONFIG_HIGHBITDEPTH
static INLINE static INLINE
unsigned int highbd_obmc_sad(const uint8_t *pre8, int pre_stride, unsigned int highbd_obmc_sad(const uint8_t *pre8, int pre_stride,
const int32_t *wsrc, const int32_t *mask, const int32_t *wsrc, const int32_t *mask,
int width, int height) { int width, int height) {
...@@ -529,6 +572,13 @@ HIGHBD_OBMCSADMXN(8, 8) ...@@ -529,6 +572,13 @@ HIGHBD_OBMCSADMXN(8, 8)
HIGHBD_OBMCSADMXN(8, 4) HIGHBD_OBMCSADMXN(8, 4)
HIGHBD_OBMCSADMXN(4, 8) HIGHBD_OBMCSADMXN(4, 8)
HIGHBD_OBMCSADMXN(4, 4) HIGHBD_OBMCSADMXN(4, 4)
#if CONFIG_AV1 && CONFIG_EXT_PARTITION_TYPES
HIGHBD_OBMCSADMXN(4, 16)
HIGHBD_OBMCSADMXN(16, 4)
HIGHBD_OBMCSADMXN(8, 32)
HIGHBD_OBMCSADMXN(32, 8)
#endif
/* clang-format on */ /* clang-format on */
#endif // CONFIG_HIGHBITDEPTH #endif // CONFIG_HIGHBITDEPTH
#endif // CONFIG_AV1 && CONFIG_MOTION_VAR #endif // CONFIG_AV1 && CONFIG_MOTION_VAR
...@@ -251,6 +251,13 @@ VARIANCES(4, 2) ...@@ -251,6 +251,13 @@ VARIANCES(4, 2)
VARIANCES(2, 4) VARIANCES(2, 4)
VARIANCES(2, 2) VARIANCES(2, 2)
#if CONFIG_AV1 && CONFIG_EXT_PARTITION_TYPES
VARIANCES(4, 16)
VARIANCES(16, 4)
VARIANCES(8, 32)
VARIANCES(32, 8)
#endif
GET_VAR(16, 16) GET_VAR(16, 16)
GET_VAR(8, 8) GET_VAR(8, 8)
...@@ -649,6 +656,13 @@ HIGHBD_VARIANCES(4, 2) ...@@ -649,6 +656,13 @@ HIGHBD_VARIANCES(4, 2)
HIGHBD_VARIANCES(2, 4) HIGHBD_VARIANCES(2, 4)
HIGHBD_VARIANCES(2, 2) HIGHBD_VARIANCES(2, 2)
#if CONFIG_AV1 && CONFIG_EXT_PARTITION_TYPES
HIGHBD_VARIANCES(4, 16)
HIGHBD_VARIANCES(16, 4)
HIGHBD_VARIANCES(8, 32)
HIGHBD_VARIANCES(32, 8)
#endif
HIGHBD_GET_VAR(8) HIGHBD_GET_VAR(8)
HIGHBD_GET_VAR(16) HIGHBD_GET_VAR(16)
...@@ -831,6 +845,13 @@ MASK_SUBPIX_VAR(128, 64) ...@@ -831,6 +845,13 @@ MASK_SUBPIX_VAR(128, 64)
MASK_SUBPIX_VAR(128, 128) MASK_SUBPIX_VAR(128, 128)
#endif // CONFIG_EXT_PARTITION #endif // CONFIG_EXT_PARTITION
#if CONFIG_EXT_PARTITION_TYPES
MASK_SUBPIX_VAR(4, 16)
MASK_SUBPIX_VAR(16, 4)
MASK_SUBPIX_VAR(8, 32)
MASK_SUBPIX_VAR(32, 8)
#endif
#if CONFIG_HIGHBITDEPTH #if CONFIG_HIGHBITDEPTH
void aom_highbd_comp_mask_pred_c(uint16_t *comp_pred, const uint8_t *pred8, void aom_highbd_comp_mask_pred_c(uint16_t *comp_pred, const uint8_t *pred8,
int width, int height, const uint8_t *ref8, int width, int height, const uint8_t *ref8,
...@@ -960,6 +981,13 @@ HIGHBD_MASK_SUBPIX_VAR(64, 128) ...@@ -960,6 +981,13 @@ HIGHBD_MASK_SUBPIX_VAR(64, 128)
HIGHBD_MASK_SUBPIX_VAR(128, 64) HIGHBD_MASK_SUBPIX_VAR(128, 64)
HIGHBD_MASK_SUBPIX_VAR(128, 128) HIGHBD_MASK_SUBPIX_VAR(128, 128)
#endif // CONFIG_EXT_PARTITION #endif // CONFIG_EXT_PARTITION
#if CONFIG_EXT_PARTITION_TYPES
HIGHBD_MASK_SUBPIX_VAR(4, 16)
HIGHBD_MASK_SUBPIX_VAR(16, 4)
HIGHBD_MASK_SUBPIX_VAR(8, 32)
HIGHBD_MASK_SUBPIX_VAR(32, 8)
#endif
#endif // CONFIG_HIGHBITDEPTH #endif // CONFIG_HIGHBITDEPTH
#endif // CONFIG_AV1 && CONFIG_EXT_INTER #endif // CONFIG_AV1 && CONFIG_EXT_INTER
...@@ -1059,6 +1087,17 @@ OBMC_VAR(128, 128) ...@@ -1059,6 +1087,17 @@ OBMC_VAR(128, 128)
OBMC_SUBPIX_VAR(128, 128) OBMC_SUBPIX_VAR(128, 128)
#endif // CONFIG_EXT_PARTITION #endif // CONFIG_EXT_PARTITION
#if CONFIG_EXT_PARTITION_TYPES
OBMC_VAR(4, 16)
OBMC_SUBPIX_VAR(4, 16)
OBMC_VAR(16, 4)
OBMC_SUBPIX_VAR(16, 4)
OBMC_VAR(8, 32)
OBMC_SUBPIX_VAR(8, 32)
OBMC_VAR(32, 8)
OBMC_SUBPIX_VAR(32, 8)
#endif
#if CONFIG_HIGHBITDEPTH #if CONFIG_HIGHBITDEPTH
static INLINE void highbd_obmc_variance64(const uint8_t *pre8, int pre_stride, static INLINE void highbd_obmc_variance64(const uint8_t *pre8, int pre_stride,
const int32_t *wsrc, const int32_t *wsrc,
...@@ -1240,5 +1279,16 @@ HIGHBD_OBMC_SUBPIX_VAR(128, 64) ...@@ -1240,5 +1279,16 @@ HIGHBD_OBMC_SUBPIX_VAR(128, 64)
HIGHBD_OBMC_VAR(128, 128) HIGHBD_OBMC_VAR(128, 128)
HIGHBD_OBMC_SUBPIX_VAR(128, 128) HIGHBD_OBMC_SUBPIX_VAR(128, 128)
#endif // CONFIG_EXT_PARTITION #endif // CONFIG_EXT_PARTITION
#if CONFIG_EXT_PARTITION_TYPES
HIGHBD_OBMC_VAR(4, 16)
HIGHBD_OBMC_SUBPIX_VAR(4, 16)
HIGHBD_OBMC_VAR(16, 4)
HIGHBD_OBMC_SUBPIX_VAR(16, 4)
HIGHBD_OBMC_VAR(8, 32)
HIGHBD_OBMC_SUBPIX_VAR(8, 32)
HIGHBD_OBMC_VAR(32, 8)
HIGHBD_OBMC_SUBPIX_VAR(32, 8)
#endif
#endif // CONFIG_HIGHBITDEPTH #endif // CONFIG_HIGHBITDEPTH
#endif // CONFIG_AV1 && CONFIG_MOTION_VAR #endif // CONFIG_AV1 && CONFIG_MOTION_VAR
...@@ -288,3 +288,9 @@ HIGH_SADNXN4D 8, 8 ...@@ -288,3 +288,9 @@ HIGH_SADNXN4D 8, 8
HIGH_SADNXN4D 8, 4 HIGH_SADNXN4D 8, 4
HIGH_SADNXN4D 4, 8 HIGH_SADNXN4D 4, 8
HIGH_SADNXN4D 4, 4 HIGH_SADNXN4D 4, 4
%if CONFIG_EXT_PARTITION_TYPES
HIGH_SADNXN4D 4, 16
HIGH_SADNXN4D 16, 4
HIGH_SADNXN4D 8, 32
HIGH_SADNXN4D 32, 8
%endif
...@@ -227,6 +227,10 @@ HIGH_SAD32XN 16 ; highbd_sad32x16_sse2 ...@@ -227,6 +227,10 @@ HIGH_SAD32XN 16 ; highbd_sad32x16_sse2
HIGH_SAD32XN 64, 1 ; highbd_sad32x64_avg_sse2 HIGH_SAD32XN 64, 1 ; highbd_sad32x64_avg_sse2
HIGH_SAD32XN 32, 1 ; highbd_sad32x32_avg_sse2 HIGH_SAD32XN 32, 1 ; highbd_sad32x32_avg_sse2
HIGH_SAD32XN 16, 1 ; highbd_sad32x16_avg_sse2 HIGH_SAD32XN 16, 1 ; highbd_sad32x16_avg_sse2
%if CONFIG_EXT_PARTITION_TYPES
HIGH_SAD32XN 8 ; highbd_sad_32x8_sse2
HIGH_SAD32XN 8, 1 ; highbd_sad_32x8_avg_sse2
%endif
; unsigned int aom_highbd_sad16x{8,16,32}_sse2(uint8_t *src, int src_stride, ; unsigned int aom_highbd_sad16x{8,16,32}_sse2(uint8_t *src, int src_stride,
; uint8_t *ref, int ref_stride); ; uint8_t *ref, int ref_stride);
...@@ -295,7 +299,10 @@ HIGH_SAD16XN 8 ; highbd_sad16x8_sse2 ...@@ -295,7 +299,10 @@ HIGH_SAD16XN 8 ; highbd_sad16x8_sse2
HIGH_SAD16XN 32, 1 ; highbd_sad16x32_avg_sse2 HIGH_SAD16XN 32, 1 ; highbd_sad16x32_avg_sse2
HIGH_SAD16XN 16, 1 ; highbd_sad16x16_avg_sse2 HIGH_SAD16XN 16, 1 ; highbd_sad16x16_avg_sse2
HIGH_SAD16XN 8, 1 ; highbd_sad16x8_avg_sse2 HIGH_SAD16XN 8, 1 ; highbd_sad16x8_avg_sse2
%if CONFIG_EXT_PARTITION_TYPES
HIGH_SAD16XN 4 ; highbd_sad_16x4_sse2
HIGH_SAD16XN 4, 1 ; highbd_sad_16x4_avg_sse2
%endif
; unsigned int aom_highbd_sad8x{4,8,16}_sse2(uint8_t *src, int src_stride, ; unsigned int aom_highbd_sad8x{4,8,16}_sse2(uint8_t *src, int src_stride,
; uint8_t *ref, int ref_stride); ; uint8_t *ref, int ref_stride);
...@@ -364,3 +371,7 @@ HIGH_SAD8XN 4 ; highbd_sad8x4_sse2 ...@@ -364,3 +371,7 @@ HIGH_SAD8XN 4 ; highbd_sad8x4_sse2
HIGH_SAD8XN 16, 1 ; highbd_sad8x16_avg_sse2 HIGH_SAD8XN 16, 1 ; highbd_sad8x16_avg_sse2
HIGH_SAD8XN 8, 1 ; highbd_sad8x8_avg_sse2 HIGH_SAD8XN 8, 1 ; highbd_sad8x8_avg_sse2
HIGH_SAD8XN 4, 1 ; highbd_sad8x4_avg_sse2 HIGH_SAD8XN 4, 1 ; highbd_sad8x4_avg_sse2
%if CONFIG_EXT_PARTITION_TYPES
HIGH_SAD8XN 32 ; highbd_sad_8x32_sse2
HIGH_SAD8XN 32, 1 ; highbd_sad_8x32_avg_sse2
%endif
...@@ -185,6 +185,11 @@ VAR_FN(16, 16, 16, 8); ...@@ -185,6 +185,11 @@ VAR_FN(16, 16, 16, 8);
VAR_FN(16, 8, 8, 7); VAR_FN(16, 8, 8, 7);
VAR_FN(8, 16, 8, 7); VAR_FN(8, 16, 8, 7);
VAR_FN(8, 8, 8, 6); VAR_FN(8, 8, 8, 6);
#if CONFIG_EXT_PARTITION_TYPES
VAR_FN(16, 4, 16, 6);
VAR_FN(8, 32, 8, 8);
VAR_FN(32, 8, 16, 8);
#endif
#undef VAR_FN #undef VAR_FN
...@@ -391,6 +396,7 @@ DECLS(sse2); ...@@ -391,6 +396,7 @@ DECLS(sse2);
return (var >= 0) ? (uint32_t)var : 0; \ return (var >= 0) ? (uint32_t)var : 0; \
} }
#if CONFIG_EXT_PARTITION_TYPES
#define FNS(opt) \ #define FNS(opt) \
FN(64, 64, 16, 6, 6, opt, (int64_t)); \ FN(64, 64, 16, 6, 6, opt, (int64_t)); \
FN(64, 32, 16, 6, 5, opt, (int64_t)); \ FN(64, 32, 16, 6, 5, opt, (int64_t)); \
...@@ -402,7 +408,24 @@ DECLS(sse2); ...@@ -402,7 +408,24 @@ DECLS(sse2);
FN(16, 8, 16, 4, 3, opt, (int64_t)); \ FN(16, 8, 16, 4, 3, opt, (int64_t)); \
FN(8, 16, 8, 3, 4, opt, (int64_t)); \ FN(8, 16, 8, 3, 4, opt, (int64_t)); \
FN(8, 8, 8, 3, 3, opt, (int64_t)); \ FN(8, 8, 8, 3, 3, opt, (int64_t)); \
FN(8, 4, 8, 3, 2, opt, (int64_t)); FN(8, 4, 8, 3, 2, opt, (int64_t)); \
FN(16, 4, 16, 4, 2, opt, (int64_t)); \
FN(8, 32, 8, 3, 5, opt, (int64_t)); \
FN(32, 8, 16, 5, 3, opt, (int64_t))
#else
#define FNS(opt) \
FN(64, 64, 16, 6, 6, opt, (int64_t)); \
FN(64, 32, 16, 6, 5, opt, (int64_t)); \
FN(32, 64, 16, 5, 6, opt, (int64_t)); \
FN(32, 32, 16, 5, 5, opt, (int64_t)); \
FN(32, 16, 16, 5, 4, opt, (int64_t)); \
FN(16, 32, 16, 4, 5, opt, (int64_t)); \
FN(16, 16, 16, 4, 4, opt, (int64_t)); \
FN(16, 8, 16, 4, 3, opt, (int64_t)); \
FN(8, 16, 8, 3, 4, opt, (int64_t)); \
FN(8, 8, 8, 3, 3, opt, (int64_t)); \
FN(8, 4, 8, 3, 2, opt, (int64_t))
#endif
FNS(sse2); FNS(sse2);
...@@ -550,6 +573,23 @@ DECLS(sse2); ...@@ -550,6 +573,23 @@ DECLS(sse2);
return (var >= 0) ? (uint32_t)var : 0; \ return (var >= 0) ? (uint32_t)var : 0; \
} }
#if CONFIG_EXT_PARTITION_TYPES
#define FNS(opt) \
FN(64, 64, 16, 6, 6, opt, (int64_t)); \
FN(64, 32, 16, 6, 5, opt, (int64_t)); \
FN(32, 64, 16, 5, 6, opt, (int64_t)); \
FN(32, 32, 16, 5, 5, opt, (int64_t)); \
FN(32, 16, 16, 5, 4, opt, (int64_t)); \
FN(16, 32, 16, 4, 5, opt, (int64_t)); \
FN(16, 16, 16, 4, 4, opt, (int64_t)); \
FN(16, 8, 16, 4, 3, opt, (int64_t)); \
FN(8, 16, 8, 3, 4, opt, (int64_t)); \
FN(8, 8, 8, 3, 3, opt, (int64_t)); \
FN(8, 4, 8, 3, 2, opt, (int64_t)); \
FN(16, 4, 16, 4, 2, opt, (int64_t)); \
FN(8, 32, 8, 3, 5, opt, (int64_t)); \
FN(32, 8, 16, 5, 3, opt, (int64_t));
#else
#define FNS(opt) \ #define FNS(opt) \
FN(64, 64, 16, 6, 6, opt, (int64_t)); \ FN(64, 64, 16, 6, 6, opt, (int64_t)); \
FN(64, 32, 16, 6, 5, opt, (int64_t)); \ FN(64, 32, 16, 6, 5, opt, (int64_t)); \
...@@ -562,6 +602,7 @@ DECLS(sse2); ...@@ -562,6 +602,7 @@ DECLS(sse2);
FN(8, 16, 8, 3, 4, opt, (int64_t)); \ FN(8, 16, 8, 3, 4, opt, (int64_t)); \
FN(8, 8, 8, 3, 3, opt, (int64_t)); \ FN(8, 8, 8, 3, 3, opt, (int64_t)); \
FN(8, 4, 8, 3, 2, opt, (int64_t)); FN(8, 4, 8, 3, 2, opt, (int64_t));
#endif
FNS(sse2); FNS(sse2);
......
...@@ -93,6 +93,12 @@ MASKSAD8XN_SSSE3(8) ...@@ -93,6 +93,12 @@ MASKSAD8XN_SSSE3(8)
MASKSAD8XN_SSSE3(4) MASKSAD8XN_SSSE3(4)
MASKSAD4XN_SSSE3(8) MASKSAD4XN_SSSE3(8)
MASKSAD4XN_SSSE3(4) MASKSAD4XN_SSSE3(4)
#if CONFIG_EXT_PARTITION_TYPES
MASKSAD4XN_SSSE3(16)
MASKSADMXN_SSSE3(16, 4)
MASKSAD8XN_SSSE3(32)
MASKSADMXN_SSSE3(32, 8)
#endif
static INLINE unsigned int masked_sad_ssse3(const uint8_t *src_ptr, static INLINE unsigned int masked_sad_ssse3(const uint8_t *src_ptr,
int src_stride, int src_stride,
...@@ -283,6 +289,12 @@ HIGHBD_MASKSADMXN_SSSE3(8, 8) ...@@ -283,6 +289,12 @@ HIGHBD_MASKSADMXN_SSSE3(8, 8)
HIGHBD_MASKSADMXN_SSSE3(8, 4) HIGHBD_MASKSADMXN_SSSE3(8, 4)
HIGHBD_MASKSAD4XN_SSSE3(8) HIGHBD_MASKSAD4XN_SSSE3(8)
HIGHBD_MASKSAD4XN_SSSE3(4) HIGHBD_MASKSAD4XN_SSSE3(4)
#if CONFIG_EXT_PARTITION_TYPES
HIGHBD_MASKSAD4XN_SSSE3(16)
HIGHBD_MASKSADMXN_SSSE3(16, 4)
HIGHBD_MASKSADMXN_SSSE3(8, 32)
HIGHBD_MASKSADMXN_SSSE3(32, 8)
#endif
static INLINE unsigned int highbd_masked_sad_ssse3( static INLINE unsigned int highbd_masked_sad_ssse3(
const uint8_t *src8, int src_stride, const uint8_t *a8, int a_stride, const uint8_t *src8, int src_stride, const uint8_t *a8, int a_stride,
......
...@@ -126,6 +126,12 @@ MASK_SUBPIX_VAR8XH_SSSE3(8) ...@@ -126,6 +126,12 @@ MASK_SUBPIX_VAR8XH_SSSE3(8)
MASK_SUBPIX_VAR8XH_SSSE3(4) MASK_SUBPIX_VAR8XH_SSSE3(4)
MASK_SUBPIX_VAR4XH_SSSE3(8) MASK_SUBPIX_VAR4XH_SSSE3(8)
MASK_SUBPIX_VAR4XH_SSSE3(4) MASK_SUBPIX_VAR4XH_SSSE3(4)
#if CONFIG_EXT_PARTITION_TYPES
MASK_SUBPIX_VAR4XH_SSSE3(16)
MASK_SUBPIX_VAR_SSSE3(16, 4)
MASK_SUBPIX_VAR8XH_SSSE3(32)
MASK_SUBPIX_VAR_SSSE3(32, 8)
#endif
static INLINE __m128i filter_block(const __m128i a, const __m128i b, static INLINE __m128i filter_block(const __m128i a, const __m128i b,
const __m128i filter) { const __m128i filter) {
...@@ -693,6 +699,12 @@ HIGHBD_MASK_SUBPIX_VAR_SSSE3(8, 8) ...@@ -693,6 +699,12 @@ HIGHBD_MASK_SUBPIX_VAR_SSSE3(8, 8)
HIGHBD_MASK_SUBPIX_VAR_SSSE3(8, 4) HIGHBD_MASK_SUBPIX_VAR_SSSE3(8, 4)
HIGHBD_MASK_SUBPIX_VAR4XH_SSSE3(8) HIGHBD_MASK_SUBPIX_VAR4XH_SSSE3(8)
HIGHBD_MASK_SUBPIX_VAR4XH_SSSE3(4) HIGHBD_MASK_SUBPIX_VAR4XH_SSSE3(4)
#if CONFIG_EXT_PARTITION_TYPES
HIGHBD_MASK_SUBPIX_VAR4XH_SSSE3(16)
HIGHBD_MASK_SUBPIX_VAR_SSSE3(16, 4)
HIGHBD_MASK_SUBPIX_VAR_SSSE3(8, 32)
HIGHBD_MASK_SUBPIX_VAR_SSSE3(32, 8)
#endif
static INLINE __m128i highbd_filter_block(const __m128i a, const __m128i b, static INLINE __m128i highbd_filter_block(const __m128i a, const __m128i b,
const __m128i filter) { const __m128i filter) {
......
...@@ -137,6 +137,12 @@ OBMCSADWXH(8, 8) ...@@ -137,6 +137,12 @@ OBMCSADWXH(8, 8)
OBMCSADWXH(8, 4) OBMCSADWXH(8, 4)
OBMCSADWXH(4, 8) OBMCSADWXH(4, 8)
OBMCSADWXH(4, 4) OBMCSADWXH(4, 4)
#if CONFIG_EXT_PARTITION_TYPES
OBMCSADWXH(4, 16)
OBMCSADWXH(16, 4)
OBMCSADWXH(8, 32)
OBMCSADWXH(32, 8)
#endif
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// High bit-depth // High bit-depth
...@@ -260,4 +266,10 @@ HBD_OBMCSADWXH(8, 8) ...@@ -260,4 +266,10 @@ HBD_OBMCSADWXH(8, 8)
HBD_OBMCSADWXH(8, 4) HBD_OBMCSADWXH(8, 4)
HBD_OBMCSADWXH(4, 8) HBD_OBMCSADWXH(4, 8)
HBD_OBMCSADWXH(4, 4) HBD_OBMCSADWXH(4, 4)
#if CONFIG_EXT_PARTITION_TYPES
HBD_OBMCSADWXH(4, 16)
HBD_OBMCSADWXH(16, 4)
HBD_OBMCSADWXH(8, 32)
HBD_OBMCSADWXH(32, 8)
#endif
#endif // CONFIG_HIGHBITDEPTH #endif // CONFIG_HIGHBITDEPTH
...@@ -146,6 +146,12 @@ OBMCVARWXH(8, 8) ...@@ -146,6 +146,12 @@ OBMCVARWXH(8, 8)
OBMCVARWXH(8, 4) OBMCVARWXH(8, 4)
OBMCVARWXH(4, 8) OBMCVARWXH(4, 8)
OBMCVARWXH(4, 4) OBMCVARWXH(4, 4)
#if CONFIG_EXT_PARTITION_TYPES
OBMCVARWXH(4, 16)
OBMCVARWXH(16, 4)
OBMCVARWXH(8, 32)
OBMCVARWXH(32, 8)
#endif
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// High bit-depth // High bit-depth
...@@ -353,4 +359,10 @@ HBD_OBMCVARWXH(8, 8) ...@@ -353,4 +359,10 @@ HBD_OBMCVARWXH(8, 8)
HBD_OBMCVARWXH(8, 4) HBD_OBMCVARWXH(8, 4)
HBD_OBMCVARWXH(4, 8) HBD_OBMCVARWXH(4, 8)
HBD_OBMCVARWXH(4, 4) HBD_OBMCVARWXH(4, 4)
#if CONFIG_EXT_PARTITION_TYPES
HBD_OBMCVARWXH(4, 16)
HBD_OBMCVARWXH(16, 4)
HBD_OBMCVARWXH(8, 32)
HBD_OBMCVARWXH(32, 8)
#endif
#endif // CONFIG_HIGHBITDEPTH #endif // CONFIG_HIGHBITDEPTH
...@@ -251,3 +251,9 @@ SADNXN4D 8, 8 ...@@ -251,3 +251,9 @@ SADNXN4D 8, 8
SADNXN4D 8, 4 SADNXN4D 8, 4
SADNXN4D 4, 8 SADNXN4D 4, 8
SADNXN4D 4, 4 SADNXN4D 4, 4
%if CONFIG_EXT_PARTITION_TYPES
SADNXN4D 4, 16
SADNXN4D 16, 4
SADNXN4D 8, 32
SADNXN4D 32, 8
%endif
...@@ -208,6 +208,10 @@ SAD32XN 16 ; sad32x16_sse2 ...@@ -208,6 +208,10 @@ SAD32XN 16 ; sad32x16_sse2
SAD32XN 64, 1 ; sad32x64_avg_sse2 SAD32XN 64, 1 ; sad32x64_avg_sse2
SAD32XN 32, 1 ; sad32x32_avg_sse2 SAD32XN 32, 1 ; sad32x32_avg_sse2
SAD32XN 16, 1 ; sad32x16_avg_sse2 SAD32XN 16, 1 ; sad32x16_avg_sse2
%if CONFIG_EXT_PARTITION_TYPES
SAD32XN 8 ; sad_32x8_sse2
SAD32XN 8, 1 ; sad_32x8_avg_sse2
%endif
; unsigned int aom_sad16x{8,16}_sse2(uint8_t *src, int src_stride, ; unsigned int aom_sad16x{8,16}_sse2(uint8_t *src, int src_stride,
; uint8_t *ref, int ref_stride); ; uint8_t *ref, int ref_stride);
...@@ -254,6 +258,10 @@ SAD16XN 8 ; sad16x8_sse2 ...@@ -254,6 +258,10 @@ SAD16XN 8 ; sad16x8_sse2
SAD16XN 32, 1 ; sad16x32_avg_sse2 SAD16XN 32, 1 ; sad16x32_avg_sse2
SAD16XN 16, 1 ; sad16x16_avg_sse2 SAD16XN 16, 1 ; sad16x16_avg_sse2
SAD16XN 8, 1 ; sad16x8_avg_sse2 SAD16XN 8, 1 ; sad16x8_avg_sse2
%if CONFIG_EXT_PARTITION_TYPES
SAD16XN 4 ; sad_16x4_sse2
SAD16XN 4, 1 ; sad_16x4_avg_sse2
%endif
; unsigned int aom_sad8x{8,16}_sse2(uint8_t *src, int src_stride, ; unsigned int aom_sad8x{8,16}_sse2(uint8_t *src, int src_stride,
; uint8_t *ref, int ref_stride); ; uint8_t *ref, int ref_stride);
...@@ -298,6 +306,10 @@ SAD8XN 4 ; sad8x4_sse2 ...@@ -298,6 +306,10 @@ SAD8XN 4 ; sad8x4_sse2
SAD8XN 16, 1 ; sad8x16_avg_sse2 SAD8XN 16, 1 ; sad8x16_avg_sse2
SAD8XN 8, 1 ; sad8x8_avg_sse2 SAD8XN 8, 1 ; sad8x8_avg_sse2
SAD8XN 4, 1 ; sad8x4_avg_sse2 SAD8XN 4, 1 ; sad8x4_avg_sse2
%if CONFIG_EXT_PARTITION_TYPES
SAD8XN 32 ; sad_8x32_sse2
SAD8XN 32, 1 ; sad_8x32_avg_sse2
%endif
; unsigned int aom_sad4x{4, 8}_sse2(uint8_t *src, int src_stride, ; unsigned int aom_sad4x{4, 8}_sse2(uint8_t *src, int src_stride,
; uint8_t *ref, int ref_stride); ; uint8_t *ref, int ref_stride);
...@@ -343,3 +355,7 @@ SAD4XN 8 ; sad4x8_sse ...@@ -343,3 +355,7 @@ SAD4XN 8 ; sad4x8_sse
SAD4XN 4 ; sad4x4_sse SAD4XN 4 ; sad4x4_sse