Commit 46ae1ea3 authored by Yi Luo's avatar Yi Luo

Lowbd SMOOTH_PRED intrapred ssse3 optimization

On i7-6700:
Predictor    ssse3 v. C
4x4          ~1.3x
4x8          ~1.9x
8x4          ~2.3x
8x8          ~3.4x
8x16         ~4.1x
16x8         ~4.6x
16x16        ~5.2x
16x32        ~5.6x
32x16        ~4.2x
32x32        ~4.7x

Change-Id: Ic12383cf9d4446361d6355eb8a480a3c7602060e
parent 698af562
......@@ -23,6 +23,7 @@ set(AOM_DSP_COMMON_SOURCES
"${AOM_ROOT}/aom_dsp/blend_a64_mask.c"
"${AOM_ROOT}/aom_dsp/blend_a64_vmask.c"
"${AOM_ROOT}/aom_dsp/intrapred.c"
"${AOM_ROOT}/aom_dsp/intrapred_common.h"
"${AOM_ROOT}/aom_dsp/loopfilter.c"
"${AOM_ROOT}/aom_dsp/prob.c"
"${AOM_ROOT}/aom_dsp/prob.h"
......
......@@ -64,6 +64,7 @@ endif
# intra predictions
DSP_SRCS-yes += intrapred.c
DSP_SRCS-yes += intrapred_common.h
ifneq ($(CONFIG_ANS),yes)
DSP_SRCS-yes += entcode.c
......
......@@ -147,6 +147,21 @@ specialize qw/aom_paeth_predictor_16x16 ssse3 avx2/;
specialize qw/aom_paeth_predictor_16x32 ssse3 avx2/;
specialize qw/aom_paeth_predictor_32x16 ssse3 avx2/;
specialize qw/aom_paeth_predictor_32x32 ssse3 avx2/;
specialize qw/aom_paeth_predictor_16x8 ssse3/;
specialize qw/aom_paeth_predictor_16x16 ssse3/;
specialize qw/aom_paeth_predictor_16x32 ssse3/;
specialize qw/aom_paeth_predictor_32x16 ssse3/;
specialize qw/aom_paeth_predictor_32x32 ssse3/;
specialize qw/aom_smooth_predictor_4x4 ssse3/;
specialize qw/aom_smooth_predictor_4x8 ssse3/;
specialize qw/aom_smooth_predictor_8x4 ssse3/;
specialize qw/aom_smooth_predictor_8x8 ssse3/;
specialize qw/aom_smooth_predictor_8x16 ssse3/;
specialize qw/aom_smooth_predictor_16x8 ssse3/;
specialize qw/aom_smooth_predictor_16x16 ssse3/;
specialize qw/aom_smooth_predictor_16x32 ssse3/;
specialize qw/aom_smooth_predictor_32x16 ssse3/;
specialize qw/aom_smooth_predictor_32x32 ssse3/;
specialize qw/aom_d63e_predictor_4x4 ssse3/;
specialize qw/aom_d135_predictor_4x4 neon/;
......
......@@ -16,6 +16,7 @@
#include "./aom_dsp_rtcd.h"
#include "aom_dsp/aom_dsp_common.h"
#include "aom_dsp/intrapred_common.h"
#include "aom_mem/aom_mem.h"
#include "aom_ports/bitops.h"
......@@ -207,40 +208,6 @@ static INLINE void paeth_predictor(uint8_t *dst, ptrdiff_t stride, int bw,
}
}
// Weights are quadratic from '1' to '1 / block_size', scaled by
// 2^sm_weight_log2_scale.
static const int sm_weight_log2_scale = 8;
#if CONFIG_TX64X64
// max(block_size_wide[BLOCK_LARGEST], block_size_high[BLOCK_LARGEST])
#define MAX_BLOCK_DIM 64
#else
#define MAX_BLOCK_DIM 32
#endif // CONFIG_TX64X64
static const uint8_t sm_weight_arrays[2 * MAX_BLOCK_DIM] = {
// Unused, because we always offset by bs, which is at least 2.
0, 0,
// bs = 2
255, 128,
// bs = 4
255, 149, 85, 64,
// bs = 8
255, 197, 146, 105, 73, 50, 37, 32,
// bs = 16
255, 225, 196, 170, 145, 123, 102, 84, 68, 54, 43, 33, 26, 20, 17, 16,
// bs = 32
255, 240, 225, 210, 196, 182, 169, 157, 145, 133, 122, 111, 101, 92, 83, 74,
66, 59, 52, 45, 39, 34, 29, 25, 21, 17, 14, 12, 10, 9, 8, 8,
#if CONFIG_TX64X64
// bs = 64
255, 248, 240, 233, 225, 218, 210, 203, 196, 189, 182, 176, 169, 163, 156,
150, 144, 138, 133, 127, 121, 116, 111, 106, 101, 96, 91, 86, 82, 77, 73, 69,
65, 61, 57, 54, 50, 47, 44, 41, 38, 35, 32, 29, 27, 25, 22, 20, 18, 16, 15,
13, 12, 10, 9, 8, 7, 6, 6, 5, 5, 4, 4, 4,
#endif // CONFIG_TX64X64
};
// Some basic checks on weights for smooth predictor.
#define sm_weights_sanity_checks(weights_w, weights_h, weights_scale, \
pred_scale) \
......
/*
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#ifndef _AOM_DSP_INTRAPRED_COMMON_H
#define _AOM_DSP_INTRAPRED_COMMON_H
#include "./aom_config.h"
// Weights are quadratic from '1' to '1 / block_size', scaled by
// 2^sm_weight_log2_scale.
static const int sm_weight_log2_scale = 8;
#if CONFIG_TX64X64
// max(block_size_wide[BLOCK_LARGEST], block_size_high[BLOCK_LARGEST])
#define MAX_BLOCK_DIM 64
#else
#define MAX_BLOCK_DIM 32
#endif // CONFIG_TX64X64
static const uint8_t sm_weight_arrays[2 * MAX_BLOCK_DIM] = {
// Unused, because we always offset by bs, which is at least 2.
0, 0,
// bs = 2
255, 128,
// bs = 4
255, 149, 85, 64,
// bs = 8
255, 197, 146, 105, 73, 50, 37, 32,
// bs = 16
255, 225, 196, 170, 145, 123, 102, 84, 68, 54, 43, 33, 26, 20, 17, 16,
// bs = 32
255, 240, 225, 210, 196, 182, 169, 157, 145, 133, 122, 111, 101, 92, 83, 74,
66, 59, 52, 45, 39, 34, 29, 25, 21, 17, 14, 12, 10, 9, 8, 8,
#if CONFIG_TX64X64
// bs = 64
255, 248, 240, 233, 225, 218, 210, 203, 196, 189, 182, 176, 169, 163, 156,
150, 144, 138, 133, 127, 121, 116, 111, 106, 101, 96, 91, 86, 82, 77, 73, 69,
65, 61, 57, 54, 50, 47, 44, 41, 38, 35, 32, 29, 27, 25, 22, 20, 18, 16, 15,
13, 12, 10, 9, 8, 7, 6, 6, 5, 5, 4, 4, 4,
#endif // CONFIG_TX64X64
};
#endif // _AOM_DSP_INTRAPRED_COMMON_H
This diff is collapsed.
......@@ -246,7 +246,7 @@ INSTANTIATE_TEST_CASE_P(AVX2, LowbdIntraPredTest,
#if HAVE_SSSE3
const IntraPredFunc<IntraPred> LowbdIntraPredTestVectorSsse3[] = {
lowbd_intrapred(paeth, ssse3),
lowbd_intrapred(paeth, ssse3), lowbd_intrapred(smooth, ssse3),
};
INSTANTIATE_TEST_CASE_P(SSSE3, LowbdIntraPredTest,
......
......@@ -440,10 +440,12 @@ INTRA_PRED_TEST(SSE2_2, TestIntraPred4, "intra4x8", aom_dc_predictor_4x8_sse2,
INTRA_PRED_TEST(SSSE3_1, TestIntraPred4, "intra4x4", NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, aom_d153_predictor_4x4_ssse3,
NULL, aom_d63e_predictor_4x4_ssse3,
aom_paeth_predictor_4x4_ssse3, NULL, NULL, NULL)
aom_paeth_predictor_4x4_ssse3, aom_smooth_predictor_4x4_ssse3,
NULL, NULL)
INTRA_PRED_TEST(SSSE3_2, TestIntraPred4, "intra4x8", NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
aom_paeth_predictor_4x8_ssse3, NULL, NULL, NULL)
aom_paeth_predictor_4x8_ssse3, aom_smooth_predictor_4x8_ssse3,
NULL, NULL)
#endif // HAVE_SSSE3
#if HAVE_DSPR2
......@@ -549,13 +551,16 @@ INTRA_PRED_TEST(SSE2_3, TestIntraPred8, "intra8x16", aom_dc_predictor_8x16_sse2,
#if HAVE_SSSE3
INTRA_PRED_TEST(SSSE3_1, TestIntraPred8, "intra8x8", NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, aom_d153_predictor_8x8_ssse3,
NULL, NULL, aom_paeth_predictor_8x8_ssse3, NULL, NULL, NULL)
NULL, NULL, aom_paeth_predictor_8x8_ssse3,
aom_smooth_predictor_8x8_ssse3, NULL, NULL)
INTRA_PRED_TEST(SSSE3_2, TestIntraPred8, "intra8x4", NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
aom_paeth_predictor_8x4_ssse3, NULL, NULL, NULL)
aom_paeth_predictor_8x4_ssse3, aom_smooth_predictor_8x4_ssse3,
NULL, NULL)
INTRA_PRED_TEST(SSSE3_3, TestIntraPred8, "intra8x16", NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
aom_paeth_predictor_8x16_ssse3, NULL, NULL, NULL)
aom_paeth_predictor_8x16_ssse3, aom_smooth_predictor_8x16_ssse3,
NULL, NULL)
#endif // HAVE_SSSE3
#if HAVE_DSPR2
......@@ -663,13 +668,16 @@ INTRA_PRED_TEST(SSE2_3, TestIntraPred16, "intra16x32",
#if HAVE_SSSE3
INTRA_PRED_TEST(SSSE3_1, TestIntraPred16, "intra16x16", NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, aom_d153_predictor_16x16_ssse3,
NULL, NULL, aom_paeth_predictor_16x16_ssse3, NULL, NULL, NULL)
NULL, NULL, aom_paeth_predictor_16x16_ssse3,
aom_smooth_predictor_16x16_ssse3, NULL, NULL)
INTRA_PRED_TEST(SSSE3_2, TestIntraPred16, "intra16x8", NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
aom_paeth_predictor_16x8_ssse3, NULL, NULL, NULL)
aom_paeth_predictor_16x8_ssse3, aom_smooth_predictor_16x8_ssse3,
NULL, NULL)
INTRA_PRED_TEST(SSSE3_3, TestIntraPred16, "intra16x32", NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
aom_paeth_predictor_16x32_ssse3, NULL, NULL, NULL)
aom_paeth_predictor_16x32_ssse3,
aom_smooth_predictor_16x32_ssse3, NULL, NULL)
#endif // HAVE_SSSE3
#if HAVE_AVX2
......@@ -767,10 +775,12 @@ INTRA_PRED_TEST(SSE2_2, TestIntraPred32, "intra32x16",
#if HAVE_SSSE3
INTRA_PRED_TEST(SSSE3_1, TestIntraPred32, "intra32x32", NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, aom_d153_predictor_32x32_ssse3,
NULL, NULL, aom_paeth_predictor_32x32_ssse3, NULL, NULL, NULL)
NULL, NULL, aom_paeth_predictor_32x32_ssse3,
aom_smooth_predictor_32x32_ssse3, NULL, NULL)
INTRA_PRED_TEST(SSSE3_2, TestIntraPred32, "intra32x16", NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
aom_paeth_predictor_32x16_ssse3, NULL, NULL, NULL)
aom_paeth_predictor_32x16_ssse3,
aom_smooth_predictor_32x16_ssse3, NULL, NULL)
#endif // HAVE_SSSE3
#if HAVE_AVX2
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment