Commit 0a2c0cbc authored by Angie Chiang's avatar Angie Chiang

Let hbd conv func be flexible

This CL allow us to change filter coefficients easily for SIMD
implementation of high bitdepth convolution functions

Change-Id: I454a5c76d3ba9e4454118c6a9d87737b3aa24898
parent 2b0eeb16
...@@ -71,7 +71,6 @@ AV1_COMMON_SRCS-yes += common/av1_inv_txfm2d_cfg.h ...@@ -71,7 +71,6 @@ AV1_COMMON_SRCS-yes += common/av1_inv_txfm2d_cfg.h
AV1_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/av1_convolve_ssse3.c AV1_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/av1_convolve_ssse3.c
ifeq ($(CONFIG_AOM_HIGHBITDEPTH),yes) ifeq ($(CONFIG_AOM_HIGHBITDEPTH),yes)
AV1_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/av1_highbd_convolve_sse4.c AV1_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/av1_highbd_convolve_sse4.c
AV1_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/av1_highbd_convolve_filters_sse4.h
endif endif
AV1_COMMON_SRCS-yes += common/convolve.c AV1_COMMON_SRCS-yes += common/convolve.c
AV1_COMMON_SRCS-yes += common/convolve.h AV1_COMMON_SRCS-yes += common/convolve.h
......
...@@ -39,8 +39,8 @@ if ($opts{arch} eq "x86_64") { ...@@ -39,8 +39,8 @@ if ($opts{arch} eq "x86_64") {
# #
# 10/12-tap convolution filters # 10/12-tap convolution filters
# #
add_proto qw/void av1_convolve_init/, "void"; add_proto qw/void av1_lowbd_convolve_init/, "void";
specialize qw/av1_convolve_init ssse3/; specialize qw/av1_lowbd_convolve_init ssse3/;
add_proto qw/void av1_convolve_horiz/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, ConvolveParams *conv_params"; add_proto qw/void av1_convolve_horiz/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, ConvolveParams *conv_params";
specialize qw/av1_convolve_horiz ssse3/; specialize qw/av1_convolve_horiz ssse3/;
...@@ -49,6 +49,8 @@ add_proto qw/void av1_convolve_vert/, "const uint8_t *src, int src_stride, uint8 ...@@ -49,6 +49,8 @@ add_proto qw/void av1_convolve_vert/, "const uint8_t *src, int src_stride, uint8
specialize qw/av1_convolve_vert ssse3/; specialize qw/av1_convolve_vert ssse3/;
if (aom_config("CONFIG_AOM_HIGHBITDEPTH") eq "yes") { if (aom_config("CONFIG_AOM_HIGHBITDEPTH") eq "yes") {
add_proto qw/void av1_highbd_convolve_init/, "void";
specialize qw/av1_highbd_convolve_init sse4_1/;
add_proto qw/void av1_highbd_convolve_horiz/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, int avg, int bd"; add_proto qw/void av1_highbd_convolve_horiz/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, int avg, int bd";
specialize qw/av1_highbd_convolve_horiz sse4_1/; specialize qw/av1_highbd_convolve_horiz sse4_1/;
add_proto qw/void av1_highbd_convolve_vert/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, int avg, int bd"; add_proto qw/void av1_highbd_convolve_vert/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, int avg, int bd";
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include "./av1_rtcd.h" #include "./av1_rtcd.h"
#include "av1/common/convolve.h" #include "av1/common/convolve.h"
#include "av1/common/filter.h" #include "av1/common/filter.h"
#include "av1/common/onyxc_int.h"
#include "aom_dsp/aom_dsp_common.h" #include "aom_dsp/aom_dsp_common.h"
#include "aom_ports/mem.h" #include "aom_ports/mem.h"
...@@ -410,11 +411,29 @@ void av1_convolve(const uint8_t *src, int src_stride, uint8_t *dst, ...@@ -410,11 +411,29 @@ void av1_convolve(const uint8_t *src, int src_stride, uint8_t *dst,
} }
} }
void av1_convolve_init_c(void) { void av1_lowbd_convolve_init_c(void) {
// A placeholder for SIMD initialization // A placeholder for SIMD initialization
return; return;
} }
void av1_highbd_convolve_init_c(void) {
// A placeholder for SIMD initialization
return;
}
void av1_convolve_init(AV1_COMMON *cm) {
#if CONFIG_AOM_HIGHBITDEPTH
if (cm->use_highbitdepth)
av1_highbd_convolve_init();
else
av1_lowbd_convolve_init();
#else
(void)cm;
av1_lowbd_convolve_init();
#endif
return;
}
#if CONFIG_AOM_HIGHBITDEPTH #if CONFIG_AOM_HIGHBITDEPTH
void av1_highbd_convolve_horiz_c(const uint16_t *src, int src_stride, void av1_highbd_convolve_horiz_c(const uint16_t *src, int src_stride,
uint16_t *dst, int dst_stride, int w, int h, uint16_t *dst, int dst_stride, int w, int h,
......
...@@ -42,7 +42,8 @@ static INLINE ConvolveParams get_conv_params(int ref, int plane) { ...@@ -42,7 +42,8 @@ static INLINE ConvolveParams get_conv_params(int ref, int plane) {
conv_params.plane = plane; conv_params.plane = plane;
return conv_params; return conv_params;
} }
struct AV1Common;
void av1_convolve_init(struct AV1Common *cm);
#if CONFIG_CONVOLVE_ROUND #if CONFIG_CONVOLVE_ROUND
void av1_convolve_2d(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, void av1_convolve_2d(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst,
int dst_stride, int w, int h, int dst_stride, int w, int h,
......
...@@ -2010,7 +2010,7 @@ void av1_setup_past_independence(AV1_COMMON *cm) { ...@@ -2010,7 +2010,7 @@ void av1_setup_past_independence(AV1_COMMON *cm) {
#if CONFIG_ADAPT_SCAN #if CONFIG_ADAPT_SCAN
av1_init_scan_order(cm); av1_init_scan_order(cm);
#endif #endif
av1_convolve_init(); av1_convolve_init(cm);
cm->fc->initialized = 1; cm->fc->initialized = 1;
if (cm->frame_type == KEY_FRAME || cm->error_resilient_mode || if (cm->frame_type == KEY_FRAME || cm->error_resilient_mode ||
......
...@@ -997,7 +997,7 @@ SimdFilter temporal_simd_filter = { ...@@ -997,7 +997,7 @@ SimdFilter temporal_simd_filter = {
}; };
#endif #endif
void av1_convolve_init_ssse3(void) { void av1_lowbd_convolve_init_ssse3(void) {
#if USE_TEMPORALFILTER_12TAP #if USE_TEMPORALFILTER_12TAP
{ {
InterpFilterParams filter_params = InterpFilterParams filter_params =
......
/*
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#ifndef AV1_COMMON_X86_AV1_HIGHBD_CONVOLVE_FILTERS_SSE4_H_
#define AV1_COMMON_X86_AV1_HIGHBD_CONVOLVE_FILTERS_SSE4_H_
#include "./aom_config.h"
#if CONFIG_AOM_HIGHBITDEPTH
#if CONFIG_DUAL_FILTER
DECLARE_ALIGNED(16, static const int16_t,
sub_pel_filters_12sharp_highbd_ver_signal_dir[15][6][8]) = {
{
{ 0, 1, 0, 1, 0, 1, 0, 1 },
{ -1, 3, -1, 3, -1, 3, -1, 3 },
{ -7, 127, -7, 127, -7, 127, -7, 127 },
{ 8, -4, 8, -4, 8, -4, 8, -4 },
{ 2, -1, 2, -1, 2, -1, 2, -1 },
{ 0, 0, 0, 0, 0, 0, 0, 0 },
},
{
{ 0, 1, 0, 1, 0, 1, 0, 1 },
{ -3, 5, -3, 5, -3, 5, -3, 5 },
{ -12, 124, -12, 124, -12, 124, -12, 124 },
{ 18, -8, 18, -8, 18, -8, 18, -8 },
{ 4, -2, 4, -2, 4, -2, 4, -2 },
{ 1, 0, 1, 0, 1, 0, 1, 0 },
},
{
{ -1, 2, -1, 2, -1, 2, -1, 2 },
{ -4, 8, -4, 8, -4, 8, -4, 8 },
{ -17, 120, -17, 120, -17, 120, -17, 120 },
{ 28, -11, 28, -11, 28, -11, 28, -11 },
{ 6, -3, 6, -3, 6, -3, 6, -3 },
{ 1, -1, 1, -1, 1, -1, 1, -1 },
},
{
{ -1, 2, -1, 2, -1, 2, -1, 2 },
{ -4, 10, -4, 10, -4, 10, -4, 10 },
{ -21, 114, -21, 114, -21, 114, -21, 114 },
{ 38, -15, 38, -15, 38, -15, 38, -15 },
{ 8, -4, 8, -4, 8, -4, 8, -4 },
{ 2, -1, 2, -1, 2, -1, 2, -1 },
},
{
{ -1, 3, -1, 3, -1, 3, -1, 3 },
{ -5, 11, -5, 11, -5, 11, -5, 11 },
{ -23, 107, -23, 107, -23, 107, -23, 107 },
{ 49, -18, 49, -18, 49, -18, 49, -18 },
{ 9, -5, 9, -5, 9, -5, 9, -5 },
{ 2, -1, 2, -1, 2, -1, 2, -1 },
},
{
{ -1, 3, -1, 3, -1, 3, -1, 3 },
{ -6, 12, -6, 12, -6, 12, -6, 12 },
{ -25, 99, -25, 99, -25, 99, -25, 99 },
{ 60, -21, 60, -21, 60, -21, 60, -21 },
{ 11, -6, 11, -6, 11, -6, 11, -6 },
{ 3, -1, 3, -1, 3, -1, 3, -1 },
},
{
{ -1, 3, -1, 3, -1, 3, -1, 3 },
{ -6, 12, -6, 12, -6, 12, -6, 12 },
{ -25, 90, -25, 90, -25, 90, -25, 90 },
{ 70, -23, 70, -23, 70, -23, 70, -23 },
{ 12, -6, 12, -6, 12, -6, 12, -6 },
{ 3, -1, 3, -1, 3, -1, 3, -1 },
},
{
{ -1, 3, -1, 3, -1, 3, -1, 3 },
{ -6, 12, -6, 12, -6, 12, -6, 12 },
{ -24, 80, -24, 80, -24, 80, -24, 80 },
{ 80, -24, 80, -24, 80, -24, 80, -24 },
{ 12, -6, 12, -6, 12, -6, 12, -6 },
{ 3, -1, 3, -1, 3, -1, 3, -1 },
},
{
{ -1, 3, -1, 3, -1, 3, -1, 3 },
{ -6, 12, -6, 12, -6, 12, -6, 12 },
{ -23, 70, -23, 70, -23, 70, -23, 70 },
{ 90, -25, 90, -25, 90, -25, 90, -25 },
{ 12, -6, 12, -6, 12, -6, 12, -6 },
{ 3, -1, 3, -1, 3, -1, 3, -1 },
},
{
{ -1, 3, -1, 3, -1, 3, -1, 3 },
{ -6, 11, -6, 11, -6, 11, -6, 11 },
{ -21, 60, -21, 60, -21, 60, -21, 60 },
{ 99, -25, 99, -25, 99, -25, 99, -25 },
{ 12, -6, 12, -6, 12, -6, 12, -6 },
{ 3, -1, 3, -1, 3, -1, 3, -1 },
},
{
{ -1, 2, -1, 2, -1, 2, -1, 2 },
{ -5, 9, -5, 9, -5, 9, -5, 9 },
{ -18, 49, -18, 49, -18, 49, -18, 49 },
{ 107, -23, 107, -23, 107, -23, 107, -23 },
{ 11, -5, 11, -5, 11, -5, 11, -5 },
{ 3, -1, 3, -1, 3, -1, 3, -1 },
},
{
{ -1, 2, -1, 2, -1, 2, -1, 2 },
{ -4, 8, -4, 8, -4, 8, -4, 8 },
{ -15, 38, -15, 38, -15, 38, -15, 38 },
{ 114, -21, 114, -21, 114, -21, 114, -21 },
{ 10, -4, 10, -4, 10, -4, 10, -4 },
{ 2, -1, 2, -1, 2, -1, 2, -1 },
},
{
{ -1, 1, -1, 1, -1, 1, -1, 1 },
{ -3, 6, -3, 6, -3, 6, -3, 6 },
{ -11, 28, -11, 28, -11, 28, -11, 28 },
{ 120, -17, 120, -17, 120, -17, 120, -17 },
{ 8, -4, 8, -4, 8, -4, 8, -4 },
{ 2, -1, 2, -1, 2, -1, 2, -1 },
},
{
{ 0, 1, 0, 1, 0, 1, 0, 1 },
{ -2, 4, -2, 4, -2, 4, -2, 4 },
{ -8, 18, -8, 18, -8, 18, -8, 18 },
{ 124, -12, 124, -12, 124, -12, 124, -12 },
{ 5, -3, 5, -3, 5, -3, 5, -3 },
{ 1, 0, 1, 0, 1, 0, 1, 0 },
},
{
{ 0, 0, 0, 0, 0, 0, 0, 0 },
{ -1, 2, -1, 2, -1, 2, -1, 2 },
{ -4, 8, -4, 8, -4, 8, -4, 8 },
{ 127, -7, 127, -7, 127, -7, 127, -7 },
{ 3, -1, 3, -1, 3, -1, 3, -1 },
{ 1, 0, 1, 0, 1, 0, 1, 0 },
},
};
#endif
#endif
#if CONFIG_AOM_HIGHBITDEPTH
#if USE_TEMPORALFILTER_12TAP
DECLARE_ALIGNED(
16, static const int16_t,
sub_pel_filters_temporalfilter_12_highbd_ver_signal_dir[15][6][8]) = {
{
{ 0, 1, 0, 1, 0, 1, 0, 1 },
{ -1, 3, -1, 3, -1, 3, -1, 3 },
{ -7, 127, -7, 127, -7, 127, -7, 127 },
{ 8, -4, 8, -4, 8, -4, 8, -4 },
{ 2, -1, 2, -1, 2, -1, 2, -1 },
{ 0, 0, 0, 0, 0, 0, 0, 0 },
},
{
{ 0, 1, 0, 1, 0, 1, 0, 1 },
{ -3, 5, -3, 5, -3, 5, -3, 5 },
{ -12, 124, -12, 124, -12, 124, -12, 124 },
{ 18, -8, 18, -8, 18, -8, 18, -8 },
{ 4, -2, 4, -2, 4, -2, 4, -2 },
{ 1, 0, 1, 0, 1, 0, 1, 0 },
},
{
{ -1, 2, -1, 2, -1, 2, -1, 2 },
{ -4, 8, -4, 8, -4, 8, -4, 8 },
{ -17, 120, -17, 120, -17, 120, -17, 120 },
{ 28, -11, 28, -11, 28, -11, 28, -11 },
{ 6, -3, 6, -3, 6, -3, 6, -3 },
{ 1, -1, 1, -1, 1, -1, 1, -1 },
},
{
{ -1, 2, -1, 2, -1, 2, -1, 2 },
{ -4, 10, -4, 10, -4, 10, -4, 10 },
{ -21, 114, -21, 114, -21, 114, -21, 114 },
{ 38, -15, 38, -15, 38, -15, 38, -15 },
{ 8, -4, 8, -4, 8, -4, 8, -4 },
{ 2, -1, 2, -1, 2, -1, 2, -1 },
},
{
{ -1, 3, -1, 3, -1, 3, -1, 3 },
{ -5, 11, -5, 11, -5, 11, -5, 11 },
{ -23, 107, -23, 107, -23, 107, -23, 107 },
{ 49, -18, 49, -18, 49, -18, 49, -18 },
{ 9, -5, 9, -5, 9, -5, 9, -5 },
{ 2, -1, 2, -1, 2, -1, 2, -1 },
},
{
{ -1, 3, -1, 3, -1, 3, -1, 3 },
{ -6, 12, -6, 12, -6, 12, -6, 12 },
{ -25, 99, -25, 99, -25, 99, -25, 99 },
{ 60, -21, 60, -21, 60, -21, 60, -21 },
{ 11, -6, 11, -6, 11, -6, 11, -6 },
{ 3, -1, 3, -1, 3, -1, 3, -1 },
},
{
{ -1, 3, -1, 3, -1, 3, -1, 3 },
{ -6, 12, -6, 12, -6, 12, -6, 12 },
{ -25, 90, -25, 90, -25, 90, -25, 90 },
{ 70, -23, 70, -23, 70, -23, 70, -23 },
{ 12, -6, 12, -6, 12, -6, 12, -6 },
{ 3, -1, 3, -1, 3, -1, 3, -1 },
},
{
{ -1, 3, -1, 3, -1, 3, -1, 3 },
{ -6, 12, -6, 12, -6, 12, -6, 12 },
{ -24, 80, -24, 80, -24, 80, -24, 80 },
{ 80, -24, 80, -24, 80, -24, 80, -24 },
{ 12, -6, 12, -6, 12, -6, 12, -6 },
{ 3, -1, 3, -1, 3, -1, 3, -1 },
},
{
{ -1, 3, -1, 3, -1, 3, -1, 3 },
{ -6, 12, -6, 12, -6, 12, -6, 12 },
{ -23, 70, -23, 70, -23, 70, -23, 70 },
{ 90, -25, 90, -25, 90, -25, 90, -25 },
{ 12, -6, 12, -6, 12, -6, 12, -6 },
{ 3, -1, 3, -1, 3, -1, 3, -1 },
},
{
{ -1, 3, -1, 3, -1, 3, -1, 3 },
{ -6, 11, -6, 11, -6, 11, -6, 11 },
{ -21, 60, -21, 60, -21, 60, -21, 60 },
{ 99, -25, 99, -25, 99, -25, 99, -25 },
{ 12, -6, 12, -6, 12, -6, 12, -6 },
{ 3, -1, 3, -1, 3, -1, 3, -1 },
},
{
{ -1, 2, -1, 2, -1, 2, -1, 2 },
{ -5, 9, -5, 9, -5, 9, -5, 9 },
{ -18, 49, -18, 49, -18, 49, -18, 49 },
{ 107, -23, 107, -23, 107, -23, 107, -23 },
{ 11, -5, 11, -5, 11, -5, 11, -5 },
{ 3, -1, 3, -1, 3, -1, 3, -1 },
},
{
{ -1, 2, -1, 2, -1, 2, -1, 2 },
{ -4, 8, -4, 8, -4, 8, -4, 8 },
{ -15, 38, -15, 38, -15, 38, -15, 38 },
{ 114, -21, 114, -21, 114, -21, 114, -21 },
{ 10, -4, 10, -4, 10, -4, 10, -4 },
{ 2, -1, 2, -1, 2, -1, 2, -1 },
},
{
{ -1, 1, -1, 1, -1, 1, -1, 1 },
{ -3, 6, -3, 6, -3, 6, -3, 6 },
{ -11, 28, -11, 28, -11, 28, -11, 28 },
{ 120, -17, 120, -17, 120, -17, 120, -17 },
{ 8, -4, 8, -4, 8, -4, 8, -4 },
{ 2, -1, 2, -1, 2, -1, 2, -1 },
},
{
{ 0, 1, 0, 1, 0, 1, 0, 1 },
{ -2, 4, -2, 4, -2, 4, -2, 4 },
{ -8, 18, -8, 18, -8, 18, -8, 18 },
{ 124, -12, 124, -12, 124, -12, 124, -12 },
{ 5, -3, 5, -3, 5, -3, 5, -3 },
{ 1, 0, 1, 0, 1, 0, 1, 0 },
},
{
{ 0, 0, 0, 0, 0, 0, 0, 0 },
{ -1, 2, -1, 2, -1, 2, -1, 2 },
{ -4, 8, -4, 8, -4, 8, -4, 8 },
{ 127, -7, 127, -7, 127, -7, 127, -7 },
{ 3, -1, 3, -1, 3, -1, 3, -1 },
{ 1, 0, 1, 0, 1, 0, 1, 0 },
},
};
#endif
#endif
#endif // AV1_COMMON_X86_AV1_HIGHBD_CONVOLVE_FILTERS_SSE4_H_
...@@ -14,9 +14,16 @@ ...@@ -14,9 +14,16 @@
#include "./av1_rtcd.h" #include "./av1_rtcd.h"
#include "av1/common/filter.h" #include "av1/common/filter.h"
#include "av1/common/x86/av1_highbd_convolve_filters_sse4.h"
typedef const int16_t (*HbdSubpelFilterCoeffs)[8]; #if CONFIG_DUAL_FILTER
DECLARE_ALIGNED(16, static int16_t, subpel_filters_sharp[15][6][8]);
#endif
#if USE_TEMPORALFILTER_12TAP
DECLARE_ALIGNED(16, static int16_t, subpel_temporalfilter[15][6][8]);
#endif
typedef int16_t (*HbdSubpelFilterCoeffs)[8];
typedef void (*TransposeSave)(const int width, int pixelsNum, uint32_t *src, typedef void (*TransposeSave)(const int width, int pixelsNum, uint32_t *src,
int src_stride, uint16_t *dst, int dst_stride, int src_stride, uint16_t *dst, int dst_stride,
...@@ -26,12 +33,12 @@ static INLINE HbdSubpelFilterCoeffs ...@@ -26,12 +33,12 @@ static INLINE HbdSubpelFilterCoeffs
hbd_get_subpel_filter_ver_signal_dir(const InterpFilterParams p, int index) { hbd_get_subpel_filter_ver_signal_dir(const InterpFilterParams p, int index) {
#if CONFIG_DUAL_FILTER #if CONFIG_DUAL_FILTER
if (p.interp_filter == MULTITAP_SHARP) { if (p.interp_filter == MULTITAP_SHARP) {
return &sub_pel_filters_12sharp_highbd_ver_signal_dir[index][0]; return &subpel_filters_sharp[index][0];
} }
#endif #endif
#if USE_TEMPORALFILTER_12TAP #if USE_TEMPORALFILTER_12TAP
if (p.interp_filter == TEMPORALFILTER_12TAP) { if (p.interp_filter == TEMPORALFILTER_12TAP) {
return &sub_pel_filters_temporalfilter_12_highbd_ver_signal_dir[index][0]; return &subpel_temporalfilter[index][0];
} }
#endif #endif
(void)p; (void)p;
...@@ -39,6 +46,47 @@ hbd_get_subpel_filter_ver_signal_dir(const InterpFilterParams p, int index) { ...@@ -39,6 +46,47 @@ hbd_get_subpel_filter_ver_signal_dir(const InterpFilterParams p, int index) {
return NULL; return NULL;
} }
static void init_simd_filter(const int16_t *filter_ptr, int taps,
int16_t (*simd_filter)[6][8]) {
int shift;
int offset = (12 - taps) / 2;
for (shift = 1; shift < SUBPEL_SHIFTS; ++shift) {
const int16_t *filter_row = filter_ptr + shift * taps;
int i, j;
for (i = 0; i < 12; ++i) {
for (j = 0; j < 4; ++j) {
int r = i / 2;
int c = j * 2 + (i % 2);
if (i - offset >= 0 && i - offset < taps)
simd_filter[shift - 1][r][c] = filter_row[i - offset];
else
simd_filter[shift - 1][r][c] = 0;
}
}
}
}
void av1_highbd_convolve_init_sse4_1(void) {
#if USE_TEMPORALFILTER_12TAP
{
InterpFilterParams filter_params =
av1_get_interp_filter_params(TEMPORALFILTER_12TAP);
int taps = filter_params.taps;
const int16_t *filter_ptr = filter_params.filter_ptr;
init_simd_filter(filter_ptr, taps, subpel_temporalfilter);
}
#endif
#if CONFIG_DUAL_FILTER
{
InterpFilterParams filter_params =
av1_get_interp_filter_params(MULTITAP_SHARP);
int taps = filter_params.taps;
const int16_t *filter_ptr = filter_params.filter_ptr;
init_simd_filter(filter_ptr, taps, subpel_filters_sharp);
}
#endif
}
// pixelsNum 0: write all 4 pixels // pixelsNum 0: write all 4 pixels
// 1/2/3: residual pixels 1/2/3 // 1/2/3: residual pixels 1/2/3
static void writePixel(__m128i *u, int width, int pixelsNum, uint16_t *dst, static void writePixel(__m128i *u, int width, int pixelsNum, uint16_t *dst,
......
...@@ -591,7 +591,7 @@ void av1_first_pass(AV1_COMP *cpi, const struct lookahead_entry *source) { ...@@ -591,7 +591,7 @@ void av1_first_pass(AV1_COMP *cpi, const struct lookahead_entry *source) {
#if CONFIG_ADAPT_SCAN #if CONFIG_ADAPT_SCAN
av1_init_scan_order(cm); av1_init_scan_order(cm);
#endif #endif
av1_convolve_init(); av1_convolve_init(cm);
av1_initialize_rd_consts(cpi); av1_initialize_rd_consts(cpi);
// Tiling is ignored in the first pass. // Tiling is ignored in the first pass.
......
...@@ -43,7 +43,7 @@ typedef tuple<ConvInit, conv_filter_t, conv_filter_t, BlockDimension, ...@@ -43,7 +43,7 @@ typedef tuple<ConvInit, conv_filter_t, conv_filter_t, BlockDimension,
// Test parameter list: // Test parameter list:
// <convolve_horiz_func, convolve_vert_func, // <convolve_horiz_func, convolve_vert_func,
// <width, height>, filter_params, subpel_x_q4, avg, bit_dpeth> // <width, height>, filter_params, subpel_x_q4, avg, bit_dpeth>
typedef tuple<hbd_conv_filter_t, hbd_conv_filter_t, BlockDimension, typedef tuple<ConvInit, hbd_conv_filter_t, hbd_conv_filter_t, BlockDimension,
InterpFilter, int, int, int> InterpFilter, int, int, int>
HbdConvParams; HbdConvParams;
#endif #endif
...@@ -228,7 +228,7 @@ const int kAvg[] = { 0, 1 }; ...@@ -228,7 +228,7 @@ const int kAvg[] = { 0, 1 };
#if HAVE_SSSE3 && CONFIG_DUAL_FILTER #if HAVE_SSSE3 && CONFIG_DUAL_FILTER
INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P(
SSSE3, AV1ConvolveOptimzTest, SSSE3, AV1ConvolveOptimzTest,
::testing::Combine(::testing::Values(av1_convolve_init_ssse3), ::testing::Combine(::testing::Values(av1_lowbd_convolve_init_ssse3),
::testing::Values(av1_convolve_horiz_ssse3), ::testing::Values(av1_convolve_horiz_ssse3),
::testing::Values(av1_convolve_vert_ssse3), ::testing::Values(av1_convolve_vert_ssse3),
::testing::ValuesIn(kBlockDim), ::testing::ValuesIn(kBlockDim),
...@@ -243,15 +243,17 @@ class AV1HbdConvolveOptimzTest : public TestWithHbdConvParams { ...@@ -243,15 +243,17 @@ class AV1HbdConvolveOptimzTest : public TestWithHbdConvParams {
public: public:
virtual ~AV1HbdConvolveOptimzTest() {} virtual ~AV1HbdConvolveOptimzTest() {}
virtual void SetUp() { virtual void SetUp() {
conv_horiz_ = GET_PARAM(0); ConvInit conv_init = GET_PARAM(0);
conv_vert_ = GET_PARAM(1); conv_init();
BlockDimension block = GET_PARAM(2); conv_horiz_ = GET_PARAM(1);
conv_vert_ = GET_PARAM(2);
BlockDimension block = GET_PARAM(3);
width_ = std::tr1::get<0>(block); width_ = std::tr1::get<0>(block);
height_ = std::tr1::get<1>(block); height_ = std::tr1::get<1>(block);
filter_ = GET_PARAM(3); filter_ = GET_PARAM(4);
subpel_ = GET_PARAM(4); subpel_ = GET_PARAM(5);
avg_ = GET_PARAM(5); avg_ = GET_PARAM(6);
bit_depth_ = GET_PARAM(6); bit_depth_ = GET_PARAM(7);
alloc_ = new uint16_t[maxBlockSize * 4]; alloc_ = new uint16_t[maxBlockSize * 4];
src_ = alloc_ + (vertiOffset * maxWidth); src_ = alloc_ + (vertiOffset * maxWidth);
...@@ -390,7 +392,8 @@ const int kBitdepth[] = { 10, 12 }; ...@@ -390,7 +392,8 @@ const int kBitdepth[] = { 10, 12 };
INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P(
SSE4_1, AV1HbdConvolveOptimzTest, SSE4_1, AV1HbdConvolveOptimzTest,
::testing::Combine(::testing::Values(av1_highbd_convolve_horiz_sse4_1), ::testing::Combine(::testing::Values(av1_highbd_convolve_init_sse4_1),
::testing::Values(av1_highbd_convolve_horiz_sse4_1),
::testing::Values(av1_highbd_convolve_vert_sse4_1), ::testing::Values(av1_highbd_convolve_vert_sse4_1),
::testing::ValuesIn(kBlockDim), ::testing::ValuesIn(kBlockDim),
::testing::ValuesIn(kFilter), ::testing::ValuesIn(kFilter),
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment