Commit e067de00 authored by Angie Chiang's avatar Angie Chiang

Add av1_convolve_init()

Generate simd filter structure in av1_convolve_init()
This will provide flexibility of changing filter coefficients.

Change-Id: If79f84c56483aa08c894d6b12e2b6ce10147f0ce
parent 42c58342
......@@ -69,7 +69,6 @@ AV1_COMMON_SRCS-yes += common/av1_fwd_txfm2d_cfg.h
AV1_COMMON_SRCS-yes += common/av1_inv_txfm2d.c
AV1_COMMON_SRCS-yes += common/av1_inv_txfm2d_cfg.h
AV1_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/av1_convolve_ssse3.c
AV1_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/av1_convolve_filters_ssse3.h
ifeq ($(CONFIG_AOM_HIGHBITDEPTH),yes)
AV1_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/av1_highbd_convolve_sse4.c
AV1_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/av1_highbd_convolve_filters_sse4.h
......
......@@ -38,6 +38,9 @@ if ($opts{arch} eq "x86_64") {
#
# 10/12-tap convolution filters
#
add_proto qw/void av1_convolve_init/, "void";
specialize qw/av1_convolve_init ssse3/;
add_proto qw/void av1_convolve_horiz/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, int avg";
specialize qw/av1_convolve_horiz ssse3/;
......
......@@ -191,6 +191,11 @@ void av1_convolve(const uint8_t *src, int src_stride, uint8_t *dst,
}
}
void av1_convolve_init_c(void) {
// A placeholder for SIMD initialization
return;
}
#if CONFIG_AOM_HIGHBITDEPTH
void av1_highbd_convolve_horiz_c(const uint16_t *src, int src_stride,
uint16_t *dst, int dst_stride, int w, int h,
......
......@@ -2084,6 +2084,7 @@ void av1_setup_past_independence(AV1_COMMON *cm) {
#if CONFIG_ADAPT_SCAN
av1_init_scan_order(cm);
#endif
av1_convolve_init();
cm->fc->initialized = 1;
if (cm->frame_type == KEY_FRAME || cm->error_resilient_mode ||
......
This diff is collapsed.
......@@ -15,12 +15,33 @@
#include "./aom_config.h"
#include "./av1_rtcd.h"
#include "av1/common/filter.h"
#include "av1/common/x86/av1_convolve_filters_ssse3.h"
#define WIDTH_BOUND (16)
#define HEIGHT_BOUND (16)
typedef const int8_t (*SubpelFilterCoeffs)[16];
#if CONFIG_EXT_INTERP
DECLARE_ALIGNED(16, static int8_t,
sub_pel_filters_10sharp_signal_dir[15][2][16]);
DECLARE_ALIGNED(16, static int8_t,
sub_pel_filters_10sharp_ver_signal_dir[15][6][16]);
DECLARE_ALIGNED(16, static int8_t,
sub_pel_filters_12sharp_signal_dir[15][2][16]);
DECLARE_ALIGNED(16, static int8_t,
sub_pel_filters_12sharp_ver_signal_dir[15][6][16]);
#endif // CONFIG_EXT_INTERP
#if USE_TEMPORALFILTER_12TAP
DECLARE_ALIGNED(16, static int8_t,
sub_pel_filters_temporalfilter_12_signal_dir[15][2][16]);
DECLARE_ALIGNED(16, static int8_t,
sub_pel_filters_temporalfilter_12_ver_signal_dir[15][6][16]);
#endif
typedef int8_t (*SubpelFilterCoeffs)[16];
static INLINE SubpelFilterCoeffs
get_subpel_filter_signal_dir(const InterpFilterParams p, int index) {
......@@ -919,3 +940,102 @@ void av1_convolve_vert_ssse3(const uint8_t *src, int src_stride, uint8_t *dst,
assert(0);
}
}
static void init_simd_horiz_filter(const int16_t *filter_ptr, int taps,
int8_t (*simd_horiz_filter)[2][16]) {
int shift;
int offset = (12 - taps) / 2;
const int16_t *filter_row;
for (shift = 1; shift < SUBPEL_SHIFTS; ++shift) {
int i;
filter_row = filter_ptr + shift * taps;
for (i = 0; i < offset; ++i) simd_horiz_filter[shift - 1][0][i] = 0;
for (i = 0; i < offset + 2; ++i) simd_horiz_filter[shift - 1][1][i] = 0;
for (i = 0; i < taps; ++i) {
simd_horiz_filter[shift - 1][0][i + offset] = filter_row[i];
simd_horiz_filter[shift - 1][1][i + offset + 2] = filter_row[i];
}
for (i = offset + taps; i < 16; ++i) simd_horiz_filter[shift - 1][0][i] = 0;
for (i = offset + 2 + taps; i < 16; ++i)
simd_horiz_filter[shift - 1][1][i] = 0;
}
}
static void init_simd_vert_filter(const int16_t *filter_ptr, int taps,
int8_t (*simd_vert_filter)[6][16]) {
int shift;
int offset = (12 - taps) / 2;
const int16_t *filter_row;
for (shift = 1; shift < SUBPEL_SHIFTS; ++shift) {
int i;
filter_row = filter_ptr + shift * taps;
for (i = 0; i < 6; ++i) {
int j;
for (j = 0; j < 16; ++j) {
int c = i * 2 + (j % 2) - offset;
if (c >= 0 && c < taps)
simd_vert_filter[shift - 1][i][j] = filter_row[c];
else
simd_vert_filter[shift - 1][i][j] = 0;
}
}
}
}
typedef struct SimdFilter {
InterpFilter interp_filter;
int8_t (*simd_horiz_filter)[2][16];
int8_t (*simd_vert_filter)[6][16];
} SimdFilter;
#if CONFIG_EXT_INTERP
#define MULTITAP_FILTER_NUM 2
SimdFilter simd_filters[MULTITAP_FILTER_NUM] = {
{ MULTITAP_SHARP, &sub_pel_filters_10sharp_signal_dir[0],
&sub_pel_filters_10sharp_ver_signal_dir[0] },
{ MULTITAP_SHARP2, &sub_pel_filters_12sharp_signal_dir[0],
&sub_pel_filters_12sharp_ver_signal_dir[0] },
};
#endif
#if USE_TEMPORALFILTER_12TAP
SimdFilter temporal_simd_filter = {
TEMPORALFILTER_12TAP, &sub_pel_filters_temporalfilter_12_signal_dir[0],
&sub_pel_filters_temporalfilter_12_ver_signal_dir[0]
};
#endif
void av1_convolve_init_ssse3(void) {
#if USE_TEMPORALFILTER_12TAP
{
InterpFilterParams filter_params =
av1_get_interp_filter_params(temporal_simd_filter.interp_filter);
int taps = filter_params.taps;
const int16_t *filter_ptr = filter_params.filter_ptr;
init_simd_horiz_filter(filter_ptr, taps,
temporal_simd_filter.simd_horiz_filter);
init_simd_vert_filter(filter_ptr, taps,
temporal_simd_filter.simd_vert_filter);
}
#endif
#if CONFIG_EXT_INTERP
{
int i;
for (i = 0; i < MULTITAP_FILTER_NUM; ++i) {
InterpFilter interp_filter = simd_filters[i].interp_filter;
InterpFilterParams filter_params =
av1_get_interp_filter_params(interp_filter);
int taps = filter_params.taps;
const int16_t *filter_ptr = filter_params.filter_ptr;
init_simd_horiz_filter(filter_ptr, taps,
simd_filters[i].simd_horiz_filter);
init_simd_vert_filter(filter_ptr, taps, simd_filters[i].simd_vert_filter);
}
}
#endif
return;
}
......@@ -575,6 +575,7 @@ void av1_first_pass(AV1_COMP *cpi, const struct lookahead_entry *source) {
#if CONFIG_ADAPT_SCAN
av1_init_scan_order(cm);
#endif
av1_convolve_init();
av1_initialize_rd_consts(cpi);
// Tiling is ignored in the first pass.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment