Commit 30466f26 authored by James Zern's avatar James Zern

Revert "Add AVX vectorized vp9_diamond_search_sad"

This reverts commit f1342a7b.

This breaks 32-bit builds:
 runtime error: load of misaligned address 0xf72fdd48 for type 'const
__m128i' (vector of 2 'long long' values), which requires 16 byte
alignment

+ _mm_set1_epi64x is incompatible with some versions of visual studio

Change-Id: I6f6fc3c11403344cef78d1c432cdc9147e5c1673
parent 892130f7
......@@ -312,7 +312,7 @@ $vp9_full_search_sad_sse3=vp9_full_search_sadx3;
$vp9_full_search_sad_sse4_1=vp9_full_search_sadx8;
add_proto qw/int vp9_diamond_search_sad/, "const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv";
specialize qw/vp9_diamond_search_sad avx/;
specialize qw/vp9_diamond_search_sad/;
add_proto qw/int vp9_full_range_search/, "const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv";
specialize qw/vp9_full_range_search/;
......
......@@ -1570,30 +1570,7 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) {
#endif
#define log2f(x) (log (x) / (float) M_LOG2_E)
/***********************************************************************
* Read before modifying 'cal_nmvjointsadcost' or 'cal_nmvsadcosts' *
***********************************************************************
* The following 2 functions ('cal_nmvjointsadcost' and *
* 'cal_nmvsadcosts') are used to calculate cost lookup tables *
* used by 'vp9_diamond_search_sad'. The C implementation of the *
* function is generic, but the AVX intrinsics optimised version *
* relies on the following properties of the computed tables: *
* For cal_nmvjointsadcost: *
* - mvjointsadcost[1] == mvjointsadcost[2] == mvjointsadcost[3] *
* For cal_nmvsadcosts: *
* - For all i: mvsadcost[0][i] == mvsadcost[1][i] *
* (Equal costs for both components) *
* - For all i: mvsadcost[0][i] == mvsadcost[0][-i] *
* (Cost function is even) *
* If these do not hold, then the AVX optimised version of the *
* 'vp9_diamond_search_sad' function cannot be used as it is, in which *
* case you can revert to using the C function instead. *
***********************************************************************/
static void cal_nmvjointsadcost(int *mvjointsadcost) {
/*********************************************************************
* Warning: Read the comments above before modifying this function *
*********************************************************************/
mvjointsadcost[0] = 600;
mvjointsadcost[1] = 300;
mvjointsadcost[2] = 300;
......@@ -1601,9 +1578,6 @@ static void cal_nmvjointsadcost(int *mvjointsadcost) {
}
static void cal_nmvsadcosts(int *mvsadcost[2]) {
/*********************************************************************
* Warning: Read the comments above before modifying this function *
*********************************************************************/
int i = 1;
mvsadcost[0][0] = 0;
......@@ -1765,10 +1739,6 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
cpi->first_time_stamp_ever = INT64_MAX;
/*********************************************************************
* Warning: Read the comments around 'cal_nmvjointsadcost' and *
* 'cal_nmvsadcosts' before modifying how these tables are computed. *
*********************************************************************/
cal_nmvjointsadcost(cpi->td.mb.nmvjointsadcost);
cpi->td.mb.nmvcost[0] = &cpi->nmvcosts[0][MV_MAX];
cpi->td.mb.nmvcost[1] = &cpi->nmvcosts[1][MV_MAX];
......
......@@ -101,8 +101,11 @@ static int mvsad_err_cost(const MACROBLOCK *x, const MV *mv, const MV *ref,
}
void vp9_init_dsmotion_compensation(search_site_config *cfg, int stride) {
int len;
int ss_count = 0;
int len, ss_count = 1;
cfg->ss_mv[0].col = 0;
cfg->ss_mv[0].row = 0;
cfg->ss_os[0] = 0;
for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
// Generate offsets for 4 search sites per step.
......@@ -114,13 +117,16 @@ void vp9_init_dsmotion_compensation(search_site_config *cfg, int stride) {
}
}
cfg->ss_count = ss_count;
cfg->searches_per_step = 4;
cfg->total_steps = ss_count / cfg->searches_per_step;
}
void vp9_init3smotion_compensation(search_site_config *cfg, int stride) {
int len;
int ss_count = 0;
int len, ss_count = 1;
cfg->ss_mv[0].col = 0;
cfg->ss_mv[0].row = 0;
cfg->ss_os[0] = 0;
for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
// Generate offsets for 8 search sites per step.
......@@ -135,8 +141,8 @@ void vp9_init3smotion_compensation(search_site_config *cfg, int stride) {
}
}
cfg->ss_count = ss_count;
cfg->searches_per_step = 8;
cfg->total_steps = ss_count / cfg->searches_per_step;
}
/*
......@@ -1606,8 +1612,8 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x,
const uint8_t *best_address;
unsigned int bestsad = INT_MAX;
int best_site = -1;
int last_site = -1;
int best_site = 0;
int last_site = 0;
int ref_row;
int ref_col;
......@@ -1620,7 +1626,7 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x,
// const search_site *ss = &cfg->ss[search_param * cfg->searches_per_step];
const MV *ss_mv = &cfg->ss_mv[search_param * cfg->searches_per_step];
const intptr_t *ss_os = &cfg->ss_os[search_param * cfg->searches_per_step];
const int tot_steps = (cfg->total_steps) - search_param;
const int tot_steps = (cfg->ss_count / cfg->searches_per_step) - search_param;
const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
......@@ -1638,7 +1644,7 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x,
bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride)
+ mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit);
i = 0;
i = 1;
for (step = 0; step < tot_steps; step++) {
int all_in = 1, t;
......
......@@ -33,10 +33,10 @@ extern "C" {
typedef struct search_site_config {
// motion search sites
MV ss_mv[8 * MAX_MVSEARCH_STEPS]; // Motion vector
intptr_t ss_os[8 * MAX_MVSEARCH_STEPS]; // Offset
MV ss_mv[8 * MAX_MVSEARCH_STEPS + 1]; // Motion vector
intptr_t ss_os[8 * MAX_MVSEARCH_STEPS + 1]; // Offset
int ss_count;
int searches_per_step;
int total_steps;
} search_site_config;
void vp9_init_dsmotion_compensation(search_site_config *cfg, int stride);
......
This diff is collapsed.
......@@ -96,7 +96,6 @@ VP9_CX_SRCS-yes += encoder/vp9_mbgraph.h
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_avg_intrin_sse2.c
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_temporal_filter_apply_sse2.asm
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_quantize_sse2.c
VP9_CX_SRCS-$(HAVE_AVX) += encoder/x86/vp9_diamond_search_sad_avx.c
ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_highbd_block_error_intrin_sse2.c
endif
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment