Commit 7227b65c authored by Michael Bebenita's avatar Michael Bebenita Committed by Yaowu Xu

Add SSE4.1 code for deringing functions.

Change-Id: I363f7fb610a5c86ea9f417e34b57c6373af877e5
parent 4713d8d0
......@@ -98,6 +98,8 @@ endif
ifeq ($(CONFIG_DERING),yes)
AV1_COMMON_SRCS-yes += common/od_dering.c
AV1_COMMON_SRCS-yes += common/od_dering.h
AV1_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/od_dering_sse4.c
AV1_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/od_dering_sse4.h
AV1_COMMON_SRCS-yes += common/dering.c
AV1_COMMON_SRCS-yes += common/dering.h
endif
......
......@@ -20,6 +20,7 @@ struct search_site_config;
struct mv;
union int_mv;
struct yv12_buffer_config;
typedef int16_t od_dering_in;
EOF
}
forward_decls qw/av1_common_forward_decls/;
......@@ -840,4 +841,24 @@ if (aom_config("CONFIG_EXT_INTER") eq "yes") {
}
# end encoder functions
# Deringing Functions
if (aom_config("CONFIG_DERING") eq "yes") {
add_proto qw/int od_dir_find8/, "const od_dering_in *img, int stride, int32_t *var, int coeff_shift";
specialize qw/od_dir_find8 sse4_1/;
add_proto qw/int od_filter_dering_direction_4x4/, "int16_t *y, int ystride, const int16_t *in, int threshold, int dir";
specialize qw/od_filter_dering_direction_4x4 sse4_1/;
add_proto qw/int od_filter_dering_direction_8x8/, "int16_t *y, int ystride, const int16_t *in, int threshold, int dir";
specialize qw/od_filter_dering_direction_8x8 sse4_1/;
add_proto qw/void od_filter_dering_orthogonal_4x4/, "int16_t *y, int ystride, const int16_t *in, int threshold, int dir";
specialize qw/od_filter_dering_orthogonal_4x4 sse4_1/;
add_proto qw/void od_filter_dering_orthogonal_8x8/, "int16_t *y, int ystride, const int16_t *in, int threshold, int dir";
specialize qw/od_filter_dering_orthogonal_8x8 sse4_1/;
}
1;
......@@ -111,7 +111,7 @@ void av1_dering_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
if (pli) level = (level * 5 + 4) >> 3;
if (sb_all_skip(cm, sbr * MAX_MIB_SIZE, sbc * MAX_MIB_SIZE)) continue;
threshold = level << coeff_shift;
od_dering(&OD_DERING_VTBL_C, dst, MAX_MIB_SIZE * bsize[pli],
od_dering(dst, MAX_MIB_SIZE * bsize[pli],
&src[pli][sbr * stride * bsize[pli] * MAX_MIB_SIZE +
sbc * bsize[pli] * MAX_MIB_SIZE],
stride, nhb, nvb, sbc, sbr, nhsb, nvsb, dec[pli], dir, pli,
......
......@@ -15,11 +15,7 @@
#include <stdlib.h>
#include <math.h>
#include "dering.h"
const od_dering_opt_vtbl OD_DERING_VTBL_C = {
{ od_filter_dering_direction_4x4_c, od_filter_dering_direction_8x8_c },
{ od_filter_dering_orthogonal_4x4_c, od_filter_dering_orthogonal_8x8_c }
};
#include "./av1_rtcd.h"
/* Generated from gen_filter_tables.c. */
const int OD_DIRECTION_OFFSETS_TABLE[8][3] = {
......@@ -42,8 +38,8 @@ const int OD_DIRECTION_OFFSETS_TABLE[8][3] = {
in a particular direction. Since each direction have the same sum(x^2) term,
that term is never computed. See Section 2, step 2, of:
http://jmvalin.ca/notes/intra_paint.pdf */
static int od_dir_find8(const od_dering_in *img, int stride, int32_t *var,
int coeff_shift) {
int od_dir_find8_c(const od_dering_in *img, int stride, int32_t *var,
int coeff_shift) {
int i;
int32_t cost[8] = { 0 };
int partial[8][15] = { { 0 } };
......@@ -273,9 +269,8 @@ static void od_compute_thresh(int thresh[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS],
}
}
void od_dering(const od_dering_opt_vtbl *vtbl, int16_t *y, int ystride,
const od_dering_in *x, int xstride, int nhb, int nvb, int sbx,
int sby, int nhsb, int nvsb, int xdec,
void od_dering(int16_t *y, int ystride, const od_dering_in *x, int xstride,
int nhb, int nvb, int sbx, int sby, int nhsb, int nvsb, int xdec,
int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS], int pli,
unsigned char *bskip, int skip_stride, int threshold,
int coeff_shift) {
......@@ -289,6 +284,12 @@ void od_dering(const od_dering_opt_vtbl *vtbl, int16_t *y, int ystride,
int32_t var[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS];
int thresh[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS];
int thresh2[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS];
od_filter_dering_direction_func filter_dering_direction[OD_DERINGSIZES] = {
od_filter_dering_direction_4x4, od_filter_dering_direction_8x8
};
od_filter_dering_orthogonal_func filter_dering_orthogonal[OD_DERINGSIZES] = {
od_filter_dering_orthogonal_4x4, od_filter_dering_orthogonal_8x8
};
bsize = 3 - xdec;
in = inbuf + OD_FILT_BORDER * OD_FILT_BSTRIDE + OD_FILT_BORDER;
/* We avoid filtering the pixels for which some of the pixels to average
......@@ -340,7 +341,7 @@ void od_dering(const od_dering_opt_vtbl *vtbl, int16_t *y, int ystride,
to be a little bit more aggressive on pure horizontal/vertical
since the ringing there tends to be directional, so it doesn't
get removed by the directional filtering. */
thresh2[by][bx] = (vtbl->filter_dering_direction[bsize - OD_LOG_BSIZE0])(
thresh2[by][bx] = (filter_dering_direction[bsize - OD_LOG_BSIZE0])(
&y[(by * ystride << bsize) + (bx << bsize)], ystride,
&in[(by * OD_FILT_BSTRIDE << bsize) + (bx << bsize)], thresh[by][bx],
dir[by][bx]);
......@@ -354,7 +355,7 @@ void od_dering(const od_dering_opt_vtbl *vtbl, int16_t *y, int ystride,
for (by = 0; by < nvb; by++) {
for (bx = 0; bx < nhb; bx++) {
if (thresh[by][bx] == 0) continue;
(vtbl->filter_dering_orthogonal[bsize - OD_LOG_BSIZE0])(
(filter_dering_orthogonal[bsize - OD_LOG_BSIZE0])(
&y[(by * ystride << bsize) + (bx << bsize)], ystride,
&in[(by * OD_FILT_BSTRIDE << bsize) + (bx << bsize)], thresh2[by][bx],
dir[by][bx]);
......
......@@ -34,27 +34,11 @@ typedef int (*od_filter_dering_direction_func)(int16_t *y, int ystride,
typedef void (*od_filter_dering_orthogonal_func)(int16_t *y, int ystride,
const int16_t *in,
int threshold, int dir);
struct od_dering_opt_vtbl {
od_filter_dering_direction_func filter_dering_direction[OD_DERINGSIZES];
od_filter_dering_orthogonal_func filter_dering_orthogonal[OD_DERINGSIZES];
};
typedef struct od_dering_opt_vtbl od_dering_opt_vtbl;
void od_dering(const od_dering_opt_vtbl *vtbl, int16_t *y, int ystride,
const od_dering_in *x, int xstride, int nvb, int nhb, int sbx,
int sby, int nhsb, int nvsb, int xdec,
void od_dering(int16_t *y, int ystride, const od_dering_in *x, int xstride,
int nvb, int nhb, int sbx, int sby, int nhsb, int nvsb, int xdec,
int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS], int pli,
unsigned char *bskip, int skip_stride, int threshold,
int coeff_shift);
void od_filter_dering_direction_c(int16_t *y, int ystride, const int16_t *in,
int ln, int threshold, int dir);
void od_filter_dering_orthogonal_c(int16_t *y, int ystride, const int16_t *in,
const od_dering_in *x, int xstride, int ln,
int threshold, int dir);
extern const od_dering_opt_vtbl OD_DERING_VTBL_C;
int od_filter_dering_direction_4x4_c(int16_t *y, int ystride, const int16_t *in,
int threshold, int dir);
int od_filter_dering_direction_8x8_c(int16_t *y, int ystride, const int16_t *in,
......@@ -65,5 +49,4 @@ void od_filter_dering_orthogonal_4x4_c(int16_t *y, int ystride,
void od_filter_dering_orthogonal_8x8_c(int16_t *y, int ystride,
const int16_t *in, int threshold,
int dir);
#endif
This diff is collapsed.
/*
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#include "av1/common/od_dering.h"
#ifndef AOM_COMMON_OD_DERING_X86_SSE4_H_
#define AOM_COMMON_OD_DERING_X86_SSE4_H_
#endif // AOM_COMMON_OD_DERING_X86_SSE4_H_
......@@ -108,7 +108,7 @@ int av1_dering_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
int threshold;
level = compute_level_from_index(best_level, gi);
threshold = level << coeff_shift;
od_dering(&OD_DERING_VTBL_C, dst, MAX_MIB_SIZE * bsize[0],
od_dering(dst, MAX_MIB_SIZE * bsize[0],
&src[sbr * stride * bsize[0] * MAX_MIB_SIZE +
sbc * bsize[0] * MAX_MIB_SIZE],
cm->mi_cols * bsize[0], nhb, nvb, sbc, sbr, nhsb, nvsb, 0,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment