Commit 98e132bd authored by James Zern's avatar James Zern Committed by Gerrit Code Review
Browse files

Merge changes I40454d26,I892e76d5,I865ab3f9,I4a4bec17,I61c4351e,I37eb3559,I1031c556,I8c8f1f42

* changes:
  delete vp9_loopfilter_sse2.asm
  vp9_loopfilter_intrin_sse2: cosmetics: fix indent
  delete x86/vp9_loopfilter_x86.h
  vp9_loopfilter_intrin_sse2: make some funcs static
  vp9_loopfilter_intrin_sse2: remove unused uv funcs
  vp9_loopfilter: remove uv function typedef
  filter_block_plane: reuse some constants
  vp9_loopfilter.c: make some functions static
parents 39ce4b13 50015f6e
......@@ -33,8 +33,7 @@ static void lf_init_lut(loop_filter_info_n *lfi) {
lfi->mode_lf_lut[NEWMV] = 1;
}
void vp9_loop_filter_update_sharpness(loop_filter_info_n *lfi,
int sharpness_lvl) {
static void update_sharpness(loop_filter_info_n *const lfi, int sharpness_lvl) {
int lvl;
// For each possible value for the loop filter fill out limits
......@@ -62,7 +61,7 @@ void vp9_loop_filter_init(VP9_COMMON *cm) {
int i;
// init limits for given sharpness
vp9_loop_filter_update_sharpness(lfi, cm->sharpness_level);
update_sharpness(lfi, cm->sharpness_level);
cm->last_sharpness_level = cm->sharpness_level;
// init LUT for lvl and hev thr picking
......@@ -73,8 +72,8 @@ void vp9_loop_filter_init(VP9_COMMON *cm) {
vpx_memset(lfi->hev_thr[i], i, SIMD_WIDTH);
}
void vp9_loop_filter_frame_init(VP9_COMMON *cm, MACROBLOCKD *xd,
int default_filt_lvl) {
static void loop_filter_frame_init(VP9_COMMON *const cm, MACROBLOCKD *const xd,
int default_filt_lvl) {
int seg;
// n_shift is the a multiplier for lf_deltas
// the multiplier is 1 for when filter_lvl is between 0 and 31;
......@@ -84,7 +83,7 @@ void vp9_loop_filter_frame_init(VP9_COMMON *cm, MACROBLOCKD *xd,
// update limits if sharpness has changed
if (cm->last_sharpness_level != cm->sharpness_level) {
vp9_loop_filter_update_sharpness(lfi, cm->sharpness_level);
update_sharpness(lfi, cm->sharpness_level);
cm->last_sharpness_level = cm->sharpness_level;
}
......@@ -118,9 +117,9 @@ void vp9_loop_filter_frame_init(VP9_COMMON *cm, MACROBLOCKD *xd,
}
}
static int build_lfi(const VP9_COMMON *cm, const MB_MODE_INFO *mbmi,
struct loop_filter_info *lfi) {
const loop_filter_info_n *const lfi_n = &cm->lf_info;
static int build_lfi(const loop_filter_info_n *const lfi_n,
const MB_MODE_INFO *const mbmi,
struct loop_filter_info *const lfi) {
const int seg = mbmi->segment_id;
const int ref = mbmi->ref_frame[0];
const int mode = lfi_n->mode_lf_lut[mbmi->mode];
......@@ -231,13 +230,13 @@ static void filter_selectively_horiz(uint8_t *s, int pitch,
}
}
static void filter_block_plane(VP9_COMMON *cm, MACROBLOCKD *xd,
static void filter_block_plane(VP9_COMMON *const cm, MACROBLOCKD *const xd,
int plane, int mi_row, int mi_col) {
const int ss_x = xd->plane[plane].subsampling_x;
const int ss_y = xd->plane[plane].subsampling_y;
const int row_step = 1 << xd->plane[plane].subsampling_y;
const int col_step = 1 << xd->plane[plane].subsampling_x;
struct buf_2d * const dst = &xd->plane[plane].dst;
const int row_step = 1 << ss_x;
const int col_step = 1 << ss_y;
struct buf_2d *const dst = &xd->plane[plane].dst;
uint8_t* const dst0 = dst->buf;
unsigned int mask_16x16[MI_BLOCK_SIZE] = {0};
unsigned int mask_8x8[MI_BLOCK_SIZE] = {0};
......@@ -245,8 +244,8 @@ static void filter_block_plane(VP9_COMMON *cm, MACROBLOCKD *xd,
unsigned int mask_4x4_int[MI_BLOCK_SIZE] = {0};
struct loop_filter_info lfi[MI_BLOCK_SIZE][MI_BLOCK_SIZE];
int r, c;
MODE_INFO *mi = xd->mode_info_context;
int row_step_stride = cm->mode_info_stride * row_step;
const MODE_INFO *mi = xd->mode_info_context;
const int row_step_stride = cm->mode_info_stride * row_step;
for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += row_step) {
unsigned int mask_16x16_c = 0;
......@@ -272,8 +271,7 @@ static void filter_block_plane(VP9_COMMON *cm, MACROBLOCKD *xd,
const int skip_border_4x4_r = ss_y && mi_row + r == cm->mi_rows - 1;
// Filter level can vary per MI
if (!build_lfi(cm, &mi[c].mbmi,
lfi[r] + (c >> xd->plane[plane].subsampling_x)))
if (!build_lfi(&cm->lf_info, &mi[c].mbmi, lfi[r] + (c >> ss_x)))
continue;
// Build masks based on the transform size of each block
......@@ -355,7 +353,7 @@ void vp9_loop_filter_frame(VP9_COMMON *cm, MACROBLOCKD *xd,
int mi_row, mi_col;
// Initialize the loop filter for this frame.
vp9_loop_filter_frame_init(cm, xd, frame_filter_level);
loop_filter_frame_init(cm, xd, frame_filter_level);
for (mi_row = 0; mi_row < cm->mi_rows; mi_row += MI_BLOCK_SIZE) {
MODE_INFO* const mi = cm->mi + mi_row * cm->mode_info_stride;
......
......@@ -44,44 +44,14 @@ struct loop_filter_info {
const uint8_t *hev_thr;
};
#define prototype_loopfilter(sym) \
void sym(uint8_t *src, int pitch, const uint8_t *blimit, \
const uint8_t *limit, const uint8_t *thresh, int count)
#define prototype_loopfilter_block(sym) \
void sym(uint8_t *y, uint8_t *u, uint8_t *v, \
int ystride, int uv_stride, struct loop_filter_info *lfi)
#if ARCH_X86 || ARCH_X86_64
#include "x86/vp9_loopfilter_x86.h"
#endif
typedef void loop_filter_uvfunction(uint8_t *src, int pitch,
const uint8_t *blimit,
const uint8_t *limit,
const uint8_t *thresh,
uint8_t *v);
/* assorted loopfilter functions which get used elsewhere */
struct VP9Common;
struct macroblockd;
void vp9_loop_filter_init(struct VP9Common *cm);
void vp9_loop_filter_frame_init(struct VP9Common *cm,
struct macroblockd *mbd,
int default_filt_lvl);
void vp9_loop_filter_frame(struct VP9Common *cm,
struct macroblockd *mbd,
int filter_level,
int y_only);
void vp9_loop_filter_partial_frame(struct VP9Common *cm,
struct macroblockd *mbd,
int default_filt_lvl);
void vp9_loop_filter_update_sharpness(loop_filter_info_n *lfi,
int sharpness_lvl);
#endif // VP9_COMMON_VP9_LOOPFILTER_H_
......@@ -12,17 +12,11 @@
#include "vp9/common/vp9_loopfilter.h"
#include "vpx_ports/emmintrin_compat.h"
prototype_loopfilter(vp9_loop_filter_vertical_edge_sse2);
prototype_loopfilter(vp9_loop_filter_horizontal_edge_sse2);
extern loop_filter_uvfunction vp9_loop_filter_horizontal_edge_uv_sse2;
extern loop_filter_uvfunction vp9_loop_filter_vertical_edge_uv_sse2;
void vp9_mb_lpf_horizontal_edge_w_sse2_8(unsigned char *s,
int p,
const unsigned char *_blimit,
const unsigned char *_limit,
const unsigned char *_thresh) {
static void mb_lpf_horizontal_edge_w_sse2_8(unsigned char *s,
int p,
const unsigned char *_blimit,
const unsigned char *_limit,
const unsigned char *_thresh) {
DECLARE_ALIGNED(16, unsigned char, flat2_op[7][8]);
DECLARE_ALIGNED(16, unsigned char, flat2_oq[7][8]);
......@@ -483,11 +477,11 @@ void vp9_mb_lpf_horizontal_edge_w_sse2_8(unsigned char *s,
}
}
void vp9_mb_lpf_horizontal_edge_w_sse2_16(unsigned char *s,
int p,
const unsigned char *_blimit,
const unsigned char *_limit,
const unsigned char *_thresh) {
static void mb_lpf_horizontal_edge_w_sse2_16(unsigned char *s,
int p,
const unsigned char *_blimit,
const unsigned char *_limit,
const unsigned char *_thresh) {
DECLARE_ALIGNED(16, unsigned char, flat2_op[7][16]);
DECLARE_ALIGNED(16, unsigned char, flat2_oq[7][16]);
......@@ -962,9 +956,9 @@ void vp9_mb_lpf_horizontal_edge_w_sse2(unsigned char *s,
const unsigned char *_thresh,
int count) {
if (count == 1)
vp9_mb_lpf_horizontal_edge_w_sse2_8(s, p, _blimit, _limit, _thresh);
mb_lpf_horizontal_edge_w_sse2_8(s, p, _blimit, _limit, _thresh);
else
vp9_mb_lpf_horizontal_edge_w_sse2_16(s, p, _blimit, _limit, _thresh);
mb_lpf_horizontal_edge_w_sse2_16(s, p, _blimit, _limit, _thresh);
}
void vp9_mbloop_filter_horizontal_edge_sse2(unsigned char *s,
......@@ -1206,79 +1200,6 @@ void vp9_mbloop_filter_horizontal_edge_sse2(unsigned char *s,
}
}
void vp9_mbloop_filter_horizontal_edge_uv_sse2(unsigned char *u,
int p,
const unsigned char *_blimit,
const unsigned char *_limit,
const unsigned char *_thresh,
unsigned char *v) {
DECLARE_ALIGNED_ARRAY(16, unsigned char, src, 160);
/* Read source */
const __m128i p4 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(u - 5 * p)),
_mm_loadl_epi64((__m128i *)(v - 5 * p)));
const __m128i p3 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(u - 4 * p)),
_mm_loadl_epi64((__m128i *)(v - 4 * p)));
const __m128i p2 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(u - 3 * p)),
_mm_loadl_epi64((__m128i *)(v - 3 * p)));
const __m128i p1 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(u - 2 * p)),
_mm_loadl_epi64((__m128i *)(v - 2 * p)));
const __m128i p0 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(u - 1 * p)),
_mm_loadl_epi64((__m128i *)(v - 1 * p)));
const __m128i q0 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(u)),
_mm_loadl_epi64((__m128i *)(v)));
const __m128i q1 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(u + 1 * p)),
_mm_loadl_epi64((__m128i *)(v + 1 * p)));
const __m128i q2 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(u + 2 * p)),
_mm_loadl_epi64((__m128i *)(v + 2 * p)));
const __m128i q3 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(u + 3 * p)),
_mm_loadl_epi64((__m128i *)(v + 3 * p)));
const __m128i q4 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(u + 4 * p)),
_mm_loadl_epi64((__m128i *)(v + 4 * p)));
_mm_store_si128((__m128i *)(src), p4);
_mm_store_si128((__m128i *)(src + 16), p3);
_mm_store_si128((__m128i *)(src + 32), p2);
_mm_store_si128((__m128i *)(src + 48), p1);
_mm_store_si128((__m128i *)(src + 64), p0);
_mm_store_si128((__m128i *)(src + 80), q0);
_mm_store_si128((__m128i *)(src + 96), q1);
_mm_store_si128((__m128i *)(src + 112), q2);
_mm_store_si128((__m128i *)(src + 128), q3);
_mm_store_si128((__m128i *)(src + 144), q4);
/* Loop filtering */
vp9_mbloop_filter_horizontal_edge_sse2(src + 80, 16, _blimit, _limit,
_thresh, 1);
/* Store result */
_mm_storel_epi64((__m128i *)(u - 3 * p),
_mm_loadl_epi64((__m128i *)(src + 32)));
_mm_storel_epi64((__m128i *)(u - 2 * p),
_mm_loadl_epi64((__m128i *)(src + 48)));
_mm_storel_epi64((__m128i *)(u - p),
_mm_loadl_epi64((__m128i *)(src + 64)));
_mm_storel_epi64((__m128i *)u,
_mm_loadl_epi64((__m128i *)(src + 80)));
_mm_storel_epi64((__m128i *)(u + p),
_mm_loadl_epi64((__m128i *)(src + 96)));
_mm_storel_epi64((__m128i *)(u + 2 * p),
_mm_loadl_epi64((__m128i *)(src + 112)));
_mm_storel_epi64((__m128i *)(v - 3 * p),
_mm_loadl_epi64((__m128i *)(src + 40)));
_mm_storel_epi64((__m128i *)(v - 2 * p),
_mm_loadl_epi64((__m128i *)(src + 56)));
_mm_storel_epi64((__m128i *)(v - p),
_mm_loadl_epi64((__m128i *)(src + 72)));
_mm_storel_epi64((__m128i *)v,
_mm_loadl_epi64((__m128i *)(src + 88)));
_mm_storel_epi64((__m128i *)(v + p),
_mm_loadl_epi64((__m128i *)(src + 104)));
_mm_storel_epi64((__m128i *)(v + 2 * p),
_mm_loadl_epi64((__m128i *)(src + 120)));
}
static INLINE void transpose8x16(unsigned char *in0, unsigned char *in1,
int in_p, unsigned char *out, int out_p) {
__m128i x0, x1, x2, x3, x4, x5, x6, x7;
......@@ -1425,7 +1346,7 @@ void vp9_mbloop_filter_vertical_edge_sse2(unsigned char *s,
/* Loop filtering */
vp9_mbloop_filter_horizontal_edge_sse2(t_dst + 8 * 16, 16, blimit, limit,
thresh, 1);
thresh, 1);
src[0] = t_dst + 3 * 16;
src[1] = t_dst + 3 * 16 + 8;
......@@ -1437,10 +1358,10 @@ void vp9_mbloop_filter_vertical_edge_sse2(unsigned char *s,
}
void vp9_mb_lpf_vertical_edge_w_sse2(unsigned char *s,
int p,
const unsigned char *blimit,
const unsigned char *limit,
const unsigned char *thresh) {
int p,
const unsigned char *blimit,
const unsigned char *limit,
const unsigned char *thresh) {
DECLARE_ALIGNED_ARRAY(16, unsigned char, t_dst, 256);
unsigned char *src[4];
unsigned char *dst[4];
......@@ -1466,32 +1387,3 @@ void vp9_mb_lpf_vertical_edge_w_sse2(unsigned char *s,
transpose(src, 16, dst, p, 2);
}
void vp9_mbloop_filter_vertical_edge_uv_sse2(unsigned char *u,
int p,
const unsigned char *blimit,
const unsigned char *limit,
const unsigned char *thresh,
unsigned char *v) {
DECLARE_ALIGNED_ARRAY(16, unsigned char, t_dst, 256);
unsigned char *src[2];
unsigned char *dst[2];
/* Transpose 16x16 */
transpose8x16(u - 8, v - 8, p, t_dst, 16);
transpose8x16(u, v, p, t_dst + 16 * 8, 16);
/* Loop filtering */
vp9_mbloop_filter_horizontal_edge_sse2(t_dst + 8 * 16, 16, blimit, limit,
thresh, 1);
src[0] = t_dst + 3 * 16;
src[1] = t_dst + 3 * 16 + 8;
dst[0] = u - 5;
dst[1] = v - 5;
/* Transpose 16x8 */
transpose(src, 16, dst, p, 2);
}
This diff is collapsed.
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP9_COMMON_X86_VP9_LOOPFILTER_X86_H_
#define VP9_COMMON_X86_VP9_LOOPFILTER_X86_H_
/* Note:
*
* This platform is commonly built for runtime CPU detection. If you modify
* any of the function mappings present in this file, be sure to also update
* them in the function pointer initialization code
*/
#if HAVE_MMX
extern prototype_loopfilter_block(vp9_loop_filter_mbv_mmx);
extern prototype_loopfilter_block(vp9_loop_filter_bv_mmx);
extern prototype_loopfilter_block(vp9_loop_filter_mbh_mmx);
extern prototype_loopfilter_block(vp9_loop_filter_bh_mmx);
#endif
#if HAVE_SSE2
extern prototype_loopfilter_block(vp9_loop_filter_mbv_sse2);
extern prototype_loopfilter_block(vp9_loop_filter_bv_sse2);
extern prototype_loopfilter_block(vp9_loop_filter_mbh_sse2);
extern prototype_loopfilter_block(vp9_loop_filter_bh_sse2);
#endif
#endif // LOOPFILTER_X86_H
......@@ -68,7 +68,6 @@ VP9_COMMON_SRCS-yes += common/vp9_treecoder.c
VP9_COMMON_SRCS-yes += common/vp9_common_data.c
VP9_COMMON_SRCS-yes += common/vp9_common_data.h
VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp9_loopfilter_x86.h
VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp9_postproc_x86.h
VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp9_asm_stubs.c
VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp9_loopfilter_intrin_sse2.c
......@@ -76,7 +75,6 @@ VP9_COMMON_SRCS-$(CONFIG_POSTPROC) += common/vp9_postproc.h
VP9_COMMON_SRCS-$(CONFIG_POSTPROC) += common/vp9_postproc.c
VP9_COMMON_SRCS-$(HAVE_MMX) += common/x86/vp9_loopfilter_mmx.asm
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_copy_sse2.asm
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_loopfilter_sse2.asm
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_intrapred_sse2.asm
VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp9_intrapred_ssse3.asm
VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp9_subpixel_8t_ssse3.asm
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment