Commit f2cd4ded authored by Johann's avatar Johann

Move shared data to shared location

Storing vp8_bilinear_filters_mmx in an mmx file and using it in an sse2
file is bad

Moving towards allowing --disable-mmx

Change-Id: I20493b35bdedcdcfc0915e6f05fdbe6c81a4a742
parent 3c755577
/*
* Copyright (c) 2011 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "vpx_ports/mem.h"
DECLARE_ALIGNED(16, const short, vp8_bilinear_filters_x86_4[8][8]) =
{
{ 128, 128, 128, 128, 0, 0, 0, 0 },
{ 112, 112, 112, 112, 16, 16, 16, 16 },
{ 96, 96, 96, 96, 32, 32, 32, 32 },
{ 80, 80, 80, 80, 48, 48, 48, 48 },
{ 64, 64, 64, 64, 64, 64, 64, 64 },
{ 48, 48, 48, 48, 80, 80, 80, 80 },
{ 32, 32, 32, 32, 96, 96, 96, 96 },
{ 16, 16, 16, 16, 112, 112, 112, 112 }
};
DECLARE_ALIGNED(16, const short, vp8_bilinear_filters_x86_8[8][16]) =
{
{ 128, 128, 128, 128, 128, 128, 128, 128, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 112, 112, 112, 112, 112, 112, 112, 112, 16, 16, 16, 16, 16, 16, 16, 16 },
{ 96, 96, 96, 96, 96, 96, 96, 96, 32, 32, 32, 32, 32, 32, 32, 32 },
{ 80, 80, 80, 80, 80, 80, 80, 80, 48, 48, 48, 48, 48, 48, 48, 48 },
{ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 },
{ 48, 48, 48, 48, 48, 48, 48, 48, 80, 80, 80, 80, 80, 80, 80, 80 },
{ 32, 32, 32, 32, 32, 32, 32, 32, 96, 96, 96, 96, 96, 96, 96, 96 },
{ 16, 16, 16, 16, 16, 16, 16, 16, 112, 112, 112, 112, 112, 112, 112, 112 }
};
/*
* Copyright (c) 2011 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef FILTER_X86_H
#define FILTER_X86_H
/* x86 assembly specific copy of vp8/common/filter.c:vp8_bilinear_filters with
* duplicated values */
extern const short vp8_bilinear_filters_x86_4[8][8]; /* duplicated 4x */
extern const short vp8_bilinear_filters_x86_8[8][16]; /* duplicated 8x */
#endif /* FILTER_X86_H */
......@@ -10,6 +10,7 @@
%include "vpx_ports/x86_abi_support.asm"
extern sym(vp8_bilinear_filters_x86_8)
%define BLOCK_HEIGHT_WIDTH 4
......@@ -222,14 +223,14 @@ sym(vp8_bilinear_predict8x8_mmx):
push rdi
; end prolog
;const short *HFilter = bilinear_filters_mmx[xoffset];
;const short *VFilter = bilinear_filters_mmx[yoffset];
;const short *HFilter = vp8_bilinear_filters_x86_8[xoffset];
;const short *VFilter = vp8_bilinear_filters_x86_8[yoffset];
movsxd rax, dword ptr arg(2) ;xoffset
mov rdi, arg(4) ;dst_ptr ;
shl rax, 5 ; offset * 32
lea rcx, [GLOBAL(sym(vp8_bilinear_filters_mmx))]
lea rcx, [GLOBAL(sym(vp8_bilinear_filters_x86_8))]
add rax, rcx ; HFilter
mov rsi, arg(0) ;src_ptr ;
......@@ -379,13 +380,13 @@ sym(vp8_bilinear_predict8x4_mmx):
push rdi
; end prolog
;const short *HFilter = bilinear_filters_mmx[xoffset];
;const short *VFilter = bilinear_filters_mmx[yoffset];
;const short *HFilter = vp8_bilinear_filters_x86_8[xoffset];
;const short *VFilter = vp8_bilinear_filters_x86_8[yoffset];
movsxd rax, dword ptr arg(2) ;xoffset
mov rdi, arg(4) ;dst_ptr ;
lea rcx, [GLOBAL(sym(vp8_bilinear_filters_mmx))]
lea rcx, [GLOBAL(sym(vp8_bilinear_filters_x86_8))]
shl rax, 5
mov rsi, arg(0) ;src_ptr ;
......@@ -534,13 +535,13 @@ sym(vp8_bilinear_predict4x4_mmx):
push rdi
; end prolog
;const short *HFilter = bilinear_filters_mmx[xoffset];
;const short *VFilter = bilinear_filters_mmx[yoffset];
;const short *HFilter = vp8_bilinear_filters_x86_8[xoffset];
;const short *VFilter = vp8_bilinear_filters_x86_8[yoffset];
movsxd rax, dword ptr arg(2) ;xoffset
mov rdi, arg(4) ;dst_ptr ;
lea rcx, [GLOBAL(sym(vp8_bilinear_filters_mmx))]
lea rcx, [GLOBAL(sym(vp8_bilinear_filters_x86_8))]
shl rax, 5
add rax, rcx ; HFilter
......@@ -699,29 +700,3 @@ sym(vp8_six_tap_mmx):
times 8 dw 0
align 16
global HIDDEN_DATA(sym(vp8_bilinear_filters_mmx))
sym(vp8_bilinear_filters_mmx):
times 8 dw 128
times 8 dw 0
times 8 dw 112
times 8 dw 16
times 8 dw 96
times 8 dw 32
times 8 dw 80
times 8 dw 48
times 8 dw 64
times 8 dw 64
times 8 dw 48
times 8 dw 80
times 8 dw 32
times 8 dw 96
times 8 dw 16
times 8 dw 112
......@@ -10,6 +10,7 @@
%include "vpx_ports/x86_abi_support.asm"
extern sym(vp8_bilinear_filters_x86_8)
%define BLOCK_HEIGHT_WIDTH 4
%define VP8_FILTER_WEIGHT 128
......@@ -961,7 +962,7 @@ sym(vp8_unpack_block1d16_h6_sse2):
; unsigned char *dst_ptr,
; int dst_pitch
;)
extern sym(vp8_bilinear_filters_mmx)
extern sym(vp8_bilinear_filters_x86_8)
global sym(vp8_bilinear_predict16x16_sse2)
sym(vp8_bilinear_predict16x16_sse2):
push rbp
......@@ -973,10 +974,10 @@ sym(vp8_bilinear_predict16x16_sse2):
push rdi
; end prolog
;const short *HFilter = bilinear_filters_mmx[xoffset]
;const short *VFilter = bilinear_filters_mmx[yoffset]
;const short *HFilter = vp8_bilinear_filters_x86_8[xoffset]
;const short *VFilter = vp8_bilinear_filters_x86_8[yoffset]
lea rcx, [GLOBAL(sym(vp8_bilinear_filters_mmx))]
lea rcx, [GLOBAL(sym(vp8_bilinear_filters_x86_8))]
movsxd rax, dword ptr arg(2) ;xoffset
cmp rax, 0 ;skip first_pass filter if xoffset=0
......@@ -1230,7 +1231,6 @@ sym(vp8_bilinear_predict16x16_sse2):
; unsigned char *dst_ptr,
; int dst_pitch
;)
extern sym(vp8_bilinear_filters_mmx)
global sym(vp8_bilinear_predict8x8_sse2)
sym(vp8_bilinear_predict8x8_sse2):
push rbp
......@@ -1245,9 +1245,9 @@ sym(vp8_bilinear_predict8x8_sse2):
ALIGN_STACK 16, rax
sub rsp, 144 ; reserve 144 bytes
;const short *HFilter = bilinear_filters_mmx[xoffset]
;const short *VFilter = bilinear_filters_mmx[yoffset]
lea rcx, [GLOBAL(sym(vp8_bilinear_filters_mmx))]
;const short *HFilter = vp8_bilinear_filters_x86_8[xoffset]
;const short *VFilter = vp8_bilinear_filters_x86_8[yoffset]
lea rcx, [GLOBAL(sym(vp8_bilinear_filters_x86_8))]
mov rsi, arg(0) ;src_ptr
movsxd rdx, dword ptr arg(1) ;src_pixels_per_line
......
......@@ -12,6 +12,8 @@
#ifndef SUBPIXEL_X86_H
#define SUBPIXEL_X86_H
#include "filter_x86.h"
/* Note:
*
* This platform is commonly built for runtime CPU detection. If you modify
......
......@@ -12,9 +12,9 @@
#include "vpx_config.h"
#include "vpx_ports/mem.h"
#include "vp8/common/subpixel.h"
#include "filter_x86.h"
extern const short vp8_six_tap_mmx[8][6*8];
extern const short vp8_bilinear_filters_mmx[8][2*8];
extern void vp8_filter_block1d_h6_mmx
(
......
......@@ -12,6 +12,7 @@
#include "vp8/encoder/variance.h"
#include "vp8/common/pragmas.h"
#include "vpx_ports/mem.h"
#include "vp8/common/x86/filter_x86.h"
extern void filter_block1d_h6_mmx
(
......@@ -21,7 +22,7 @@ extern void filter_block1d_h6_mmx
unsigned int pixel_step,
unsigned int output_height,
unsigned int output_width,
short *vp7_filter
short *filter
);
extern void filter_block1d_v6_mmx
(
......@@ -31,7 +32,7 @@ extern void filter_block1d_v6_mmx
unsigned int pixel_step,
unsigned int output_height,
unsigned int output_width,
short *vp7_filter
short *filter
);
extern unsigned int vp8_get_mb_ss_mmx(const short *src_ptr);
......@@ -198,24 +199,6 @@ unsigned int vp8_variance8x16_mmx(
}
///////////////////////////////////////////////////////////////////////////
// the mmx function that does the bilinear filtering and var calculation //
// int one pass //
///////////////////////////////////////////////////////////////////////////
DECLARE_ALIGNED(16, const short, vp8_vp7_bilinear_filters_mmx[8][8]) =
{
{ 128, 128, 128, 128, 0, 0, 0, 0 },
{ 112, 112, 112, 112, 16, 16, 16, 16 },
{ 96, 96, 96, 96, 32, 32, 32, 32 },
{ 80, 80, 80, 80, 48, 48, 48, 48 },
{ 64, 64, 64, 64, 64, 64, 64, 64 },
{ 48, 48, 48, 48, 80, 80, 80, 80 },
{ 32, 32, 32, 32, 96, 96, 96, 96 },
{ 16, 16, 16, 16, 112, 112, 112, 112 }
};
unsigned int vp8_sub_pixel_variance4x4_mmx
(
const unsigned char *src_ptr,
......@@ -232,7 +215,7 @@ unsigned int vp8_sub_pixel_variance4x4_mmx
vp8_filter_block2d_bil4x4_var_mmx(
src_ptr, src_pixels_per_line,
dst_ptr, dst_pixels_per_line,
vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset],
&xsum, &xxsum
);
*sse = xxsum;
......@@ -257,7 +240,7 @@ unsigned int vp8_sub_pixel_variance8x8_mmx
vp8_filter_block2d_bil_var_mmx(
src_ptr, src_pixels_per_line,
dst_ptr, dst_pixels_per_line, 8,
vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset],
&xsum, &xxsum
);
*sse = xxsum;
......@@ -283,7 +266,7 @@ unsigned int vp8_sub_pixel_variance16x16_mmx
vp8_filter_block2d_bil_var_mmx(
src_ptr, src_pixels_per_line,
dst_ptr, dst_pixels_per_line, 16,
vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset],
&xsum0, &xxsum0
);
......@@ -291,7 +274,7 @@ unsigned int vp8_sub_pixel_variance16x16_mmx
vp8_filter_block2d_bil_var_mmx(
src_ptr + 8, src_pixels_per_line,
dst_ptr + 8, dst_pixels_per_line, 16,
vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset],
&xsum1, &xxsum1
);
......@@ -336,7 +319,7 @@ unsigned int vp8_sub_pixel_variance16x8_mmx
vp8_filter_block2d_bil_var_mmx(
src_ptr, src_pixels_per_line,
dst_ptr, dst_pixels_per_line, 8,
vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset],
&xsum0, &xxsum0
);
......@@ -344,7 +327,7 @@ unsigned int vp8_sub_pixel_variance16x8_mmx
vp8_filter_block2d_bil_var_mmx(
src_ptr + 8, src_pixels_per_line,
dst_ptr + 8, dst_pixels_per_line, 8,
vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset],
&xsum1, &xxsum1
);
......@@ -371,7 +354,7 @@ unsigned int vp8_sub_pixel_variance8x16_mmx
vp8_filter_block2d_bil_var_mmx(
src_ptr, src_pixels_per_line,
dst_ptr, dst_pixels_per_line, 16,
vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset],
&xsum, &xxsum
);
*sse = xxsum;
......
......@@ -12,11 +12,12 @@
#include "vp8/encoder/variance.h"
#include "vp8/common/pragmas.h"
#include "vpx_ports/mem.h"
#include "vp8/common/x86/filter_x86.h"
extern void filter_block1d_h6_mmx(const unsigned char *src_ptr, unsigned short *output_ptr, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *vp7_filter);
extern void filter_block1d_v6_mmx(const short *src_ptr, unsigned char *output_ptr, unsigned int pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *vp7_filter);
extern void filter_block1d8_h6_sse2(const unsigned char *src_ptr, unsigned short *output_ptr, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *vp7_filter);
extern void filter_block1d8_v6_sse2(const short *src_ptr, unsigned char *output_ptr, unsigned int pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *vp7_filter);
extern void filter_block1d_h6_mmx(const unsigned char *src_ptr, unsigned short *output_ptr, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *filter);
extern void filter_block1d_v6_mmx(const short *src_ptr, unsigned char *output_ptr, unsigned int pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *filter);
extern void filter_block1d8_h6_sse2(const unsigned char *src_ptr, unsigned short *output_ptr, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *filter);
extern void filter_block1d8_v6_sse2(const short *src_ptr, unsigned char *output_ptr, unsigned int pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *filter);
extern void vp8_filter_block2d_bil4x4_var_mmx
(
......@@ -135,8 +136,6 @@ void vp8_half_vert_variance16x_h_sse2
unsigned int *sumsquared
);
DECLARE_ALIGNED(16, extern short, vp8_vp7_bilinear_filters_mmx[8][8]);
unsigned int vp8_variance4x4_wmt(
const unsigned char *src_ptr,
int source_stride,
......@@ -262,7 +261,7 @@ unsigned int vp8_sub_pixel_variance4x4_wmt
vp8_filter_block2d_bil4x4_var_mmx(
src_ptr, src_pixels_per_line,
dst_ptr, dst_pixels_per_line,
vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset],
&xsum, &xxsum
);
*sse = xxsum;
......
......@@ -72,6 +72,8 @@ VP8_COMMON_SRCS-yes += common/swapyv12buffer.c
VP8_COMMON_SRCS-$(CONFIG_POSTPROC_VISUALIZER) += common/textblit.c
VP8_COMMON_SRCS-yes += common/treecoder.c
VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/filter_x86.c
VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/filter_x86.h
VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/idct_x86.h
VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/subpixel_x86.h
VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/recon_x86.h
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment