Commit 510e0ab4 authored by John Koleszar's avatar John Koleszar

RTCD: add FDCT functions

This commit continues the process of converting to the new RTCD
system.

Change-Id: I3f9c07db65eb206f6363d21bdb80e871570da767
parent 83a91e78
......@@ -392,5 +392,20 @@ if [ "$CONFIG_INTERNAL_STATS" = "yes" ]; then
specialize vp8_ssim_parms_16x16 $sse2_on_x86_64
fi
#
# Forward DCT
#
prototype void vp8_short_fdct4x4 "short *input, short *output, int pitch"
specialize vp8_short_fdct4x4 mmx sse2 media neon
vp8_short_fdct4x4_media=vp8_short_fdct4x4_armv6
prototype void vp8_short_fdct8x4 "short *input, short *output, int pitch"
specialize vp8_short_fdct8x4 mmx sse2 media neon
vp8_short_fdct8x4_media=vp8_short_fdct8x4_armv6
prototype void vp8_short_walsh4x4 "short *input, short *output, int pitch"
specialize vp8_short_walsh4x4 mmx sse2 media neon
vp8_short_walsh4x4_media=vp8_short_walsh4x4_armv6
# End of encoder only functions
fi
......@@ -32,12 +32,6 @@ void vp8_arch_arm_encoder_init(VP8_COMP *cpi)
#if HAVE_MEDIA
if (flags & HAS_MEDIA)
{
cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_armv6;
cpi->rtcd.fdct.short8x4 = vp8_short_fdct8x4_armv6;
cpi->rtcd.fdct.fast4x4 = vp8_short_fdct4x4_armv6;
cpi->rtcd.fdct.fast8x4 = vp8_short_fdct8x4_armv6;
cpi->rtcd.fdct.walsh_short4x4 = vp8_short_walsh4x4_armv6;
/*cpi->rtcd.encodemb.berr = vp8_block_error_c;
cpi->rtcd.encodemb.mberr = vp8_mbblock_error_c;
cpi->rtcd.encodemb.mbuverr = vp8_mbuverror_c;*/
......@@ -53,12 +47,6 @@ void vp8_arch_arm_encoder_init(VP8_COMP *cpi)
#if HAVE_NEON
if (flags & HAS_NEON)
{
cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_neon;
cpi->rtcd.fdct.short8x4 = vp8_short_fdct8x4_neon;
cpi->rtcd.fdct.fast4x4 = vp8_short_fdct4x4_neon;
cpi->rtcd.fdct.fast8x4 = vp8_short_fdct8x4_neon;
cpi->rtcd.fdct.walsh_short4x4 = vp8_short_walsh4x4_neon;
/*cpi->rtcd.encodemb.berr = vp8_block_error_c;
cpi->rtcd.encodemb.mberr = vp8_mbblock_error_c;
cpi->rtcd.encodemb.mbuverr = vp8_mbuverror_c;*/
......
......@@ -9,7 +9,7 @@
*/
#include "vpx_config.h"
#include "vp8/encoder/dct.h"
#include "vpx_rtcd.h"
#if HAVE_MEDIA
......
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef DCT_ARM_H
#define DCT_ARM_H
#if HAVE_MEDIA
extern prototype_fdct(vp8_short_walsh4x4_armv6);
extern prototype_fdct(vp8_short_fdct4x4_armv6);
extern prototype_fdct(vp8_short_fdct8x4_armv6);
#if !CONFIG_RUNTIME_CPU_DETECT
#undef vp8_fdct_walsh_short4x4
#define vp8_fdct_walsh_short4x4 vp8_short_walsh4x4_armv6
#undef vp8_fdct_short4x4
#define vp8_fdct_short4x4 vp8_short_fdct4x4_armv6
#undef vp8_fdct_short8x4
#define vp8_fdct_short8x4 vp8_short_fdct8x4_armv6
#undef vp8_fdct_fast4x4
#define vp8_fdct_fast4x4 vp8_short_fdct4x4_armv6
#undef vp8_fdct_fast8x4
#define vp8_fdct_fast8x4 vp8_short_fdct8x4_armv6
#endif
#endif /* HAVE_MEDIA */
#if HAVE_NEON
extern prototype_fdct(vp8_short_fdct4x4_neon);
extern prototype_fdct(vp8_short_fdct8x4_neon);
extern prototype_fdct(vp8_fast_fdct4x4_neon);
extern prototype_fdct(vp8_fast_fdct8x4_neon);
extern prototype_fdct(vp8_short_walsh4x4_neon);
#if !CONFIG_RUNTIME_CPU_DETECT
#undef vp8_fdct_short4x4
#define vp8_fdct_short4x4 vp8_short_fdct4x4_neon
#undef vp8_fdct_short8x4
#define vp8_fdct_short8x4 vp8_short_fdct8x4_neon
#undef vp8_fdct_fast4x4
#define vp8_fdct_fast4x4 vp8_short_fdct4x4_neon
#undef vp8_fdct_fast8x4
#define vp8_fdct_fast8x4 vp8_short_fdct8x4_neon
#undef vp8_fdct_walsh_short4x4
#define vp8_fdct_walsh_short4x4 vp8_short_walsh4x4_neon
#endif
#endif /* HAVE_NEON */
#endif
......@@ -117,8 +117,8 @@ typedef struct
int optimize;
int q_index;
void (*vp8_short_fdct4x4)(short *input, short *output, int pitch);
void (*vp8_short_fdct8x4)(short *input, short *output, int pitch);
void (*short_fdct4x4)(short *input, short *output, int pitch);
void (*short_fdct8x4)(short *input, short *output, int pitch);
void (*short_walsh4x4)(short *input, short *output, int pitch);
void (*quantize_b)(BLOCK *b, BLOCKD *d);
void (*quantize_b_pair)(BLOCK *b1, BLOCK *b2, BLOCKD *d0, BLOCKD *d1);
......
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef __INC_DCT_H
#define __INC_DCT_H
#define prototype_fdct(sym) void (sym)(short *input, short *output, int pitch)
#if ARCH_X86 || ARCH_X86_64
#include "x86/dct_x86.h"
#endif
#if ARCH_ARM
#include "arm/dct_arm.h"
#endif
#ifndef vp8_fdct_short4x4
#define vp8_fdct_short4x4 vp8_short_fdct4x4_c
#endif
extern prototype_fdct(vp8_fdct_short4x4);
#ifndef vp8_fdct_short8x4
#define vp8_fdct_short8x4 vp8_short_fdct8x4_c
#endif
extern prototype_fdct(vp8_fdct_short8x4);
// There is no fast4x4 (for now)
#ifndef vp8_fdct_fast4x4
#define vp8_fdct_fast4x4 vp8_short_fdct4x4_c
#endif
#ifndef vp8_fdct_fast8x4
#define vp8_fdct_fast8x4 vp8_short_fdct8x4_c
#endif
#ifndef vp8_fdct_walsh_short4x4
#define vp8_fdct_walsh_short4x4 vp8_short_walsh4x4_c
#endif
extern prototype_fdct(vp8_fdct_walsh_short4x4);
typedef prototype_fdct(*vp8_fdct_fn_t);
typedef struct
{
vp8_fdct_fn_t short4x4;
vp8_fdct_fn_t short8x4;
vp8_fdct_fn_t fast4x4;
vp8_fdct_fn_t fast8x4;
vp8_fdct_fn_t walsh_short4x4;
} vp8_fdct_rtcd_vtable_t;
#if CONFIG_RUNTIME_CPU_DETECT
#define FDCT_INVOKE(ctx,fn) (ctx)->fn
#else
#define FDCT_INVOKE(ctx,fn) vp8_fdct_##fn
#endif
#endif
......@@ -15,7 +15,6 @@
#include "vp8/common/reconintra4x4.h"
#include "encodemb.h"
#include "vp8/common/invtrans.h"
#include "dct.h"
#include "encodeintra.h"
......@@ -70,7 +69,7 @@ void vp8_encode_intra4x4block(const VP8_ENCODER_RTCD *rtcd,
ENCODEMB_INVOKE(&rtcd->encodemb, subb)(be, b, 16);
x->vp8_short_fdct4x4(be->src_diff, be->coeff, 32);
x->short_fdct4x4(be->src_diff, be->coeff, 32);
x->quantize_b(be, b);
......
......@@ -16,7 +16,6 @@
#include "quantize.h"
#include "tokenize.h"
#include "vp8/common/invtrans.h"
#include "dct.h"
#include "vpx_mem/vpx_mem.h"
#include "rdopt.h"
......@@ -127,7 +126,7 @@ void vp8_transform_mbuv(MACROBLOCK *x)
for (i = 16; i < 24; i += 2)
{
x->vp8_short_fdct8x4(&x->block[i].src_diff[0],
x->short_fdct8x4(&x->block[i].src_diff[0],
&x->block[i].coeff[0], 16);
}
}
......@@ -139,7 +138,7 @@ void vp8_transform_intra_mby(MACROBLOCK *x)
for (i = 0; i < 16; i += 2)
{
x->vp8_short_fdct8x4(&x->block[i].src_diff[0],
x->short_fdct8x4(&x->block[i].src_diff[0],
&x->block[i].coeff[0], 32);
}
......@@ -159,7 +158,7 @@ static void transform_mb(MACROBLOCK *x)
for (i = 0; i < 16; i += 2)
{
x->vp8_short_fdct8x4(&x->block[i].src_diff[0],
x->short_fdct8x4(&x->block[i].src_diff[0],
&x->block[i].coeff[0], 32);
}
......@@ -169,7 +168,7 @@ static void transform_mb(MACROBLOCK *x)
for (i = 16; i < 24; i += 2)
{
x->vp8_short_fdct8x4(&x->block[i].src_diff[0],
x->short_fdct8x4(&x->block[i].src_diff[0],
&x->block[i].coeff[0], 16);
}
......@@ -187,7 +186,7 @@ static void transform_mby(MACROBLOCK *x)
for (i = 0; i < 16; i += 2)
{
x->vp8_short_fdct8x4(&x->block[i].src_diff[0],
x->short_fdct8x4(&x->block[i].src_diff[0],
&x->block[i].coeff[0], 32);
}
......
......@@ -304,8 +304,8 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc)
z->mv_row_max = x->mv_row_max;
*/
z->vp8_short_fdct4x4 = x->vp8_short_fdct4x4;
z->vp8_short_fdct8x4 = x->vp8_short_fdct8x4;
z->short_fdct4x4 = x->short_fdct4x4;
z->short_fdct8x4 = x->short_fdct8x4;
z->short_walsh4x4 = x->short_walsh4x4;
z->quantize_b = x->quantize_b;
z->quantize_b_pair = x->quantize_b_pair;
......
......@@ -25,12 +25,6 @@ extern void vp8_yv12_copy_partial_frame(YV12_BUFFER_CONFIG *src_ybc,
void vp8_cmachine_specific_config(VP8_COMP *cpi)
{
#if CONFIG_RUNTIME_CPU_DETECT
cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_c;
cpi->rtcd.fdct.short8x4 = vp8_short_fdct8x4_c;
cpi->rtcd.fdct.fast4x4 = vp8_short_fdct4x4_c;
cpi->rtcd.fdct.fast8x4 = vp8_short_fdct8x4_c;
cpi->rtcd.fdct.walsh_short4x4 = vp8_short_walsh4x4_c;
cpi->rtcd.encodemb.berr = vp8_block_error_c;
cpi->rtcd.encodemb.mberr = vp8_mbblock_error_c;
cpi->rtcd.encodemb.mbuverr = vp8_mbuverror_c;
......
......@@ -141,9 +141,6 @@ extern int inter_uv_modes[4] ;
extern unsigned int inter_b_modes[15];
#endif
extern void (*vp8_short_fdct4x4)(short *input, short *output, int pitch);
extern void (*vp8_short_fdct8x4)(short *input, short *output, int pitch);
extern const int vp8_bits_per_mb[2][QINDEX_RANGE];
extern const int qrounding_factors[129];
......@@ -979,16 +976,17 @@ void vp8_set_speed_features(VP8_COMP *cpi)
if (cpi->sf.improved_dct)
{
cpi->mb.vp8_short_fdct8x4 = FDCT_INVOKE(&cpi->rtcd.fdct, short8x4);
cpi->mb.vp8_short_fdct4x4 = FDCT_INVOKE(&cpi->rtcd.fdct, short4x4);
cpi->mb.short_fdct8x4 = vp8_short_fdct8x4;
cpi->mb.short_fdct4x4 = vp8_short_fdct4x4;
}
else
{
cpi->mb.vp8_short_fdct8x4 = FDCT_INVOKE(&cpi->rtcd.fdct, fast8x4);
cpi->mb.vp8_short_fdct4x4 = FDCT_INVOKE(&cpi->rtcd.fdct, fast4x4);
/* No fast FDCT defined for any platform at this time. */
cpi->mb.short_fdct8x4 = vp8_short_fdct8x4;
cpi->mb.short_fdct4x4 = vp8_short_fdct4x4;
}
cpi->mb.short_walsh4x4 = FDCT_INVOKE(&cpi->rtcd.fdct, walsh_short4x4);
cpi->mb.short_walsh4x4 = vp8_short_walsh4x4;
if (cpi->sf.improved_quant)
{
......
......@@ -19,7 +19,6 @@
#include "tokenize.h"
#include "vp8/common/onyxc_int.h"
#include "variance.h"
#include "dct.h"
#include "encodemb.h"
#include "quantize.h"
#include "vp8/common/entropy.h"
......@@ -224,7 +223,6 @@ typedef struct
typedef struct VP8_ENCODER_RTCD
{
vp8_fdct_rtcd_vtable_t fdct;
vp8_encodemb_rtcd_vtable_t encodemb;
vp8_quantize_rtcd_vtable_t quantize;
vp8_search_rtcd_vtable_t search;
......
......@@ -32,7 +32,6 @@
#include "mcomp.h"
#include "rdopt.h"
#include "vpx_mem/vpx_mem.h"
#include "dct.h"
#include "vp8/common/systemdependent.h"
#if CONFIG_RUNTIME_CPU_DETECT
......@@ -580,7 +579,7 @@ static void macro_block_yrd( MACROBLOCK *mb,
// Fdct and building the 2nd order block
for (beptr = mb->block; beptr < mb->block + 16; beptr += 2)
{
mb->vp8_short_fdct8x4(beptr->src_diff, beptr->coeff, 32);
mb->short_fdct8x4(beptr->src_diff, beptr->coeff, 32);
*Y2DCPtr++ = beptr->coeff[0];
*Y2DCPtr++ = beptr->coeff[16];
}
......@@ -656,7 +655,7 @@ static int rd_pick_intra4x4block(
(*(b->base_dst) + b->dst, b->dst_stride,
mode, b->predictor, 16);
ENCODEMB_INVOKE(IF_RTCD(&cpi->rtcd.encodemb), subb)(be, b, 16);
x->vp8_short_fdct4x4(be->src_diff, be->coeff, 32);
x->short_fdct4x4(be->src_diff, be->coeff, 32);
x->quantize_b(be, b);
tempa = ta;
......@@ -1028,7 +1027,7 @@ static unsigned int vp8_encode_inter_mb_segment(MACROBLOCK *x, int const *labels
vp8_build_inter_predictors_b(bd, 16, x->e_mbd.subpixel_predict);
ENCODEMB_INVOKE(rtcd, subb)(be, bd, 16);
x->vp8_short_fdct4x4(be->src_diff, be->coeff, 32);
x->short_fdct4x4(be->src_diff, be->coeff, 32);
// set to 0 no way to account for 2nd order DC so discount
//be->coeff[0] = 0;
......
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef DCT_X86_H
#define DCT_X86_H
/* Note:
*
* This platform is commonly built for runtime CPU detection. If you modify
* any of the function mappings present in this file, be sure to also update
* them in the function pointer initialization code
*/
#if HAVE_MMX
extern prototype_fdct(vp8_short_fdct4x4_mmx);
extern prototype_fdct(vp8_short_fdct8x4_mmx);
#if !CONFIG_RUNTIME_CPU_DETECT
#undef vp8_fdct_short4x4
#define vp8_fdct_short4x4 vp8_short_fdct4x4_mmx
#undef vp8_fdct_short8x4
#define vp8_fdct_short8x4 vp8_short_fdct8x4_mmx
#undef vp8_fdct_fast4x4
#define vp8_fdct_fast4x4 vp8_short_fdct4x4_mmx
#undef vp8_fdct_fast8x4
#define vp8_fdct_fast8x4 vp8_short_fdct8x4_mmx
#endif
#endif
#if HAVE_SSE2
extern prototype_fdct(vp8_short_fdct8x4_sse2);
extern prototype_fdct(vp8_short_walsh4x4_sse2);
extern prototype_fdct(vp8_short_fdct4x4_sse2);
#if !CONFIG_RUNTIME_CPU_DETECT
#undef vp8_fdct_short4x4
#define vp8_fdct_short4x4 vp8_short_fdct4x4_sse2
#undef vp8_fdct_short8x4
#define vp8_fdct_short8x4 vp8_short_fdct8x4_sse2
#undef vp8_fdct_fast4x4
#define vp8_fdct_fast4x4 vp8_short_fdct4x4_sse2
#undef vp8_fdct_fast8x4
#define vp8_fdct_fast8x4 vp8_short_fdct8x4_sse2
#undef vp8_fdct_walsh_short4x4
#define vp8_fdct_walsh_short4x4 vp8_short_walsh4x4_sse2
#endif
#endif
#endif
......@@ -127,13 +127,6 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
#if HAVE_MMX
if (flags & HAS_MMX)
{
cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_mmx;
cpi->rtcd.fdct.short8x4 = vp8_short_fdct8x4_mmx;
cpi->rtcd.fdct.fast4x4 = vp8_short_fdct4x4_mmx;
cpi->rtcd.fdct.fast8x4 = vp8_short_fdct8x4_mmx;
cpi->rtcd.fdct.walsh_short4x4 = vp8_short_walsh4x4_c;
cpi->rtcd.encodemb.berr = vp8_block_error_mmx;
cpi->rtcd.encodemb.mberr = vp8_mbblock_error_mmx;
cpi->rtcd.encodemb.mbuverr = vp8_mbuverror_mmx;
......@@ -148,13 +141,6 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
#if HAVE_SSE2
if (flags & HAS_SSE2)
{
cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_sse2;
cpi->rtcd.fdct.short8x4 = vp8_short_fdct8x4_sse2;
cpi->rtcd.fdct.fast4x4 = vp8_short_fdct4x4_sse2;
cpi->rtcd.fdct.fast8x4 = vp8_short_fdct8x4_sse2;
cpi->rtcd.fdct.walsh_short4x4 = vp8_short_walsh4x4_sse2 ;
cpi->rtcd.encodemb.berr = vp8_block_error_xmm;
cpi->rtcd.encodemb.mberr = vp8_mbblock_error_xmm;
cpi->rtcd.encodemb.mbuverr = vp8_mbuverror_xmm;
......
......@@ -48,7 +48,6 @@ VP8_CX_SRCS-yes += encoder/generic/csystemdependent.c
VP8_CX_SRCS-yes += encoder/block.h
VP8_CX_SRCS-yes += encoder/boolhuff.h
VP8_CX_SRCS-yes += encoder/bitstream.h
VP8_CX_SRCS-yes += encoder/dct.h
VP8_CX_SRCS-yes += encoder/encodeintra.h
VP8_CX_SRCS-yes += encoder/encodemb.h
VP8_CX_SRCS-yes += encoder/encodemv.h
......@@ -95,7 +94,6 @@ VP8_CX_SRCS_REMOVE-yes += encoder/temporal_filter.c
endif
VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/encodemb_x86.h
VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/dct_x86.h
VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/mcomp_x86.h
VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/quantize_x86.h
VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/temporal_filter_x86.h
......
......@@ -16,7 +16,6 @@
VP8_CX_SRCS-$(ARCH_ARM) += encoder/arm/arm_csystemdependent.c
VP8_CX_SRCS-$(ARCH_ARM) += encoder/arm/dct_arm.c
VP8_CX_SRCS-$(ARCH_ARM) += encoder/arm/dct_arm.h
VP8_CX_SRCS-$(ARCH_ARM) += encoder/arm/encodemb_arm.h
VP8_CX_SRCS-$(ARCH_ARM) += encoder/arm/quantize_arm.c
VP8_CX_SRCS-$(ARCH_ARM) += encoder/arm/quantize_arm.h
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment