Commit 378ba611 authored by Daniel Kang's avatar Daniel Kang Committed by On2 (Google) Code Review

Merge "Adds x86inc.asm and update idct/dequant mmx" into experimental

parents 074a38b1 7a000715
......@@ -33,6 +33,7 @@ void vp8_arch_arm_common_init(VP8_COMMON *ctx)
}
#endif
// The commented functions need to be re-written for vpx.
#if HAVE_ARMV6
if (flags & HAS_MEDIA)
{
......@@ -46,10 +47,10 @@ void vp8_arch_arm_common_init(VP8_COMMON *ctx)
rtcd->subpix.bilinear8x4 = vp8_bilinear_predict8x4_armv6;
rtcd->subpix.bilinear4x4 = vp8_bilinear_predict4x4_armv6;
rtcd->idct.idct1 = vp8_short_idct4x4llm_1_v6;
rtcd->idct.idct16 = vp8_short_idct4x4llm_v6_dual;
rtcd->idct.iwalsh1 = vp8_short_inv_walsh4x4_1_v6;
rtcd->idct.iwalsh16 = vp8_short_inv_walsh4x4_v6;
//rtcd->idct.idct1 = vp8_short_idct4x4llm_1_v6;
//rtcd->idct.idct16 = vp8_short_idct4x4llm_v6_dual;
//rtcd->idct.iwalsh1 = vp8_short_inv_walsh4x4_1_v6;
//rtcd->idct.iwalsh16 = vp8_short_inv_walsh4x4_v6;
rtcd->loopfilter.normal_mb_v = vp8_loop_filter_mbv_armv6;
rtcd->loopfilter.normal_b_v = vp8_loop_filter_bv_armv6;
......@@ -84,10 +85,10 @@ void vp8_arch_arm_common_init(VP8_COMMON *ctx)
rtcd->subpix.bilinear8x4 = vp8_bilinear_predict8x4_neon;
rtcd->subpix.bilinear4x4 = vp8_bilinear_predict4x4_neon;
rtcd->idct.idct1 = vp8_short_idct4x4llm_1_neon;
rtcd->idct.idct16 = vp8_short_idct4x4llm_neon;
rtcd->idct.iwalsh1 = vp8_short_inv_walsh4x4_1_neon;
rtcd->idct.iwalsh16 = vp8_short_inv_walsh4x4_neon;
//rtcd->idct.idct1 = vp8_short_idct4x4llm_1_neon;
//rtcd->idct.idct16 = vp8_short_idct4x4llm_neon;
//rtcd->idct.iwalsh1 = vp8_short_inv_walsh4x4_1_neon;
//rtcd->idct.iwalsh16 = vp8_short_inv_walsh4x4_neon;
rtcd->loopfilter.normal_mb_v = vp8_loop_filter_mbv_neon;
rtcd->loopfilter.normal_b_v = vp8_loop_filter_bv_neon;
......
......@@ -139,10 +139,4 @@ void vp8_machine_specific_config(VP8_COMMON *ctx)
#if ARCH_ARM
vp8_arch_arm_common_init(ctx);
#endif
rtcd->idct.idct1 = vp8_short_idct4x4llm_1_c;
rtcd->idct.idct16 = vp8_short_idct4x4llm_c;
rtcd->idct.idct1_scalar_add = vp8_dc_only_idct_add_c;
rtcd->idct.iwalsh1 = vp8_short_inv_walsh4x4_1_c;
rtcd->idct.iwalsh16 = vp8_short_inv_walsh4x4_c;
}
......@@ -20,22 +20,22 @@
*/
#if HAVE_MMX
extern prototype_idct(vp8_short_idct4x4llm_1_mmx);
extern prototype_idct(vp8_short_idct4x4llm_mmx);
extern prototype_idct_scalar_add(vp8_dc_only_idct_add_mmx);
extern prototype_idct(vpx_short_idct4x4llm_1_mmx);
extern prototype_idct(vpx_short_idct4x4llm_mmx);
extern prototype_idct_scalar_add(vpx_dc_only_idct_add_mmx);
extern prototype_second_order(vp8_short_inv_walsh4x4_mmx);
extern prototype_second_order(vp8_short_inv_walsh4x4_1_mmx);
#if !CONFIG_RUNTIME_CPU_DETECT
#undef vp8_idct_idct1
#define vp8_idct_idct1 vp8_short_idct4x4llm_1_mmx
#define vp8_idct_idct1 vpx_short_idct4x4llm_1_mmx
#undef vp8_idct_idct16
#define vp8_idct_idct16 vp8_short_idct4x4llm_mmx
#define vp8_idct_idct16 vpx_short_idct4x4llm_mmx
#undef vp8_idct_idct1_scalar_add
#define vp8_idct_idct1_scalar_add vp8_dc_only_idct_add_mmx
#define vp8_idct_idct1_scalar_add vpx_dc_only_idct_add_mmx
#undef vp8_idct_iwalsh16
#define vp8_idct_iwalsh16 vp8_short_inv_walsh4x4_mmx
......
This diff is collapsed.
......@@ -34,14 +34,14 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx)
/* Override default functions with fastest ones for this CPU. */
#if HAVE_MMX
// The commented functions need to be re-written for vpx.
if (flags & HAS_MMX)
{
rtcd->idct.idct1 = vp8_short_idct4x4llm_1_mmx;
rtcd->idct.idct16 = vp8_short_idct4x4llm_mmx;
rtcd->idct.idct1_scalar_add = vp8_dc_only_idct_add_mmx;
rtcd->idct.iwalsh16 = vp8_short_inv_walsh4x4_mmx;
rtcd->idct.iwalsh1 = vp8_short_inv_walsh4x4_1_mmx;
rtcd->idct.idct1 = vpx_short_idct4x4llm_1_mmx;
rtcd->idct.idct16 = vpx_short_idct4x4llm_mmx;
rtcd->idct.idct1_scalar_add = vpx_dc_only_idct_add_mmx;
//rtcd->idct.iwalsh16 = vp8_short_inv_walsh4x4_mmx;
//rtcd->idct.iwalsh1 = vp8_short_inv_walsh4x4_1_mmx;
rtcd->recon.recon = vp8_recon_b_mmx;
rtcd->recon.copy8x8 = vp8_copy_mem8x8_mmx;
......@@ -91,7 +91,7 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx)
vp8_build_intra_predictors_mbuv_s_sse2;
#endif
rtcd->idct.iwalsh16 = vp8_short_inv_walsh4x4_sse2;
//rtcd->idct.iwalsh16 = vp8_short_inv_walsh4x4_sse2;
#if CONFIG_ENHANCED_INTERP == 0 && CONFIG_HIGH_PRECISION_MV == 0 && CONFIG_SIXTEENTH_SUBPEL_UV == 0
rtcd->subpix.sixtap16x16 = vp8_sixtap_predict16x16_sse2;
......
......@@ -27,15 +27,16 @@ void vp8_arch_arm_decode_init(VP8D_COMP *pbi)
}
#endif
//The commented functions need to be re-written for vpx.
#if HAVE_ARMV6
if (flags & HAS_MEDIA)
{
pbi->dequant.block = vp8_dequantize_b_v6;
pbi->dequant.idct_add = vp8_dequant_idct_add_v6;
/*pbi->dequant.idct_add = vp8_dequant_idct_add_v6;
pbi->dequant.dc_idct_add = vp8_dequant_dc_idct_add_v6;
pbi->dequant.dc_idct_add_y_block = vp8_dequant_dc_idct_add_y_block_v6;
pbi->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_v6;
pbi->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_v6;
pbi->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_v6;*/
}
#endif
......@@ -43,12 +44,12 @@ void vp8_arch_arm_decode_init(VP8D_COMP *pbi)
if (flags & HAS_NEON)
{
pbi->dequant.block = vp8_dequantize_b_neon;
pbi->dequant.idct_add = vp8_dequant_idct_add_neon;
//pbi->dequant.idct_add = vp8_dequant_idct_add_neon;
/*This is not used: NEON always dequants two blocks at once.
pbi->dequant.dc_idct_add = vp8_dequant_dc_idct_add_neon;*/
pbi->dequant.dc_idct_add_y_block = vp8_dequant_dc_idct_add_y_block_neon;
/*pbi->dequant.dc_idct_add_y_block = vp8_dequant_dc_idct_add_y_block_neon;
pbi->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_neon;
pbi->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_neon;
pbi->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_neon;*/
}
#endif
#endif
......
......@@ -43,10 +43,4 @@ void vp8_dmachine_specific_config(VP8D_COMP *pbi)
#if ARCH_ARM
vp8_arch_arm_decode_init(pbi);
#endif
pbi->dequant.idct_add = vp8_dequant_idct_add_c;
pbi->dequant.dc_idct_add = vp8_dequant_dc_idct_add_c;
pbi->dequant.dc_idct_add_y_block = vp8_dequant_dc_idct_add_y_block_c;
pbi->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_c;
pbi->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_c;
}
This diff is collapsed.
......@@ -21,8 +21,8 @@
*/
#if HAVE_MMX
extern prototype_dequant_block(vp8_dequantize_b_mmx);
extern prototype_dequant_idct_add(vp8_dequant_idct_add_mmx);
extern prototype_dequant_dc_idct_add(vp8_dequant_dc_idct_add_mmx);
extern prototype_dequant_idct_add(vpx_dequant_idct_add_mmx);
extern prototype_dequant_dc_idct_add(vpx_dequant_dc_idct_add_mmx);
extern prototype_dequant_dc_idct_add_y_block(vp8_dequant_dc_idct_add_y_block_mmx);
extern prototype_dequant_idct_add_y_block(vp8_dequant_idct_add_y_block_mmx);
extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block_mmx);
......@@ -32,7 +32,7 @@ extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block_mmx);
#define vp8_dequant_block vp8_dequantize_b_mmx
#undef vp8_dequant_idct_add
#define vp8_dequant_idct_add vp8_dequant_idct_add_mmx
#define vp8_dequant_idct_add vpx_dequant_idct_add_mmx
#undef vp8_dequant_dc_idct_add
#define vp8_dequant_dc_idct_add vp8_dequant_dc_idct_add_mmx
......
......@@ -21,24 +21,24 @@ void vp8_dequant_dc_idct_add_y_block_mmx
for (i = 0; i < 4; i++)
{
if (eobs[0] > 1)
vp8_dequant_dc_idct_add_mmx (q, dq, pre, dst, 16, stride, dc[0]);
vpx_dequant_dc_idct_add_mmx (q, dq, pre, dst, 16, stride, dc[0]);
else
vp8_dc_only_idct_add_mmx (dc[0], pre, dst, 16, stride);
vpx_dc_only_idct_add_mmx (dc[0], pre, dst, 16, stride);
if (eobs[1] > 1)
vp8_dequant_dc_idct_add_mmx (q+16, dq, pre+4, dst+4, 16, stride, dc[1]);
vpx_dequant_dc_idct_add_mmx (q+16, dq, pre+4, dst+4, 16, stride, dc[1]);
else
vp8_dc_only_idct_add_mmx (dc[1], pre+4, dst+4, 16, stride);
vpx_dc_only_idct_add_mmx (dc[1], pre+4, dst+4, 16, stride);
if (eobs[2] > 1)
vp8_dequant_dc_idct_add_mmx (q+32, dq, pre+8, dst+8, 16, stride, dc[2]);
vpx_dequant_dc_idct_add_mmx (q+32, dq, pre+8, dst+8, 16, stride, dc[2]);
else
vp8_dc_only_idct_add_mmx (dc[2], pre+8, dst+8, 16, stride);
vpx_dc_only_idct_add_mmx (dc[2], pre+8, dst+8, 16, stride);
if (eobs[3] > 1)
vp8_dequant_dc_idct_add_mmx (q+48, dq, pre+12, dst+12, 16, stride, dc[3]);
vpx_dequant_dc_idct_add_mmx (q+48, dq, pre+12, dst+12, 16, stride, dc[3]);
else
vp8_dc_only_idct_add_mmx (dc[3], pre+12, dst+12, 16, stride);
vpx_dc_only_idct_add_mmx (dc[3], pre+12, dst+12, 16, stride);
q += 64;
dc += 4;
......@@ -57,34 +57,34 @@ void vp8_dequant_idct_add_y_block_mmx
for (i = 0; i < 4; i++)
{
if (eobs[0] > 1)
vp8_dequant_idct_add_mmx (q, dq, pre, dst, 16, stride);
vpx_dequant_idct_add_mmx (q, dq, pre, dst, 16, stride);
else
{
vp8_dc_only_idct_add_mmx (q[0]*dq[0], pre, dst, 16, stride);
vpx_dc_only_idct_add_mmx (q[0]*dq[0], pre, dst, 16, stride);
((int *)q)[0] = 0;
}
if (eobs[1] > 1)
vp8_dequant_idct_add_mmx (q+16, dq, pre+4, dst+4, 16, stride);
vpx_dequant_idct_add_mmx (q+16, dq, pre+4, dst+4, 16, stride);
else
{
vp8_dc_only_idct_add_mmx (q[16]*dq[0], pre+4, dst+4, 16, stride);
vpx_dc_only_idct_add_mmx (q[16]*dq[0], pre+4, dst+4, 16, stride);
((int *)(q+16))[0] = 0;
}
if (eobs[2] > 1)
vp8_dequant_idct_add_mmx (q+32, dq, pre+8, dst+8, 16, stride);
vpx_dequant_idct_add_mmx (q+32, dq, pre+8, dst+8, 16, stride);
else
{
vp8_dc_only_idct_add_mmx (q[32]*dq[0], pre+8, dst+8, 16, stride);
vpx_dc_only_idct_add_mmx (q[32]*dq[0], pre+8, dst+8, 16, stride);
((int *)(q+32))[0] = 0;
}
if (eobs[3] > 1)
vp8_dequant_idct_add_mmx (q+48, dq, pre+12, dst+12, 16, stride);
vpx_dequant_idct_add_mmx (q+48, dq, pre+12, dst+12, 16, stride);
else
{
vp8_dc_only_idct_add_mmx (q[48]*dq[0], pre+12, dst+12, 16, stride);
vpx_dc_only_idct_add_mmx (q[48]*dq[0], pre+12, dst+12, 16, stride);
((int *)(q+48))[0] = 0;
}
......@@ -104,18 +104,18 @@ void vp8_dequant_idct_add_uv_block_mmx
for (i = 0; i < 2; i++)
{
if (eobs[0] > 1)
vp8_dequant_idct_add_mmx (q, dq, pre, dstu, 8, stride);
vpx_dequant_idct_add_mmx (q, dq, pre, dstu, 8, stride);
else
{
vp8_dc_only_idct_add_mmx (q[0]*dq[0], pre, dstu, 8, stride);
vpx_dc_only_idct_add_mmx (q[0]*dq[0], pre, dstu, 8, stride);
((int *)q)[0] = 0;
}
if (eobs[1] > 1)
vp8_dequant_idct_add_mmx (q+16, dq, pre+4, dstu+4, 8, stride);
vpx_dequant_idct_add_mmx (q+16, dq, pre+4, dstu+4, 8, stride);
else
{
vp8_dc_only_idct_add_mmx (q[16]*dq[0], pre+4, dstu+4, 8, stride);
vpx_dc_only_idct_add_mmx (q[16]*dq[0], pre+4, dstu+4, 8, stride);
((int *)(q+16))[0] = 0;
}
......@@ -128,18 +128,18 @@ void vp8_dequant_idct_add_uv_block_mmx
for (i = 0; i < 2; i++)
{
if (eobs[0] > 1)
vp8_dequant_idct_add_mmx (q, dq, pre, dstv, 8, stride);
vpx_dequant_idct_add_mmx (q, dq, pre, dstv, 8, stride);
else
{
vp8_dc_only_idct_add_mmx (q[0]*dq[0], pre, dstv, 8, stride);
vpx_dc_only_idct_add_mmx (q[0]*dq[0], pre, dstv, 8, stride);
((int *)q)[0] = 0;
}
if (eobs[1] > 1)
vp8_dequant_idct_add_mmx (q+16, dq, pre+4, dstv+4, 8, stride);
vpx_dequant_idct_add_mmx (q+16, dq, pre+4, dstv+4, 8, stride);
else
{
vp8_dc_only_idct_add_mmx (q[16]*dq[0], pre+4, dstv+4, 8, stride);
vpx_dc_only_idct_add_mmx (q[16]*dq[0], pre+4, dstv+4, 8, stride);
((int *)(q+16))[0] = 0;
}
......
......@@ -15,14 +15,14 @@
#if HAVE_MMX
void vp8_dequantize_b_impl_mmx(short *sq, short *dq, short *q);
void vpx_dequantize_b_impl_mmx(short *sq, short *dq, short *q);
void vp8_dequantize_b_mmx(BLOCKD *d)
{
short *sq = (short *) d->qcoeff;
short *dq = (short *) d->dqcoeff;
short *q = (short *) d->dequant;
vp8_dequantize_b_impl_mmx(sq, dq, q);
vpx_dequantize_b_impl_mmx(sq, dq, q);
}
#endif
......@@ -42,8 +42,8 @@ void vp8_arch_x86_decode_init(VP8D_COMP *pbi)
if (flags & HAS_MMX)
{
pbi->dequant.block = vp8_dequantize_b_mmx;
pbi->dequant.idct_add = vp8_dequant_idct_add_mmx;
pbi->dequant.dc_idct_add = vp8_dequant_dc_idct_add_mmx;
pbi->dequant.idct_add = vpx_dequant_idct_add_mmx;
pbi->dequant.dc_idct_add = vpx_dequant_dc_idct_add_mmx;
pbi->dequant.dc_idct_add_y_block = vp8_dequant_dc_idct_add_y_block_mmx;
pbi->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_mmx;
pbi->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_mmx;
......@@ -52,9 +52,9 @@ void vp8_arch_x86_decode_init(VP8D_COMP *pbi)
#if HAVE_SSE2
if (flags & HAS_SSE2)
{
pbi->dequant.dc_idct_add_y_block = vp8_dequant_dc_idct_add_y_block_sse2;
pbi->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_sse2;
pbi->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_sse2;
//pbi->dequant.dc_idct_add_y_block = vp8_dequant_dc_idct_add_y_block_sse2;
//pbi->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_sse2;
//pbi->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_sse2;
}
#endif
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment