Commit dda671b8 authored by Monty's avatar Monty
Browse files

Commit includes:

Major speed improvement through lpc->spectrum optimizations (roughly
6x faster decode).

Short blocks are now being used.

Fixed artifact due to overlap/add bug (was using the wrong window to
overlap long blocks )

Monty

svn path=/trunk/vorbis/; revision=144
parent 0024760a
......@@ -931,8 +931,8 @@ else
case $host in
i?86-*-linux*)
DEBUG="-g -Wall -fsigned-char"
OPT="-O20 -ffast-math -fsigned-char"
PROFILE="-pg -g -O20 -fsigned-char";;
OPT="-O20 -ffast-math -malign-double -fsigned-char"
PROFILE="-pg -g -O20 -malign-double -fsigned-char";;
sparc-sun-*)
DEBUG="-g -Wall -fsigned-char -mv8"
OPT="-O20 -ffast-math -fsigned-char -mv8"
......
# $Id: configure.in,v 1.2 1999/07/13 07:48:13 mwhitson Exp $
# $Id: configure.in,v 1.3 1999/10/12 08:37:54 xiphmont Exp $
AC_INIT(lib/mdct.c)
#AC_CONFIG_HEADER(config.h)
......@@ -45,8 +45,8 @@ else
case $host in
i?86-*-linux*)
DEBUG="-g -Wall -fsigned-char"
OPT="-O20 -ffast-math -fsigned-char"
PROFILE="-pg -g -O20 -fsigned-char";;
OPT="-O20 -ffast-math -malign-double -fsigned-char"
PROFILE="-pg -g -O20 -malign-double -D__NO_MATH_INLINES -fsigned-char";;
sparc-sun-*)
DEBUG="-g -Wall -fsigned-char -mv8"
OPT="-O20 -ffast-math -fsigned-char -mv8"
......
......@@ -202,9 +202,9 @@ int vorbis_analysis_init(vorbis_dsp_state *v,vorbis_info *vi){
/* Yes, wasteful to have four lookups. This will get collapsed once
things crystallize */
lpc_init(&v->vl[0],vi->blocksize[0]/2,vi->blocksize[0]/2,
lpc_init(&v->vl[0],vi->blocksize[0]/2,vi->blocksize[0]/8,
vi->floororder[0],vi->flooroctaves[0],1);
lpc_init(&v->vl[1],vi->blocksize[1]/2,vi->blocksize[1]/2,
lpc_init(&v->vl[1],vi->blocksize[1]/2,vi->blocksize[1]/8,
vi->floororder[0],vi->flooroctaves[0],1);
/*lpc_init(&v->vbal[0],vi->blocksize[0]/2,256,
......@@ -364,7 +364,6 @@ int vorbis_analysis_blockout(vorbis_dsp_state *v,vorbis_block *vb){
}
}else
v->nW=1;
v->nW=1;
/* Do we actually have enough data *now* for the next block? The
reason to check is that if we had no multipliers, that could
......@@ -453,9 +452,9 @@ int vorbis_synthesis_init(vorbis_dsp_state *v,vorbis_info *vi){
/* Yes, wasteful to have four lookups. This will get collapsed once
things crystallize */
lpc_init(&v->vl[0],vi->blocksize[0]/2,vi->blocksize[0]/2,
lpc_init(&v->vl[0],vi->blocksize[0]/2,vi->blocksize[0]/8,
vi->floororder[0],vi->flooroctaves[0],0);
lpc_init(&v->vl[1],vi->blocksize[1]/2,vi->blocksize[1]/2,
lpc_init(&v->vl[1],vi->blocksize[1]/2,vi->blocksize[1]/8,
vi->floororder[1],vi->flooroctaves[1],0);
/*lpc_init(&v->vbal[0],vi->blocksize[0]/2,256,
vi->balanceorder,vi->balanceoctaves,0);
......@@ -509,7 +508,8 @@ int vorbis_synthesis_blockin(vorbis_dsp_state *v,vorbis_block *vb){
int beginSl;
int endSl;
int i,j;
double *window;
double *windowL;
double *windowN;
/* Do we have enough PCM storage for the block? */
if(endW>v->pcm_storage){
......@@ -533,14 +533,15 @@ int vorbis_synthesis_blockin(vorbis_dsp_state *v,vorbis_block *vb){
break;
}
window=v->window[v->W][0][v->lW]+vi->blocksize[v->W]/2;
windowN=v->window[v->W][v->lW][v->lW];
windowL=windowN+vi->blocksize[v->W]/2;
for(j=0;j<vi->channels;j++){
double *pcm=v->pcm[j]+beginW;
/* the add section */
/* the overlap/add section */
for(i=beginSl;i<endSl;i++)
pcm[i]=pcm[i]*window[i]+vb->pcm[j][i];
pcm[i]=pcm[i]*windowL[i]+vb->pcm[j][i]*windowN[i];
/* the remaining section */
for(;i<sizeW;i++)
pcm[i]=vb->pcm[j][i];
......
......@@ -63,7 +63,7 @@ typedef struct lpclook{
drft_lookup fft;
/* en/decode lookups */
double *dscale;
int *iscale;
double *norm;
int n;
int ln;
......
......@@ -38,7 +38,7 @@ int vorbis_info_modeset(vorbis_info *vi, int mode){
/* handle the flat settings first */
memcpy(vi,&(predef_modes[mode]),sizeof(vorbis_info));
vi->user_comments=calloc(1,sizeof(char *));
vi->vendor=strdup("Xiphophorus libVorbis I 19991003");
vi->vendor=strdup("Xiphophorus libVorbis I 19991012");
return(0);
}
......
......@@ -14,7 +14,7 @@
function: LPC low level routines
author: Monty <monty@xiph.org>
modifications by: Monty
last modification date: Aug 22 1999
last modification date: Oct 11 1999
********************************************************************/
......@@ -67,11 +67,13 @@ double vorbis_gen_lpc(double *curve,double *lpc,lpc_lookup *l){
int n=l->ln;
int m=l->m;
double aut[m+1],work[n+n],error;
double fscale=1./n;
int i,j;
/* input is a real curve. make it complex-real */
/* This mixes phase, but the LPC generation doesn't care. */
for(i=0;i<n;i++){
work[i*2]=curve[i];
work[i*2]=curve[i]*fscale;
work[i*2+1]=0;
}
......@@ -134,24 +136,6 @@ double vorbis_gen_lpc(double *curve,double *lpc,lpc_lookup *l){
return error;
}
/* One can do this the long way by generating the transfer function in
the time domain and taking the forward FFT of the result. The
results from direct calculation are cleaner and faster. If one
looks at the below in the context of the calling function, there's
lots of redundant trig, but at least it's clear */
double vorbis_lpc_magnitude(double w,double *lpc, int m){
int k;
double real=1,imag=0;
double wn=w;
for(k=0;k<m;k++){
real+=lpc[k]*cos(wn);
imag+=lpc[k]*sin(wn);
wn+=w;
}
return(1./sqrt(real*real+imag*imag));
}
/* On top of this basic LPC infrastructure we introduce two modifications:
1) Filter generation is limited in the resolution of features it
......@@ -191,7 +175,7 @@ void lpc_init(lpc_lookup *l,int n, int mapped, int m, int oct, int encode_p){
l->n=n;
l->ln=mapped;
l->m=m;
l->dscale=malloc(n*sizeof(double));
l->iscale=malloc(n*sizeof(int));
l->norm=malloc(n*sizeof(double));
for(i=0;i<n;i++){
......@@ -214,14 +198,16 @@ void lpc_init(lpc_lookup *l,int n, int mapped, int m, int oct, int encode_p){
l->bscale[i]=rint(LOG_X(i,bias)*scale);
}
drft_init(&l->fft,mapped*2);
}
/* decode; encode may use this too */
drft_init(&l->fft,mapped*2);
{
double w=1./oct*M_PI;
for(i=0;i<n;i++)
l->dscale[i]=LOG_X(i,bias)*w;
for(i=0;i<n;i++){
l->iscale[i]=rint(LOG_X(i,bias)/oct*mapped);
if(l->iscale[i]>=l->ln)l->iscale[i]=l->ln-1;
}
}
}
......@@ -230,7 +216,7 @@ void lpc_clear(lpc_lookup *l){
if(l->bscale)free(l->bscale);
if(l->escale)free(l->escale);
drft_clear(&l->fft);
free(l->dscale);
free(l->iscale);
free(l->norm);
}
}
......@@ -246,14 +232,12 @@ double vorbis_curve_to_lpc(double *curve,double *lpc,lpc_lookup *l){
'em equal is a decent rule of thumb. The below must be reworked
slightly if mapped != n */
int n=l->n;
int mapped=n;
int mapped=l->ln;
double work[mapped];
int i;
/* fairly correct for low frequencies, naieve for high frequencies
(suffers from undersampling) */
for(i=0;i<mapped;i++){
double lin=l->escale[i];
int a=floor(lin);
......@@ -268,22 +252,62 @@ double vorbis_curve_to_lpc(double *curve,double *lpc,lpc_lookup *l){
return vorbis_gen_lpc(work,lpc,l);
}
/* One can do this the long way by generating the transfer function in
the time domain and taking the forward FFT of the result. The
results from direct calculation are cleaner and faster. If one
looks at the below in the context of the calling function, there's
lots of redundant trig, but at least it's clear */
/* This version does a linear curve generation and then later
interpolates the log curve from the linear curve. This could stand
optimization; it could both be more precise as well as not compute
quite a few unused values */
static void _vlpc_de_helper(double *curve,double *lpc,double amp,
lpc_lookup *l){
int i;
memset(curve,0,sizeof(double)*l->ln*2);
for(i=0;i<l->m;i++){
curve[i*2+1]=lpc[i]/4/amp;
curve[i*2+2]=-lpc[i]/4/amp;
}
drft_backward(&l->fft,curve); /* reappropriated ;-) */
{
int l2=l->ln*2;
double unit=1./amp;
curve[0]=(1./(curve[0]+unit));
for(i=1;i<l->ln;i++){
double real=(curve[i]+curve[l2-i]);
double imag=(curve[i]-curve[l2-i]);
curve[i]=(1./hypot(real+unit,imag));
}
}
}
/* generate the whole freq response curve on an LPC IIR filter */
void vorbis_lpc_to_curve(double *curve,double *lpc,double amp,lpc_lookup *l){
double lcurve[l->ln*2];
int i;
_vlpc_de_helper(lcurve,lpc,amp,l);
for(i=0;i<l->n;i++)
curve[i]=vorbis_lpc_magnitude(l->dscale[i],lpc,l->m)*amp*l->norm[i];
curve[i]=lcurve[l->iscale[i]]*l->norm[i];
}
/* find frequency response of LPC filter only at nonsero residue
points and apply the envelope to the residue */
void vorbis_lpc_apply(double *residue,double *lpc,double amp,lpc_lookup *l){
double lcurve[l->ln*2];
int i;
_vlpc_de_helper(lcurve,lpc,amp,l);
for(i=0;i<l->n;i++)
if(residue[i])
residue[i]*=vorbis_lpc_magnitude(l->dscale[i],lpc,l->m)*amp*l->norm[i];
residue[i]*=lcurve[l->iscale[i]]*l->norm[i];
}
......@@ -270,7 +270,7 @@ void mdct_forward(mdct_lookup *init, double *in, double *out, double *window){
}
}
void mdct_backward(mdct_lookup *init, double *in, double *out, double *window){
void mdct_backward(mdct_lookup *init, double *in, double *out){
int n=init->n;
double *x=alloca(n*sizeof(double));
int n2=n>>1;
......@@ -318,10 +318,9 @@ void mdct_backward(mdct_lookup *init, double *in, double *out, double *window){
int o3=n4+n2,o4=o3-1;
for(i=0;i<n4;i++){
double temp= (*x * *BO - *(x+2) * *BE);
out[o1]=-(out[o2]=(*x * *BO - *(x+2) * *BE));
out[o3]=out[o4]= -(*x * *BE + *(x+2) * *BO);
out[o1]=-temp*window[o1];
out[o2]=temp*window[o2];
o1++;
o2--;
......
......@@ -24,8 +24,7 @@ extern void mdct_init(mdct_lookup *lookup,int n);
extern void mdct_clear(mdct_lookup *l);
extern void mdct_forward(mdct_lookup *init, double *in,
double *out, double *window);
extern void mdct_backward(mdct_lookup *init, double *in,
double *out, double *window);
extern void mdct_backward(mdct_lookup *init, double *in, double *out);
#endif
......
......@@ -27,9 +27,9 @@
#include "smallft.h"
#include "xlogmap.h"
#define NOISEdB 0
#define NOISEdB -6
#define MASKdB 18
#define MASKdB 20
#define HROLL 60
#define LROLL 90
#define MASKBIAS 10
......@@ -204,7 +204,7 @@ double _vp_balance_compute(double *A, double *B, double *lpc,lpc_lookup *vb){
}
void _vp_balance_apply(double *A, double *B, double *lpc, double amp,
/*void _vp_balance_apply(double *A, double *B, double *lpc, double amp,
lpc_lookup *vb,int divp){
int i;
for(i=0;i<vb->n;i++){
......@@ -220,4 +220,4 @@ void _vp_balance_apply(double *A, double *B, double *lpc, double amp,
A[i]=mag*sin(phi);
B[i]=mag*cos(phi);
}
}
}*/
......@@ -4,11 +4,16 @@
*
* FFT implementation from OggSquish, minus cosine transforms,
* minus all but radix 2/4 case. In Vorbis we only need this
* cut-down version (used by the encoder, not decoder).
* cut-down version.
*
* To do more than just power-of-two sized vectors, see the full
* version I wrote for NetLib.
*
* Note that the packing is a little strange; rather than the FFT r/i
* packing following R_0, I_n, R_1, I_1, R_2, I_2 ... R_n-1, I_n-1,
* it follows R_0, R_1, I_1, R_2, I_2 ... R_n-1, I_n-1, I_n like the
* FORTRAN version
*
******************************************************************/
#include <stdlib.h>
......@@ -1216,11 +1221,8 @@ void drft_forward(drft_lookup *l,double *data){
}
void drft_backward(drft_lookup *l,double *data){
int i;
double n1=1./l->n;
if (l->n==1)return;
drftb1(l->n,data,l->trigcache,l->trigcache+l->n,l->splitcache);
for(i=0;i<l->n;i++)data[i]*=n1;
}
void drft_init(drft_lookup *l,int n){
......
......@@ -37,7 +37,8 @@ int _vs_spectrum_encode(vorbis_block *vb,double amp,double *lsp){
int bits=rint(log(n)/log(2));
int i;
_oggpack_write(&vb->opb,amp*32768,15);
_oggpack_write(&vb->opb,amp*327680,18);
for(i=0;i<vb->vd->vi->floororder[scale];i++){
int val=rint(lsp[i]/M_PI*n-last);
......@@ -54,7 +55,7 @@ int _vs_spectrum_decode(vorbis_block *vb,double *amp,double *lsp){
int bits=rint(log(n)/log(2));
int i;
*amp=_oggpack_read(&vb->opb,15)/32768.;
*amp=_oggpack_read(&vb->opb,18)/327680.;
for(i=0;i<vb->vd->vi->floororder[scale];i++){
int val=_oggpack_read(&vb->opb,bits);
......
......@@ -33,7 +33,6 @@ int vorbis_synthesis(vorbis_block *vb,ogg_packet *op){
vorbis_info *vi=vd->vi;
oggpack_buffer *opb=&vb->opb;
lpc_lookup *vl;
double *window;
int spectral_order;
int n,i;
......@@ -59,10 +58,6 @@ int vorbis_synthesis(vorbis_block *vb,ogg_packet *op){
n=vb->pcmend=vi->blocksize[vb->W];
vb->multend=vb->pcmend/vi->envelopesa;
/* we don't know the size of the following window, but we don't need
it yet. We only use the first half of the window */
window=vb->vd->window[vb->W][vb->lW][0];
/* recover the time envelope */
/*if(_ve_envelope_decode(vb)<0)return(-1);*/
......@@ -84,7 +79,7 @@ int vorbis_synthesis(vorbis_block *vb,ogg_packet *op){
vorbis_lpc_apply(vb->pcm[i],lpc,vb->amp[i],vl);
/* MDCT->time */
mdct_backward(&vb->vd->vm[vb->W],vb->pcm[i],vb->pcm[i],window);
mdct_backward(&vb->vd->vm[vb->W],vb->pcm[i],vb->pcm[i]);
/* apply time domain envelope */
/*_ve_envelope_apply(vb,1);*/
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment