Commit 80ccca87 authored by Monty's avatar Monty
Browse files

incremental update.  Go to pure tone masking (no noise), prepare for training

svn path=/trunk/vorbis/; revision=222
parent c460c05f
......@@ -12,7 +12,7 @@
********************************************************************
function: libvorbis codec headers
last mod: $Id: codec.h,v 1.3 1999/12/31 12:35:10 xiphmont Exp $
last mod: $Id: codec.h,v 1.4 2000/01/04 09:04:54 xiphmont Exp $
********************************************************************/
......@@ -38,8 +38,8 @@ typedef struct {
int n;
struct vorbis_info *vi;
double *noisethresh;
double *maskthresh;
double *barknum;
} psy_lookup;
......@@ -92,7 +92,7 @@ typedef struct {
compression/decompression mode in progress (eg, psychoacoustic settings,
channel setup, options, codebook etc) *********************************/
#define THRESH_POINTS 20
#define MAX_BARK 27
typedef struct vorbis_info{
int channels;
......@@ -146,15 +146,9 @@ typedef struct vorbis_info{
double preecho_thresh;
double preecho_clamp;
double *threshhold_points;
double noisethresh[THRESH_POINTS];
double lnoise;
double hnoise;
double noisebias;
double maskthresh[THRESH_POINTS];
double lroll;
double hroll;
double maskbias;
double maskthresh[MAX_BARK];
double lrolldB;
double hrolldB;
/* local storage, only used on the encoding size. This way the
application does not need to worry about freeing some packets'
......@@ -396,12 +390,5 @@ extern int vorbis_synthesis_blockin(vorbis_dsp_state *v,vorbis_block *vb);
extern int vorbis_synthesis_pcmout(vorbis_dsp_state *v,double ***pcm);
extern int vorbis_synthesis_read(vorbis_dsp_state *v,int samples);
#define min(x,y) ((x)>(y)?(y):(x))
#define max(x,y) ((x)<(y)?(y):(x))
/* 20log10(x) */
#define todB(x) ((x)==0?-9.e40:log(fabs(x))*8.6858896)
#define fromdB(x) (exp((x)*.11512925))
#endif
......@@ -12,7 +12,7 @@
********************************************************************
function: predefined encoding modes
last mod: $Id: modes.h,v 1.2 2000/01/01 02:52:57 xiphmont Exp $
last mod: $Id: modes.h,v 1.3 2000/01/04 09:04:55 xiphmont Exp $
********************************************************************/
......@@ -22,10 +22,15 @@
#include <stdio.h>
#include "codec.h"
double threshhold_points[THRESH_POINTS]=
/* 0Hz 24kHz
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 */
{0.,.01,.02,.03,.04,.06,.08,.1,.15,.2,.25,.3,.34,.4,.45,.5,.6,.7,.8,1.};
/*
0 1 2 3 4 5 6 7 8 9
0, 100, 200, 300, 400, 510, 630, 770, 920, 1080,
10 11 12 13 14 15 16 17 18 19
1270, 1480, 1720, 2000, 2320, 2700, 3150, 3700, 4400, 5300,
20 21 22 23 24 25 26 Bark
6400, 7700, 9500, 12000, 15500, 20500, 27000 Hz */
vorbis_info predef_modes[]={
/* CD quality stereo, no channel coupling */
......@@ -35,23 +40,17 @@ vorbis_info predef_modes[]={
/* dummy, dummy, dummy, dummy */
0, NULL, 0, NULL,
/* smallblock, largeblock, LPC order (small, large) */
{256, 2048}, {12,22},
{256, 2048}, {20,32},
/* {bark mapping size}, spectral channels */
{64,256}, 2,
/* thresh sample period, preecho clamp trigger threshhold, range, dummy */
64, 4, 2, NULL,
/* noise masking curve dB attenuation levels [20] */
/*{-12,-12,-18,-18,-18,-18,-18,-18,-18,-12,
-8,-4,0,0,0,0,0,0,0,0},*/
{-100,-100,-100,-100,-100,-100,-100,-24,-24,-24,
-24,-24,-24,-24,-24,-24,-24,-24,-24,-24},
/* noise masking scale biases */
.95,1.01,.01,
/* tone masking curve dB attenuation levels [20] */
{-20,-20,-20,-20,-20,-20,-20,-20,-20,-20,
-20,-20,-20,-20,-20,-20,-20,-20,-20,-20},
64, 10, 2,
/* tone masking curve dB attenuation levels [27] */
{ -10, -10, -10, -10, -10, -10, -10, -10, -10, -10,
-12, -14, -16, -16, -16, -16, -18, -18, -16, -16,
-12, -10, -8, -6, -6, -6, -4},
/* tone masking rolloff settings (dB per octave), octave bias */
90,60,.001,
24,10,
NULL,NULL,NULL},
};
......
# vorbis makefile configured for use with gcc on any platform
# $Id: Makefile.in,v 1.18 1999/12/31 12:35:11 xiphmont Exp $
# $Id: Makefile.in,v 1.19 2000/01/04 09:04:56 xiphmont Exp $
###############################################################################
# #
......@@ -29,7 +29,7 @@ LIBS=@LIBS@ -lm
HFILES = ../include/codec.h ../include/vorbisfile.h \
bitwise.h envelope.h lpc.h lsp.h \
psy.h smallft.h window.h barkmel.h os.h mdct.h
psy.h smallft.h window.h scales.h os.h mdct.h
LFILES = framing.o mdct.o smallft.o block.o envelope.o window.o\
lsp.o lpc.o analysis.o synthesis.o psy.o info.o bitwise.o\
spectrum.o vorbisfile.o
......
......@@ -12,7 +12,7 @@
********************************************************************
function: single-block PCM analysis
last mod: $Id: analysis.c,v 1.18 1999/12/30 07:26:32 xiphmont Exp $
last mod: $Id: analysis.c,v 1.19 2000/01/04 09:04:57 xiphmont Exp $
********************************************************************/
......@@ -87,8 +87,6 @@ int vorbis_analysis(vorbis_block *vb,ogg_packet *op){
memset(floor,0,sizeof(double)*n/2);
_vp_noise_floor(vp,vb->pcm[i],floor);
#ifdef ANALYSIS
{
FILE *out;
......@@ -100,11 +98,6 @@ int vorbis_analysis(vorbis_block *vb,ogg_packet *op){
fprintf(out,"%g\n",vb->pcm[i][j]);
fclose(out);
sprintf(buffer,"Anoise%d.m",vb->sequence);
out=fopen(buffer,"w+");
for(j=0;j<n/2;j++)
fprintf(out,"%g\n",floor[j]);
fclose(out);
}
#endif
......@@ -139,8 +132,9 @@ int vorbis_analysis(vorbis_block *vb,ogg_packet *op){
vorbis_lsp_to_lpc(lsp,lpc,vl->m);
vorbis_lpc_to_curve(curve,lpc,vb->amp[i],vl);
/* this may do various interesting massaging too...*/
if(vb->amp[i])_vs_residue_train(vb,vb->pcm[i],curve,n/2);
_vs_residue_quantize(vb->pcm[i],curve,vi,n/2);
#ifdef ANALYSIS
......
/********************************************************************
* *
* THIS FILE IS PART OF THE Ogg Vorbis SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS SOURCE IS GOVERNED BY *
* THE GNU PUBLIC LICENSE 2, WHICH IS INCLUDED WITH THIS SOURCE. *
* PLEASE READ THESE TERMS DISTRIBUTING. *
* *
* THE OggSQUISH SOURCE CODE IS (C) COPYRIGHT 1994-2000 *
* by Monty <monty@xiph.org> and The XIPHOPHORUS Company *
* http://www.xiph.org/ *
* *
********************************************************************
function: bark scale utility
last mod: $Id: barkmel.c,v 1.1 2000/01/04 09:04:58 xiphmont Exp $
********************************************************************/
#include <stdio.h>
#include "scales.h"
int main(){
int i;
double rate;
for(i=64;i<32000;i*=2){
rate=48000.;
fprintf(stderr,"rate=%gHz, block=%d, f(1)=%.2gHz bark(1)=%.2g (of %.2g)\n",
rate,i,rate/2 / (i/2),toBARK(rate/2 /(i/2)),toBARK(rate/2));
rate=44100.;
fprintf(stderr,"rate=%gHz, block=%d, f(1)=%.2gHz bark(1)=%.2g (of %.2g)\n",
rate,i,rate/2 / (i/2),toBARK(rate/2 /(i/2)),toBARK(rate/2));
rate=32000.;
fprintf(stderr,"rate=%gHz, block=%d, f(1)=%.2gHz bark(1)=%.2g (of %.2g)\n",
rate,i,rate/2 / (i/2),toBARK(rate/2 /(i/2)),toBARK(rate/2));
rate=22050.;
fprintf(stderr,"rate=%gHz, block=%d, f(1)=%.2gHz bark(1)=%.2g (of %.2g)\n",
rate,i,rate/2 / (i/2),toBARK(rate/2 /(i/2)),toBARK(rate/2));
rate=16000.;
fprintf(stderr,"rate=%gHz, block=%d, f(1)=%.2gHz bark(1)=%.2g (of %.2g)\n",
rate,i,rate/2 / (i/2),toBARK(rate/2 /(i/2)),toBARK(rate/2));
rate=11025.;
fprintf(stderr,"rate=%gHz, block=%d, f(1)=%.2gHz bark(1)=%.2g (of %.2g)\n",
rate,i,rate/2 / (i/2),toBARK(rate/2 /(i/2)),toBARK(rate/2));
rate=8000.;
fprintf(stderr,"rate=%gHz, block=%d, f(1)=%.2gHz bark(1)=%.2g (of %.2g)\n\n",
rate,i,rate/2 / (i/2),toBARK(rate/2 /(i/2)),toBARK(rate/2));
}
for(i=0;i<28;i++){
fprintf(stderr,"bark=%d %gHz\n",
i,fromBARK(i));
}
return(0);
}
......@@ -12,7 +12,7 @@
********************************************************************
function: PCM data vector blocking, windowing and dis/reassembly
last mod: $Id: block.c,v 1.19 1999/12/31 12:35:12 xiphmont Exp $
last mod: $Id: block.c,v 1.20 2000/01/04 09:04:59 xiphmont Exp $
Handle windowing, overlap-add, etc of the PCM vectors. This is made
more amusing by Vorbis' current two allowed block sizes.
......@@ -33,6 +33,7 @@
#include "lpc.h"
#include "bitwise.h"
#include "psy.h"
#include "scales.h"
/* pcm accumulator examples (not exhaustive):
......
......@@ -12,7 +12,7 @@
********************************************************************
function: maintain the info structure, info <-> header packets
last mod: $Id: info.c,v 1.12 1999/12/31 12:35:13 xiphmont Exp $
last mod: $Id: info.c,v 1.13 2000/01/04 09:05:00 xiphmont Exp $
********************************************************************/
......@@ -44,7 +44,6 @@ int vorbis_info_modeset(vorbis_info *vi, int mode){
/* handle the flat settings first */
memcpy(vi,&(predef_modes[mode]),sizeof(vorbis_info));
vi->threshhold_points=threshhold_points;
vi->user_comments=calloc(1,sizeof(char *));
vi->vendor=strdup("Xiphophorus libVorbis I 19991230");
......
......@@ -12,7 +12,7 @@
********************************************************************
function: LPC low level routines
last mod: $Id: lpc.c,v 1.12 2000/01/01 02:52:59 xiphmont Exp $
last mod: $Id: lpc.c,v 1.13 2000/01/04 09:05:01 xiphmont Exp $
********************************************************************/
......@@ -51,7 +51,7 @@ Carsten Bormann
#include "os.h"
#include "smallft.h"
#include "lpc.h"
#include "barkmel.h"
#include "scales.h"
/* Autocorrelation LPC coeff generation algorithm invented by
N. Levinson in 1947, modified by J. Durbin in 1959. */
......@@ -167,7 +167,7 @@ void lpc_init(lpc_lookup *l,int n, long mapped, long rate, int m){
floor(bark(rate-1)*C)=mapped-1
floor(bark(rate)*C)=mapped */
scale=mapped/fBARK(rate);
scale=mapped/toBARK(rate);
/* the mapping from a linear scale to a smaller bark scale is
straightforward. We do *not* make sure that the linear mapping
......@@ -178,9 +178,9 @@ void lpc_init(lpc_lookup *l,int n, long mapped, long rate, int m){
{
int last=-1;
for(i=0;i<n;i++){
int val=floor( fBARK(((double)rate)/n*i) *scale); /* bark numbers
represent
band edges */
int val=floor( toBARK(((double)rate)/n*i) *scale); /* bark numbers
represent
band edges */
if(val>=mapped)val=mapped; /* guard against the approximation */
l->linearmap[i]=val;
last=val;
......@@ -195,7 +195,7 @@ void lpc_init(lpc_lookup *l,int n, long mapped, long rate, int m){
smoothness in the scale; they should agree closely */
for(i=0;i<mapped;i++)
l->barknorm[i]=iBARK((i+1)/scale)-iBARK(i/scale);
l->barknorm[i]=fromBARK((i+1)/scale)-fromBARK(i/scale);
/* we cheat decoding the LPC spectrum via FFTs */
......
......@@ -12,7 +12,7 @@
********************************************************************
function: random psychoacoustics (not including preecho)
last mod: $Id: psy.c,v 1.8 1999/12/31 12:35:16 xiphmont Exp $
last mod: $Id: psy.c,v 1.9 2000/01/04 09:05:02 xiphmont Exp $
********************************************************************/
......@@ -24,17 +24,15 @@
#include "psy.h"
#include "lpc.h"
#include "smallft.h"
#include "scales.h"
/* Set up decibel threshhold 'curves'. Actually, just set a level at
log frequency intervals, interpolate, and call it a curve. */
/* Set up decibel threshhold slopes on a Bark frequency scale */
static void set_curve(double *points,
double *ref,int rn,double rrate,
double *c,int n, double crate){
static void set_curve(double *ref,double *c,int n, double crate){
int i,j=0;
for(i=0;i<rn-1;i++){
int endpos=points[i+1]*n*rrate/crate;
for(i=0;i<MAX_BARK-1;i++){
int endpos=rint(fromBARK(i+1)*2*n/crate);
double base=ref[i];
double delta=(ref[i+1]-base)/(endpos-j);
for(;j<endpos && j<n;j++){
......@@ -45,18 +43,19 @@ static void set_curve(double *points,
}
void _vp_psy_init(psy_lookup *p,vorbis_info *vi,int n){
long i;
memset(p,0,sizeof(psy_lookup));
p->noisethresh=malloc(n*sizeof(double));
p->maskthresh=malloc(n*sizeof(double));
p->barknum=malloc(n*sizeof(double));
p->vi=vi;
p->n=n;
/* set up the curves for a given blocksize and sample rate */
set_curve(vi->threshhold_points,
vi->noisethresh,THRESH_POINTS,48000,p->noisethresh,n,vi->rate);
set_curve(vi->threshhold_points,
vi->maskthresh, THRESH_POINTS,48000,p->maskthresh, n,vi->rate);
/* set up the lookups for a given blocksize and sample rate */
/* Vorbis max sample rate is limited by 26 Bark (54kHz) */
set_curve(vi->maskthresh, p->maskthresh, n,vi->rate);
for(i=0;i<n;i++)
p->barknum[i]=toBARK(vi->rate/2.*i/n);
#ifdef ANALYSIS
{
......@@ -64,11 +63,6 @@ void _vp_psy_init(psy_lookup *p,vorbis_info *vi,int n){
FILE *out;
char buffer[80];
sprintf(buffer,"noise_threshhold_%d.m",n);
out=fopen(buffer,"w+");
for(j=0;j<n;j++)
fprintf(out,"%g\n",p->noisethresh[j]);
fclose(out);
sprintf(buffer,"mask_threshhold_%d.m",n);
out=fopen(buffer,"w+");
for(j=0;j<n;j++)
......@@ -81,73 +75,27 @@ void _vp_psy_init(psy_lookup *p,vorbis_info *vi,int n){
void _vp_psy_clear(psy_lookup *p){
if(p){
if(p->noisethresh)free(p->noisethresh);
if(p->maskthresh)free(p->maskthresh);
if(p->barknum)free(p->barknum);
memset(p,0,sizeof(psy_lookup));
}
}
/* Find the mean log energy of a given 'band'; used to evaluate tones
against background noise */
/* This is faster than a real convolution, gives us roughly the log f
scale we seek, and gives OK results. So, that means it's a good
hack */
void _vp_noise_floor(psy_lookup *p, double *f, double *m){
int n=p->n;
vorbis_info *vi=p->vi;
long lo=0,hi=0;
double acc=0.;
double div=0.;
int i,j;
double bias=n*vi->noisebias;
for(i=0;i<n;i++){
long newlo=i*vi->lnoise-bias;
long newhi=i*vi->hnoise+bias;
double temp;
if(newhi>n)newhi=n;
if(newlo<0)newlo=0;
for(j=hi;j<newhi;j++){
acc+=todB(f[j]);
div+=1.;
}
for(j=lo;j<newlo;j++){
acc-=todB(f[j]);
div-=1.;
}
hi=newhi;
lo=newlo;
/* attenuate by the noise threshhold curve */
temp=fromdB(acc/div+p->noisethresh[i]);
if(m[i]<temp)m[i]=temp;
}
}
/* Masking curve: linear rolloff on a dB scale, adjusted by octave,
attenuated by maskthresh */
/* Masking curve: linear rolloff on a Bark/dB scale, attenuated by
maskthresh */
void _vp_mask_floor(psy_lookup *p,double *f, double *m){
int n=p->n;
double hroll=p->vi->hroll;
double lroll=p->vi->lroll;
double ocSCALE=1./log(2);
double curmask=-9.e40;
double maskbias=n*p->vi->maskbias;
double curoc=log(maskbias)*ocSCALE;
double hroll=p->vi->hrolldB;
double lroll=p->vi->lrolldB;
double curmask=todB(f[0])+p->maskthresh[0];
double curoc=0.;
long i;
/* run mask forward then backward */
for(i=0;i<n;i++){
double newmask=todB(f[i])+p->maskthresh[i];
double newoc=log(i+maskbias)*ocSCALE;
double newoc=p->barknum[i];
double roll=curmask-(newoc-curoc)*hroll;
double troll;
if(newmask>roll){
......@@ -158,11 +106,11 @@ void _vp_mask_floor(psy_lookup *p,double *f, double *m){
if(m[i]<troll)m[i]=troll;
}
curmask=-9.e40;
curoc=log(n+maskbias)*ocSCALE;
curmask=todB(f[n-1])+p->maskthresh[n-1];
curoc=p->barknum[n-1];
for(i=n-1;i>=0;i--){
double newmask=todB(f[i])+p->maskthresh[i];
double newoc=log(i+maskbias)*ocSCALE;
double newoc=p->barknum[i];
double roll=curmask-(curoc-newoc)*lroll;
double troll;
if(newmask>roll){
......
......@@ -11,28 +11,36 @@
* *
********************************************************************
function: linear scale -> bark and mel scales
last mod: $Id: barkmel.h,v 1.2 2000/01/01 02:52:58 xiphmont Exp $
function: linear scale -> dB, Bark and Mel scales
last mod: $Id: scales.h,v 1.1 2000/01/04 09:05:03 xiphmont Exp $
********************************************************************/
#ifndef _V_BARKMEL_H_
#define _V_BARKMEL_H_
#ifndef _V_SCALE_H_
#define _V_SCALES_H_
#include <math.h>
#define min(x,y) ((x)>(y)?(y):(x))
#define max(x,y) ((x)<(y)?(y):(x))
/* 20log10(x) */
#define todB(x) ((x)==0?-9.e40:log(fabs(x))*8.6858896)
#define fromdB(x) (exp((x)*.11512925))
/* The bark scale equations are approximations, since the original
table was somewhat hand rolled. They're chosen to have the best
possible fit to the rolled tables, thus their somewhat odd
appearence (these are more accurate and over a longer range than
table was somewhat hand rolled. The below are chosen to have the
best possible fit to the rolled tables, thus their somewhat odd
appearance (these are more accurate and over a longer range than
the oft-quoted bark equations found in the texts I have). The
approximations are valid from 0 - 30kHz (nyquist) or so.
all f in Hz, z in Bark */
#define fBARK(f) (13.1*atan(.00074*(f))+2.24*atan((f)*(f)*1.85e-8)+1e-4*(f))
#define iBARK(z) (102.*(z)-2.*pow(z,2.)+.4*pow(z,3)+pow(1.46,z)-1.)
#define fMEL(f) (log(1.+(f)*.001)*1442.695)
#define iMEL(m) (1000.*exp((m)/1442.695)-1000.)
#define toBARK(f) (13.1*atan(.00074*(f))+2.24*atan((f)*(f)*1.85e-8)+1e-4*(f))
#define fromBARK(z) (102.*(z)-2.*pow(z,2.)+.4*pow(z,3)+pow(1.46,z)-1.)
#define toMEL(f) (log(1.+(f)*.001)*1442.695)
#define fromMEL(m) (1000.*exp((m)/1442.695)-1000.)
#endif
......@@ -12,7 +12,7 @@
********************************************************************
function: spectrum envelope and residue code/decode
last mod: $Id: spectrum.c,v 1.8 1999/12/31 12:35:17 xiphmont Exp $
last mod: $Id: spectrum.c,v 1.9 2000/01/04 09:05:03 xiphmont Exp $
********************************************************************/
......@@ -45,19 +45,29 @@ int _vs_spectrum_encode(vorbis_block *vb,double amp,double *lsp){
int bits=rint(log(n)/log(2));
int i;
#if 0
#ifdef TRAIN
if(amp>0){
{
FILE *out=fopen("lspdiff.vqd","a");
for(i=0;i<m;i++)
fprintf(out,"%lf ",lsp[i]);
fprintf(out,"\n");
fclose(out);
}
FILE *out;
if(vb->W)
out=fopen("lspcoeff-long.vqd","a");
else
out=fopen("lspcoeff-short.vqd","a");
for(i=0;i<m;i++)
fprintf(out,"%lf ",lsp[i]);
fprintf(out,"\n");
fclose(out);
if(vb->W)
out=fopen("lspamp-long.vqd","a");
else
out=fopen("lspamp-short.vqd","a");
fprintf(out,"%lf\n",amp);
fclose(out);
}
#endif
_oggpack_write(&vb->opb,amp*327680,18);
_oggpack_write(&vb->opb,amp*32768,18);
for(i=0;i<m;i++){
int val=rint(lsp[i]/M_PI*n-last);
......@@ -83,7 +93,7 @@ int _vs_spectrum_decode(vorbis_block *vb,double *amp,double *lsp){
int i;
double min=M_PI/n/2.;
*amp=_oggpack_read(&vb->opb,18)/327680.;
*amp=_oggpack_read(&vb->opb,18)/32768.;
for(i=0;i<m;i++){
int val=_oggpack_read(&vb->opb,bits);
......@@ -96,6 +106,22 @@ int _vs_spectrum_decode(vorbis_block *vb,double *amp,double *lsp){
return(0);
}
void _vs_residue_train(vorbis_block *vb,double *data,double *curve,int n){
int i;
FILE *out;
if(vb->W)
out=fopen("residue-long.vqd","a");
else
out=fopen("residue-short.vqd","a");
for(i=0;i<n;i++){
double val=0;
if(curve[i]!=0.)val=data[i]/curve[i];
fprintf(out,"%lf ",val);
}
fprintf(out,"\n");
fclose(out);
}
void _vs_residue_quantize(double *data,double *curve,
vorbis_info *vi,int n){
......@@ -105,11 +131,13 @@ void _vs_residue_quantize(double *data,double *curve,
for(i=0;i<n;i++){
int val=0;
if(curve[i]!=0.)val=rint(data[i]/curve[i]);
if(val>16)val=16;
if(val<-16)val=-16;
if(val>31)val=31;
if(val<-31)val=-31;
/*if(val==0 || val==2 || val==-2){