vbr.c 8.43 KB
Newer Older
Tristan Matthews's avatar
Tristan Matthews committed
1
/* Copyright (C) 2002 Jean-Marc Valin
jmvalin's avatar
...  
jmvalin committed
2 3 4 5
   File: vbr.c

   VBR-related routines

jm's avatar
jm committed
6 7 8
   Redistribution and use in source and binary forms, with or without
   modification, are permitted provided that the following conditions
   are met:
Tristan Matthews's avatar
Tristan Matthews committed
9

jm's avatar
jm committed
10 11
   - Redistributions of source code must retain the above copyright
   notice, this list of conditions and the following disclaimer.
Tristan Matthews's avatar
Tristan Matthews committed
12

jm's avatar
jm committed
13 14 15
   - Redistributions in binary form must reproduce the above copyright
   notice, this list of conditions and the following disclaimer in the
   documentation and/or other materials provided with the distribution.
Tristan Matthews's avatar
Tristan Matthews committed
16

jm's avatar
jm committed
17 18 19
   - Neither the name of the Xiph.org Foundation nor the names of its
   contributors may be used to endorse or promote products derived from
   this software without specific prior written permission.
Tristan Matthews's avatar
Tristan Matthews committed
20

jm's avatar
jm committed
21 22 23 24 25 26 27 28 29 30 31
   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
jmvalin's avatar
...  
jmvalin committed
32 33 34

*/

35 36 37 38
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif

jmvalin's avatar
...  
jmvalin committed
39
#include "vbr.h"
40
#include <math.h>
41

42 43 44

#define sqr(x) ((x)*(x))

45
#define MIN_ENERGY 6000
46
#define NOISE_POW .3
jmvalin's avatar
...  
jmvalin committed
47

48
#ifndef DISABLE_VBR
49

50
const float vbr_nb_thresh[9][11]={
51
   {-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f}, /*   CNG   */
Jean-Marc Valin's avatar
Jean-Marc Valin committed
52 53 54 55 56
   { 4.0f,  2.5f,  2.0f,  1.2f,  0.5f,-0.25f, -0.5f, -0.7f, -0.8f, -0.9f, -1.0f}, /*  2 kbps */
   {10.0f,  6.5f,  5.2f,  4.5f,  3.9f, 3.7f,  3.0f,  2.5f,  2.3f,  1.8f,  1.0f}, /*  6 kbps */
   {11.0f,  8.8f,  7.5f,  6.5f,  5.0f,  4.2f,  3.9f,  3.9f,  3.5f,  3.0f,  1.0f}, /*  8 kbps */
   {11.0f, 11.0f,  9.9f,  8.5f,  7.0f, 5.25f,  4.5f,  4.0f,  4.0f,  4.0f,  2.0f}, /* 11 kbps */
   {11.0f, 11.0f, 11.0f, 11.0f,  9.5f, 9.25f,  8.0f,  7.0f,  5.0f,  4.0f,  3.0f}, /* 15 kbps */
57
   {11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f,  9.5f,  8.5f,  6.2f,  5.2f,  5.0f}, /* 18 kbps */
Tristan Matthews's avatar
Tristan Matthews committed
58
   {11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 10.0f,  9.8f,  7.5f}, /* 24 kbps */
Jean-Marc Valin's avatar
Jean-Marc Valin committed
59
   { 7.0f,  4.5f,  3.7f,  3.0f,  2.5f,  1.0f,  1.8f,  1.5f,  1.0f,  0.0f,  0.0f}  /*  4 kbps */
60 61 62
};


63
const float vbr_hb_thresh[5][11]={
64 65 66 67
   {-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f}, /* silence */
   {-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f}, /*  2 kbps */
   {11.0f, 11.0f,  9.5f,  8.5f,  7.5f,  6.0f,  5.0f,  3.9f,  3.0f,  2.0f,  1.0f}, /*  6 kbps */
   {11.0f, 11.0f, 11.0f, 11.0f, 11.0f,  9.5f,  8.7f,  7.8f,  7.0f,  6.5f,  4.0f}, /* 10 kbps */
Tristan Matthews's avatar
Tristan Matthews committed
68
   {11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f,  9.8f,  7.5f,  5.5f}  /* 18 kbps */
jm's avatar
...  
jm committed
69 70
};

71
const float vbr_uhb_thresh[2][11]={
72 73
   {-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f}, /* silence */
   { 3.9f,  2.5f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f,  0.0f, -1.0f}  /*  2 kbps */
74 75
};

jmvalin's avatar
jmvalin committed
76 77
void vbr_init(VBRState *vbr)
{
78 79
   int i;

80
   vbr->average_energy=1600000;
jmvalin's avatar
jmvalin committed
81 82
   vbr->last_energy=1;
   vbr->accum_sum=0;
83 84 85
   vbr->soft_pitch=0;
   vbr->last_pitch_coef=0;
   vbr->last_quality=0;
86 87 88 89 90 91 92 93 94

   vbr->noise_accum = .05*pow(MIN_ENERGY, NOISE_POW);
   vbr->noise_accum_count=.05;
   vbr->noise_level=vbr->noise_accum/vbr->noise_accum_count;
   vbr->consec_noise=0;


   for (i=0;i<VBR_MEMORY_SIZE;i++)
      vbr->last_log_energy[i] = log(MIN_ENERGY);
jmvalin's avatar
jmvalin committed
95 96 97
}


jmvalin's avatar
...  
jmvalin committed
98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117
/*
  This function should analyse the signal and decide how critical the
  coding error will be perceptually. The following factors should be
  taken into account:

  -Attacks (positive energy derivative) should be coded with more bits

  -Stationary voiced segments should receive more bits

  -Segments with (very) low absolute energy should receive less bits (maybe
  only shaped noise?)

  -DTX for near-zero energy?

  -Stationary fricative segments should have less bits

  -Temporal masking: when energy slope is decreasing, decrease the bit-rate

  -Decrease bit-rate for males (low pitch)?

Tristan Matthews's avatar
Tristan Matthews committed
118
  -(wideband only) less bits in the high-band when signal is very
jmvalin's avatar
...  
jmvalin committed
119 120 121
  non-stationary (harder to notice high-frequency noise)???

*/
jm's avatar
jm committed
122

123
float vbr_analysis(VBRState *vbr, spx_word16_t *sig, int len, int pitch, float pitch_coef)
jmvalin's avatar
...  
jmvalin committed
124
{
jmvalin's avatar
jmvalin committed
125 126
   int i;
   float ener=0, ener1=0, ener2=0;
127
   float qual=7;
128 129 130 131 132
   int va;
   float log_energy;
   float non_st=0;
   float voicing;
   float pow_ener;
jmvalin's avatar
jmvalin committed
133 134

   for (i=0;i<len>>1;i++)
135
      ener1 += ((float)sig[i])*sig[i];
jmvalin's avatar
jmvalin committed
136 137

   for (i=len>>1;i<len;i++)
138
      ener2 += ((float)sig[i])*sig[i];
jmvalin's avatar
jmvalin committed
139 140
   ener=ener1+ener2;

141 142 143 144 145 146 147 148
   log_energy = log(ener+MIN_ENERGY);
   for (i=0;i<VBR_MEMORY_SIZE;i++)
      non_st += sqr(log_energy-vbr->last_log_energy[i]);
   non_st =  non_st/(30*VBR_MEMORY_SIZE);
   if (non_st>1)
      non_st=1;

   voicing = 3*(pitch_coef-.4)*fabs(pitch_coef-.4);
149
   vbr->average_energy = 0.9*vbr->average_energy + .1*ener;
150 151
   vbr->noise_level=vbr->noise_accum/vbr->noise_accum_count;
   pow_ener = pow(ener,NOISE_POW);
152 153 154
   if (vbr->noise_accum_count<.06 && ener>MIN_ENERGY)
      vbr->noise_accum = .05*pow_ener;

155
   if ((voicing<.3 && non_st < .2 && pow_ener < 1.2*vbr->noise_level)
156 157 158
       || (voicing<.3 && non_st < .05 && pow_ener < 1.5*vbr->noise_level)
       || (voicing<.4 && non_st < .05 && pow_ener < 1.2*vbr->noise_level)
       || (voicing<0 && non_st < .05))
159 160 161 162 163 164
   {
      float tmp;
      va = 0;
      vbr->consec_noise++;
      if (pow_ener > 3*vbr->noise_level)
         tmp = 3*vbr->noise_level;
Tristan Matthews's avatar
Tristan Matthews committed
165
      else
166 167 168 169 170 171 172 173 174 175 176
         tmp = pow_ener;
      if (vbr->consec_noise>=4)
      {
         vbr->noise_accum = .95*vbr->noise_accum + .05*tmp;
         vbr->noise_accum_count = .95*vbr->noise_accum_count + .05;
      }
   } else {
      va = 1;
      vbr->consec_noise=0;
   }

177 178 179
   if (pow_ener < vbr->noise_level && ener>MIN_ENERGY)
   {
      vbr->noise_accum = .95*vbr->noise_accum + .05*pow_ener;
Tristan Matthews's avatar
Tristan Matthews committed
180
      vbr->noise_accum_count = .95*vbr->noise_accum_count + .05;
181 182
   }

jmvalin's avatar
jmvalin committed
183 184 185 186 187 188 189 190 191
   /* Checking for very low absolute energy */
   if (ener < 30000)
   {
      qual -= .7;
      if (ener < 10000)
         qual-=.7;
      if (ener < 3000)
         qual-=.7;
   } else {
jm's avatar
...  
jm committed
192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209
      float short_diff, long_diff;
      short_diff = log((ener+1)/(1+vbr->last_energy));
      long_diff = log((ener+1)/(1+vbr->average_energy));
      /*fprintf (stderr, "%f %f\n", short_diff, long_diff);*/

      if (long_diff<-5)
         long_diff=-5;
      if (long_diff>2)
         long_diff=2;

      if (long_diff>0)
         qual += .6*long_diff;
      if (long_diff<0)
         qual += .5*long_diff;
      if (short_diff>0)
      {
         if (short_diff>5)
            short_diff=5;
210
         qual += 1*short_diff;
jm's avatar
...  
jm committed
211
      }
jmvalin's avatar
jmvalin committed
212 213
      /* Checking for energy increases */
      if (ener2 > 1.6*ener1)
jm's avatar
...  
jm committed
214
         qual += .5;
jmvalin's avatar
jmvalin committed
215
   }
216
   vbr->last_energy = ener;
217
   vbr->soft_pitch = .8*vbr->soft_pitch + .2*pitch_coef;
jm's avatar
...  
jm committed
218
   qual += 2.2*((pitch_coef-.4) + (vbr->soft_pitch-.4));
219 220 221

   if (qual < vbr->last_quality)
      qual = .5*qual + .5*vbr->last_quality;
222 223 224 225
   if (qual<4)
      qual=4;
   if (qual>10)
      qual=10;
Tristan Matthews's avatar
Tristan Matthews committed
226

227
   /*
jm's avatar
...  
jm committed
228
   if (vbr->consec_noise>=2)
229 230 231 232 233
      qual-=1.3;
   if (vbr->consec_noise>=5)
      qual-=1.3;
   if (vbr->consec_noise>=12)
      qual-=1.3;
234
   */
235 236 237
   if (vbr->consec_noise>=3)
      qual=4;

238
   if (vbr->consec_noise)
239
      qual -= 1.0 * (log(3.0 + vbr->consec_noise)-log(3));
240 241
   if (qual<0)
      qual=0;
Tristan Matthews's avatar
Tristan Matthews committed
242

243
   if (ener<1600000)
244
   {
245 246 247 248
      if (vbr->consec_noise>2)
         qual-=0.5*(log(3.0 + vbr->consec_noise)-log(3));
      if (ener<10000&&vbr->consec_noise>2)
         qual-=0.5*(log(3.0 + vbr->consec_noise)-log(3));
249 250
      if (qual<0)
         qual=0;
251
      qual += .3*log(.0001+ener/1600000.0);
252 253 254 255
   }
   if (qual<-1)
      qual=-1;

256 257
   /*printf ("%f %f %f %f %d\n", qual, voicing, non_st, pow_ener/(.01+vbr->noise_level), va);*/

258 259
   vbr->last_pitch_coef = pitch_coef;
   vbr->last_quality = qual;
260 261 262 263 264

   for (i=VBR_MEMORY_SIZE-1;i>0;i--)
      vbr->last_log_energy[i] = vbr->last_log_energy[i-1];
   vbr->last_log_energy[0] = log_energy;

jmvalin's avatar
jmvalin committed
265
   /*printf ("VBR: %f %f %f %d %f\n", (float)(log_energy-log(vbr->average_energy+MIN_ENERGY)), non_st, voicing, va, vbr->noise_level);*/
266

jmvalin's avatar
jmvalin committed
267 268 269 270 271
   return qual;
}

void vbr_destroy(VBRState *vbr)
{
jmvalin's avatar
...  
jmvalin committed
272
}
273 274

#endif /* #ifndef DISABLE_VBR */