vq.c 10.4 KB
Newer Older
1
/* (C) 2007-2008 Jean-Marc Valin, CSIRO
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
*/
/*
   Redistribution and use in source and binary forms, with or without
   modification, are permitted provided that the following conditions
   are met:
   
   - Redistributions of source code must retain the above copyright
   notice, this list of conditions and the following disclaimer.
   
   - Redistributions in binary form must reproduce the above copyright
   notice, this list of conditions and the following disclaimer in the
   documentation and/or other materials provided with the distribution.
   
   - Neither the name of the Xiph.org Foundation nor the names of its
   contributors may be used to endorse or promote products derived from
   this software without specific prior written permission.
   
   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

32
33
34
35
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif

36
#include "mathops.h"
Jean-Marc Valin's avatar
Jean-Marc Valin committed
37
#include "cwrs.h"
38
#include "vq.h"
39
#include "arch.h"
40
#include "os_support.h"
41
#include "rate.h"
42

43
44
45
46
47
48
static void exp_rotation(celt_norm_t *X, int len, int dir, int stride, int K)
{
   int i, k, iter;
   celt_word16_t c, s;
   celt_word16_t gain, theta;
   celt_norm_t *Xptr;
49
   gain = celt_div((celt_word32_t)MULT16_16(Q15_ONE,len),(celt_word32_t)(3+len+6*K));
50
51
52
53
54
55
56
57
58
59
   /* FIXME: Make that HALF16 instead of HALF32 */
   theta = SUB16(Q15ONE, HALF32(MULT16_16_Q15(gain,gain)));
   /*if (len==30)
   {
   for (i=0;i<len;i++)
   X[i] = 0;
   X[14] = 1;
}*/ 
   c = celt_cos_norm(EXTEND32(theta));
   s = dir*celt_cos_norm(EXTEND32(SUB16(Q15ONE,theta))); /*  sin(theta) */
60
61
   if (len > 8*stride)
      stride *= len/(8*stride);
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
   iter = 1;
   for (k=0;k<iter;k++)
   {
      /* We could use MULT16_16_P15 instead of MULT16_16_Q15 for more accuracy, 
      but at this point, I really don't think it's necessary */
      Xptr = X;
      for (i=0;i<len-stride;i++)
      {
         celt_norm_t x1, x2;
         x1 = Xptr[0];
         x2 = Xptr[stride];
         Xptr[stride] = MULT16_16_Q15(c,x2) + MULT16_16_Q15(s,x1);
         *Xptr++      = MULT16_16_Q15(c,x1) - MULT16_16_Q15(s,x2);
      }
      Xptr = &X[len-2*stride-1];
      for (i=len-2*stride-1;i>=0;i--)
      {
         celt_norm_t x1, x2;
         x1 = Xptr[0];
         x2 = Xptr[stride];
         Xptr[stride] = MULT16_16_Q15(c,x2) + MULT16_16_Q15(s,x1);
         *Xptr--      = MULT16_16_Q15(c,x1) - MULT16_16_Q15(s,x2);
      }
   }
   /*if (len==30)
   {
   for (i=0;i<len;i++)
   printf ("%f ", X[i]);
   printf ("\n");
   exit(0);
}*/
}


96
97
/** Takes the pitch vector and the decoded residual vector, computes the gain
    that will give ||p+g*y||=1 and mixes the residual with the pitch. */
98
static void mix_pitch_and_residual(int * restrict iy, celt_norm_t * restrict X, int N, int K)
99
100
{
   int i;
101
   celt_word32_t Ryy;
102
   celt_word32_t g;
103
   VARDECL(celt_norm_t, y);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
104
105
106
#ifdef FIXED_POINT
   int yshift;
#endif
107
   SAVE_STACK;
108
#ifdef FIXED_POINT
109
   yshift = 13-celt_ilog2(K);
110
111
#endif
   ALLOC(y, N, celt_norm_t);
112

Jean-Marc Valin's avatar
Jean-Marc Valin committed
113
   i=0;
114
   Ryy = 0;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
115
   do {
116
      y[i] = SHL16(iy[i],yshift);
117
      Ryy = MAC16_16(Ryy, y[i], y[i]);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
118
119
   } while (++i < N);

120
   g = MULT16_32_Q15(celt_sqrt(Ryy), celt_rcp(SHR32(Ryy,9)));
121

Jean-Marc Valin's avatar
Jean-Marc Valin committed
122
123
   i=0;
   do 
124
      X[i] = ROUND16(MULT16_16(y[i], g),11);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
125
126
   while (++i < N);

127
   RESTORE_STACK;
128
129
}

130

131
void alg_quant(celt_norm_t *X, int N, int K, int spread, ec_enc *enc)
132
{
133
134
   VARDECL(celt_norm_t, y);
   VARDECL(int, iy);
135
   VARDECL(celt_word16_t, signx);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
136
   int j, is;
137
   celt_word16_t s;
138
   int pulsesLeft;
139
   celt_word32_t sum;
140
   celt_word32_t xy, yy;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
141
   int N_1; /* Inverse of N, in Q14 format (even for float) */
142
#ifdef FIXED_POINT
143
144
145
146
   int yshift;
#endif
   SAVE_STACK;

147
   K = get_pulses(K);
148
#ifdef FIXED_POINT
149
   yshift = 13-celt_ilog2(K);
150
#endif
151

152
153
   ALLOC(y, N, celt_norm_t);
   ALLOC(iy, N, int);
154
   ALLOC(signx, N, celt_word16_t);
155
   N_1 = 512/N;
156
157
158
   
   if (spread)
      exp_rotation(X, N, 1, spread, K);
159
160

   sum = 0;
161
   j=0; do {
162
163
      if (X[j]>0)
         signx[j]=1;
164
      else {
165
         signx[j]=-1;
166
167
         X[j]=-X[j];
      }
168
169
      iy[j] = 0;
      y[j] = 0;
170
   } while (++j<N);
171

Jean-Marc Valin's avatar
Jean-Marc Valin committed
172
   celt_assert2(Rpp<=NORM_SCALING, "Rpp should never have a norm greater than unity");
173

174
   xy = yy = 0;
175

176
   pulsesLeft = K;
177
178

   /* Do a pre-search by projecting on the pyramid */
179
180
   if (K > (N>>1))
   {
181
      celt_word16_t rcp;
182
      sum=0;
183
184
185
      j=0; do {
         sum += X[j];
      }  while (++j<N);
186
187
188
189
190
191

#ifdef FIXED_POINT
      if (sum <= K)
#else
      if (sum <= EPSILON)
#endif
192
      {
193
         X[0] = QCONST16(1.f,14);
194
195
196
         j=1; do
            X[j]=0;
         while (++j<N);
197
         sum = QCONST16(1.f,14);
198
199
200
      }
      /* Do we have sufficient accuracy here? */
      rcp = EXTRACT16(MULT16_32_Q16(K-1, celt_rcp(sum)));
201
      j=0; do {
202
#ifdef FIXED_POINT
203
         /* It's really important to round *towards zero* here */
204
         iy[j] = MULT16_16_Q15(X[j],rcp);
205
#else
206
         iy[j] = floor(rcp*X[j]);
207
#endif
208
209
210
         y[j] = SHL16(iy[j],yshift);
         yy = MAC16_16(yy, y[j],y[j]);
         xy = MAC16_16(xy, X[j],y[j]);
211
         y[j] *= 2;
212
213
214
         pulsesLeft -= iy[j];
      }  while (++j<N);
   }
215
   celt_assert2(pulsesLeft>=1, "Allocated too many pulses in the quick pass");
216

217
   while (pulsesLeft > 0)
218
   {
219
      int pulsesAtOnce=1;
220
      int best_id;
221
      celt_word16_t magnitude;
222
223
      celt_word32_t best_num = -VERY_LARGE16;
      celt_word16_t best_den = 0;
224
225
226
#ifdef FIXED_POINT
      int rshift;
#endif
227
      /* Decide on how many pulses to find at once */
228
      pulsesAtOnce = (pulsesLeft*N_1)>>9; /* pulsesLeft/N */
229
230
      if (pulsesAtOnce<1)
         pulsesAtOnce = 1;
231
232
233
#ifdef FIXED_POINT
      rshift = yshift+1+celt_ilog2(K-pulsesLeft+pulsesAtOnce);
#endif
234
      magnitude = SHL16(pulsesAtOnce, yshift);
235

236
      best_id = 0;
237
238
      /* The squared magnitude term gets added anyway, so we might as well 
         add it outside the loop */
239
      yy = MAC16_16(yy, magnitude,magnitude);
240
      /* Choose between fast and accurate strategy depending on where we are in the search */
241
         /* This should ensure that anything we can process will have a better score */
242
243
244
245
      j=0;
      do {
         celt_word16_t Rxy, Ryy;
         /* Select sign based on X[j] alone */
246
         s = magnitude;
247
248
249
250
         /* Temporary sums of the new pulse(s) */
         Rxy = EXTRACT16(SHR32(MAC16_16(xy, s,X[j]),rshift));
         /* We're multiplying y[j] by two so we don't have to do it here */
         Ryy = EXTRACT16(SHR32(MAC16_16(yy, s,y[j]),rshift));
251
            
252
            /* Approximate score: we maximise Rxy/sqrt(Ryy) (we're guaranteed that 
253
254
         Rxy is positive because the sign is pre-computed) */
         Rxy = MULT16_16_Q15(Rxy,Rxy);
255
            /* The idea is to check for num/den >= best_num/best_den, but that way
256
257
258
259
260
261
262
263
264
         we can do it without any division */
         /* OPT: Make sure to use conditional moves here */
         if (MULT16_16(best_den, Rxy) > MULT16_16(Ryy, best_num))
         {
            best_den = Ryy;
            best_num = Rxy;
            best_id = j;
         }
      } while (++j<N);
265
      
266
      j = best_id;
267
      is = pulsesAtOnce;
268
269
270
271
      s = SHL16(is, yshift);

      /* Updating the sums of the new pulse(s) */
      xy = xy + MULT16_16(s,X[j]);
272
273
      /* We're multiplying y[j] by two so we don't have to do it here */
      yy = yy + MULT16_16(s,y[j]);
274
275

      /* Only now that we've made the final choice, update y/iy */
276
277
      /* Multiplying y[j] by 2 so we don't have to do it everywhere else */
      y[j] += 2*s;
278
      iy[j] += is;
279
      pulsesLeft -= pulsesAtOnce;
280
   }
281
282
283
284
285
286
   j=0;
   do {
      X[j] = MULT16_16(signx[j],X[j]);
      if (signx[j] < 0)
         iy[j] = -iy[j];
   } while (++j<N);
287
   encode_pulses(iy, N, K, enc);
288
   
289
   /* Recompute the gain in one pass to reduce the encoder-decoder mismatch
290
   due to the recursive computation used in quantisation. */
291
   mix_pitch_and_residual(iy, X, N, K);
292
293
   if (spread)
      exp_rotation(X, N, -1, spread, K);
294
   RESTORE_STACK;
295
296
}

297

298
299
/** Decode pulse vector and combine the result with the pitch vector to produce
    the final normalised signal in the current band. */
300
void alg_unquant(celt_norm_t *X, int N, int K, int spread, ec_dec *dec)
301
{
302
   VARDECL(int, iy);
303
   SAVE_STACK;
304
   K = get_pulses(K);
305
   ALLOC(iy, N, int);
306
   decode_pulses(iy, N, K, dec);
307
   mix_pitch_and_residual(iy, X, N, K);
308
309
   if (spread)
      exp_rotation(X, N, -1, spread, K);
310
   RESTORE_STACK;
311
312
}

313
celt_word16_t renormalise_vector(celt_norm_t *X, celt_word16_t value, int N, int stride)
314
315
316
{
   int i;
   celt_word32_t E = EPSILON;
317
   celt_word16_t rE;
318
319
320
321
322
323
324
325
   celt_word16_t g;
   celt_norm_t *xptr = X;
   for (i=0;i<N;i++)
   {
      E = MAC16_16(E, *xptr, *xptr);
      xptr += stride;
   }

326
   rE = celt_sqrt(E);
327
328
329
330
331
332
#ifdef FIXED_POINT
   if (rE <= 128)
      g = Q15ONE;
   else
#endif
      g = MULT16_16_Q15(value,celt_rcp(SHL32(rE,9)));
333
334
335
336
337
338
   xptr = X;
   for (i=0;i<N;i++)
   {
      *xptr = PSHR32(MULT16_16(g, *xptr),8);
      xptr += stride;
   }
339
   return rE;
340
341
342
}

static void fold(const CELTMode *m, int N, celt_norm_t *Y, celt_norm_t * restrict P, int N0, int B)
Jean-Marc Valin's avatar
Jean-Marc Valin committed
343
{
344
   int j;
345
   const int C = CHANNELS(m);
346
   int id = (N0*C) % (C*B);
347
   /* Here, we assume that id will never be greater than N0, i.e. that 
348
349
      no band is wider than N0. In the unlikely case it happens, we set
      everything to zero */
350
351
352
353
354
355
356
357
358
359
   /*{
	   int offset = (N0*C - (id+C*N))/2;
	   if (offset > C*N0/16)
		   offset = C*N0/16;
	   offset -= offset % (C*B);
	   if (offset < 0)
		   offset = 0;
	   //printf ("%d\n", offset);
	   id += offset;
   }*/
360
   if (id+C*N>N0*C)
361
362
363
364
365
      for (j=0;j<C*N;j++)
         P[j] = 0;
   else
      for (j=0;j<C*N;j++)
         P[j] = Y[id++];
366
367
}

368
void intra_fold(const CELTMode *m, celt_norm_t * restrict x, int N, celt_norm_t *Y, celt_norm_t * restrict P, int N0, int B)
369
{
370
   int c;
371
   const int C = CHANNELS(m);
372
373

   fold(m, N, Y, P, N0, B);
374
375
   c=0;
   do {
376
      renormalise_vector(P+c, Q15ONE, N, C);
377
   } while (++c < C);
378
379
}