Commit 888d8ce9 authored by Gregory Maxwell's avatar Gregory Maxwell Committed by Jean-Marc Valin
Browse files

VBR support. VBR API and VBR support in celtenc.

parent 381d05aa
......@@ -78,6 +78,7 @@ struct CELTEncoder {
int pitch_enabled;
int pitch_available;
int delayedIntra;
int VBR_rate; /* Target number of 16th bits per frame */
celt_word16_t * restrict preemph_memE; /* Input is 16-bit, so why bother with 32 */
celt_sig_t * restrict preemph_memD;
......@@ -108,6 +109,7 @@ CELTEncoder *celt_encoder_create(const CELTMode *mode)
st->block_size = N;
st->overlap = mode->overlap;
st->VBR_rate = 0;
st->pitch_enabled = 1;
st->pitch_available = 1;
st->delayedIntra = 1;
......@@ -439,6 +441,7 @@ int celt_encode_float(CELTEncoder * restrict st, const celt_sig_t * pcm, celt_si
int pitch_index;
int bits;
int has_fold=1;
unsigned coarse_needed;
ec_byte_buffer buf;
ec_enc enc;
VARDECL(celt_sig_t, in);
......@@ -667,8 +670,29 @@ int celt_encode_float(CELTEncoder * restrict st, const celt_sig_t * pcm, celt_si
/* Bit allocation */
ALLOC(error, C*st->mode->nbEBands, celt_word16_t);
quant_coarse_energy(st->mode, bandE, st->oldBandE, nbCompressedBytes*8/3, intra_ener, st->mode->prob, error, &enc);
coarse_needed = quant_coarse_energy(st->mode, bandE, st->oldBandE, nbCompressedBytes*8/3, intra_ener, st->mode->prob, error, &enc);
coarse_needed = ((coarse_needed*3-1)>>3)+1;
/* Variable bitrate */
if (st->VBR_rate>0)
{
/* The target rate in 16th bits per frame */
int target=st->VBR_rate;
/* Shortblocks get a large boost in bitrate, but since they are uncommon long blocks are not greatly effected */
if (shortBlocks)
target*=2;
else if (st->mode->nbShortMdcts > 1)
target-=(target+14)/28;
/*The average energy is removed from the target and the actual energy added*/
target=target-588+ec_enc_tell(&enc, 4);
/* In VBR mode the frame size must not be reduced so much that it would result in the coarse energy busting its budget */
target=IMAX(coarse_needed,(target+64)/128);
nbCompressedBytes=IMIN(nbCompressedBytes,target);
}
ALLOC(offsets, st->mode->nbEBands, int);
ALLOC(stereo_mode, st->mode->nbEBands, int);
stereo_decision(st->mode, X, stereo_mode, st->mode->nbEBands);
......@@ -807,7 +831,7 @@ int celt_encoder_ctl(CELTEncoder * restrict st, int request, ...)
{
case CELT_SET_COMPLEXITY_REQUEST:
{
int value = va_arg(ap, int);
int value = va_arg(ap, celt_int32_t);
if (value<0 || value>10)
goto bad_arg;
if (value<=2) {
......@@ -822,7 +846,7 @@ int celt_encoder_ctl(CELTEncoder * restrict st, int request, ...)
break;
case CELT_SET_LTP_REQUEST:
{
int value = va_arg(ap, int);
int value = va_arg(ap, celt_int32_t);
if (value<0 || value>1 || (value==1 && st->pitch_available==0))
goto bad_arg;
if (value==0)
......@@ -831,6 +855,17 @@ int celt_encoder_ctl(CELTEncoder * restrict st, int request, ...)
st->pitch_enabled = 1;
}
break;
case CELT_SET_VBR_RATE_REQUEST:
{
int value = va_arg(ap, celt_int32_t);
if (value<0)
goto bad_arg;
if (value>3072000)
value = 3072000;
st->VBR_rate = ((st->mode->Fs<<3)+(st->block_size>>1))/st->block_size;
st->VBR_rate = ((value<<7)+(st->VBR_rate>>1))/st->VBR_rate;
}
break;
default:
goto bad_request;
}
......
......@@ -51,7 +51,7 @@ extern "C" {
#define EXPORT
#endif
#define _celt_check_int(x) (((void)((x) == (int)0)), (int)(x))
#define _celt_check_int(x) (((void)((x) == (celt_int32_t)0)), (celt_int32_t)(x))
/* Error codes */
/** No error */
......@@ -71,9 +71,12 @@ extern "C" {
#define CELT_SET_COMPLEXITY_REQUEST 2
/** Controls the complexity from 0-10 (int) */
#define CELT_SET_COMPLEXITY(x) CELT_SET_COMPLEXITY_REQUEST, _celt_check_int(x)
#define CELT_SET_LTP_REQUEST 3
#define CELT_SET_LTP_REQUEST 4
/** Activate or deactivate the use of the long term predictor (PITCH) from 0 or 1 (int) */
#define CELT_SET_LTP(x) CELT_SET_LTP_REQUEST, _celt_check_int(x)
#define CELT_SET_VBR_RATE_REQUEST 6
/** Set the target VBR rate in bits per second (int); 0=CBR (default) */
#define CELT_SET_VBR_RATE(x) CELT_SET_VBR_RATE_REQUEST, _celt_check_int(x)
/** GET the frame size used in the current mode */
#define CELT_GET_FRAME_SIZE 1000
......
......@@ -122,10 +122,11 @@ void quant_prob_free(int *freq)
celt_free(freq);
}
static void quant_coarse_energy_mono(const CELTMode *m, celt_ener_t *eBands, celt_word16_t *oldEBands, unsigned budget, int intra, int *prob, celt_word16_t *error, ec_enc *enc)
static unsigned quant_coarse_energy_mono(const CELTMode *m, celt_ener_t *eBands, celt_word16_t *oldEBands, unsigned budget, int intra, int *prob, celt_word16_t *error, ec_enc *enc)
{
int i;
unsigned bits;
unsigned bits_used = 0;
celt_word16_t prev = 0;
celt_word16_t coef = m->ePredCoef;
celt_word16_t beta;
......@@ -159,7 +160,8 @@ static void quant_coarse_energy_mono(const CELTMode *m, celt_ener_t *eBands, cel
#endif
/* If we don't have enough bits to encode all the energy, just assume something safe.
We allow slightly busting the budget here */
if (ec_enc_tell(enc, 0) - bits > budget)
bits_used=ec_enc_tell(enc, 0) - bits;
if (bits_used > budget)
{
qi = -1;
error[i] = 128;
......@@ -174,6 +176,7 @@ static void quant_coarse_energy_mono(const CELTMode *m, celt_ener_t *eBands, cel
oldEBands[i] = -QCONST16(12.f,8);
prev = mean+prev+MULT16_16_Q15(Q15ONE-beta,q);
}
return bits_used;
}
static void quant_fine_energy_mono(const CELTMode *m, celt_ener_t *eBands, celt_word16_t *oldEBands, celt_word16_t *error, int *fine_quant, ec_enc *enc)
......@@ -279,27 +282,31 @@ static void unquant_fine_energy_mono(const CELTMode *m, celt_ener_t *eBands, cel
void quant_coarse_energy(const CELTMode *m, celt_ener_t *eBands, celt_word16_t *oldEBands, int budget, int intra, int *prob, celt_word16_t *error, ec_enc *enc)
unsigned quant_coarse_energy(const CELTMode *m, celt_ener_t *eBands, celt_word16_t *oldEBands, int budget, int intra, int *prob, celt_word16_t *error, ec_enc *enc)
{
int C;
C = m->nbChannels;
if (C==1)
{
quant_coarse_energy_mono(m, eBands, oldEBands, budget, intra, prob, error, enc);
return quant_coarse_energy_mono(m, eBands, oldEBands, budget, intra, prob, error, enc);
} else {
int c;
unsigned maxBudget=0;
for (c=0;c<C;c++)
{
int i;
unsigned coarse_needed;
VARDECL(celt_ener_t, E);
SAVE_STACK;
ALLOC(E, m->nbEBands, celt_ener_t);
for (i=0;i<m->nbEBands;i++)
E[i] = eBands[C*i+c];
quant_coarse_energy_mono(m, E, oldEBands+c*m->nbEBands, budget/C, intra, prob, error+c*m->nbEBands, enc);
coarse_needed=quant_coarse_energy_mono(m, E, oldEBands+c*m->nbEBands, budget/C, intra, prob, error+c*m->nbEBands, enc);
maxBudget=IMAX(maxBudget,coarse_needed);
RESTORE_STACK;
}
return maxBudget*C;
}
}
......
......@@ -44,7 +44,7 @@ void compute_fine_allocation(const CELTMode *m, int *bits, int budget);
int intra_decision(celt_ener_t *eBands, celt_word16_t *oldEBands, int len);
void quant_coarse_energy(const CELTMode *m, celt_ener_t *eBands, celt_word16_t *oldEBands, int budget, int intra, int *prob, celt_word16_t *error, ec_enc *enc);
unsigned quant_coarse_energy(const CELTMode *m, celt_ener_t *eBands, celt_word16_t *oldEBands, int budget, int intra, int *prob, celt_word16_t *error, ec_enc *enc);
void quant_fine_energy(const CELTMode *m, celt_ener_t *eBands, celt_word16_t *oldEBands, celt_word16_t *error, int *fine_quant, ec_enc *enc);
......
......@@ -80,12 +80,14 @@ int oe_write_page(ogg_page *page, FILE *fp)
}
#define MAX_FRAME_SIZE 2000
#define MAX_FRAME_BYTES 2000
#define MAX_FRAME_BYTES 300
#define IMIN(a,b) ((a) < (b) ? (a) : (b)) /**< Minimum int value. */
#define IMAX(a,b) ((a) > (b) ? (a) : (b)) /**< Maximum int value. */
/* Convert input audio bits, endians and channels */
static int read_samples(FILE *fin,int frame_size, int bits, int channels, int lsb, short * input, char *buff, celt_int32_t *size)
{
unsigned char in[MAX_FRAME_BYTES*2];
unsigned char in[MAX_FRAME_SIZE*2];
int i;
short *s;
int nb_read;
......@@ -212,6 +214,7 @@ void usage(void)
printf ("\n");
printf ("Options:\n");
printf (" --bitrate n Encoding bit-rate in kbit/sec\n");
printf (" --vbr Use variable bitrate encoding\n");
printf (" --comp n Encoding complexity (0-10)\n");
printf (" --framesize n Frame size (Default: 256)\n");
printf (" --skeleton Outputs ogg skeleton metadata (may cause incompatibilities)\n");
......@@ -248,10 +251,14 @@ int main(int argc, char **argv)
CELTMode *mode;
void *st;
unsigned char bits[MAX_FRAME_BYTES];
int with_vbr = 0;
int with_skeleton = 0;
int total_bytes = 0;
int peak_bytes = 0;
struct option long_options[] =
{
{"bitrate", required_argument, NULL, 0},
{"vbr",no_argument,NULL, 0},
{"comp", required_argument, NULL, 0},
{"framesize", required_argument, NULL, 0},
{"skeleton",no_argument,NULL, 0},
......@@ -312,6 +319,9 @@ int main(int argc, char **argv)
if (strcmp(long_options[option_index].name,"bitrate")==0)
{
bitrate = atof (optarg);
} else if (strcmp(long_options[option_index].name,"vbr")==0)
{
with_vbr=1;
} else if (strcmp(long_options[option_index].name,"skeleton")==0)
{
with_skeleton=1;
......@@ -444,26 +454,30 @@ int main(int argc, char **argv)
}
}
if (bitrate<0)
if (bitrate<=0.005)
if (chan==1)
bitrate=64.0;
else
bitrate=128.0;
if (chan>2) {
}
bytes_per_packet = (bitrate*1000*frame_size/rate+4)/8;
if (bytes_per_packet < 8) {
bytes_per_packet=8;
fprintf (stderr, "Warning: Requested bitrate (%0.3fkbit/sec) is too low. Setting CELT to 8 bytes/frame.\n",bitrate);
} else if (bytes_per_packet > 300) {
bytes_per_packet=300;
fprintf (stderr, "Warning: Requested bitrate (%0.3fkbit/sec) is too high. Setting CELT to 300 bytes/frame.\n",bitrate);
} else if (bytes_per_packet > MAX_FRAME_BYTES) {
bytes_per_packet=MAX_FRAME_BYTES;
fprintf (stderr, "Warning: Requested bitrate (%0.3fkbit/sec) is too high. Setting CELT to %d bytes/frame.\n",bitrate,MAX_FRAME_BYTES);
}
bitrate = ((rate/(float)frame_size)*8*bytes_per_packet)/1000.0;
if (with_vbr)
{
/*In VBR mode the bytes_per_packet argument becomes a hard maximum. 3x the average rate is just a random choice.*/
bytes_per_packet=IMIN(bytes_per_packet*3,MAX_FRAME_BYTES);
} else {
bitrate = ((rate/(float)frame_size)*8*bytes_per_packet)/1000.0;
}
mode = celt_mode_create(rate, chan, frame_size, NULL);
if (!mode)
return 1;
......@@ -483,13 +497,27 @@ int main(int argc, char **argv)
if (chan==2)
st_string="stereo";
if (!quiet)
fprintf (stderr, "Encoding %d Hz %s audio in %d sample packets at %0.3fkbit/sec (%d bytes per packet) with bitstream version %d\n",
if (with_vbr)
fprintf (stderr, "Encoding %d Hz %s audio in %d sample packets at %0.3fkbit/sec (%d maximum bytes per packet) with bitstream version %d\n",
header.sample_rate, st_string, frame_size, bitrate, bytes_per_packet,bitstream);
else
fprintf (stderr, "Encoding %d Hz %s audio in %d sample packets at %0.3fkbit/sec (%d bytes per packet) with bitstream version %d\n",
header.sample_rate, st_string, frame_size, bitrate, bytes_per_packet,bitstream);
}
/*Initialize CELT encoder*/
st = celt_encoder_create(mode);
if (with_vbr)
{
int tmp = (bitrate*1000);
if (celt_encoder_ctl(st, CELT_SET_VBR_RATE(tmp)) != CELT_OK)
{
fprintf (stderr, "VBR request failed\n");
return 1;
}
}
if (complexity!=-127) {
if (celt_encoder_ctl(st, CELT_SET_COMPLEXITY(complexity)) != CELT_OK)
{
......@@ -624,6 +652,8 @@ int main(int argc, char **argv)
break;
}
nb_encoded += frame_size;
total_bytes += nbBytes;
peak_bytes=IMAX(nbBytes,peak_bytes);
if (wave_input)
{
......@@ -682,6 +712,9 @@ int main(int argc, char **argv)
bytes_written += ret;
}
if (with_vbr && !quiet)
fprintf (stderr, "Average rate %0.3fkbit/sec, %d peak bytes per packet\n", (total_bytes*8.0/((float)nb_encoded/header.sample_rate))/1000.0, peak_bytes);
celt_encoder_destroy(st);
celt_mode_destroy(mode);
ogg_stream_clear(&os);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment