diff --git a/src/opus.h b/src/opus.h index 37c1129e77997d5c6380dac8e1581de6b9b5ba37..e62bedd3151c6279da4983d206001882063008f0 100644 --- a/src/opus.h +++ b/src/opus.h @@ -65,13 +65,12 @@ extern "C" { #define OPUS_BITRATE_AUTO -1 -#define OPUS_MODE_AUTO 2000 -#define OPUS_MODE_VOICE 2001 -#define OPUS_MODE_AUDIO 2002 +#define OPUS_MODE_VOICE 2000 +#define OPUS_MODE_AUDIO 2001 -#define MODE_SILK_ONLY 1000 -#define MODE_HYBRID 1001 -#define MODE_CELT_ONLY 1002 +#define MODE_SILK_ONLY 1000 +#define MODE_HYBRID 1001 +#define MODE_CELT_ONLY 1002 #define BANDWIDTH_AUTO 1100 #define BANDWIDTH_NARROWBAND 1101 @@ -140,25 +139,65 @@ extern "C" { typedef struct OpusEncoder OpusEncoder; typedef struct OpusDecoder OpusDecoder; -OPUS_EXPORT OpusEncoder *opus_encoder_init(OpusEncoder* st, int Fs, int channels); - -OPUS_EXPORT OpusEncoder *opus_encoder_create(int Fs, int channels); +/* + * There are two coding modes: + * OPUS_MODE_VOICE gives best quality at a given bitrate for voice signals. It enhances the + * input signal by high-pass filtering and emphasizing formants and harmonics. Optionally + * it includes in-band forward error correction to protect against packet loss. Use this + * mode for typical VoIP applications. Because of the enhancement, even at high bitrates + * the output may sound different from the input. + * OPUS_MODE_AUDIO gives best quality at a given bitrate for most non-voice signals like music. + * Use this mode for music and mixed (music/voice) content, and applications requiring less + * than 15 ms of coding delay. + */ + +/* Returns initialized encoder state */ +OPUS_EXPORT OpusEncoder *opus_encoder_create( + int Fs, /* Sampling rate of input signal (Hz) */ + int channels, /* Number of channels (1/2) in input signal */ + int mode /* Coding mode (OPUS_MODE_VOICE/OPUS_MODE_AUDIO) */ +); + +OPUS_EXPORT OpusEncoder *opus_encoder_init( + OpusEncoder *st, /* Encoder state */ + int Fs, /* Sampling rate of input signal (Hz) */ + int channels, /* Number of channels (1/2) in input signal */ + int mode /* Coding mode (OPUS_MODE_VOICE/OPUS_MODE_AUDIO) */ +); /* returns length of data payload (in bytes) */ -OPUS_EXPORT int opus_encode(OpusEncoder *st, const short *pcm, int frame_size, - unsigned char *data, int max_data_bytes); +OPUS_EXPORT int opus_encode( + OpusEncoder *st, /* Encoder state */ + const short *pcm, /* Input signal (interleaved if 2 channels). length is frame_size*channels */ + int frame_size, /* Number of samples per frame of input signal */ + unsigned char *data, /* Output payload (no more than max_data_bytes long) */ + int max_data_bytes /* Allocated memory for payload; don't use for controlling bitrate */ +); OPUS_EXPORT void opus_encoder_destroy(OpusEncoder *st); OPUS_EXPORT int opus_encoder_ctl(OpusEncoder *st, int request, ...); -OPUS_EXPORT OpusDecoder *opus_decoder_init(OpusDecoder *st, int Fs, int channels); +OPUS_EXPORT OpusDecoder *opus_decoder_create( + int Fs, /* Sampling rate of output signal (Hz) */ + int channels /* Number of channels (1/2) in output signal */ +); -OPUS_EXPORT OpusDecoder *opus_decoder_create(int Fs, int channels); +OPUS_EXPORT OpusDecoder *opus_decoder_init(OpusDecoder *st, + int Fs, /* Sampling rate of output signal (Hz) */ + int channels /* Number of channels (1/2) in output signal */ +); /* returns (CELT) error code */ -OPUS_EXPORT int opus_decode(OpusDecoder *st, const unsigned char *data, int len, - short *pcm, int frame_size, int decode_fec); +OPUS_EXPORT int opus_decode( + OpusDecoder *st, /* Decoder state */ + const unsigned char *data, /* Input payload. Use a NULL pointer to indicate packet loss */ + int len, /* Number of bytes in payload */ + short *pcm, /* Output signal (interleaved if 2 channels). length is frame_size*channels */ + int frame_size, /* Number of samples per frame of input signal */ + int decode_fec /* Flag (0/1) to request that any in-band forward error correction data be */ + /* decoded. If no such data is available the frame is decoded as if it were lost. */ +); OPUS_EXPORT int opus_decoder_ctl(OpusDecoder *st, int request, ...); @@ -170,7 +209,7 @@ OPUS_EXPORT int opus_packet_get_nb_channels(const unsigned char *data); OPUS_EXPORT int opus_packet_get_nb_frames(const unsigned char packet[], int len); OPUS_EXPORT int opus_decoder_get_nb_samples(const OpusDecoder *dec, const unsigned char packet[], int len); - +/* For testing purposes: the encoder and decoder state should always be identical after coding a payload */ #if OPUS_TEST_RANGE_CODER_STATE OPUS_EXPORT int opus_encoder_get_final_range(OpusEncoder *st); OPUS_EXPORT int opus_decoder_get_final_range(OpusDecoder *st); diff --git a/src/opus_decoder.c b/src/opus_decoder.c index c784233685f2cd9cacb40358756b8fd02254c911..add9226ebf9d69f3d8ee14c118d37f8c36666d32 100644 --- a/src/opus_decoder.c +++ b/src/opus_decoder.c @@ -103,7 +103,7 @@ failure: OpusDecoder *opus_decoder_create(int Fs, int channels) { - char *raw_state = malloc(opus_decoder_get_size(channels)); + char *raw_state = (char*)malloc(opus_decoder_get_size(channels)); if (raw_state == NULL) return NULL; return opus_decoder_init((OpusDecoder*)raw_state, Fs, channels); diff --git a/src/opus_encoder.c b/src/opus_encoder.c index f306606a74e14281028804339d91835d658db535..b4fcd93ebd64994add528b7a91150a7d041c789d 100644 --- a/src/opus_encoder.c +++ b/src/opus_encoder.c @@ -75,7 +75,7 @@ int opus_encoder_get_size(int channels) } -OpusEncoder *opus_encoder_init(OpusEncoder* st, int Fs, int channels) +OpusEncoder *opus_encoder_init(OpusEncoder* st, int Fs, int channels, int mode) { void *silk_enc; CELTEncoder *celt_enc; @@ -115,6 +115,7 @@ OpusEncoder *opus_encoder_init(OpusEncoder* st, int Fs, int channels) st->silk_mode.useInBandFEC = 0; st->silk_mode.useDTX = 0; st->silk_mode.useCBR = 0; + st->silk_mode.HP_cutoff_Hz = 0; st->hybrid_stereo_width_Q14 = 1 << 14; @@ -130,7 +131,7 @@ OpusEncoder *opus_encoder_init(OpusEncoder* st, int Fs, int channels) st->use_vbr = 0; st->user_bitrate_bps = OPUS_BITRATE_AUTO; st->bitrate_bps = 3000+Fs*channels; - st->user_mode = OPUS_MODE_AUTO; + st->user_mode = mode; st->user_bandwidth = BANDWIDTH_AUTO; st->voice_ratio = 90; st->first = 1; @@ -145,12 +146,12 @@ failure: return NULL; } -OpusEncoder *opus_encoder_create(int Fs, int channels) +OpusEncoder *opus_encoder_create(int Fs, int channels, int mode) { char *raw_state = (char *)malloc(opus_encoder_get_size(channels)); if (raw_state == NULL) return NULL; - return opus_encoder_init((OpusEncoder*)raw_state, Fs, channels); + return opus_encoder_init((OpusEncoder*)raw_state, Fs, channels, mode); } int opus_encode(OpusEncoder *st, const short *pcm, int frame_size, @@ -160,7 +161,7 @@ int opus_encode(OpusEncoder *st, const short *pcm, int frame_size, CELTEncoder *celt_enc; int i; int ret=0; - SKP_int32 nBytes; + int nBytes; ec_enc enc; int framerate, period; int silk_internal_bandwidth=-1; @@ -187,8 +188,9 @@ int opus_encode(OpusEncoder *st, const short *pcm, int frame_size, /* Rate-dependent mono-stereo decision */ if (st->force_mono) + { st->stream_channels = 1; - if (st->mode == MODE_CELT_ONLY && st->channels == 2) + } else if (st->mode == MODE_CELT_ONLY && st->channels == 2) { celt_int32 decision_rate; decision_rate = st->bitrate_bps + st->voice_ratio*st->voice_ratio; @@ -213,21 +215,7 @@ int opus_encode(OpusEncoder *st, const short *pcm, int frame_size, mono_rate -= 60*(st->Fs/frame_size - 50); /* Mode selection */ - if (st->user_mode==OPUS_MODE_AUTO) - { - celt_int32 decision_rate; - /* SILK/CELT threshold is higher for voice than for music */ - decision_rate = mono_rate - 3*st->voice_ratio*st->voice_ratio; - /* Hysteresis */ - if (st->prev_mode == MODE_CELT_ONLY) - decision_rate += 4000; - else if (st->prev_mode>0) - decision_rate -= 4000; - if (decision_rate>24000) - st->mode = MODE_CELT_ONLY; - else - st->mode = MODE_SILK_ONLY; - } else if (st->user_mode==OPUS_MODE_VOICE) + if (st->user_mode==OPUS_MODE_VOICE) { st->mode = MODE_SILK_ONLY; } else {/* OPUS_AUDIO_MODE */ @@ -525,7 +513,6 @@ int opus_encode(OpusEncoder *st, const short *pcm, int frame_size, if (st->mode != MODE_SILK_ONLY) { - /* Encode high band with CELT */ ret = celt_encode_with_ec(celt_enc, pcm_buf, frame_size, NULL, nb_compr_bytes, &enc); } diff --git a/src/opus_encoder.h b/src/opus_encoder.h index ca1e28aedc6f546b27139cb4dec195bec4c9be80..87910b4376c4b0349bbd1a5e3aba91b6c4e84944 100644 --- a/src/opus_encoder.h +++ b/src/opus_encoder.h @@ -36,20 +36,20 @@ #define MAX_ENCODER_BUFFER 480 struct OpusEncoder { - int celt_enc_offset; - int silk_enc_offset; - silk_EncControlStruct silk_mode; + int celt_enc_offset; + int silk_enc_offset; + silk_EncControlStruct silk_mode; int hybrid_stereo_width_Q14; - int channels; - int stream_channels; + int channels; + int stream_channels; + int force_mono; int mode; int user_mode; - int force_mono; int prev_mode; - int bandwidth; - int user_bandwidth; - int voice_ratio; + int bandwidth; + int user_bandwidth; + int voice_ratio; /* Sampling rate (at the API level) */ int Fs; int use_vbr; diff --git a/src/test_opus.c b/src/test_opus.c index 19165840ab3f6c607c8dccd30daf2a3ee5158550..f2d3db576d94baa336dd8d182b92ea6fa5bcb98f 100644 --- a/src/test_opus.c +++ b/src/test_opus.c @@ -42,11 +42,12 @@ void print_usage( char* argv[] ) { - fprintf(stderr, "Usage: %s <mode (0/1/2)> <sampling rate (Hz)> <channels> " + fprintf(stderr, "Usage: %s <mode (0/1)> <sampling rate (Hz)> <channels (1/2)> " "<bits per second> [options] <input> <output>\n\n", argv[0]); - fprintf(stderr, "mode: 0 for auto, 1 for voice, 2 for audio:\n" ); + fprintf(stderr, "mode: 0 for voice, 1 for audio:\n" ); fprintf(stderr, "options:\n" ); - fprintf(stderr, "-cbr : enable constant bitrate; default: VBR\n" ); + fprintf(stderr, "-cbr : enable constant bitrate; default: variable bitrate\n" ); + fprintf(stderr, "-cvbr : enable constraint variable bitrate; default: unconstraint\n" ); fprintf(stderr, "-bandwidth <NB|MB|WB|SWB|FB> : audio bandwidth (from narrowband to fullband); default: sampling rate\n" ); fprintf(stderr, "-framesize <2.5|5|10|20|40|60> : frame size in ms; default: 20 \n" ); fprintf(stderr, "-max_payload <bytes> : maximum payload size in bytes, default: 1024\n" ); @@ -106,7 +107,7 @@ int main(int argc, char *argv[]) return 1; } - mode = atoi(argv[1]) + OPUS_MODE_AUTO; + mode = atoi(argv[1]) + OPUS_MODE_VOICE; sampling_rate = atoi(argv[2]); channels = atoi(argv[3]); bitrate_bps = atoi(argv[4]); @@ -199,8 +200,8 @@ int main(int argc, char *argv[]) } } - if( mode < OPUS_MODE_AUTO || mode > OPUS_MODE_AUDIO) { - fprintf (stderr, "mode must be: 0, 1 or 2\n"); + if( mode < OPUS_MODE_VOICE || mode > OPUS_MODE_AUDIO) { + fprintf (stderr, "mode must be: 0 or 1\n"); return 1; } @@ -232,7 +233,7 @@ int main(int argc, char *argv[]) return 1; } - enc = opus_encoder_create(sampling_rate, channels); + enc = opus_encoder_create(sampling_rate, channels, mode); dec = opus_decoder_create(sampling_rate, channels); if (enc==NULL) @@ -246,7 +247,6 @@ int main(int argc, char *argv[]) exit(1); } - opus_encoder_ctl(enc, OPUS_SET_MODE(mode)); opus_encoder_ctl(enc, OPUS_SET_BITRATE(bitrate_bps)); opus_encoder_ctl(enc, OPUS_SET_BANDWIDTH(bandwidth)); opus_encoder_ctl(enc, OPUS_SET_VBR_FLAG(use_vbr));