/* * iaxclient: a cross-platform IAX softphone library * * Copyrights: * Copyright (C) 2003-2006, Horizon Wimba, Inc. * Copyright (C) 2007, Wimba, Inc. * * Contributors: * Steve Kann * Michael Van Donselaar * Shawn Lawrence * * This program is free software, distributed under the terms of * the GNU Lesser (Library) General Public License. */ #include "audio_encode.h" #include "iaxclient_lib.h" #include "libiax2/src/iax-client.h" #ifdef CODEC_GSM #include "codec_gsm.h" #endif #include "codec_ulaw.h" #include "codec_alaw.h" #include "codec_speex.h" #include #ifdef CODEC_ILBC #include "codec_ilbc.h" #endif float iaxci_silence_threshold = AUDIO_ENCODE_SILENCE_DB; static float input_level = 0.0f; static float output_level = 0.0f; static SpeexPreprocessState *st = NULL; static int speex_state_size = 0; static int speex_state_rate = 0; int iaxci_filters = IAXC_FILTER_AGC|IAXC_FILTER_DENOISE|IAXC_FILTER_AAGC|IAXC_FILTER_CN; /* use to measure time since last audio was processed */ static struct timeval timeLastInput ; static struct timeval timeLastOutput ; static struct iaxc_speex_settings speex_settings = { 1, /* decode_enhance */ -1, /* float quality */ -1, /* bitrate */ 0, /* vbr */ 0, /* abr */ 3 /* complexity */ }; static float vol_to_db(float vol) { /* avoid calling log10() on zero which yields inf or * negative numbers which yield nan */ if ( vol <= 0.0f ) return AUDIO_ENCODE_SILENCE_DB; else return log10f(vol) * 20.0f; } static int do_level_callback() { static struct timeval last = {0,0}; struct timeval now; float input_db; float output_db; now = iax_tvnow(); if ( last.tv_sec != 0 && iaxci_usecdiff(&now, &last) < 100000 ) return 0; last = now; /* if input has not been processed in the last second, set to silent */ input_db = iaxci_usecdiff(&now, &timeLastInput) < 1000000 ? vol_to_db(input_level) : AUDIO_ENCODE_SILENCE_DB; /* if output has not been processed in the last second, set to silent */ output_db = iaxci_usecdiff(&now, &timeLastOutput) < 1000000 ? vol_to_db(output_level) : AUDIO_ENCODE_SILENCE_DB; iaxci_do_levels_callback(input_db, output_db); return 0; } static void set_speex_filters() { int i; if ( !st ) return; i = 1; /* always make VAD decision */ speex_preprocess_ctl(st, SPEEX_PREPROCESS_SET_VAD, &i); i = (iaxci_filters & IAXC_FILTER_AGC) ? 1 : 0; speex_preprocess_ctl(st, SPEEX_PREPROCESS_SET_AGC, &i); i = (iaxci_filters & IAXC_FILTER_DENOISE) ? 1 : 0; speex_preprocess_ctl(st, SPEEX_PREPROCESS_SET_DENOISE, &i); /* * We can tweak these parameters to play with VAD sensitivity. * For now, we use the default values since it seems they are a good starting point. * However, if need be, this is the code that needs to change */ i = 35; speex_preprocess_ctl(st, SPEEX_PREPROCESS_SET_PROB_START, &i); i = 20; speex_preprocess_ctl(st, SPEEX_PREPROCESS_SET_PROB_CONTINUE, &i); } static void calculate_level(short *audio, int len, float *level) { int big_sample = 0; int i; for ( i = 0; i < len; i++ ) { const int sample = abs(audio[i]); big_sample = sample > big_sample ? sample : big_sample; } *level += ((float)big_sample / 32767.0f - *level) / 5.0f; } static int input_postprocess(void *audio, int len, int rate) { static float lowest_volume = 1.0f; float volume; int silent = 0; if ( !st || speex_state_size != len || speex_state_rate != rate ) { if (st) speex_preprocess_state_destroy(st); st = speex_preprocess_state_init(len,rate); speex_state_size = len; speex_state_rate = rate; set_speex_filters(); } calculate_level((short *)audio, len, &input_level); /* only preprocess if we're interested in VAD, AGC, or DENOISE */ if ( (iaxci_filters & (IAXC_FILTER_DENOISE | IAXC_FILTER_AGC)) || iaxci_silence_threshold > 0.0f ) silent = !speex_preprocess(st, (spx_int16_t *)audio, NULL); /* Analog AGC: Bring speex AGC gain out to mixer, with lots of hysteresis */ /* use a higher continuation threshold for AAGC than for VAD itself */ if ( !silent && iaxci_silence_threshold != 0.0f && (iaxci_filters & IAXC_FILTER_AGC) && (iaxci_filters & IAXC_FILTER_AAGC) ) { static int i = 0; i++; if ( (i & 0x3f) == 0 ) { float loudness; #ifdef SPEEX_PREPROCESS_GET_AGC_LOUDNESS speex_preprocess_ctl(st, SPEEX_PREPROCESS_GET_AGC_LOUDNESS, &loudness); #else loudness = st->loudness2; #endif if ( loudness > 8000.0f || loudness < 4000.0f ) { const float level = iaxc_input_level_get(); if ( loudness > 16000.0f && level > 0.5f ) { /* lower quickly if we're really too hot */ iaxc_input_level_set(level - 0.2f); } else if ( loudness > 8000.0f && level >= 0.15f ) { /* lower less quickly if we're a bit too hot */ iaxc_input_level_set(level - 0.1f); } else if ( loudness < 4000.0f && level <= 0.9f ) { /* raise slowly if we're cold */ iaxc_input_level_set(level + 0.1f); } } } } /* This is ugly. Basically just don't get volume level if speex thought * we were silent. Just set it to 0 in that case */ if ( iaxci_silence_threshold > 0.0f && silent ) input_level = 0.0f; do_level_callback(); volume = vol_to_db(input_level); if ( volume < lowest_volume ) lowest_volume = volume; if ( iaxci_silence_threshold > 0.0f ) return silent; else return volume < iaxci_silence_threshold; } static int output_postprocess(void *audio, int len) { calculate_level((short *)audio, len, &output_level); do_level_callback(); return 0; } static struct iaxc_audio_codec *create_codec(int format) { switch (format & IAXC_AUDIO_FORMAT_MASK) { #ifdef CODEC_GSM case IAXC_FORMAT_GSM: return codec_audio_gsm_new(); #endif case IAXC_FORMAT_ULAW: return codec_audio_ulaw_new(); case IAXC_FORMAT_ALAW: return codec_audio_alaw_new(); case IAXC_FORMAT_SPEEX: return codec_audio_speex_new(&speex_settings); #ifdef CODEC_ILBC case IAXC_FORMAT_ILBC: return codec_audio_ilbc_new(); #endif default: /* ERROR: codec not supported */ fprintf(stderr, "ERROR: Codec not supported: %d\n", format); return NULL; } } EXPORT void iaxc_set_speex_settings(int decode_enhance, float quality, int bitrate, int vbr, int abr, int complexity) { speex_settings.decode_enhance = decode_enhance; speex_settings.quality = quality; speex_settings.bitrate = bitrate; speex_settings.vbr = vbr; speex_settings.abr = abr; speex_settings.complexity = complexity; } int audio_send_encoded_audio(struct iaxc_call *call, int callNo, void *data, int format, int samples) { unsigned char outbuf[1024]; int outsize = 1024; int silent; int insize = samples; /* update last input timestamp */ timeLastInput = iax_tvnow(); silent = input_postprocess(data, insize, 8000); if(silent) { if(!call->tx_silent) { /* send a Comfort Noise Frame */ call->tx_silent = 1; if ( iaxci_filters & IAXC_FILTER_CN ) iax_send_cng(call->session, 10, NULL, 0); } return 0; /* poof! no encoding! */ } /* we're going to send voice now */ call->tx_silent = 0; /* destroy encoder if it is incorrect type */ if(call->encoder && call->encoder->format != format) { call->encoder->destroy(call->encoder); call->encoder = NULL; } /* just break early if there's no format defined: this happens for the * first couple of frames of new calls */ if(format == 0) return 0; /* create encoder if necessary */ if(!call->encoder) { call->encoder = create_codec(format); } if(!call->encoder) { /* ERROR: no codec */ fprintf(stderr, "ERROR: Codec could not be created: %d\n", format); return 0; } if(call->encoder->encode(call->encoder, &insize, (short *)data, &outsize, outbuf)) { /* ERROR: codec error */ fprintf(stderr, "ERROR: encode error: %d\n", format); return 0; } if(samples-insize == 0) { fprintf(stderr, "ERROR encoding (no samples output (samples=%d)\n", samples); return -1; } // Send the encoded audio data back to the app if required // TODO: fix the stupid way in which the encoded audio size is returned if ( iaxc_get_audio_prefs() & IAXC_AUDIO_PREF_RECV_LOCAL_ENCODED ) iaxci_do_audio_callback(callNo, 0, IAXC_SOURCE_LOCAL, 1, call->encoder->format & IAXC_AUDIO_FORMAT_MASK, sizeof(outbuf) - outsize, outbuf); if(iax_send_voice(call->session,format, outbuf, sizeof(outbuf) - outsize, samples-insize) == -1) { fprintf(stderr, "Failed to send voice! %s\n", iax_errstr); return -1; } return 0; } /* decode encoded audio; return the number of bytes decoded * negative indicates error */ int audio_decode_audio(struct iaxc_call * call, void * out, void * data, int len, int format, int * samples) { int insize = len; int outsize = *samples; timeLastOutput = iax_tvnow(); if ( format == 0 ) { fprintf(stderr, "audio_decode_audio: Format is zero (should't happen)!\n"); return -1; } /* destroy decoder if it is incorrect type */ if ( call->decoder && call->decoder->format != format ) { call->decoder->destroy(call->decoder); call->decoder = NULL; } /* create decoder if necessary */ if ( !call->decoder ) { call->decoder = create_codec(format); } if ( !call->decoder ) { fprintf(stderr, "ERROR: Codec could not be created: %d\n", format); return -1; } if ( call->decoder->decode(call->decoder, &insize, (unsigned char *)data, &outsize, (short *)out) ) { fprintf(stderr, "ERROR: decode error: %d\n", format); return -1; } output_postprocess(out, *samples - outsize); *samples = outsize; return len - insize; } EXPORT int iaxc_get_filters(void) { return iaxci_filters; } EXPORT void iaxc_set_filters(int filters) { iaxci_filters = filters; set_speex_filters(); } EXPORT void iaxc_set_silence_threshold(float thr) { iaxci_silence_threshold = thr; set_speex_filters(); }