在一般的VOIP软件或视频会议系统中,假设我们只有A和B两个人在通话,首先,A的声音传给B,B然后用喇叭放出来,而这时B的MIC呢则会采集到喇叭放出来的声音,然后传回给A,如果这个传输的过程中时延足够大,A就会听到一个和自己刚才说过的话一样的声音,这就是回声,声学回声消除器的作用就是在B端对B采集到的声音进行处理,把采集到声音包含的A的声音去掉再传给A,这样,A就不会听到自己说过的话了。 声学回声消除的原理我就不说了,这在网上有很多文档,网上缺少的是实现,所以,我在这把一个开源的声学回声消除器介绍一下,希望对有些有人用,如果有人知道怎么把这消除器用的基于实时流的VOIP软件中,希望能一起分享一下。 这个声学回声消除器是一个著名的音频编解码器speex中的一部分,1.1.9版本后的回声消除器才起作用,以前版本的都不行,我用的也是这个版本,测试表明,用同一个模拟文件,它有效果比INTEL IPP库4.1版中的声学回声消除器的还要好。 先说编译。首先,从 上下载speex1.1.9的源代码,解压,打开speex/win32/libspeex中的libspeex.dsw,这个工作区里有两个工程,一个是libspeex,另一个是libspeex_dynamic。然后,将libspeex中的mdf.c文件添加到工程libspeex中,编译即可。 以下是我根据文档封装的一个类,里面有一个测试程序: //file name: speexEC.h #ifndef SPEEX_EC_H #define SPEEX_EC_H #include < stdio.h > #include < stdlib.h > #include " speex/speex_echo.h " #include " speex/speex_preprocess.h " class CSpeexEC { public :CSpeexEC(); ~ CSpeexEC(); void Init( int frame_size = 160 , int filter_length = 1280 , int sampling_rate = 8000 ); void DoAEC( short * mic, short * ref , short * out ); protected : void Reset(); private : bool m_bHasInit;SpeexEchoState * m_pState; SpeexPreprocessState * m_pPreprocessorState; int m_nFrameSize; int m_nFilterLen; int m_nSampleRate; float * m_pfNoise;} ; #endif //fine name:speexEC.cpp
#include " SpeexEC.h " CSpeexEC::CSpeexEC() { m_bHasInit = false ;m_pState = NULL;m_pPreprocessorState = NULL;m_nFrameSize = 160 ;m_nFilterLen = 160 * 8 ;m_nSampleRate = 8000 ;m_pfNoise = NULL;} CSpeexEC:: ~ CSpeexEC() { Reset();} void CSpeexEC::Init( int frame_size, int filter_length, int sampling_rate) { Reset(); if (frame_size <= 0 || filter_length <= 0 || sampling_rate <= 0 ) { m_nFrameSize = 160 ; m_nFilterLen = 160 * 8 ; m_nSampleRate = 8000 ;} else { m_nFrameSize = frame_size; m_nFilterLen = filter_length; m_nSampleRate = sampling_rate;} m_pState = speex_echo_state_init(m_nFrameSize, m_nFilterLen);m_pPreprocessorState = speex_preprocess_state_init(m_nFrameSize, m_nSampleRate);m_pfNoise = new float [m_nFrameSize + 1 ];m_bHasInit = true ;} void CSpeexEC::Reset() { if (m_pState != NULL) { speex_echo_state_destroy(m_pState); m_pState = NULL;} if (m_pPreprocessorState != NULL) { speex_preprocess_state_destroy(m_pPreprocessorState); m_pPreprocessorState = NULL;} if (m_pfNoise != NULL) { delete []m_pfNoise; m_pfNoise = NULL;} m_bHasInit = false ;} void CSpeexEC:DoAEC( short * mic, short * ref , short * out ) { if ( ! m_bHasInit) return ;speex_echo_cancel(m_pState, mic, ref , out , m_pfNoise); speex_preprocess(m_pPreprocessorState, (__int16 * ) out , m_pfNoise); } 可以看出,这个回声消除器类很简单,只要初始化一下就可以调用了。但是,要注意的是,传给回声消除器的两个声音信号,必须同步得非常的好,就是说,在B端,接收到A说的话以后,要把这些话音数据传给回声消除器做参考,然后再传给声卡,声卡再放出来,这有一段延时,这时,B再采集,然后传给回声消除器,与那个参考数据比较,从采集到的数据中把频域和参考数据相同的部分消除掉。如果传给消除器的两个信号同步得不好,即两个信号找不到频域相同的部分,就没有办法进行消除了。
测试程序:
#define NN 160 void main() { FILE * ref_fd, * mic_fd, * out_fd; short ref [NN], mic[NN], out [NN];ref_fd = fopen ( " ref.pcm " , " rb " ); // 打开参考文件,即要消除的声音 mic_fd = fopen ( " mic.pcm " , " rb " ); // 打开mic采集到的声音文件,包含回声在里面 out_fd = fopen ( " echo.pcm " , " wb " ); // 消除了回声以后的文件 CSpeexEC ec;ec.Init(); while (fread(mic, 1 , NN * 2 , mic_fd)) { fread( ref , 1 , NN * 2 , ref_fd); ec.DoAEC(mic, ref , out ); fwrite( out , 1 , NN * 2 , out_fd); } fclose(ref_fd); fclose(mic_fd); fclose(out_fd);} 以上的程序是用文件来模拟回声和MIC,但在实时流中是大不一样的,在一般的VOIP软件中,接收对方的声音并传到声卡中播放是在一个线程中进行的,而采集本地的声音并传送到对方又是在另一个线程中进行的,而声学回声消除器在对采集到的声音进行回声消除的同时,还需要播放线程中的数据作为参考,而要同步这两个线程中的数据是非常困难的,因为稍稍有些不同步,声学回声消除器中的自适应滤波器就会发散,不但消除不了回声,还会破坏原始采集到的声音,使被破坏的声音难以分辨。我做过好多尝试,始终无法用软件来实现对这两个线程中的数据进行同步,导致实现失败,希望有经验的网友们一起分享一下这方面的经验。
示例代码:
Sample code
This section shows sample code for encoding and decoding speech using the Speex API. The commands can be used to encode and decode a file by calling:
% sampleenc in_file.sw | sampledec out_file.sw where both files are raw (no header) files encoded at 16 bits per sample (in the machine natural endianness).
sampleenc takes a raw 16 bits/sample file, encodes it and outputs a Speex stream to stdout. Note that the packing used is NOT compatible with that of speexenc/speexdec.
#include < speex / speex.h > #include < stdio.h > /* The frame size in hardcoded for this sample code but it doesn't have to be */ #define FRAME_SIZE 160 int main( int argc, char ** argv) { char * inFile;FILE * fin; short in [FRAME_SIZE]; float input[FRAME_SIZE]; char cbits[ 200 ]; int nbBytes; /* Holds the state of the encoder */ void * state; /* Holds bits so they can be read and written to by the Speex routines */ SpeexBits bits; int i, tmp; /* Create a new encoder state in narrowband mode */ state = speex_encoder_init( & speex_nb_mode); /* Set the quality to 8 (15 kbps) */ tmp = 8 ;speex_encoder_ctl(state, SPEEX_SET_QUALITY, & tmp);inFile = argv[ 1 ];fin = fopen(inFile, " r " ); /* Initialization of the structure that holds the bits */ speex_bits_init( & bits); while ( 1 ) { /* Read a 16 bits/sample audio frame */ fread( in , sizeof ( short ), FRAME_SIZE, fin); if (feof(fin)) break ; /* Copy the 16 bits values to float so Speex can work on them */ for (i = 0 ;i < FRAME_SIZE;i ++ )input[i] = in [i]; /* Flush all the bits in the struct so we can encode a new frame */ speex_bits_reset( & bits); /* Encode the frame */ speex_encode(state, input, & bits); /* Copy the bits to an array of char that can be written */ nbBytes = speex_bits_write( & bits, cbits, 200 ); /* Write the size of the frame first. This is what sampledec expects butit's likely to be different in your own application */ fwrite( & nbBytes, sizeof ( int ), 1 , stdout); /* Write the compressed data */ fwrite(cbits, 1 , nbBytes, stdout);} /* Destroy the encoder state */ speex_encoder_destroy(state); /* Destroy the bit-packing struct */ speex_bits_destroy( & bits);fclose(fin); return 0 ;}
sampledec reads a Speex stream from stdin, decodes it and outputs it to a raw 16 bits/sample file. Note that the packing used is NOT compatible with that of speexenc/speexdec.
#include < speex / speex.h > #include < stdio.h > /* The frame size in hardcoded for this sample code but it doesn't have to be */ #define FRAME_SIZE 160 int main( int argc, char ** argv) { char * outFile;FILE * fout; /* Holds the audio that will be written to file (16 bits per sample) */ short out [FRAME_SIZE]; /* Speex handle samples as float, so we need an array of floats */ float output[FRAME_SIZE]; char cbits[ 200 ]; int nbBytes; /* Holds the state of the decoder */ void * state; /* Holds bits so they can be read and written to by the Speex routines */ SpeexBits bits; int i, tmp; /* Create a new decoder state in narrowband mode */ state = speex_decoder_init( & speex_nb_mode); /* Set the perceptual enhancement on */ tmp = 1 ;speex_decoder_ctl(state, SPEEX_SET_ENH, & tmp);outFile = argv[ 1 ];fout = fopen(outFile, " w " ); /* Initialization of the structure that holds the bits */ speex_bits_init( & bits); while ( 1 ) { /* Read the size encoded by sampleenc, this part will likely bedifferent in your application */ fread( & nbBytes, sizeof ( int ), 1 , stdin);fprintf (stderr, " nbBytes: %d/n " , nbBytes); if (feof(stdin)) break ; /* Read the "packet" encoded by sampleenc */ fread(cbits, 1 , nbBytes, stdin); /* Copy the data into the bit-stream struct */ speex_bits_read_from( & bits, cbits, nbBytes); /* Decode the data */ speex_decode(state, & bits, output); /* Copy from float to short (16 bits) for output */ for (i = 0 ;i < FRAME_SIZE;i ++ ) out [i] = output[i]; /* Write the decoded audio to file */ fwrite( out , sizeof ( short ), FRAME_SIZE, fout);} /* Destroy the decoder state */ speex_decoder_destroy(state); /* Destroy the bit-stream truct */ speex_bits_destroy( & bits);fclose(fout); return 0 ;}