| | |
| | | return 0; |
| | | } |
| | | |
| | | Audio::Audio(int data_type) : data_type(data_type) |
| | | Audio::Audio(int data_type) : dest_sample_rate(MODEL_SAMPLE_RATE), data_type(data_type) |
| | | { |
| | | speech_buff = NULL; |
| | | speech_data = NULL; |
| | | align_size = 1360; |
| | | seg_sample = dest_sample_rate / 1000; |
| | | } |
| | | |
| | | Audio::Audio(int data_type, int size) : data_type(data_type) |
| | | Audio::Audio(int model_sample_rate, int data_type) : dest_sample_rate(model_sample_rate), data_type(data_type) |
| | | { |
| | | speech_buff = NULL; |
| | | speech_data = NULL; |
| | | align_size = 1360; |
| | | seg_sample = dest_sample_rate / 1000; |
| | | } |
| | | |
| | | Audio::Audio(int model_sample_rate, int data_type, int size) : dest_sample_rate(model_sample_rate), data_type(data_type) |
| | | { |
| | | speech_buff = NULL; |
| | | speech_data = NULL; |
| | | align_size = (float)size; |
| | | seg_sample = dest_sample_rate / 1000; |
| | | } |
| | | |
| | | Audio::~Audio() |
| | |
| | | |
| | | void Audio::Disp() |
| | | { |
| | | LOG(INFO) << "Audio time is " << (float)speech_len / MODEL_SAMPLE_RATE << " s. len is " << speech_len; |
| | | LOG(INFO) << "Audio time is " << (float)speech_len / dest_sample_rate << " s. len is " << speech_len; |
| | | } |
| | | |
| | | float Audio::GetTimeLen() |
| | | { |
| | | return (float)speech_len / MODEL_SAMPLE_RATE; |
| | | return (float)speech_len / dest_sample_rate; |
| | | } |
| | | |
| | | void Audio::WavResample(int32_t sampling_rate, const float *waveform, |
| | |
| | | << " in_sample_rate: "<< sampling_rate << "\n" |
| | | << " output_sample_rate: " << static_cast<int32_t>(MODEL_SAMPLE_RATE); |
| | | float min_freq = |
| | | std::min<int32_t>(sampling_rate, MODEL_SAMPLE_RATE); |
| | | std::min<int32_t>(sampling_rate, dest_sample_rate); |
| | | float lowpass_cutoff = 0.99 * 0.5 * min_freq; |
| | | |
| | | int32_t lowpass_filter_width = 6; |
| | | |
| | | auto resampler = std::make_unique<LinearResample>( |
| | | sampling_rate, MODEL_SAMPLE_RATE, lowpass_cutoff, lowpass_filter_width); |
| | | sampling_rate, dest_sample_rate, lowpass_cutoff, lowpass_filter_width); |
| | | std::vector<float> samples; |
| | | resampler->Resample(waveform, n, true, &samples); |
| | | //reset speech_data |
| | |
| | | nullptr, // allocate a new context |
| | | AV_CH_LAYOUT_MONO, // output channel layout (stereo) |
| | | AV_SAMPLE_FMT_S16, // output sample format (signed 16-bit) |
| | | 16000, // output sample rate (same as input) |
| | | dest_sample_rate, // output sample rate (same as input) |
| | | av_get_default_channel_layout(codecContext->channels), // input channel layout |
| | | codecContext->sample_fmt, // input sample format |
| | | codecContext->sample_rate, // input sample rate |
| | |
| | | int in_samples = frame->nb_samples; |
| | | uint8_t **in_data = frame->extended_data; |
| | | int out_samples = av_rescale_rnd(in_samples, |
| | | 16000, |
| | | dest_sample_rate, |
| | | codecContext->sample_rate, |
| | | AV_ROUND_DOWN); |
| | | |
| | |
| | | nullptr, // allocate a new context |
| | | AV_CH_LAYOUT_MONO, // output channel layout (stereo) |
| | | AV_SAMPLE_FMT_S16, // output sample format (signed 16-bit) |
| | | 16000, // output sample rate (same as input) |
| | | dest_sample_rate, // output sample rate (same as input) |
| | | av_get_default_channel_layout(codecContext->channels), // input channel layout |
| | | codecContext->sample_fmt, // input sample format |
| | | codecContext->sample_rate, // input sample rate |
| | |
| | | int in_samples = frame->nb_samples; |
| | | uint8_t **in_data = frame->extended_data; |
| | | int out_samples = av_rescale_rnd(in_samples, |
| | | 16000, |
| | | dest_sample_rate, |
| | | codecContext->sample_rate, |
| | | AV_ROUND_DOWN); |
| | | |
| | |
| | | } |
| | | |
| | | //resample |
| | | if(*sampling_rate != MODEL_SAMPLE_RATE){ |
| | | if(*sampling_rate != dest_sample_rate){ |
| | | WavResample(*sampling_rate, speech_data, speech_len); |
| | | } |
| | | |
| | |
| | | } |
| | | |
| | | //resample |
| | | if(*sampling_rate != MODEL_SAMPLE_RATE){ |
| | | if(*sampling_rate != dest_sample_rate){ |
| | | WavResample(*sampling_rate, speech_data, speech_len); |
| | | } |
| | | |
| | |
| | | } |
| | | |
| | | //resample |
| | | if(*sampling_rate != MODEL_SAMPLE_RATE){ |
| | | if(*sampling_rate != dest_sample_rate){ |
| | | WavResample(*sampling_rate, speech_data, speech_len); |
| | | } |
| | | |
| | |
| | | } |
| | | |
| | | //resample |
| | | if(*sampling_rate != MODEL_SAMPLE_RATE){ |
| | | if(*sampling_rate != dest_sample_rate){ |
| | | WavResample(*sampling_rate, speech_data, speech_len); |
| | | } |
| | | |
| | |
| | | } |
| | | |
| | | //resample |
| | | if(*sampling_rate != MODEL_SAMPLE_RATE){ |
| | | if(*sampling_rate != dest_sample_rate){ |
| | | WavResample(*sampling_rate, speech_data, speech_len); |
| | | } |
| | | |
| | |
| | | AudioFrame *frame = frame_queue.front(); |
| | | frame_queue.pop(); |
| | | |
| | | start_time = (float)(frame->GetStart())/MODEL_SAMPLE_RATE; |
| | | start_time = (float)(frame->GetStart())/ dest_sample_rate; |
| | | dout = speech_data + frame->GetStart(); |
| | | len = frame->GetLen(); |
| | | delete frame; |
| | |
| | | } |
| | | |
| | | // erase all_samples |
| | | int vector_cache = MODEL_SAMPLE_RATE*2; |
| | | int vector_cache = dest_sample_rate*2; |
| | | if(speech_offline_start == -1){ |
| | | if(all_samples.size() > vector_cache){ |
| | | int erase_num = all_samples.size() - vector_cache; |