| | |
| | | len = end - start; |
| | | } |
| | | AudioFrame::~AudioFrame(){ |
| | | if(data != NULL){ |
| | | if(data != nullptr){ |
| | | free(data); |
| | | } |
| | | } |
| | |
| | | |
| | | Audio::Audio(int data_type) : dest_sample_rate(MODEL_SAMPLE_RATE), data_type(data_type) |
| | | { |
| | | speech_buff = NULL; |
| | | speech_data = NULL; |
| | | speech_buff = nullptr; |
| | | speech_data = nullptr; |
| | | align_size = 1360; |
| | | seg_sample = dest_sample_rate / 1000; |
| | | } |
| | | |
| | | Audio::Audio(int model_sample_rate, int data_type) : dest_sample_rate(model_sample_rate), data_type(data_type) |
| | | { |
| | | speech_buff = NULL; |
| | | speech_data = NULL; |
| | | speech_buff = nullptr; |
| | | speech_data = nullptr; |
| | | align_size = 1360; |
| | | seg_sample = dest_sample_rate / 1000; |
| | | } |
| | | |
| | | Audio::Audio(int model_sample_rate, int data_type, int size) : dest_sample_rate(model_sample_rate), data_type(data_type) |
| | | { |
| | | speech_buff = NULL; |
| | | speech_data = NULL; |
| | | speech_buff = nullptr; |
| | | speech_data = nullptr; |
| | | align_size = (float)size; |
| | | seg_sample = dest_sample_rate / 1000; |
| | | } |
| | | |
| | | Audio::~Audio() |
| | | { |
| | | if (speech_buff != NULL) { |
| | | if (speech_buff != nullptr) { |
| | | free(speech_buff); |
| | | } |
| | | if (speech_data != NULL) { |
| | | if (speech_data != nullptr) { |
| | | free(speech_data); |
| | | } |
| | | if (speech_char != NULL) { |
| | | if (speech_char != nullptr) { |
| | | free(speech_char); |
| | | } |
| | | ClearQueue(frame_queue); |
| | |
| | | resampler->Resample(waveform, n, true, &samples); |
| | | //reset speech_data |
| | | speech_len = samples.size(); |
| | | if (speech_data != NULL) { |
| | | if (speech_data != nullptr) { |
| | | free(speech_data); |
| | | } |
| | | speech_data = (float*)malloc(sizeof(float) * speech_len); |
| | |
| | | #else |
| | | // from file |
| | | AVFormatContext* formatContext = avformat_alloc_context(); |
| | | if (avformat_open_input(&formatContext, filename, NULL, NULL) != 0) { |
| | | if (avformat_open_input(&formatContext, filename, nullptr, nullptr) != 0) { |
| | | LOG(ERROR) << "Error: Could not open input file."; |
| | | avformat_close_input(&formatContext); |
| | | avformat_free_context(formatContext); |
| | | return false; |
| | | } |
| | | |
| | | if (avformat_find_stream_info(formatContext, NULL) < 0) { |
| | | if (avformat_find_stream_info(formatContext, nullptr) < 0) { |
| | | LOG(ERROR) << "Error: Could not open input file."; |
| | | avformat_close_input(&formatContext); |
| | | avformat_free_context(formatContext); |
| | | return false; |
| | | } |
| | | const AVCodec* codec = NULL; |
| | | AVCodecParameters* codecParameters = NULL; |
| | | const AVCodec* codec = nullptr; |
| | | AVCodecParameters* codecParameters = nullptr; |
| | | int audioStreamIndex = av_find_best_stream(formatContext, AVMEDIA_TYPE_AUDIO, -1, -1, &codec, 0); |
| | | if (audioStreamIndex >= 0) { |
| | | codecParameters = formatContext->streams[audioStreamIndex]->codecpar; |
| | |
| | | avcodec_free_context(&codecContext); |
| | | return false; |
| | | } |
| | | if (avcodec_open2(codecContext, codec, NULL) < 0) { |
| | | if (avcodec_open2(codecContext, codec, nullptr) < 0) { |
| | | LOG(ERROR) << "Error: Could not open audio decoder."; |
| | | avformat_close_input(&formatContext); |
| | | avformat_free_context(formatContext); |
| | |
| | | av_packet_free(&packet); |
| | | av_frame_free(&frame); |
| | | |
| | | if (speech_data != NULL) { |
| | | if (speech_data != nullptr) { |
| | | free(speech_data); |
| | | } |
| | | if (speech_char != NULL) { |
| | | if (speech_char != nullptr) { |
| | | free(speech_char); |
| | | } |
| | | offset = 0; |
| | |
| | | } |
| | | AVFormatContext* formatContext = avformat_alloc_context(); |
| | | formatContext->pb = avio_ctx; |
| | | if (avformat_open_input(&formatContext, "", NULL, NULL) != 0) { |
| | | if (avformat_open_input(&formatContext, "", nullptr, nullptr) != 0) { |
| | | LOG(ERROR) << "Error: Could not open input file."; |
| | | avio_context_free(&avio_ctx); |
| | | avformat_close_input(&formatContext); |
| | |
| | | return false; |
| | | } |
| | | |
| | | if (avformat_find_stream_info(formatContext, NULL) < 0) { |
| | | if (avformat_find_stream_info(formatContext, nullptr) < 0) { |
| | | LOG(ERROR) << "Error: Could not find stream information."; |
| | | avio_context_free(&avio_ctx); |
| | | avformat_close_input(&formatContext); |
| | | avformat_free_context(formatContext); |
| | | return false; |
| | | } |
| | | const AVCodec* codec = NULL; |
| | | AVCodecParameters* codecParameters = NULL; |
| | | const AVCodec* codec = nullptr; |
| | | AVCodecParameters* codecParameters = nullptr; |
| | | int audioStreamIndex = av_find_best_stream(formatContext, AVMEDIA_TYPE_AUDIO, -1, -1, &codec, 0); |
| | | if (audioStreamIndex >= 0) { |
| | | codecParameters = formatContext->streams[audioStreamIndex]->codecpar; |
| | |
| | | avcodec_free_context(&codecContext); |
| | | return false; |
| | | } |
| | | if (avcodec_open2(codecContext, codec, NULL) < 0) { |
| | | if (avcodec_open2(codecContext, codec, nullptr) < 0) { |
| | | LOG(ERROR) << "Error: Could not open audio decoder."; |
| | | avio_context_free(&avio_ctx); |
| | | avformat_close_input(&formatContext); |
| | |
| | | av_packet_free(&packet); |
| | | av_frame_free(&frame); |
| | | |
| | | if (speech_data != NULL) { |
| | | if (speech_data != nullptr) { |
| | | free(speech_data); |
| | | } |
| | | |
| | |
| | | bool Audio::LoadWav(const char *filename, int32_t* sampling_rate, bool resample) |
| | | { |
| | | WaveHeader header; |
| | | if (speech_data != NULL) { |
| | | if (speech_data != nullptr) { |
| | | free(speech_data); |
| | | } |
| | | if (speech_buff != NULL) { |
| | | if (speech_buff != nullptr) { |
| | | free(speech_buff); |
| | | } |
| | | |
| | |
| | | bool Audio::LoadWav2Char(const char *filename, int32_t* sampling_rate) |
| | | { |
| | | WaveHeader header; |
| | | if (speech_char != NULL) { |
| | | if (speech_char != nullptr) { |
| | | free(speech_char); |
| | | } |
| | | offset = 0; |
| | |
| | | bool Audio::LoadWav(const char* buf, int n_file_len, int32_t* sampling_rate) |
| | | { |
| | | WaveHeader header; |
| | | if (speech_data != NULL) { |
| | | if (speech_data != nullptr) { |
| | | free(speech_data); |
| | | } |
| | | if (speech_buff != NULL) { |
| | | if (speech_buff != nullptr) { |
| | | free(speech_buff); |
| | | } |
| | | |
| | |
| | | |
| | | bool Audio::LoadPcmwav(const char* buf, int n_buf_len, int32_t* sampling_rate) |
| | | { |
| | | if (speech_data != NULL) { |
| | | if (speech_data != nullptr) { |
| | | free(speech_data); |
| | | } |
| | | |
| | |
| | | |
| | | bool Audio::LoadPcmwavOnline(const char* buf, int n_buf_len, int32_t* sampling_rate) |
| | | { |
| | | if (speech_data != NULL) { |
| | | if (speech_data != nullptr) { |
| | | free(speech_data); |
| | | } |
| | | |
| | |
| | | |
| | | bool Audio::LoadPcmwav(const char* filename, int32_t* sampling_rate, bool resample) |
| | | { |
| | | if (speech_data != NULL) { |
| | | if (speech_data != nullptr) { |
| | | free(speech_data); |
| | | } |
| | | if (speech_buff != NULL) { |
| | | if (speech_buff != nullptr) { |
| | | free(speech_buff); |
| | | } |
| | | offset = 0; |
| | |
| | | |
| | | bool Audio::LoadPcmwav2Char(const char* filename, int32_t* sampling_rate) |
| | | { |
| | | if (speech_char != NULL) { |
| | | if (speech_char != nullptr) { |
| | | free(speech_char); |
| | | } |
| | | offset = 0; |
| | |
| | | |
| | | bool Audio::LoadOthers2Char(const char* filename) |
| | | { |
| | | if (speech_char != NULL) { |
| | | if (speech_char != nullptr) { |
| | | free(speech_char); |
| | | } |
| | | |
| | |
| | | frame_queue.pop(); |
| | | int sp_len = frame->GetLen(); |
| | | delete frame; |
| | | frame = NULL; |
| | | frame = nullptr; |
| | | |
| | | std::vector<float> pcm_data(speech_data, speech_data+sp_len); |
| | | vector<std::vector<int>> vad_segments = (offline_stream->vad_handle)->Infer(pcm_data); |
| | |
| | | frame->SetStart(start); |
| | | frame->SetEnd(end); |
| | | frame_queue.push(frame); |
| | | frame = NULL; |
| | | frame = nullptr; |
| | | } |
| | | } |
| | | |
| | |
| | | frame_queue.pop(); |
| | | int sp_len = frame->GetLen(); |
| | | delete frame; |
| | | frame = NULL; |
| | | frame = nullptr; |
| | | |
| | | std::vector<float> pcm_data(speech_data, speech_data+sp_len); |
| | | vad_segments = vad_obj->Infer(pcm_data, input_finished); |
| | |
| | | frame_queue.pop(); |
| | | int sp_len = frame->GetLen(); |
| | | delete frame; |
| | | frame = NULL; |
| | | frame = nullptr; |
| | | |
| | | std::vector<float> pcm_data(speech_data, speech_data+sp_len); |
| | | vector<std::vector<int>> vad_segments = vad_obj->Infer(pcm_data, input_finished); |
| | |
| | | frame->data = (float*)malloc(sizeof(float) * step); |
| | | memcpy(frame->data, all_samples.data()+start-offset, step*sizeof(float)); |
| | | asr_online_queue.push(frame); |
| | | frame = NULL; |
| | | frame = nullptr; |
| | | speech_start += step/seg_sample; |
| | | } |
| | | } |
| | |
| | | frame->data = (float*)malloc(sizeof(float) * (end-start)); |
| | | memcpy(frame->data, all_samples.data()+start-offset, (end-start)*sizeof(float)); |
| | | asr_online_queue.push(frame); |
| | | frame = NULL; |
| | | frame = nullptr; |
| | | } |
| | | |
| | | if(asr_mode != ASR_ONLINE){ |
| | |
| | | frame->data = (float*)malloc(sizeof(float) * (end-start)); |
| | | memcpy(frame->data, all_samples.data()+start-offset, (end-start)*sizeof(float)); |
| | | asr_offline_queue.push(frame); |
| | | frame = NULL; |
| | | frame = nullptr; |
| | | } |
| | | |
| | | speech_start = -1; |
| | |
| | | frame->data = (float*)malloc(sizeof(float) * step); |
| | | memcpy(frame->data, all_samples.data()+start-offset, step*sizeof(float)); |
| | | asr_online_queue.push(frame); |
| | | frame = NULL; |
| | | frame = nullptr; |
| | | speech_start += step/seg_sample; |
| | | } |
| | | } |
| | |
| | | frame->data = (float*)malloc(sizeof(float) * (end-offline_start)); |
| | | memcpy(frame->data, all_samples.data()+offline_start-offset, (end-offline_start)*sizeof(float)); |
| | | asr_offline_queue.push(frame); |
| | | frame = NULL; |
| | | frame = nullptr; |
| | | } |
| | | |
| | | if(asr_mode != ASR_OFFLINE){ |
| | |
| | | frame->data = (float*)malloc(sizeof(float) * step); |
| | | memcpy(frame->data, all_samples.data()+start-offset+sample_offset, step*sizeof(float)); |
| | | asr_online_queue.push(frame); |
| | | frame = NULL; |
| | | frame = nullptr; |
| | | } |
| | | }else{ |
| | | frame = new AudioFrame(0); |
| | |
| | | frame->global_start = speech_start; // in this case start >= end |
| | | frame->global_end = speech_end_i; |
| | | asr_online_queue.push(frame); |
| | | frame = NULL; |
| | | frame = nullptr; |
| | | } |
| | | } |
| | | speech_start = -1; |