游雁
2024-01-14 8912e0696af069de47646fdb8a9d9c4e086e88b3
runtime/onnxruntime/src/audio.cpp
@@ -228,6 +228,17 @@
    if (speech_char != NULL) {
        free(speech_char);
    }
    ClearQueue(frame_queue);
    ClearQueue(asr_online_queue);
    ClearQueue(asr_offline_queue);
}
void Audio::ClearQueue(std::queue<AudioFrame*>& q) {
    while (!q.empty()) {
        AudioFrame* frame = q.front();
        delete frame;
        q.pop();
    }
}
void Audio::Disp()
@@ -243,9 +254,9 @@
void Audio::WavResample(int32_t sampling_rate, const float *waveform,
                          int32_t n)
{
    LOG(INFO) << "Creating a resampler:\n"
              << "   in_sample_rate: "<< sampling_rate << "\n"
              << "   output_sample_rate: " << static_cast<int32_t>(MODEL_SAMPLE_RATE);
    LOG(INFO) << "Creating a resampler: "
              << " in_sample_rate: "<< sampling_rate
              << " output_sample_rate: " << static_cast<int32_t>(dest_sample_rate);
    float min_freq =
        std::min<int32_t>(sampling_rate, dest_sample_rate);
    float lowpass_cutoff = 0.99 * 0.5 * min_freq;
@@ -354,9 +365,7 @@
                while (avcodec_receive_frame(codecContext, frame) >= 0) {
                    // Resample audio if necessary
                    std::vector<uint8_t> resampled_buffer;
                    int in_samples = frame->nb_samples;
                    uint8_t **in_data = frame->extended_data;
                    int out_samples = av_rescale_rnd(in_samples,
                    int out_samples = av_rescale_rnd(swr_get_delay(swr_ctx, codecContext->sample_rate) + frame->nb_samples,
                                                    dest_sample_rate,
                                                    codecContext->sample_rate,
                                                    AV_ROUND_DOWN);
@@ -364,20 +373,20 @@
                    int resampled_size = out_samples * av_get_bytes_per_sample(AV_SAMPLE_FMT_S16);
                    if (resampled_buffer.size() < resampled_size) {
                        resampled_buffer.resize(resampled_size);
                    }
                    }
                    uint8_t *resampled_data = resampled_buffer.data();
                    int ret = swr_convert(
                        swr_ctx,
                        &resampled_data, // output buffer
                        resampled_size, // output buffer size
                        (const uint8_t **)(frame->data), //(const uint8_t **)(frame->extended_data)
                        in_samples // input buffer size
                        out_samples, // output buffer size
                        (const uint8_t **)(frame->data), // choose channel
                        frame->nb_samples // input buffer size
                    );
                    if (ret < 0) {
                        LOG(ERROR) << "Error resampling audio";
                        break;
                    }
                    std::copy(resampled_buffer.begin(), resampled_buffer.end(), std::back_inserter(resampled_buffers));
                    resampled_buffers.insert(resampled_buffers.end(), resampled_buffer.begin(), resampled_buffer.begin() + resampled_size);
                }
            }
        }
@@ -453,6 +462,10 @@
        nullptr, // write callback (not used here)
        nullptr // seek callback (not used here)
    );
    if (!avio_ctx) {
        av_free(buf_copy);
        return false;
    }
    AVFormatContext* formatContext = avformat_alloc_context();
    formatContext->pb = avio_ctx;
    if (avformat_open_input(&formatContext, "", NULL, NULL) != 0) {
@@ -539,9 +552,7 @@
                while (avcodec_receive_frame(codecContext, frame) >= 0) {
                    // Resample audio if necessary
                    std::vector<uint8_t> resampled_buffer;
                    int in_samples = frame->nb_samples;
                    uint8_t **in_data = frame->extended_data;
                    int out_samples = av_rescale_rnd(in_samples,
                    int out_samples = av_rescale_rnd(swr_get_delay(swr_ctx, codecContext->sample_rate) + frame->nb_samples,
                                                    dest_sample_rate,
                                                    codecContext->sample_rate,
                                                    AV_ROUND_DOWN);
@@ -549,27 +560,29 @@
                    int resampled_size = out_samples * av_get_bytes_per_sample(AV_SAMPLE_FMT_S16);
                    if (resampled_buffer.size() < resampled_size) {
                        resampled_buffer.resize(resampled_size);
                    }
                    }
                    uint8_t *resampled_data = resampled_buffer.data();
                    int ret = swr_convert(
                        swr_ctx,
                        &resampled_data, // output buffer
                        resampled_size, // output buffer size
                        (const uint8_t **)(frame->data), //(const uint8_t **)(frame->extended_data)
                        in_samples // input buffer size
                        out_samples, // output buffer size
                        (const uint8_t **)(frame->data), // choose channel: channel_data
                        frame->nb_samples // input buffer size
                    );
                    if (ret < 0) {
                        LOG(ERROR) << "Error resampling audio";
                        break;
                    }
                    std::copy(resampled_buffer.begin(), resampled_buffer.end(), std::back_inserter(resampled_buffers));
                    resampled_buffers.insert(resampled_buffers.end(), resampled_buffer.begin(), resampled_buffer.begin() + resampled_size);
                }
            }
        }
        av_packet_unref(packet);
    }
    avio_context_free(&avio_ctx);
    //avio_context_free(&avio_ctx);
    av_freep(&avio_ctx ->buffer);
    av_freep(&avio_ctx);
    avformat_close_input(&formatContext);
    avformat_free_context(formatContext);
    avcodec_free_context(&codecContext);
@@ -614,7 +627,7 @@
}
bool Audio::LoadWav(const char *filename, int32_t* sampling_rate)
bool Audio::LoadWav(const char *filename, int32_t* sampling_rate, bool resample)
{
    WaveHeader header;
    if (speech_data != NULL) {
@@ -676,7 +689,7 @@
        }
        //resample
        if(*sampling_rate != dest_sample_rate){
        if(resample && *sampling_rate != dest_sample_rate){
            WavResample(*sampling_rate, speech_data, speech_len);
        }
@@ -867,7 +880,7 @@
        return false;
}
bool Audio::LoadPcmwav(const char* filename, int32_t* sampling_rate)
bool Audio::LoadPcmwav(const char* filename, int32_t* sampling_rate, bool resample)
{
    if (speech_data != NULL) {
        free(speech_data);
@@ -908,7 +921,7 @@
        }
        //resample
        if(*sampling_rate != dest_sample_rate){
        if(resample && *sampling_rate != dest_sample_rate){
            WavResample(*sampling_rate, speech_data, speech_len);
        }