雾聪
2023-09-01 beceb14feca0befb9a0f46a9fe3350546874f867
fix fsmn-vad
2个文件已修改
50 ■■■■■ 已修改文件
funasr/runtime/onnxruntime/src/fsmn-vad.cpp 5 ●●●● 补丁 | 查看 | 原始文档 | blame | 历史
funasr/runtime/onnxruntime/src/funasrruntime.cpp 45 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
funasr/runtime/onnxruntime/src/fsmn-vad.cpp
@@ -279,12 +279,15 @@
FsmnVad::Infer(std::vector<float> &waves, bool input_finished) {
    std::vector<std::vector<float>> vad_feats;
    std::vector<std::vector<float>> vad_probs;
    std::vector<std::vector<int>> vad_segments;
    FbankKaldi(vad_sample_rate_, vad_feats, waves);
    if(vad_feats.size() == 0){
      return vad_segments;
    }
    LfrCmvn(vad_feats);
    Forward(vad_feats, &vad_probs, &in_cache_, input_finished);
    E2EVadModel vad_scorer = E2EVadModel();
    std::vector<std::vector<int>> vad_segments;
    vad_segments = vad_scorer(vad_probs, waves, true, false, vad_silence_duration_, vad_max_len_,
                              vad_speech_noise_thres_, vad_sample_rate_);
    return vad_segments;
funasr/runtime/onnxruntime/src/funasrruntime.cpp
@@ -224,12 +224,18 @@
            return nullptr;
        funasr::Audio audio(1);
        if(wav_format == "pcm" || wav_format == "PCM"){
            if (!audio.LoadPcmwav(sz_buf, n_len, &sampling_rate))
                return nullptr;
        }else{
            if (!audio.FfmpegLoad(sz_buf, n_len))
                return nullptr;
        try{
            if(wav_format == "pcm" || wav_format == "PCM"){
                if (!audio.LoadPcmwav(sz_buf, n_len, &sampling_rate))
                    return nullptr;
            }else{
                if (!audio.FfmpegLoad(sz_buf, n_len))
                    return nullptr;
            }
        }catch (std::exception const &e)
        {
            LOG(ERROR)<<e.what();
            return nullptr;
        }
        funasr::FUNASR_RECOG_RESULT* p_result = new funasr::FUNASR_RECOG_RESULT;
@@ -288,17 +294,24 @@
            return nullptr;
        
        funasr::Audio audio(1);
        if(funasr::is_target_file(sz_filename, "wav")){
            int32_t sampling_rate_ = -1;
            if(!audio.LoadWav(sz_filename, &sampling_rate_))
                return nullptr;
        }else if(funasr::is_target_file(sz_filename, "pcm")){
            if (!audio.LoadPcmwav(sz_filename, &sampling_rate))
                return nullptr;
        }else{
            if (!audio.FfmpegLoad(sz_filename))
                return nullptr;
        try{
            if(funasr::is_target_file(sz_filename, "wav")){
                int32_t sampling_rate_ = -1;
                if(!audio.LoadWav(sz_filename, &sampling_rate_))
                    return nullptr;
            }else if(funasr::is_target_file(sz_filename, "pcm")){
                if (!audio.LoadPcmwav(sz_filename, &sampling_rate))
                    return nullptr;
            }else{
                if (!audio.FfmpegLoad(sz_filename))
                    return nullptr;
            }
        }catch (std::exception const &e)
        {
            LOG(ERROR)<<e.what();
            return nullptr;
        }
        funasr::FUNASR_RECOG_RESULT* p_result = new funasr::FUNASR_RECOG_RESULT;
        p_result->snippet_time = audio.GetTimeLen();
        if(p_result->snippet_time == 0){