游雁
2024-01-14 8912e0696af069de47646fdb8a9d9c4e086e88b3
runtime/onnxruntime/src/funasrruntime.cpp
@@ -57,7 +57,7 @@
      if (!recog_obj)
         return nullptr;
      funasr::Audio audio(1);
      funasr::Audio audio(recog_obj->GetAsrSampleRate(),1);
      if(wav_format == "pcm" || wav_format == "PCM"){
         if (!audio.LoadPcmwav(sz_buf, n_len, &sampling_rate))
            return nullptr;
@@ -93,7 +93,7 @@
      if (!recog_obj)
         return nullptr;
      funasr::Audio audio(1);
      funasr::Audio audio(recog_obj->GetAsrSampleRate(),1);
      if(funasr::is_target_file(sz_filename, "wav")){
         int32_t sampling_rate_ = -1;
         if(!audio.LoadWav(sz_filename, &sampling_rate_))
@@ -134,7 +134,7 @@
      if (!vad_obj)
         return nullptr;
      funasr::Audio audio(1);
      funasr::Audio audio(vad_obj->GetVadSampleRate(),1);
      if(wav_format == "pcm" || wav_format == "PCM"){
         if (!audio.LoadPcmwav(sz_buf, n_len, &sampling_rate))
            return nullptr;
@@ -146,6 +146,7 @@
      funasr::FUNASR_VAD_RESULT* p_result = new funasr::FUNASR_VAD_RESULT;
      p_result->snippet_time = audio.GetTimeLen();
      if(p_result->snippet_time == 0){
         p_result->segments = new vector<std::vector<int>>();
            return p_result;
        }
      
@@ -162,7 +163,7 @@
      if (!vad_obj)
         return nullptr;
      funasr::Audio audio(1);
      funasr::Audio audio(vad_obj->GetVadSampleRate(),1);
      if(funasr::is_target_file(sz_filename, "wav")){
         int32_t sampling_rate_ = -1;
         if(!audio.LoadWav(sz_filename, &sampling_rate_))
@@ -178,6 +179,7 @@
      funasr::FUNASR_VAD_RESULT* p_result = new funasr::FUNASR_VAD_RESULT;
      p_result->snippet_time = audio.GetTimeLen();
      if(p_result->snippet_time == 0){
         p_result->segments = new vector<std::vector<int>>();
            return p_result;
        }
      
@@ -222,7 +224,7 @@
      if (!offline_stream)
         return nullptr;
      funasr::Audio audio(1);
      funasr::Audio audio(offline_stream->asr_handle->GetAsrSampleRate(),1);
      try{
         if(wav_format == "pcm" || wav_format == "PCM"){
            if (!audio.LoadPcmwav(sz_buf, n_len, &sampling_rate))
@@ -266,7 +268,7 @@
         if(msg_vec.size()==0){
            continue;
         }
         if(lang == "en-bpe" and p_result->msg != ""){
         if(lang == "en-bpe" && p_result->msg != ""){
            p_result->msg += " ";
         }
         p_result->msg += msg_vec[0];
@@ -294,10 +296,18 @@
#if !defined(__APPLE__)
      if(offline_stream->UseITN() && itn){
         string msg_itn = offline_stream->itn_handle->Normalize(p_result->msg);
         if(!(p_result->stamp).empty()){
            std::string new_stamp = funasr::TimestampSmooth(p_result->msg, msg_itn, p_result->stamp);
            if(!new_stamp.empty()){
               p_result->stamp = new_stamp;
            }
         }
         p_result->msg = msg_itn;
      }
#endif
      if (!(p_result->stamp).empty()){
         p_result->stamp_sents = funasr::TimestampSentence(p_result->msg, p_result->stamp);
      }
      return p_result;
   }
@@ -308,7 +318,7 @@
      if (!offline_stream)
         return nullptr;
      
      funasr::Audio audio(1);
      funasr::Audio audio((offline_stream->asr_handle)->GetAsrSampleRate(),1);
      try{
         if(funasr::is_target_file(sz_filename, "wav")){
            int32_t sampling_rate_ = -1;
@@ -355,7 +365,7 @@
         if(msg_vec.size()==0){
            continue;
         }
         if(lang == "en-bpe" and p_result->msg != ""){
         if(lang == "en-bpe" && p_result->msg != ""){
            p_result->msg += " ";
         }
         p_result->msg += msg_vec[0];
@@ -384,9 +394,18 @@
#if !defined(__APPLE__)
      if(offline_stream->UseITN() && itn){
         string msg_itn = offline_stream->itn_handle->Normalize(p_result->msg);
         if(!(p_result->stamp).empty()){
            std::string new_stamp = funasr::TimestampSmooth(p_result->msg, msg_itn, p_result->stamp);
            if(!new_stamp.empty()){
               p_result->stamp = new_stamp;
            }
         }
         p_result->msg = msg_itn;
      }
#endif
      if (!(p_result->stamp).empty()){
         p_result->stamp_sents = funasr::TimestampSentence(p_result->msg, p_result->stamp);
      }
      return p_result;
   }
@@ -420,7 +439,7 @@
   _FUNASRAPI FUNASR_RESULT FunTpassInferBuffer(FUNASR_HANDLE handle, FUNASR_HANDLE online_handle, const char* sz_buf, 
                                     int n_len, std::vector<std::vector<std::string>> &punc_cache, bool input_finished, 
                                     int sampling_rate, std::string wav_format, ASR_TYPE mode, 
                                     const std::vector<std::vector<float>> &hw_emb, bool itn)
                                     const std::vector<std::vector<float>> &hw_emb, bool itn, FUNASR_DEC_HANDLE dec_handle)
   {
      funasr::TpassStream* tpass_stream = (funasr::TpassStream*)handle;
      funasr::TpassOnlineStream* tpass_online_stream = (funasr::TpassOnlineStream*)online_handle;
@@ -463,7 +482,7 @@
      funasr::AudioFrame* frame = NULL;
      while(audio->FetchChunck(frame) > 0){
         string msg = asr_online_handle->Forward(frame->data, frame->len, frame->is_final);
         string msg = ((funasr::ParaformerOnline*)asr_online_handle)->Forward(frame->data, frame->len, frame->is_final);
         if(mode == ASR_ONLINE){
            ((funasr::ParaformerOnline*)asr_online_handle)->online_res += msg;
            if(frame->is_final){
@@ -494,7 +513,12 @@
      // timestamp
      std::string cur_stamp = "[";      
      while(audio->FetchTpass(frame) > 0){
         string msg = asr_handle->Forward(frame->data, frame->len, frame->is_final, hw_emb);
         // dec reset
         funasr::WfstDecoder* wfst_decoder = (funasr::WfstDecoder*)dec_handle;
         if (wfst_decoder){
            wfst_decoder->StartUtterance();
         }
         string msg = ((funasr::Paraformer*)asr_handle)->Forward(frame->data, frame->len, frame->is_final, hw_emb, dec_handle);
         std::vector<std::string> msg_vec = funasr::split(msg, '|');  // split with timestamp
         if(msg_vec.size()==0){
@@ -524,10 +548,19 @@
#if !defined(__APPLE__)
         if(tpass_stream->UseITN() && itn){
            string msg_itn = tpass_stream->itn_handle->Normalize(msg_punc);
            // TimestampSmooth
            if(!(p_result->stamp).empty()){
               std::string new_stamp = funasr::TimestampSmooth(p_result->tpass_msg, msg_itn, p_result->stamp);
               if(!new_stamp.empty()){
                  p_result->stamp = new_stamp;
               }
            }
            p_result->tpass_msg = msg_itn;
         }
#endif
         if (!(p_result->stamp).empty()){
            p_result->stamp_sents = funasr::TimestampSentence(p_result->tpass_msg, p_result->stamp);
         }
         if(frame != NULL){
            delete frame;
            frame = NULL;
@@ -582,6 +615,15 @@
         return nullptr;
      return p_result->stamp.c_str();
   }
      _FUNASRAPI const char* FunASRGetStampSents(FUNASR_RESULT result)
   {
      funasr::FUNASR_RECOG_RESULT * p_result = (funasr::FUNASR_RECOG_RESULT*)result;
      if(!p_result)
         return nullptr;
      return p_result->stamp_sents.c_str();
   }
   _FUNASRAPI const char* FunASRGetTpassResult(FUNASR_RESULT result,int n_index)
@@ -727,8 +769,14 @@
         funasr::OfflineStream* offline_stream = (funasr::OfflineStream*)handle;
         funasr::Paraformer* paraformer = (funasr::Paraformer*)offline_stream->asr_handle.get();
         if (paraformer->lm_)
            mm = new funasr::WfstDecoder(paraformer->lm_.get(),
               paraformer->GetPhoneSet(), paraformer->GetLmVocab(), glob_beam, lat_beam, am_scale);
      } else if (asr_type == ASR_TWO_PASS){
         funasr::TpassStream* tpass_stream = (funasr::TpassStream*)handle;
         funasr::Paraformer* paraformer = (funasr::Paraformer*)tpass_stream->asr_handle.get();
         if (paraformer->lm_)
            mm = new funasr::WfstDecoder(paraformer->lm_.get(), 
               paraformer->GetPhoneSet(), paraformer->GetVocab(), glob_beam, lat_beam, am_scale);
               paraformer->GetPhoneSet(), paraformer->GetLmVocab(), glob_beam, lat_beam, am_scale);
      }
      return mm;
   }