runtime/onnxruntime/src/funasrruntime.cpp
@@ -207,7 +207,8 @@
   // APIs for Offline-stream Infer
   _FUNASRAPI FUNASR_RESULT FunOfflineInferBuffer(FUNASR_HANDLE handle, const char* sz_buf, int n_len, 
                                       FUNASR_MODE mode, QM_CALLBACK fn_callback, const std::vector<std::vector<float>> &hw_emb, 
                                       int sampling_rate, std::string wav_format, bool itn, FUNASR_DEC_HANDLE dec_handle)
                                       int sampling_rate, std::string wav_format, bool itn, FUNASR_DEC_HANDLE dec_handle,
                                       std::string svs_lang, bool svs_itn)
   {
      funasr::OfflineStream* offline_stream = (funasr::OfflineStream*)handle;
      if (!offline_stream)
@@ -250,13 +251,18 @@
      std::string cur_stamp = "[";
      std::string lang = (offline_stream->asr_handle)->GetLang();
      while (audio.Fetch(buff, len, flag, start_time, batch_size, batch_in) > 0) {
      while (audio.FetchDynamic(buff, len, flag, start_time, batch_size, batch_in) > 0) {
         // dec reset
         funasr::WfstDecoder* wfst_decoder = (funasr::WfstDecoder*)dec_handle;
         if (wfst_decoder){
            wfst_decoder->StartUtterance();
         }
         vector<string> msg_batch = (offline_stream->asr_handle)->Forward(buff, len, true, hw_emb, dec_handle, batch_in);
         vector<string> msg_batch;
         if(offline_stream->GetModelType() == MODEL_SVS){
            msg_batch = (offline_stream->asr_handle)->Forward(buff, len, true, svs_lang, svs_itn, batch_in);
         }else{
            msg_batch = (offline_stream->asr_handle)->Forward(buff, len, true, hw_emb, dec_handle, batch_in);
         }
         for(int idx=0; idx<batch_in; idx++){
            string msg = msg_batch[idx];
            if(msg_idx < index_vector.size()){
@@ -280,7 +286,7 @@
      }
      for(int idx=0; idx<msgs.size(); idx++){
         string msg = msgs[idx];
         std::vector<std::string> msg_vec = funasr::split(msg, '|');
         std::vector<std::string> msg_vec = funasr::SplitStr(msg, " | ");
         if(msg_vec.size()==0){
            continue;
         }
@@ -291,10 +297,12 @@
         //timestamp
         if(msg_vec.size() > 1){
            std::vector<std::string> msg_stamp = funasr::split(msg_vec[1], ',');
            for(int i=0; i<msg_stamp.size()-1; i+=2){
               float begin = std::stof(msg_stamp[i])+msg_stimes[idx];
               float end = std::stof(msg_stamp[i+1])+msg_stimes[idx];
               cur_stamp += "["+std::to_string((int)(1000*begin))+","+std::to_string((int)(1000*end))+"],";
            if (msg_stamp.size() > 1) {
               for(int i=0; i<msg_stamp.size()-1; i+=2){
                  float begin = std::stof(msg_stamp[i])+msg_stimes[idx];
                  float end = std::stof(msg_stamp[i+1])+msg_stimes[idx];
                  cur_stamp += "["+std::to_string((int)(1000*begin))+","+std::to_string((int)(1000*end))+"],";
               }
            }
         }
      }
@@ -372,7 +380,7 @@
      std::string cur_stamp = "[";
      std::string lang = (offline_stream->asr_handle)->GetLang();
      while (audio.Fetch(buff, len, flag, start_time, batch_size, batch_in) > 0) {
      while (audio.FetchDynamic(buff, len, flag, start_time, batch_size, batch_in) > 0) {
         // dec reset
         funasr::WfstDecoder* wfst_decoder = (funasr::WfstDecoder*)dec_handle;
         if (wfst_decoder){
@@ -402,7 +410,7 @@
      }
      for(int idx=0; idx<msgs.size(); idx++){
         string msg = msgs[idx];
         std::vector<std::string> msg_vec = funasr::split(msg, '|');
         std::vector<std::string> msg_vec = funasr::SplitStr(msg, " | ");
         if(msg_vec.size()==0){
            continue;
         }
@@ -446,7 +454,7 @@
      return p_result;
   }
#if !defined(__APPLE__)
//#if !defined(__APPLE__)
   _FUNASRAPI const std::vector<std::vector<float>> CompileHotwordEmbedding(FUNASR_HANDLE handle, std::string &hotwords, ASR_TYPE mode)
   {
      if (mode == ASR_OFFLINE){
@@ -470,13 +478,14 @@
      }
      
   }
#endif
//#endif
   // APIs for 2pass-stream Infer
   _FUNASRAPI FUNASR_RESULT FunTpassInferBuffer(FUNASR_HANDLE handle, FUNASR_HANDLE online_handle, const char* sz_buf, 
                                     int n_len, std::vector<std::vector<std::string>> &punc_cache, bool input_finished, 
                                     int sampling_rate, std::string wav_format, ASR_TYPE mode, 
                                     const std::vector<std::vector<float>> &hw_emb, bool itn, FUNASR_DEC_HANDLE dec_handle)
                                     const std::vector<std::vector<float>> &hw_emb, bool itn, FUNASR_DEC_HANDLE dec_handle,
                                     std::string svs_lang, bool svs_itn)
   {
      funasr::TpassStream* tpass_stream = (funasr::TpassStream*)handle;
      funasr::TpassOnlineStream* tpass_online_stream = (funasr::TpassOnlineStream*)online_handle;
@@ -519,7 +528,7 @@
      funasr::AudioFrame* frame = nullptr;
      while(audio->FetchChunck(frame) > 0){
         string msg = ((funasr::ParaformerOnline*)asr_online_handle)->Forward(frame->data, frame->len, frame->is_final);
         string msg = (asr_online_handle)->Forward(frame->data, frame->len, frame->is_final);
         if(mode == ASR_ONLINE){
            ((funasr::ParaformerOnline*)asr_online_handle)->online_res += msg;
            if(frame->is_final){
@@ -561,9 +570,14 @@
           len = new int[1];
         buff[0] = frame->data;
         len[0] = frame->len;
         vector<string> msgs = ((funasr::Paraformer*)asr_handle)->Forward(buff, len, frame->is_final, hw_emb, dec_handle);
         vector<string> msgs;
         if(tpass_stream->GetModelType() == MODEL_SVS){
            msgs = (tpass_stream->asr_handle)->Forward(buff, len, true, svs_lang, svs_itn, 1);
         }else{
            msgs = (tpass_stream->asr_handle)->Forward(buff, len, true, hw_emb, dec_handle, 1);
         }
         string msg = msgs.size()>0?msgs[0]:"";
         std::vector<std::string> msg_vec = funasr::split(msg, '|');  // split with timestamp
         std::vector<std::string> msg_vec = funasr::SplitStr(msg, " | ");  // split with timestamp
         if(msg_vec.size()==0){
            continue;
         }
@@ -583,24 +597,29 @@
            p_result->stamp += cur_stamp + "]";
         }
         string msg_punc = punc_online_handle->AddPunc(msg.c_str(), punc_cache[1]);
         if(input_finished){
            msg_punc += "。";
         }
         p_result->tpass_msg = msg_punc;
#if !defined(__APPLE__)
         if(tpass_stream->UseITN() && itn){
            string msg_itn = tpass_stream->itn_handle->Normalize(msg_punc);
            // TimestampSmooth
            if(!(p_result->stamp).empty()){
               std::string new_stamp = funasr::TimestampSmooth(p_result->tpass_msg, msg_itn, p_result->stamp);
               if(!new_stamp.empty()){
                  p_result->stamp = new_stamp;
               }
         if (tpass_stream->GetModelType() == MODEL_PARA){
            string msg_punc = punc_online_handle->AddPunc(msg.c_str(), punc_cache[1]);
            if(input_finished){
               msg_punc += "。";
            }
            p_result->tpass_msg = msg_itn;
         }
            p_result->tpass_msg = msg_punc;
#if !defined(__APPLE__)
            if(tpass_stream->UseITN() && itn){
               string msg_itn = tpass_stream->itn_handle->Normalize(msg_punc);
               // TimestampSmooth
               if(!(p_result->stamp).empty()){
                  std::string new_stamp = funasr::TimestampSmooth(p_result->tpass_msg, msg_itn, p_result->stamp);
                  if(!new_stamp.empty()){
                     p_result->stamp = new_stamp;
                  }
               }
               p_result->tpass_msg = msg_itn;
            }
#endif
         }else{
            p_result->tpass_msg = msg;
         }
         if (!(p_result->stamp).empty()){
            p_result->stamp_sents = funasr::TimestampSentence(p_result->tpass_msg, p_result->stamp);
         }