From d62d237a76e423fd1eec31e662162c135d2f93f5 Mon Sep 17 00:00:00 2001
From: 雾聪 <wucong.lyb@alibaba-inc.com>
Date: 星期三, 25 九月 2024 23:46:47 +0800
Subject: [PATCH] add sensevoice in offline-stream

---
 runtime/onnxruntime/src/precomp.h          |    1 +
 runtime/onnxruntime/src/util.cpp           |   15 +++++++++++++++
 runtime/onnxruntime/src/funasrruntime.cpp  |   16 +++++++++++-----
 runtime/onnxruntime/src/offline-stream.cpp |   12 +++++++++++-
 runtime/onnxruntime/src/util.h             |    1 +
 5 files changed, 39 insertions(+), 6 deletions(-)

diff --git a/runtime/onnxruntime/src/funasrruntime.cpp b/runtime/onnxruntime/src/funasrruntime.cpp
index 93b89a5..88a3970 100644
--- a/runtime/onnxruntime/src/funasrruntime.cpp
+++ b/runtime/onnxruntime/src/funasrruntime.cpp
@@ -207,7 +207,8 @@
 	// APIs for Offline-stream Infer
 	_FUNASRAPI FUNASR_RESULT FunOfflineInferBuffer(FUNASR_HANDLE handle, const char* sz_buf, int n_len, 
 												   FUNASR_MODE mode, QM_CALLBACK fn_callback, const std::vector<std::vector<float>> &hw_emb, 
-												   int sampling_rate, std::string wav_format, bool itn, FUNASR_DEC_HANDLE dec_handle)
+												   int sampling_rate, std::string wav_format, bool itn, FUNASR_DEC_HANDLE dec_handle,
+												   std::string svs_lang, bool svs_itn)
 	{
 		funasr::OfflineStream* offline_stream = (funasr::OfflineStream*)handle;
 		if (!offline_stream)
@@ -256,7 +257,12 @@
 			if (wfst_decoder){
 				wfst_decoder->StartUtterance();
 			}
-			vector<string> msg_batch = (offline_stream->asr_handle)->Forward(buff, len, true, hw_emb, dec_handle, batch_in);
+			vector<string> msg_batch;
+			if(offline_stream->GetModelType() == MODEL_SVS){
+				msg_batch = (offline_stream->asr_handle)->Forward(buff, len, true, svs_lang, svs_itn, batch_in);
+			}else{
+				msg_batch = (offline_stream->asr_handle)->Forward(buff, len, true, hw_emb, dec_handle, batch_in);
+			}
 			for(int idx=0; idx<batch_in; idx++){
 				string msg = msg_batch[idx];
 				if(msg_idx < index_vector.size()){
@@ -280,7 +286,7 @@
 		}
 		for(int idx=0; idx<msgs.size(); idx++){
 			string msg = msgs[idx];
-			std::vector<std::string> msg_vec = funasr::split(msg, '|');
+			std::vector<std::string> msg_vec = funasr::SplitStr(msg, " | ");
 			if(msg_vec.size()==0){
 				continue;
 			}
@@ -402,7 +408,7 @@
 		}
 		for(int idx=0; idx<msgs.size(); idx++){
 			string msg = msgs[idx];
-			std::vector<std::string> msg_vec = funasr::split(msg, '|');
+			std::vector<std::string> msg_vec = funasr::SplitStr(msg, " | ");
 			if(msg_vec.size()==0){
 				continue;
 			}
@@ -563,7 +569,7 @@
 			len[0] = frame->len;
 			vector<string> msgs = ((funasr::Paraformer*)asr_handle)->Forward(buff, len, frame->is_final, hw_emb, dec_handle);
 			string msg = msgs.size()>0?msgs[0]:"";
-			std::vector<std::string> msg_vec = funasr::split(msg, '|');  // split with timestamp
+			std::vector<std::string> msg_vec = funasr::SplitStr(msg, " | ");  // split with timestamp
 			if(msg_vec.size()==0){
 				continue;
 			}
diff --git a/runtime/onnxruntime/src/offline-stream.cpp b/runtime/onnxruntime/src/offline-stream.cpp
index 166d3c9..b436025 100644
--- a/runtime/onnxruntime/src/offline-stream.cpp
+++ b/runtime/onnxruntime/src/offline-stream.cpp
@@ -47,7 +47,13 @@
             use_gpu = false;
             #endif
         }else{
-            asr_handle = make_unique<Paraformer>();
+            if (model_path.at(MODEL_DIR).find(MODEL_SVS) != std::string::npos)
+            {
+                asr_handle = make_unique<SenseVoiceSmall>();
+                model_type = MODEL_SVS;
+            }else{
+                asr_handle = make_unique<Paraformer>();
+            }
         }
 
         bool enable_hotword = false;
@@ -138,6 +144,10 @@
         }
     }
 #endif
+    if(model_type == MODEL_SVS){
+        use_itn = false;
+        use_punc = false;
+    }
 }
 
 OfflineStream *CreateOfflineStream(std::map<std::string, std::string>& model_path, int thread_num, bool use_gpu, int batch_size)
diff --git a/runtime/onnxruntime/src/precomp.h b/runtime/onnxruntime/src/precomp.h
index 1a98852..d525c65 100644
--- a/runtime/onnxruntime/src/precomp.h
+++ b/runtime/onnxruntime/src/precomp.h
@@ -64,6 +64,7 @@
 #include "seg_dict.h"
 #include "resample.h"
 #include "paraformer.h"
+#include "sensevoice-small.h"
 #ifdef USE_GPU
 #include "paraformer-torch.h"
 #endif
diff --git a/runtime/onnxruntime/src/util.cpp b/runtime/onnxruntime/src/util.cpp
index 483795e..50c9c82 100644
--- a/runtime/onnxruntime/src/util.cpp
+++ b/runtime/onnxruntime/src/util.cpp
@@ -646,6 +646,21 @@
   return elems;
 }
 
+std::vector<std::string> SplitStr(const std::string &s, string delimiter) {
+    std::vector<std::string> tokens;
+    size_t start = 0;
+    size_t end = s.find(delimiter);
+
+    while (end != std::string::npos) {
+        tokens.push_back(s.substr(start, end - start));
+        start = end + delimiter.length();
+        end = s.find(delimiter, start);
+    }
+    tokens.push_back(s.substr(start, end - start));
+
+    return tokens;
+}
+
 template<typename T>
 void PrintMat(const std::vector<std::vector<T>> &mat, const std::string &name) {
   std::cout << name << ":" << std::endl;
diff --git a/runtime/onnxruntime/src/util.h b/runtime/onnxruntime/src/util.h
index bb4e21a..89dacf6 100644
--- a/runtime/onnxruntime/src/util.h
+++ b/runtime/onnxruntime/src/util.h
@@ -49,6 +49,7 @@
 std::string TimestampSmooth(std::string &text, std::string &text_itn, std::string &str_time);
 std::string TimestampSentence(std::string &text, std::string &str_time);
 std::vector<std::string> split(const std::string &s, char delim);
+std::vector<std::string> SplitStr(const std::string &s, string delimiter);
 
 template<typename T>
 void PrintMat(const std::vector<std::vector<T>> &mat, const std::string &name);

--
Gitblit v1.9.1