From 1819303f5e8cfc03f4c0ec2495571a54a186d34b Mon Sep 17 00:00:00 2001
From: 雾聪 <wucong.lyb@alibaba-inc.com>
Date: 星期二, 29 十月 2024 11:40:18 +0800
Subject: [PATCH] support SenseVoiceSmall in 2pass mode

---
 runtime/onnxruntime/src/funasrruntime.cpp |   49 ++++++++++++++++++++++++++++++-------------------
 1 files changed, 30 insertions(+), 19 deletions(-)

diff --git a/runtime/onnxruntime/src/funasrruntime.cpp b/runtime/onnxruntime/src/funasrruntime.cpp
index 88a3970..6286412 100644
--- a/runtime/onnxruntime/src/funasrruntime.cpp
+++ b/runtime/onnxruntime/src/funasrruntime.cpp
@@ -482,7 +482,8 @@
 	_FUNASRAPI FUNASR_RESULT FunTpassInferBuffer(FUNASR_HANDLE handle, FUNASR_HANDLE online_handle, const char* sz_buf, 
 												 int n_len, std::vector<std::vector<std::string>> &punc_cache, bool input_finished, 
 												 int sampling_rate, std::string wav_format, ASR_TYPE mode, 
-												 const std::vector<std::vector<float>> &hw_emb, bool itn, FUNASR_DEC_HANDLE dec_handle)
+												 const std::vector<std::vector<float>> &hw_emb, bool itn, FUNASR_DEC_HANDLE dec_handle,
+												 std::string svs_lang, bool svs_itn)
 	{
 		funasr::TpassStream* tpass_stream = (funasr::TpassStream*)handle;
 		funasr::TpassOnlineStream* tpass_online_stream = (funasr::TpassOnlineStream*)online_handle;
@@ -525,7 +526,7 @@
 
 		funasr::AudioFrame* frame = nullptr;
 		while(audio->FetchChunck(frame) > 0){
-			string msg = ((funasr::ParaformerOnline*)asr_online_handle)->Forward(frame->data, frame->len, frame->is_final);
+			string msg = (asr_online_handle)->Forward(frame->data, frame->len, frame->is_final);
 			if(mode == ASR_ONLINE){
 				((funasr::ParaformerOnline*)asr_online_handle)->online_res += msg;
 				if(frame->is_final){
@@ -567,7 +568,12 @@
         	len = new int[1];
 			buff[0] = frame->data;
 			len[0] = frame->len;
-			vector<string> msgs = ((funasr::Paraformer*)asr_handle)->Forward(buff, len, frame->is_final, hw_emb, dec_handle);
+			vector<string> msgs;
+			if(tpass_stream->GetModelType() == MODEL_SVS){
+				msgs = (tpass_stream->asr_handle)->Forward(buff, len, true, svs_lang, svs_itn, 1);
+			}else{
+				msgs = (tpass_stream->asr_handle)->Forward(buff, len, true, hw_emb, dec_handle, 1);
+			}
 			string msg = msgs.size()>0?msgs[0]:"";
 			std::vector<std::string> msg_vec = funasr::SplitStr(msg, " | ");  // split with timestamp
 			if(msg_vec.size()==0){
@@ -589,24 +595,29 @@
 				p_result->stamp += cur_stamp + "]";
 			}
 
-			string msg_punc = punc_online_handle->AddPunc(msg.c_str(), punc_cache[1]);
-			if(input_finished){
-				msg_punc += "銆�";
-			}
-			p_result->tpass_msg = msg_punc;
-#if !defined(__APPLE__)
-			if(tpass_stream->UseITN() && itn){
-				string msg_itn = tpass_stream->itn_handle->Normalize(msg_punc);
-				// TimestampSmooth
-				if(!(p_result->stamp).empty()){
-					std::string new_stamp = funasr::TimestampSmooth(p_result->tpass_msg, msg_itn, p_result->stamp);
-					if(!new_stamp.empty()){
-						p_result->stamp = new_stamp;
-					}
+			if (tpass_stream->GetModelType() == MODEL_PARA){
+				string msg_punc = punc_online_handle->AddPunc(msg.c_str(), punc_cache[1]);
+				if(input_finished){
+					msg_punc += "銆�";
 				}
-				p_result->tpass_msg = msg_itn;
-			}
+				p_result->tpass_msg = msg_punc;
+
+#if !defined(__APPLE__)
+				if(tpass_stream->UseITN() && itn){
+					string msg_itn = tpass_stream->itn_handle->Normalize(msg_punc);
+					// TimestampSmooth
+					if(!(p_result->stamp).empty()){
+						std::string new_stamp = funasr::TimestampSmooth(p_result->tpass_msg, msg_itn, p_result->stamp);
+						if(!new_stamp.empty()){
+							p_result->stamp = new_stamp;
+						}
+					}
+					p_result->tpass_msg = msg_itn;
+				}
 #endif
+			}else{
+				p_result->tpass_msg = msg;
+			}
 			if (!(p_result->stamp).empty()){
 				p_result->stamp_sents = funasr::TimestampSentence(p_result->tpass_msg, p_result->stamp);
 			}

--
Gitblit v1.9.1