From c2dee5e3c29eba79e591d9e9caebaef15ea4e56b Mon Sep 17 00:00:00 2001
From: hnluo <haoneng.lhn@alibaba-inc.com>
Date: 星期四, 29 六月 2023 11:09:28 +0800
Subject: [PATCH] Merge pull request #687 from alibaba-damo-academy/dev_lhn

---
 funasr/runtime/onnxruntime/src/funasrruntime.cpp |  267 +++++++++++++++++++++++++++++++----------------------
 1 files changed, 155 insertions(+), 112 deletions(-)

diff --git a/funasr/runtime/onnxruntime/src/funasrruntime.cpp b/funasr/runtime/onnxruntime/src/funasrruntime.cpp
index b8508fd..82fdd70 100644
--- a/funasr/runtime/onnxruntime/src/funasrruntime.cpp
+++ b/funasr/runtime/onnxruntime/src/funasrruntime.cpp
@@ -17,9 +17,15 @@
 		return mm;
 	}
 
-	_FUNASRAPI FUNASR_HANDLE  FunPuncInit(std::map<std::string, std::string>& model_path, int thread_num)
+	_FUNASRAPI FUNASR_HANDLE  FsmnVadOnlineInit(FUNASR_HANDLE fsmnvad_handle)
 	{
-		funasr::PuncModel* mm = funasr::CreatePuncModel(model_path, thread_num);
+		funasr::VadModel* mm = funasr::CreateVadModel(fsmnvad_handle);
+		return mm;
+	}
+
+	_FUNASRAPI FUNASR_HANDLE  CTTransformerInit(std::map<std::string, std::string>& model_path, int thread_num, PUNC_TYPE type)
+	{
+		funasr::PuncModel* mm = funasr::CreatePuncModel(model_path, thread_num, type);
 		return mm;
 	}
 
@@ -30,36 +36,7 @@
 	}
 
 	// APIs for ASR Infer
-	_FUNASRAPI FUNASR_RESULT FunASRRecogBuffer(FUNASR_HANDLE handle, const char* sz_buf, int n_len, FUNASR_MODE mode, QM_CALLBACK fn_callback)
-	{
-		funasr::Model* recog_obj = (funasr::Model*)handle;
-		if (!recog_obj)
-			return nullptr;
-
-		int32_t sampling_rate = -1;
-		funasr::Audio audio(1);
-		if (!audio.LoadWav(sz_buf, n_len, &sampling_rate))
-			return nullptr;
-
-		float* buff;
-		int len;
-		int flag=0;
-		funasr::FUNASR_RECOG_RESULT* p_result = new funasr::FUNASR_RECOG_RESULT;
-		p_result->snippet_time = audio.GetTimeLen();
-		int n_step = 0;
-		int n_total = audio.GetQueueSize();
-		while (audio.Fetch(buff, len, flag) > 0) {
-			string msg = recog_obj->Forward(buff, len, flag);
-			p_result->msg += msg;
-			n_step++;
-			if (fn_callback)
-				fn_callback(n_step, n_total);
-		}
-
-		return p_result;
-	}
-
-	_FUNASRAPI FUNASR_RESULT FunASRRecogPCMBuffer(FUNASR_HANDLE handle, const char* sz_buf, int n_len, int sampling_rate, FUNASR_MODE mode, QM_CALLBACK fn_callback)
+	_FUNASRAPI FUNASR_RESULT FunASRInferBuffer(FUNASR_HANDLE handle, const char* sz_buf, int n_len, FUNASR_MODE mode, QM_CALLBACK fn_callback, int sampling_rate)
 	{
 		funasr::Model* recog_obj = (funasr::Model*)handle;
 		if (!recog_obj)
@@ -74,6 +51,9 @@
 		int flag = 0;
 		funasr::FUNASR_RECOG_RESULT* p_result = new funasr::FUNASR_RECOG_RESULT;
 		p_result->snippet_time = audio.GetTimeLen();
+		if(p_result->snippet_time == 0){
+            return p_result;
+        }
 		int n_step = 0;
 		int n_total = audio.GetQueueSize();
 		while (audio.Fetch(buff, len, flag) > 0) {
@@ -87,23 +67,35 @@
 		return p_result;
 	}
 
-	_FUNASRAPI FUNASR_RESULT FunASRRecogPCMFile(FUNASR_HANDLE handle, const char* sz_filename, int sampling_rate, FUNASR_MODE mode, QM_CALLBACK fn_callback)
+	_FUNASRAPI FUNASR_RESULT FunASRInfer(FUNASR_HANDLE handle, const char* sz_filename, FUNASR_MODE mode, QM_CALLBACK fn_callback, int sampling_rate)
 	{
 		funasr::Model* recog_obj = (funasr::Model*)handle;
 		if (!recog_obj)
 			return nullptr;
 
 		funasr::Audio audio(1);
-		if (!audio.LoadPcmwav(sz_filename, &sampling_rate))
-			return nullptr;
+		if(funasr::is_target_file(sz_filename, "wav")){
+			int32_t sampling_rate_ = -1;
+			if(!audio.LoadWav(sz_filename, &sampling_rate_))
+				return nullptr;
+		}else if(funasr::is_target_file(sz_filename, "pcm")){
+			if (!audio.LoadPcmwav(sz_filename, &sampling_rate))
+				return nullptr;
+		}else{
+			LOG(ERROR)<<"Wrong wav extension";
+			exit(-1);
+		}
 
 		float* buff;
 		int len;
 		int flag = 0;
-		funasr::FUNASR_RECOG_RESULT* p_result = new funasr::FUNASR_RECOG_RESULT;
-		p_result->snippet_time = audio.GetTimeLen();
 		int n_step = 0;
 		int n_total = audio.GetQueueSize();
+		funasr::FUNASR_RECOG_RESULT* p_result = new funasr::FUNASR_RECOG_RESULT;
+		p_result->snippet_time = audio.GetTimeLen();
+		if(p_result->snippet_time == 0){
+            return p_result;
+        }
 		while (audio.Fetch(buff, len, flag) > 0) {
 			string msg = recog_obj->Forward(buff, len, flag);
 			p_result->msg += msg;
@@ -112,109 +104,92 @@
 				fn_callback(n_step, n_total);
 		}
 
-		return p_result;
-	}
-
-	_FUNASRAPI FUNASR_RESULT FunASRRecogFile(FUNASR_HANDLE handle, const char* sz_wavfile, FUNASR_MODE mode, QM_CALLBACK fn_callback)
-	{
-		funasr::Model* recog_obj = (funasr::Model*)handle;
-		if (!recog_obj)
-			return nullptr;
-		
-		int32_t sampling_rate = -1;
-		funasr::Audio audio(1);
-		if(!audio.LoadWav(sz_wavfile, &sampling_rate))
-			return nullptr;
-
-		float* buff;
-		int len;
-		int flag = 0;
-		int n_step = 0;
-		int n_total = audio.GetQueueSize();
-		funasr::FUNASR_RECOG_RESULT* p_result = new funasr::FUNASR_RECOG_RESULT;
-		p_result->snippet_time = audio.GetTimeLen();
-		while (audio.Fetch(buff, len, flag) > 0) {
-			string msg = recog_obj->Forward(buff, len, flag);
-			p_result->msg+= msg;
-			n_step++;
-			if (fn_callback)
-				fn_callback(n_step, n_total);
-		}
-	
 		return p_result;
 	}
 
 	// APIs for VAD Infer
-	_FUNASRAPI FUNASR_RESULT FsmnVadWavFile(FUNASR_HANDLE handle, const char* sz_wavfile, FUNASR_MODE mode, QM_CALLBACK fn_callback)
+	_FUNASRAPI FUNASR_RESULT FsmnVadInferBuffer(FUNASR_HANDLE handle, const char* sz_buf, int n_len, QM_CALLBACK fn_callback, bool input_finished, int sampling_rate)
 	{
 		funasr::VadModel* vad_obj = (funasr::VadModel*)handle;
 		if (!vad_obj)
 			return nullptr;
-		
-		int32_t sampling_rate = -1;
+
 		funasr::Audio audio(1);
-		if(!audio.LoadWav(sz_wavfile, &sampling_rate))
+		if (!audio.LoadPcmwav(sz_buf, n_len, &sampling_rate))
 			return nullptr;
 
 		funasr::FUNASR_VAD_RESULT* p_result = new funasr::FUNASR_VAD_RESULT;
 		p_result->snippet_time = audio.GetTimeLen();
+		if(p_result->snippet_time == 0){
+            return p_result;
+        }
 		
 		vector<std::vector<int>> vad_segments;
-		audio.Split(vad_obj, vad_segments);
+		audio.Split(vad_obj, vad_segments, input_finished);
+		p_result->segments = new vector<std::vector<int>>(vad_segments);
+
+		return p_result;
+	}
+
+	_FUNASRAPI FUNASR_RESULT FsmnVadInfer(FUNASR_HANDLE handle, const char* sz_filename, QM_CALLBACK fn_callback, int sampling_rate)
+	{
+		funasr::VadModel* vad_obj = (funasr::VadModel*)handle;
+		if (!vad_obj)
+			return nullptr;
+
+		funasr::Audio audio(1);
+		if(funasr::is_target_file(sz_filename, "wav")){
+			int32_t sampling_rate_ = -1;
+			if(!audio.LoadWav(sz_filename, &sampling_rate_))
+				return nullptr;
+		}else if(funasr::is_target_file(sz_filename, "pcm")){
+			if (!audio.LoadPcmwav(sz_filename, &sampling_rate))
+				return nullptr;
+		}else{
+			LOG(ERROR)<<"Wrong wav extension";
+			exit(-1);
+		}
+
+		funasr::FUNASR_VAD_RESULT* p_result = new funasr::FUNASR_VAD_RESULT;
+		p_result->snippet_time = audio.GetTimeLen();
+		if(p_result->snippet_time == 0){
+            return p_result;
+        }
+		
+		vector<std::vector<int>> vad_segments;
+		audio.Split(vad_obj, vad_segments, true);
 		p_result->segments = new vector<std::vector<int>>(vad_segments);
 
 		return p_result;
 	}
 
 	// APIs for PUNC Infer
-	_FUNASRAPI const std::string FunPuncInfer(FUNASR_HANDLE handle, const char* sz_sentence, FUNASR_MODE mode, QM_CALLBACK fn_callback)
+	_FUNASRAPI FUNASR_RESULT CTTransformerInfer(FUNASR_HANDLE handle, const char* sz_sentence, FUNASR_MODE mode, QM_CALLBACK fn_callback, PUNC_TYPE type, FUNASR_RESULT pre_result)
 	{
 		funasr::PuncModel* punc_obj = (funasr::PuncModel*)handle;
 		if (!punc_obj)
 			return nullptr;
-
-		string punc_res = punc_obj->AddPunc(sz_sentence);
-		return punc_res;
-	}
-
-	// APIs for Offline-stream Infer
-	_FUNASRAPI FUNASR_RESULT FunOfflineRecogFile(FUNASR_HANDLE handle, const char* sz_wavfile, FUNASR_MODE mode, QM_CALLBACK fn_callback)
-	{
-		funasr::OfflineStream* offline_stream = (funasr::OfflineStream*)handle;
-		if (!offline_stream)
-			return nullptr;
 		
-		int32_t sampling_rate = -1;
-		funasr::Audio audio(1);
-		if(!audio.LoadWav(sz_wavfile, &sampling_rate))
-			return nullptr;
-		if(offline_stream->UseVad()){
-			audio.Split(offline_stream);
+		FUNASR_RESULT p_result = nullptr;
+		if (type==PUNC_OFFLINE){
+			p_result = (FUNASR_RESULT)new funasr::FUNASR_PUNC_RESULT;
+			((funasr::FUNASR_PUNC_RESULT*)p_result)->msg = punc_obj->AddPunc(sz_sentence);
+		}else if(type==PUNC_ONLINE){
+			if (!pre_result)
+				p_result = (FUNASR_RESULT)new funasr::FUNASR_PUNC_RESULT;
+			else
+				p_result = pre_result;
+			((funasr::FUNASR_PUNC_RESULT*)p_result)->msg = punc_obj->AddPunc(sz_sentence, ((funasr::FUNASR_PUNC_RESULT*)p_result)->arr_cache);
+		}else{
+			LOG(ERROR) << "Wrong PUNC_TYPE";
+			exit(-1);
 		}
 
-		float* buff;
-		int len;
-		int flag = 0;
-		int n_step = 0;
-		int n_total = audio.GetQueueSize();
-		funasr::FUNASR_RECOG_RESULT* p_result = new funasr::FUNASR_RECOG_RESULT;
-		p_result->snippet_time = audio.GetTimeLen();
-		while (audio.Fetch(buff, len, flag) > 0) {
-			string msg = (offline_stream->asr_handle)->Forward(buff, len, flag);
-			p_result->msg+= msg;
-			n_step++;
-			if (fn_callback)
-				fn_callback(n_step, n_total);
-		}
-		if(offline_stream->UsePunc()){
-			string punc_res = (offline_stream->punc_handle)->AddPunc((p_result->msg).c_str());
-			p_result->msg = punc_res;
-		}
-	
 		return p_result;
 	}
 
-	_FUNASRAPI FUNASR_RESULT FunOfflineRecogPCMBuffer(FUNASR_HANDLE handle, const char* sz_buf, int n_len, int sampling_rate, FUNASR_MODE mode, QM_CALLBACK fn_callback)
+	// APIs for Offline-stream Infer
+	_FUNASRAPI FUNASR_RESULT FunOfflineInferBuffer(FUNASR_HANDLE handle, const char* sz_buf, int n_len, FUNASR_MODE mode, QM_CALLBACK fn_callback, int sampling_rate)
 	{
 		funasr::OfflineStream* offline_stream = (funasr::OfflineStream*)handle;
 		if (!offline_stream)
@@ -223,6 +198,11 @@
 		funasr::Audio audio(1);
 		if (!audio.LoadPcmwav(sz_buf, n_len, &sampling_rate))
 			return nullptr;
+		funasr::FUNASR_RECOG_RESULT* p_result = new funasr::FUNASR_RECOG_RESULT;
+		p_result->snippet_time = audio.GetTimeLen();
+		if(p_result->snippet_time == 0){
+            return p_result;
+        }
 		if(offline_stream->UseVad()){
 			audio.Split(offline_stream);
 		}
@@ -230,8 +210,7 @@
 		float* buff;
 		int len;
 		int flag = 0;
-		funasr::FUNASR_RECOG_RESULT* p_result = new funasr::FUNASR_RECOG_RESULT;
-		p_result->snippet_time = audio.GetTimeLen();
+
 		int n_step = 0;
 		int n_total = audio.GetQueueSize();
 		while (audio.Fetch(buff, len, flag) > 0) {
@@ -246,6 +225,53 @@
 			p_result->msg = punc_res;
 		}
 
+		return p_result;
+	}
+
+	_FUNASRAPI FUNASR_RESULT FunOfflineInfer(FUNASR_HANDLE handle, const char* sz_filename, FUNASR_MODE mode, QM_CALLBACK fn_callback, int sampling_rate)
+	{
+		funasr::OfflineStream* offline_stream = (funasr::OfflineStream*)handle;
+		if (!offline_stream)
+			return nullptr;
+		
+		funasr::Audio audio(1);
+		if(funasr::is_target_file(sz_filename, "wav")){
+			int32_t sampling_rate_ = -1;
+			if(!audio.LoadWav(sz_filename, &sampling_rate_))
+				return nullptr;
+		}else if(funasr::is_target_file(sz_filename, "pcm")){
+			if (!audio.LoadPcmwav(sz_filename, &sampling_rate))
+				return nullptr;
+		}else{
+			LOG(ERROR)<<"Wrong wav extension";
+			exit(-1);
+		}
+		funasr::FUNASR_RECOG_RESULT* p_result = new funasr::FUNASR_RECOG_RESULT;
+		p_result->snippet_time = audio.GetTimeLen();
+		if(p_result->snippet_time == 0){
+            return p_result;
+        }
+		if(offline_stream->UseVad()){
+			audio.Split(offline_stream);
+		}
+
+		float* buff;
+		int len;
+		int flag = 0;
+		int n_step = 0;
+		int n_total = audio.GetQueueSize();
+		while (audio.Fetch(buff, len, flag) > 0) {
+			string msg = (offline_stream->asr_handle)->Forward(buff, len, flag);
+			p_result->msg+= msg;
+			n_step++;
+			if (fn_callback)
+				fn_callback(n_step, n_total);
+		}
+		if(offline_stream->UsePunc()){
+			string punc_res = (offline_stream->punc_handle)->AddPunc((p_result->msg).c_str());
+			p_result->msg = punc_res;
+		}
+	
 		return p_result;
 	}
 
@@ -284,6 +310,15 @@
 		return p_result->msg.c_str();
 	}
 
+	_FUNASRAPI const char* CTTransformerGetResult(FUNASR_RESULT result,int n_index)
+	{
+		funasr::FUNASR_PUNC_RESULT * p_result = (funasr::FUNASR_PUNC_RESULT*)result;
+		if(!p_result)
+			return nullptr;
+
+		return p_result->msg.c_str();
+	}
+
 	_FUNASRAPI vector<std::vector<int>>* FsmnVadGetResult(FUNASR_RESULT result,int n_index)
 	{
 		funasr::FUNASR_VAD_RESULT * p_result = (funasr::FUNASR_VAD_RESULT*)result;
@@ -299,6 +334,14 @@
 		if (result)
 		{
 			delete (funasr::FUNASR_RECOG_RESULT*)result;
+		}
+	}
+
+	_FUNASRAPI void CTTransformerFreeResult(FUNASR_RESULT result)
+	{
+		if (result)
+		{
+			delete (funasr::FUNASR_PUNC_RESULT*)result;
 		}
 	}
 
@@ -335,7 +378,7 @@
 		delete recog_obj;
 	}
 
-	_FUNASRAPI void FunPuncUninit(FUNASR_HANDLE handle)
+	_FUNASRAPI void CTTransformerUninit(FUNASR_HANDLE handle)
 	{
 		funasr::PuncModel* punc_obj = (funasr::PuncModel*)handle;
 

--
Gitblit v1.9.1