From d62d237a76e423fd1eec31e662162c135d2f93f5 Mon Sep 17 00:00:00 2001
From: 雾聪 <wucong.lyb@alibaba-inc.com>
Date: 星期三, 25 九月 2024 23:46:47 +0800
Subject: [PATCH] add sensevoice in offline-stream
---
runtime/onnxruntime/src/precomp.h | 1 +
runtime/onnxruntime/src/util.cpp | 15 +++++++++++++++
runtime/onnxruntime/src/funasrruntime.cpp | 16 +++++++++++-----
runtime/onnxruntime/src/offline-stream.cpp | 12 +++++++++++-
runtime/onnxruntime/src/util.h | 1 +
5 files changed, 39 insertions(+), 6 deletions(-)
diff --git a/runtime/onnxruntime/src/funasrruntime.cpp b/runtime/onnxruntime/src/funasrruntime.cpp
index 93b89a5..88a3970 100644
--- a/runtime/onnxruntime/src/funasrruntime.cpp
+++ b/runtime/onnxruntime/src/funasrruntime.cpp
@@ -207,7 +207,8 @@
// APIs for Offline-stream Infer
_FUNASRAPI FUNASR_RESULT FunOfflineInferBuffer(FUNASR_HANDLE handle, const char* sz_buf, int n_len,
FUNASR_MODE mode, QM_CALLBACK fn_callback, const std::vector<std::vector<float>> &hw_emb,
- int sampling_rate, std::string wav_format, bool itn, FUNASR_DEC_HANDLE dec_handle)
+ int sampling_rate, std::string wav_format, bool itn, FUNASR_DEC_HANDLE dec_handle,
+ std::string svs_lang, bool svs_itn)
{
funasr::OfflineStream* offline_stream = (funasr::OfflineStream*)handle;
if (!offline_stream)
@@ -256,7 +257,12 @@
if (wfst_decoder){
wfst_decoder->StartUtterance();
}
- vector<string> msg_batch = (offline_stream->asr_handle)->Forward(buff, len, true, hw_emb, dec_handle, batch_in);
+ vector<string> msg_batch;
+ if(offline_stream->GetModelType() == MODEL_SVS){
+ msg_batch = (offline_stream->asr_handle)->Forward(buff, len, true, svs_lang, svs_itn, batch_in);
+ }else{
+ msg_batch = (offline_stream->asr_handle)->Forward(buff, len, true, hw_emb, dec_handle, batch_in);
+ }
for(int idx=0; idx<batch_in; idx++){
string msg = msg_batch[idx];
if(msg_idx < index_vector.size()){
@@ -280,7 +286,7 @@
}
for(int idx=0; idx<msgs.size(); idx++){
string msg = msgs[idx];
- std::vector<std::string> msg_vec = funasr::split(msg, '|');
+ std::vector<std::string> msg_vec = funasr::SplitStr(msg, " | ");
if(msg_vec.size()==0){
continue;
}
@@ -402,7 +408,7 @@
}
for(int idx=0; idx<msgs.size(); idx++){
string msg = msgs[idx];
- std::vector<std::string> msg_vec = funasr::split(msg, '|');
+ std::vector<std::string> msg_vec = funasr::SplitStr(msg, " | ");
if(msg_vec.size()==0){
continue;
}
@@ -563,7 +569,7 @@
len[0] = frame->len;
vector<string> msgs = ((funasr::Paraformer*)asr_handle)->Forward(buff, len, frame->is_final, hw_emb, dec_handle);
string msg = msgs.size()>0?msgs[0]:"";
- std::vector<std::string> msg_vec = funasr::split(msg, '|'); // split with timestamp
+ std::vector<std::string> msg_vec = funasr::SplitStr(msg, " | "); // split with timestamp
if(msg_vec.size()==0){
continue;
}
diff --git a/runtime/onnxruntime/src/offline-stream.cpp b/runtime/onnxruntime/src/offline-stream.cpp
index 166d3c9..b436025 100644
--- a/runtime/onnxruntime/src/offline-stream.cpp
+++ b/runtime/onnxruntime/src/offline-stream.cpp
@@ -47,7 +47,13 @@
use_gpu = false;
#endif
}else{
- asr_handle = make_unique<Paraformer>();
+ if (model_path.at(MODEL_DIR).find(MODEL_SVS) != std::string::npos)
+ {
+ asr_handle = make_unique<SenseVoiceSmall>();
+ model_type = MODEL_SVS;
+ }else{
+ asr_handle = make_unique<Paraformer>();
+ }
}
bool enable_hotword = false;
@@ -138,6 +144,10 @@
}
}
#endif
+ if(model_type == MODEL_SVS){
+ use_itn = false;
+ use_punc = false;
+ }
}
OfflineStream *CreateOfflineStream(std::map<std::string, std::string>& model_path, int thread_num, bool use_gpu, int batch_size)
diff --git a/runtime/onnxruntime/src/precomp.h b/runtime/onnxruntime/src/precomp.h
index 1a98852..d525c65 100644
--- a/runtime/onnxruntime/src/precomp.h
+++ b/runtime/onnxruntime/src/precomp.h
@@ -64,6 +64,7 @@
#include "seg_dict.h"
#include "resample.h"
#include "paraformer.h"
+#include "sensevoice-small.h"
#ifdef USE_GPU
#include "paraformer-torch.h"
#endif
diff --git a/runtime/onnxruntime/src/util.cpp b/runtime/onnxruntime/src/util.cpp
index 483795e..50c9c82 100644
--- a/runtime/onnxruntime/src/util.cpp
+++ b/runtime/onnxruntime/src/util.cpp
@@ -646,6 +646,21 @@
return elems;
}
+std::vector<std::string> SplitStr(const std::string &s, string delimiter) {
+ std::vector<std::string> tokens;
+ size_t start = 0;
+ size_t end = s.find(delimiter);
+
+ while (end != std::string::npos) {
+ tokens.push_back(s.substr(start, end - start));
+ start = end + delimiter.length();
+ end = s.find(delimiter, start);
+ }
+ tokens.push_back(s.substr(start, end - start));
+
+ return tokens;
+}
+
template<typename T>
void PrintMat(const std::vector<std::vector<T>> &mat, const std::string &name) {
std::cout << name << ":" << std::endl;
diff --git a/runtime/onnxruntime/src/util.h b/runtime/onnxruntime/src/util.h
index bb4e21a..89dacf6 100644
--- a/runtime/onnxruntime/src/util.h
+++ b/runtime/onnxruntime/src/util.h
@@ -49,6 +49,7 @@
std::string TimestampSmooth(std::string &text, std::string &text_itn, std::string &str_time);
std::string TimestampSentence(std::string &text, std::string &str_time);
std::vector<std::string> split(const std::string &s, char delim);
+std::vector<std::string> SplitStr(const std::string &s, string delimiter);
template<typename T>
void PrintMat(const std::vector<std::vector<T>> &mat, const std::string &name);
--
Gitblit v1.9.1