From f974935484d5d8eb37b36eb2646816c02a41184c Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期二, 10 十月 2023 16:31:11 +0800
Subject: [PATCH] Merge branch 'main' of github.com:alibaba-damo-academy/FunASR add
---
funasr/runtime/onnxruntime/include/punc-model.h | 4 +-
funasr/runtime/onnxruntime/src/ct-transformer.cpp | 24 +++++++++++-
funasr/runtime/onnxruntime/src/ct-transformer-online.cpp | 2
funasr/runtime/onnxruntime/src/funasrruntime.cpp | 6 ++-
funasr/runtime/onnxruntime/include/model.h | 1
funasr/runtime/onnxruntime/src/ct-transformer-online.h | 2
funasr/runtime/onnxruntime/src/vocab.h | 1
funasr/runtime/onnxruntime/src/paraformer.h | 4 +
funasr/runtime/onnxruntime/src/vocab.cpp | 17 ++++++++
funasr/runtime/onnxruntime/src/ct-transformer.h | 2
funasr/runtime/websocket/bin/funasr-wss-server.cpp | 15 +++++--
11 files changed, 63 insertions(+), 15 deletions(-)
diff --git a/funasr/runtime/onnxruntime/include/model.h b/funasr/runtime/onnxruntime/include/model.h
index 8019a07..7f1e0ac 100644
--- a/funasr/runtime/onnxruntime/include/model.h
+++ b/funasr/runtime/onnxruntime/include/model.h
@@ -18,6 +18,7 @@
virtual void InitHwCompiler(const std::string &hw_model, int thread_num){};
virtual void InitSegDict(const std::string &seg_dict_model){};
virtual std::vector<std::vector<float>> CompileHotwordEmbedding(std::string &hotwords){return std::vector<std::vector<float>>();};
+ virtual std::string GetLang(){return "";};
};
Model *CreateModel(std::map<std::string, std::string>& model_path, int thread_num=1, ASR_TYPE type=ASR_OFFLINE);
diff --git a/funasr/runtime/onnxruntime/include/punc-model.h b/funasr/runtime/onnxruntime/include/punc-model.h
index 4266eea..214c770 100644
--- a/funasr/runtime/onnxruntime/include/punc-model.h
+++ b/funasr/runtime/onnxruntime/include/punc-model.h
@@ -12,8 +12,8 @@
public:
virtual ~PuncModel(){};
virtual void InitPunc(const std::string &punc_model, const std::string &punc_config, int thread_num)=0;
- virtual std::string AddPunc(const char* sz_input){return "";};
- virtual std::string AddPunc(const char* sz_input, std::vector<std::string>& arr_cache){return "";};
+ virtual std::string AddPunc(const char* sz_input, std::string language="zh-cn"){return "";};
+ virtual std::string AddPunc(const char* sz_input, std::vector<std::string>& arr_cache, std::string language="zh-cn"){return "";};
};
PuncModel *CreatePuncModel(std::map<std::string, std::string>& model_path, int thread_num, PUNC_TYPE type=PUNC_OFFLINE);
diff --git a/funasr/runtime/onnxruntime/src/ct-transformer-online.cpp b/funasr/runtime/onnxruntime/src/ct-transformer-online.cpp
index 5fe692b..51f2a6a 100644
--- a/funasr/runtime/onnxruntime/src/ct-transformer-online.cpp
+++ b/funasr/runtime/onnxruntime/src/ct-transformer-online.cpp
@@ -50,7 +50,7 @@
{
}
-string CTTransformerOnline::AddPunc(const char* sz_input, vector<string> &arr_cache)
+string CTTransformerOnline::AddPunc(const char* sz_input, vector<string> &arr_cache, std::string language)
{
string strResult;
vector<string> strOut;
diff --git a/funasr/runtime/onnxruntime/src/ct-transformer-online.h b/funasr/runtime/onnxruntime/src/ct-transformer-online.h
index 5db183a..ea7edb7 100644
--- a/funasr/runtime/onnxruntime/src/ct-transformer-online.h
+++ b/funasr/runtime/onnxruntime/src/ct-transformer-online.h
@@ -29,7 +29,7 @@
void InitPunc(const std::string &punc_model, const std::string &punc_config, int thread_num);
~CTTransformerOnline();
vector<int> Infer(vector<int32_t> input_data, int nCacheSize);
- string AddPunc(const char* sz_input, vector<string> &arr_cache);
+ string AddPunc(const char* sz_input, vector<string> &arr_cache, std::string language="zh-cn");
void Transport(vector<float>& In, int nRows, int nCols);
void VadMask(int size, int vad_pos,vector<float>& Result);
void Triangle(int text_length, vector<float>& Result);
diff --git a/funasr/runtime/onnxruntime/src/ct-transformer.cpp b/funasr/runtime/onnxruntime/src/ct-transformer.cpp
index a6c75fb..64a70da 100644
--- a/funasr/runtime/onnxruntime/src/ct-transformer.cpp
+++ b/funasr/runtime/onnxruntime/src/ct-transformer.cpp
@@ -46,7 +46,7 @@
{
}
-string CTTransformer::AddPunc(const char* sz_input)
+string CTTransformer::AddPunc(const char* sz_input, std::string language)
{
string strResult;
vector<string> strOut;
@@ -139,8 +139,28 @@
}
}
}
- for (auto& item : NewSentenceOut)
+
+ for (auto& item : NewSentenceOut){
strResult += item;
+ }
+
+ if(language == "en-bpe"){
+ std::vector<std::string> chineseSymbols;
+ chineseSymbols.push_back("锛�");
+ chineseSymbols.push_back("銆�");
+ chineseSymbols.push_back("銆�");
+ chineseSymbols.push_back("锛�");
+
+ std::string englishSymbols = ",.,?";
+ for (size_t i = 0; i < chineseSymbols.size(); i++) {
+ size_t pos = 0;
+ while ((pos = strResult.find(chineseSymbols[i], pos)) != std::string::npos) {
+ strResult.replace(pos, 3, 1, englishSymbols[i]);
+ pos++;
+ }
+ }
+ }
+
return strResult;
}
diff --git a/funasr/runtime/onnxruntime/src/ct-transformer.h b/funasr/runtime/onnxruntime/src/ct-transformer.h
index 49ed1b7..b33dcf5 100644
--- a/funasr/runtime/onnxruntime/src/ct-transformer.h
+++ b/funasr/runtime/onnxruntime/src/ct-transformer.h
@@ -29,6 +29,6 @@
void InitPunc(const std::string &punc_model, const std::string &punc_config, int thread_num);
~CTTransformer();
vector<int> Infer(vector<int32_t> input_data);
- string AddPunc(const char* sz_input);
+ string AddPunc(const char* sz_input, std::string language="zh-cn");
};
} // namespace funasr
\ No newline at end of file
diff --git a/funasr/runtime/onnxruntime/src/funasrruntime.cpp b/funasr/runtime/onnxruntime/src/funasrruntime.cpp
index 0d4af5c..73738c7 100644
--- a/funasr/runtime/onnxruntime/src/funasrruntime.cpp
+++ b/funasr/runtime/onnxruntime/src/funasrruntime.cpp
@@ -282,7 +282,8 @@
p_result->stamp += cur_stamp + "]";
}
if(offline_stream->UsePunc()){
- string punc_res = (offline_stream->punc_handle)->AddPunc((p_result->msg).c_str());
+ string lang = (offline_stream->asr_handle)->GetLang();
+ string punc_res = (offline_stream->punc_handle)->AddPunc((p_result->msg).c_str(), lang);
p_result->msg = punc_res;
}
#if !defined(__APPLE__)
@@ -363,7 +364,8 @@
p_result->stamp += cur_stamp + "]";
}
if(offline_stream->UsePunc()){
- string punc_res = (offline_stream->punc_handle)->AddPunc((p_result->msg).c_str());
+ string lang = (offline_stream->asr_handle)->GetLang();
+ string punc_res = (offline_stream->punc_handle)->AddPunc((p_result->msg).c_str(), lang);
p_result->msg = punc_res;
}
#if !defined(__APPLE__)
diff --git a/funasr/runtime/onnxruntime/src/paraformer.h b/funasr/runtime/onnxruntime/src/paraformer.h
index bac8fad..455078e 100644
--- a/funasr/runtime/onnxruntime/src/paraformer.h
+++ b/funasr/runtime/onnxruntime/src/paraformer.h
@@ -33,7 +33,6 @@
vector<const char*> hw_m_szInputNames;
vector<const char*> hw_m_szOutputNames;
bool use_hotword;
- std::string language="zh-cn";
public:
Paraformer();
@@ -55,6 +54,7 @@
string PostProcess(std::vector<string> &raw_char, std::vector<std::vector<float>> ×tamp_list);
string Rescoring();
+ string GetLang(){return language;};
knf::FbankOptions fbank_opts_;
vector<float> means_list_;
@@ -71,6 +71,8 @@
vector<const char*> m_szInputNames;
vector<const char*> m_szOutputNames;
+ std::string language="zh-cn";
+
// paraformer-online
std::shared_ptr<Ort::Session> encoder_session_ = nullptr;
std::shared_ptr<Ort::Session> decoder_session_ = nullptr;
diff --git a/funasr/runtime/onnxruntime/src/vocab.cpp b/funasr/runtime/onnxruntime/src/vocab.cpp
index 3f51911..2babc40 100644
--- a/funasr/runtime/onnxruntime/src/vocab.cpp
+++ b/funasr/runtime/onnxruntime/src/vocab.cpp
@@ -75,6 +75,21 @@
return false;
}
+string Vocab::WordFormat(std::string word)
+{
+ if(word == "i"){
+ return "I";
+ }else if(word == "i'm"){
+ return "I'm";
+ }else if(word == "i've"){
+ return "I've";
+ }else if(word == "i'll"){
+ return "I'll";
+ }else{
+ return word;
+ }
+}
+
string Vocab::Vector2StringV2(vector<int> in, std::string language)
{
int i;
@@ -94,6 +109,7 @@
size_t found = word.find(unicodeChar);
if(found != std::string::npos){
if (combine != ""){
+ combine = WordFormat(combine);
if (words.size() != 0){
combine = " " + combine;
}
@@ -164,6 +180,7 @@
}
if (language == "en-bpe" and combine != ""){
+ combine = WordFormat(combine);
if (words.size() != 0){
combine = " " + combine;
}
diff --git a/funasr/runtime/onnxruntime/src/vocab.h b/funasr/runtime/onnxruntime/src/vocab.h
index eecb9c8..23b4bd6 100644
--- a/funasr/runtime/onnxruntime/src/vocab.h
+++ b/funasr/runtime/onnxruntime/src/vocab.h
@@ -23,6 +23,7 @@
bool IsChinese(string ch);
void Vector2String(vector<int> in, std::vector<std::string> &preds);
string Vector2StringV2(vector<int> in, std::string language="");
+ string WordFormat(std::string word);
int GetIdByToken(const std::string &token);
};
diff --git a/funasr/runtime/websocket/bin/funasr-wss-server.cpp b/funasr/runtime/websocket/bin/funasr-wss-server.cpp
index e64667b..eb1402b 100644
--- a/funasr/runtime/websocket/bin/funasr-wss-server.cpp
+++ b/funasr/runtime/websocket/bin/funasr-wss-server.cpp
@@ -195,11 +195,16 @@
size_t found = s_asr_path.find("speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404");
if (found != std::string::npos) {
model_path["model-revision"]="v1.2.4";
- }else{
- found = s_asr_path.find("speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404");
- if (found != std::string::npos) {
- model_path["model-revision"]="v1.0.5";
- }
+ }
+
+ found = s_asr_path.find("speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404");
+ if (found != std::string::npos) {
+ model_path["model-revision"]="v1.0.5";
+ }
+
+ found = s_asr_path.find("speech_paraformer-large_asr_nat-en-16k-common-vocab10020");
+ if (found != std::string::npos) {
+ model_path["model-revision"]="v1.0.0";
}
// modelscope
--
Gitblit v1.9.1