From f974935484d5d8eb37b36eb2646816c02a41184c Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期二, 10 十月 2023 16:31:11 +0800
Subject: [PATCH] Merge branch 'main' of github.com:alibaba-damo-academy/FunASR add

---
 funasr/runtime/onnxruntime/include/punc-model.h          |    4 +-
 funasr/runtime/onnxruntime/src/ct-transformer.cpp        |   24 +++++++++++-
 funasr/runtime/onnxruntime/src/ct-transformer-online.cpp |    2 
 funasr/runtime/onnxruntime/src/funasrruntime.cpp         |    6 ++-
 funasr/runtime/onnxruntime/include/model.h               |    1 
 funasr/runtime/onnxruntime/src/ct-transformer-online.h   |    2 
 funasr/runtime/onnxruntime/src/vocab.h                   |    1 
 funasr/runtime/onnxruntime/src/paraformer.h              |    4 +
 funasr/runtime/onnxruntime/src/vocab.cpp                 |   17 ++++++++
 funasr/runtime/onnxruntime/src/ct-transformer.h          |    2 
 funasr/runtime/websocket/bin/funasr-wss-server.cpp       |   15 +++++--
 11 files changed, 63 insertions(+), 15 deletions(-)

diff --git a/funasr/runtime/onnxruntime/include/model.h b/funasr/runtime/onnxruntime/include/model.h
index 8019a07..7f1e0ac 100644
--- a/funasr/runtime/onnxruntime/include/model.h
+++ b/funasr/runtime/onnxruntime/include/model.h
@@ -18,6 +18,7 @@
     virtual void InitHwCompiler(const std::string &hw_model, int thread_num){};
     virtual void InitSegDict(const std::string &seg_dict_model){};
     virtual std::vector<std::vector<float>> CompileHotwordEmbedding(std::string &hotwords){return std::vector<std::vector<float>>();};
+    virtual std::string GetLang(){return "";};
 };
 
 Model *CreateModel(std::map<std::string, std::string>& model_path, int thread_num=1, ASR_TYPE type=ASR_OFFLINE);
diff --git a/funasr/runtime/onnxruntime/include/punc-model.h b/funasr/runtime/onnxruntime/include/punc-model.h
index 4266eea..214c770 100644
--- a/funasr/runtime/onnxruntime/include/punc-model.h
+++ b/funasr/runtime/onnxruntime/include/punc-model.h
@@ -12,8 +12,8 @@
   public:
     virtual ~PuncModel(){};
 	  virtual void InitPunc(const std::string &punc_model, const std::string &punc_config, int thread_num)=0;
-	  virtual std::string AddPunc(const char* sz_input){return "";};
-	  virtual std::string AddPunc(const char* sz_input, std::vector<std::string>& arr_cache){return "";};
+	  virtual std::string AddPunc(const char* sz_input, std::string language="zh-cn"){return "";};
+	  virtual std::string AddPunc(const char* sz_input, std::vector<std::string>& arr_cache, std::string language="zh-cn"){return "";};
 };
 
 PuncModel *CreatePuncModel(std::map<std::string, std::string>& model_path, int thread_num, PUNC_TYPE type=PUNC_OFFLINE);
diff --git a/funasr/runtime/onnxruntime/src/ct-transformer-online.cpp b/funasr/runtime/onnxruntime/src/ct-transformer-online.cpp
index 5fe692b..51f2a6a 100644
--- a/funasr/runtime/onnxruntime/src/ct-transformer-online.cpp
+++ b/funasr/runtime/onnxruntime/src/ct-transformer-online.cpp
@@ -50,7 +50,7 @@
 {
 }
 
-string CTTransformerOnline::AddPunc(const char* sz_input, vector<string> &arr_cache)
+string CTTransformerOnline::AddPunc(const char* sz_input, vector<string> &arr_cache, std::string language)
 {
     string strResult;
     vector<string> strOut;
diff --git a/funasr/runtime/onnxruntime/src/ct-transformer-online.h b/funasr/runtime/onnxruntime/src/ct-transformer-online.h
index 5db183a..ea7edb7 100644
--- a/funasr/runtime/onnxruntime/src/ct-transformer-online.h
+++ b/funasr/runtime/onnxruntime/src/ct-transformer-online.h
@@ -29,7 +29,7 @@
 	void InitPunc(const std::string &punc_model, const std::string &punc_config, int thread_num);
 	~CTTransformerOnline();
 	vector<int>  Infer(vector<int32_t> input_data, int nCacheSize);
-	string AddPunc(const char* sz_input, vector<string> &arr_cache);
+	string AddPunc(const char* sz_input, vector<string> &arr_cache, std::string language="zh-cn");
 	void Transport(vector<float>& In, int nRows, int nCols);
 	void VadMask(int size, int vad_pos,vector<float>& Result);
 	void Triangle(int text_length, vector<float>& Result);
diff --git a/funasr/runtime/onnxruntime/src/ct-transformer.cpp b/funasr/runtime/onnxruntime/src/ct-transformer.cpp
index a6c75fb..64a70da 100644
--- a/funasr/runtime/onnxruntime/src/ct-transformer.cpp
+++ b/funasr/runtime/onnxruntime/src/ct-transformer.cpp
@@ -46,7 +46,7 @@
 {
 }
 
-string CTTransformer::AddPunc(const char* sz_input)
+string CTTransformer::AddPunc(const char* sz_input, std::string language)
 {
     string strResult;
     vector<string> strOut;
@@ -139,8 +139,28 @@
             }
         }
     }
-    for (auto& item : NewSentenceOut)
+
+    for (auto& item : NewSentenceOut){
         strResult += item;
+    }
+    
+    if(language == "en-bpe"){
+        std::vector<std::string> chineseSymbols;
+        chineseSymbols.push_back("锛�");
+        chineseSymbols.push_back("銆�");
+        chineseSymbols.push_back("銆�");
+        chineseSymbols.push_back("锛�");
+
+        std::string englishSymbols = ",.,?";
+        for (size_t i = 0; i < chineseSymbols.size(); i++) {
+            size_t pos = 0;
+            while ((pos = strResult.find(chineseSymbols[i], pos)) != std::string::npos) {
+                strResult.replace(pos, 3, 1, englishSymbols[i]);
+                pos++;
+            }
+        }
+    }
+
     return strResult;
 }
 
diff --git a/funasr/runtime/onnxruntime/src/ct-transformer.h b/funasr/runtime/onnxruntime/src/ct-transformer.h
index 49ed1b7..b33dcf5 100644
--- a/funasr/runtime/onnxruntime/src/ct-transformer.h
+++ b/funasr/runtime/onnxruntime/src/ct-transformer.h
@@ -29,6 +29,6 @@
 	void InitPunc(const std::string &punc_model, const std::string &punc_config, int thread_num);
 	~CTTransformer();
 	vector<int>  Infer(vector<int32_t> input_data);
-	string AddPunc(const char* sz_input);
+	string AddPunc(const char* sz_input, std::string language="zh-cn");
 };
 } // namespace funasr
\ No newline at end of file
diff --git a/funasr/runtime/onnxruntime/src/funasrruntime.cpp b/funasr/runtime/onnxruntime/src/funasrruntime.cpp
index 0d4af5c..73738c7 100644
--- a/funasr/runtime/onnxruntime/src/funasrruntime.cpp
+++ b/funasr/runtime/onnxruntime/src/funasrruntime.cpp
@@ -282,7 +282,8 @@
 			p_result->stamp += cur_stamp + "]";
 		}
 		if(offline_stream->UsePunc()){
-			string punc_res = (offline_stream->punc_handle)->AddPunc((p_result->msg).c_str());
+			string lang = (offline_stream->asr_handle)->GetLang();
+			string punc_res = (offline_stream->punc_handle)->AddPunc((p_result->msg).c_str(), lang);
 			p_result->msg = punc_res;
 		}
 #if !defined(__APPLE__)
@@ -363,7 +364,8 @@
 			p_result->stamp += cur_stamp + "]";
 		}
 		if(offline_stream->UsePunc()){
-			string punc_res = (offline_stream->punc_handle)->AddPunc((p_result->msg).c_str());
+			string lang = (offline_stream->asr_handle)->GetLang();
+			string punc_res = (offline_stream->punc_handle)->AddPunc((p_result->msg).c_str(), lang);
 			p_result->msg = punc_res;
 		}
 #if !defined(__APPLE__)
diff --git a/funasr/runtime/onnxruntime/src/paraformer.h b/funasr/runtime/onnxruntime/src/paraformer.h
index bac8fad..455078e 100644
--- a/funasr/runtime/onnxruntime/src/paraformer.h
+++ b/funasr/runtime/onnxruntime/src/paraformer.h
@@ -33,7 +33,6 @@
         vector<const char*> hw_m_szInputNames;
         vector<const char*> hw_m_szOutputNames;
         bool use_hotword;
-        std::string language="zh-cn";
 
     public:
         Paraformer();
@@ -55,6 +54,7 @@
         string PostProcess(std::vector<string> &raw_char, std::vector<std::vector<float>> &timestamp_list);
 
         string Rescoring();
+        string GetLang(){return language;};
 
         knf::FbankOptions fbank_opts_;
         vector<float> means_list_;
@@ -71,6 +71,8 @@
         vector<const char*> m_szInputNames;
         vector<const char*> m_szOutputNames;
 
+        std::string language="zh-cn";
+
         // paraformer-online
         std::shared_ptr<Ort::Session> encoder_session_ = nullptr;
         std::shared_ptr<Ort::Session> decoder_session_ = nullptr;
diff --git a/funasr/runtime/onnxruntime/src/vocab.cpp b/funasr/runtime/onnxruntime/src/vocab.cpp
index 3f51911..2babc40 100644
--- a/funasr/runtime/onnxruntime/src/vocab.cpp
+++ b/funasr/runtime/onnxruntime/src/vocab.cpp
@@ -75,6 +75,21 @@
     return false;
 }
 
+string Vocab::WordFormat(std::string word)
+{
+    if(word == "i"){
+        return "I";
+    }else if(word == "i'm"){
+        return "I'm";
+    }else if(word == "i've"){
+        return "I've";
+    }else if(word == "i'll"){
+        return "I'll";
+    }else{
+        return word;
+    }
+}
+
 string Vocab::Vector2StringV2(vector<int> in, std::string language)
 {
     int i;
@@ -94,6 +109,7 @@
             size_t found = word.find(unicodeChar);
             if(found != std::string::npos){
                 if (combine != ""){
+                    combine = WordFormat(combine);
                     if (words.size() != 0){
                         combine = " " + combine;
                     }
@@ -164,6 +180,7 @@
     }
 
     if (language == "en-bpe" and combine != ""){
+        combine = WordFormat(combine);
         if (words.size() != 0){
             combine = " " + combine;
         }
diff --git a/funasr/runtime/onnxruntime/src/vocab.h b/funasr/runtime/onnxruntime/src/vocab.h
index eecb9c8..23b4bd6 100644
--- a/funasr/runtime/onnxruntime/src/vocab.h
+++ b/funasr/runtime/onnxruntime/src/vocab.h
@@ -23,6 +23,7 @@
     bool IsChinese(string ch);
     void Vector2String(vector<int> in, std::vector<std::string> &preds);
     string Vector2StringV2(vector<int> in, std::string language="");
+    string WordFormat(std::string word);
     int GetIdByToken(const std::string &token);
 };
 
diff --git a/funasr/runtime/websocket/bin/funasr-wss-server.cpp b/funasr/runtime/websocket/bin/funasr-wss-server.cpp
index e64667b..eb1402b 100644
--- a/funasr/runtime/websocket/bin/funasr-wss-server.cpp
+++ b/funasr/runtime/websocket/bin/funasr-wss-server.cpp
@@ -195,11 +195,16 @@
                 size_t found = s_asr_path.find("speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404");
                 if (found != std::string::npos) {
                     model_path["model-revision"]="v1.2.4";
-                }else{
-                    found = s_asr_path.find("speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404");
-                    if (found != std::string::npos) {
-                        model_path["model-revision"]="v1.0.5";
-                    }
+                }
+
+                found = s_asr_path.find("speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404");
+                if (found != std::string::npos) {
+                    model_path["model-revision"]="v1.0.5";
+                }
+
+                found = s_asr_path.find("speech_paraformer-large_asr_nat-en-16k-common-vocab10020");
+                if (found != std::string::npos) {
+                    model_path["model-revision"]="v1.0.0";
                 }
 
                 // modelscope

--
Gitblit v1.9.1