From c2e4e3c2e9be855277d9f4fa9cd0544892ff829a Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期三, 30 八月 2023 09:57:30 +0800
Subject: [PATCH] Merge branch 'main' of github.com:alibaba-damo-academy/FunASR add

---
 funasr/runtime/wss-client/FunASRWSClient_Offline/README.md                       |   20 +++---
 egs_modelscope/speech_separation/speech_separation_mossformer_8k_pytorch/demo.py |    4 
 funasr/runtime/onnxruntime/src/funasrruntime.cpp                                 |    2 
 funasr/runtime/wss-client/FunASRWSClient_Offline/WebScoketClient.cs              |   75 ++++++++++++++++++++++---
 funasr/runtime/wss-client/FunASRWSClient_Offline/Program.cs                      |   30 ++++++++++
 funasr/runtime/wss-client/confg/hotword.txt                                      |    3 +
 funasr/runtime/html5/h5Server.py                                                 |    6 +-
 funasr/export/models/__init__.py                                                 |    5 +
 8 files changed, 119 insertions(+), 26 deletions(-)

diff --git a/egs_modelscope/speech_separation/speech_separation_mossformer_8k_pytorch/demo.py b/egs_modelscope/speech_separation/speech_separation_mossformer_8k_pytorch/demo.py
index d3674a8..7399ee2 100644
--- a/egs_modelscope/speech_separation/speech_separation_mossformer_8k_pytorch/demo.py
+++ b/egs_modelscope/speech_separation/speech_separation_mossformer_8k_pytorch/demo.py
@@ -5,10 +5,10 @@
 
 input = 'https://modelscope.cn/api/v1/models/damo/speech_separation_mossformer_8k_pytorch/repo?Revision=master&FilePath=examples/mix_speech1.wav'
 separation = pipeline(
-   Tasks.funasr_speech_separation,
+   Tasks.speech_separation,
    model='damo/speech_separation_mossformer_8k_pytorch',
    output_dir='./',
-   model_revision='v1.0.1')
+   model_revision='v1.0.2')
 result = separation(audio_in=input)
 for i, signal in enumerate(result):
     save_file = f'output_spk_{i+1}.wav'
diff --git a/funasr/export/models/__init__.py b/funasr/export/models/__init__.py
index cba92a8..94447dc 100644
--- a/funasr/export/models/__init__.py
+++ b/funasr/export/models/__init__.py
@@ -25,8 +25,9 @@
     elif isinstance(model, BiCifParaformer):
         return BiCifParaformer_export(model, **export_config)
     elif isinstance(model, ParaformerOnline):
-        return (ParaformerOnline_encoder_predictor_export(model, model_name="model"),
-                ParaformerOnline_decoder_export(model, model_name="decoder"))
+        encoder = ParaformerOnline_encoder_predictor_export(model, model_name="model")
+        decoder = ParaformerOnline_decoder_export(model, model_name="decoder")
+        return [encoder, decoder]
     elif isinstance(model, Paraformer):
         return Paraformer_export(model, **export_config)
     elif isinstance(model, Conformer_export):
diff --git a/funasr/runtime/html5/h5Server.py b/funasr/runtime/html5/h5Server.py
index d0ecf27..96392f8 100644
--- a/funasr/runtime/html5/h5Server.py
+++ b/funasr/runtime/html5/h5Server.py
@@ -6,7 +6,7 @@
 ### 2022-2023 by zhaoming,mali aihealthx.com 
 
 
-from flask import Flask,render_template,request,send_from_directory,jsonify
+from flask import Flask,render_template,request,send_from_directory,jsonify,redirect,url_for
 #from gevent.pywsgi import WSGIServer
  
 import datetime
@@ -20,7 +20,7 @@
 
 @app.route('/')
 def homePage():
-    return render_template('recorderapp_test.html')
+    return redirect('/static/index.html')
 
  
 parser = argparse.ArgumentParser()
@@ -62,4 +62,4 @@
     #flask 
     print("srv run on ",port)
 
-    app.run(debug=True,host=args.host,port=port, ssl_context=(args.certfile,args.keyfile))
+    app.run(debug=False,threaded=True,host=args.host,port=port, ssl_context=(args.certfile,args.keyfile))
diff --git a/funasr/runtime/onnxruntime/src/funasrruntime.cpp b/funasr/runtime/onnxruntime/src/funasrruntime.cpp
index 4946a22..a10e3ec 100644
--- a/funasr/runtime/onnxruntime/src/funasrruntime.cpp
+++ b/funasr/runtime/onnxruntime/src/funasrruntime.cpp
@@ -390,7 +390,7 @@
 			// if (!audio->FfmpegLoad(sz_buf, n_len))
 			// 	return nullptr;
 			LOG(ERROR) <<"Wrong wav_format: " << wav_format ;
-			exit(-1);
+			return nullptr;
 		}
 
 		funasr::FUNASR_RECOG_RESULT* p_result = new funasr::FUNASR_RECOG_RESULT;
diff --git a/funasr/runtime/wss-client/FunASRWSClient_Offline/Program.cs b/funasr/runtime/wss-client/FunASRWSClient_Offline/Program.cs
index a0039e7..cfdddeb 100644
--- a/funasr/runtime/wss-client/FunASRWSClient_Offline/Program.cs
+++ b/funasr/runtime/wss-client/FunASRWSClient_Offline/Program.cs
@@ -19,11 +19,13 @@
     {
         public static string host = "0.0.0.0";
         public static string port = "10095";
+        public static string hotword = null;
         private static CWebSocketClient m_websocketclient = new CWebSocketClient();
         [STAThread]
         public async void FunASR_Main()
         {
             loadconfig();
+            loadhotword();
             //鍒濆鍖栭�氫俊杩炴帴
             string errorStatus = string.Empty;
             string commstatus = ClientConnTest();
@@ -72,6 +74,34 @@
                     }
                 }
             }
+
+        }
+        static void loadhotword()
+        {
+            string filePath = "hotword.txt";
+            try
+            {
+                // 浣跨敤 StreamReader 鎵撳紑鏂囨湰鏂囦欢
+                using (StreamReader sr = new StreamReader(filePath))
+                {
+                    string line;
+                    // 閫愯璇诲彇鏂囦欢鍐呭
+                    while ((line = sr.ReadLine()) != null)
+                    {
+                        hotword += line;
+                        hotword += " ";
+                    }
+                }
+            }
+            catch (Exception ex)
+            {
+                Console.WriteLine("璇诲彇鏂囦欢鏃跺彂鐢熼敊璇細" + ex.Message);
+            }
+            finally
+            {
+                if (hotword.Length > 0 && hotword[hotword.Length - 1] == ' ')
+                    hotword = hotword.Substring(0,hotword.Length - 1);
+            }
         }
         private static string ClientConnTest()
         {
diff --git a/funasr/runtime/wss-client/FunASRWSClient_Offline/README.md b/funasr/runtime/wss-client/FunASRWSClient_Offline/README.md
index 3563560..8a5742c 100644
--- a/funasr/runtime/wss-client/FunASRWSClient_Offline/README.md
+++ b/funasr/runtime/wss-client/FunASRWSClient_Offline/README.md
@@ -1,9 +1,11 @@
-# cshape-client-offline
-
-杩欐槸涓�涓熀浜嶧unASR-Websocket鏈嶅姟鍣ㄧ殑CShape瀹㈡埛绔紝鐢ㄤ簬杞綍鏈湴闊抽鏂囦欢銆�
-
-灏嗛厤缃枃浠舵斁鍦ㄤ笌绋嬪簭鐩稿悓鐩綍涓嬬殑config鏂囦欢澶逛腑锛屽苟鍦╟onfig.ini涓厤缃湇鍔″櫒ip鍦板潃鍜岀鍙e彿銆�
-
-閰嶇疆濂芥湇鍔$ip鍜岀鍙e彿锛屽湪vs涓墦寮�闇�娣诲姞Websocket.Client鐨凬uget绋嬪簭鍖呭悗锛屽彲鐩存帴杩涜娴嬭瘯锛屾寜鐓ф帶鍒跺彴鎻愮ず鎿嶄綔鍗冲彲銆�
-
-娉細鏈鎴风鏆傛敮鎸亀av鏂囦欢锛屽湪win11涓嬪畬鎴愭祴璇曪紝缂栬瘧鐜VS2022銆�
\ No newline at end of file
+# cshape-client-offline
+
+杩欐槸涓�涓熀浜嶧unASR-Websocket鏈嶅姟鍣ㄧ殑CShape瀹㈡埛绔紝鐢ㄤ簬杞綍鏈湴闊抽鏂囦欢銆�
+
+灏嗛厤缃枃浠舵斁鍦ㄤ笌绋嬪簭鐩稿悓鐩綍涓嬬殑config鏂囦欢澶逛腑锛屽苟鍦╟onfig.ini涓厤缃湇鍔″櫒ip鍦板潃鍜岀鍙e彿銆�
+
+閰嶇疆濂芥湇鍔$ip鍜岀鍙e彿锛屽湪vs涓墦寮�闇�娣诲姞Websocket.Client鐨凬uget绋嬪簭鍖呭悗锛屽彲鐩存帴杩涜娴嬭瘯锛屾寜鐓ф帶鍒跺彴鎻愮ず鎿嶄綔鍗冲彲銆�
+
+鏇存柊锛氭敮鎸佺儹璇嶅拰鏃堕棿鎴筹紝鐑瘝闇�灏哻onfig鏂囦欢澶逛笅鐨刪otword.txt鏀剧疆鍦ㄦ墽琛岃矾寰勪笅銆�
+
+娉細杩愯鍚庡彴椤绘敞鎰忕儹璇嶅拰鏃堕棿鎴充负涓嶅悓妯″瀷锛屾湰瀹㈡埛绔湪win11涓嬪畬鎴愭祴璇曪紝缂栬瘧鐜VS2022銆�
diff --git a/funasr/runtime/wss-client/FunASRWSClient_Offline/WebScoketClient.cs b/funasr/runtime/wss-client/FunASRWSClient_Offline/WebScoketClient.cs
index 9208524..350aa20 100644
--- a/funasr/runtime/wss-client/FunASRWSClient_Offline/WebScoketClient.cs
+++ b/funasr/runtime/wss-client/FunASRWSClient_Offline/WebScoketClient.cs
@@ -2,6 +2,7 @@
 using System.Text.Json;
 using System.Reactive.Linq;
 using FunASRWSClient_Offline;
+using System.Text.RegularExpressions;
 
 namespace WebSocketSpace
 {
@@ -45,15 +46,31 @@
 
         public async Task<Task> ClientSendFileFunc(string file_name)//鏂囦欢杞綍
         {
+            string fileExtension = Path.GetExtension(file_name);
+            fileExtension = fileExtension.Replace(".", "");
+            if (!(fileExtension == "mp3" || fileExtension == "mp4" || fileExtension == "wav" || fileExtension == "pcm"))
+                return Task.CompletedTask;
+
             try
             {
                 if (client.IsRunning)
                 {
-                    var exitEvent = new ManualResetEvent(false);
-                    string path = Path.GetFileName(file_name);
-                    string firstbuff = string.Format("{{\"mode\": \"offline\", \"wav_name\": \"{0}\", \"is_speaking\": true}}", Path.GetFileName(file_name));
-                    client.Send(firstbuff);
-                    showWAVForm(client, file_name);
+                    if (fileExtension == "wav")
+                    {
+                        var exitEvent = new ManualResetEvent(false);
+                        string path = Path.GetFileName(file_name);
+                        string firstbuff = string.Format("{{\"mode\": \"offline\", \"wav_name\": \"{0}\", \"is_speaking\": true,\"hotwords\":\"{1}\"}}", Path.GetFileName(file_name), WSClient_Offline.hotword);
+                        client.Send(firstbuff);
+                        showWAVForm(client, file_name);
+                    }
+                    else
+                    {
+                        var exitEvent = new ManualResetEvent(false);
+                        string path = Path.GetFileName(file_name);
+                        string firstbuff = string.Format("{{\"mode\": \"offline\", \"wav_name\": \"{0}\", \"is_speaking\": true,\"hotwords\":\"{1}\", \"wav_format\":\"{2}\"}}", Path.GetFileName(file_name), WSClient_Offline.hotword, fileExtension);
+                        client.Send(firstbuff);
+                        showWAVForm_All(client, file_name);
+                    }
                 }
             }
             catch (Exception ex)
@@ -69,15 +86,42 @@
             {
                 try
                 {
+                    string timestamp = string.Empty;
                     JsonDocument jsonDoc = JsonDocument.Parse(message);
                     JsonElement root = jsonDoc.RootElement;
                     string mode = root.GetProperty("mode").GetString();
-                    string text = root.GetProperty("text").GetString();
+                    string text = root.GetProperty("text").GetString(); 
                     string name = root.GetProperty("wav_name").GetString();
-                    if(name == "asr_stream")
-                        Console.WriteLine($"瀹炴椂璇嗗埆鍐呭: {text}");
+                    if (message.IndexOf("timestamp") != -1)
+                    {
+                        Console.WriteLine($"鏂囦欢鍚嶇О:{name}");
+                        //璇嗗埆鍐呭澶勭悊
+                        text = text.Replace(",", "銆�");
+                        text = text.Replace("?", "銆�");
+                        List<string> sens = text.Split("銆�").ToList();
+                        //鏃堕棿鎴冲鐞�
+                        timestamp = root.GetProperty("timestamp").GetString();
+                        List<List<int>> data = new List<List<int>>();
+                        string pattern = @"\[(\d+),(\d+)\]";
+                        foreach (Match match in Regex.Matches(timestamp, pattern))
+                        {
+                            int start = int.Parse(match.Groups[1].Value);
+                            int end = int.Parse(match.Groups[2].Value);
+                            data.Add(new List<int> { start, end });
+                        }
+                        int count = 0;
+                        for (int i = 0; i< sens.Count;  i++)
+                        {
+                            if (sens[i].Length == 0)
+                                continue;
+                            Console.WriteLine(string.Format($"[{data[count][0]}-{data[count + sens[i].Length - 1][1]}]:{sens[i]}"));
+                            count += sens[i].Length;
+                        }
+                    }
                     else
-                        Console.WriteLine($"鏂囦欢鍚嶇О:{name} 鏂囦欢杞綍鍐呭: {text}");
+                    {
+                        Console.WriteLine($"鏂囦欢鍚嶇О:{name} 鏂囦欢杞綍鍐呭: {text} 鏃堕棿鎴筹細{timestamp}");
+                    }
                 }
                 catch (JsonException ex)
                 {
@@ -100,6 +144,19 @@
             client.Send("{\"is_speaking\": false}");
         }
 
+        private void showWAVForm_All(WebsocketClient client, string file_name)
+        {
+            byte[] getbyte = FileToByte(file_name).ToArray();
+            for (int i = 0; i < getbyte.Length; i += 1024000)
+            {
+                byte[] send = getbyte.Skip(i).Take(1024000).ToArray();
+                client.Send(send);
+                Thread.Sleep(5);
+            }
+            Thread.Sleep(10);
+            client.Send("{\"is_speaking\": false}");
+        }
+
         public byte[] FileToByte(string fileUrl)
         {
             try
diff --git a/funasr/runtime/wss-client/confg/hotword.txt b/funasr/runtime/wss-client/confg/hotword.txt
new file mode 100644
index 0000000..c5468ea
--- /dev/null
+++ b/funasr/runtime/wss-client/confg/hotword.txt
@@ -0,0 +1,3 @@
+闃块噷宸村反
+杈炬懇闄�
+FunASR
\ No newline at end of file

--
Gitblit v1.9.1