From 4f224c88068b66bcb6f81570da59d99c9bba8288 Mon Sep 17 00:00:00 2001
From: zhifu gao <zhifu.gzf@alibaba-inc.com>
Date: 星期五, 26 一月 2024 16:02:14 +0800
Subject: [PATCH] python-websocket funasr1.0 (#1310)

---
 examples/industrial_data_pretraining/ct_transformer_streaming/demo.py |    2 
 funasr/auto/auto_model.py                                             |    3 
 runtime/python/websocket/funasr_wss_client.py                         |   89 ++++++---
 runtime/python/websocket/funasr_wss_server.py                         |  422 +++++++++++++++++++++++++---------------------
 4 files changed, 290 insertions(+), 226 deletions(-)

diff --git a/examples/industrial_data_pretraining/ct_transformer_streaming/demo.py b/examples/industrial_data_pretraining/ct_transformer_streaming/demo.py
index e653c1a..66cc5c5 100644
--- a/examples/industrial_data_pretraining/ct_transformer_streaming/demo.py
+++ b/examples/industrial_data_pretraining/ct_transformer_streaming/demo.py
@@ -5,7 +5,7 @@
 
 from funasr import AutoModel
 
-model = AutoModel(model="damo/punc_ct-transformer_zh-cn-common-vad_realtime-vocab272727", model_revision="v2.0.4")
+model = AutoModel(model="iic/punc_ct-transformer_zh-cn-common-vad_realtime-vocab272727", model_revision="v2.0.4")
 
 inputs = "璺ㄥ娌虫祦鏄吇鑲叉部宀竱浜烘皯鐨勭敓鍛戒箣婧愰暱鏈熶互鏉ヤ负甯姪涓嬫父鍦板尯闃茬伨鍑忕伨涓柟鎶�鏈汉鍛榺鍦ㄤ笂娓稿湴鍖烘瀬涓烘伓鍔g殑鑷劧鏉′欢涓嬪厠鏈嶅法澶у洶闅剧敋鑷冲啋鐫�鐢熷懡鍗遍櫓|鍚戝嵃鏂规彁渚涙睕鏈熸按鏂囪祫鏂欏鐞嗙揣鎬ヤ簨浠朵腑鏂归噸瑙嗗嵃鏂瑰湪璺ㄥ娌虫祦闂涓婄殑鍏冲垏|鎰挎剰杩涗竴姝ュ畬鍠勫弻鏂硅仈鍚堝伐浣滄満鍒秥鍑℃槸|涓柟鑳藉仛鐨勬垜浠瑋閮戒細鍘诲仛鑰屼笖浼氬仛寰楁洿濂芥垜璇峰嵃搴︽湅鍙嬩滑鏀惧績涓浗鍦ㄤ笂娓哥殑|浠讳綍寮�鍙戝埄鐢ㄩ兘浼氱粡杩囩瀛瑙勫垝鍜岃璇佸吋椤句笂涓嬫父鐨勫埄鐩�"
 vads = inputs.split("|")
diff --git a/funasr/auto/auto_model.py b/funasr/auto/auto_model.py
index 02b3b2e..d072219 100644
--- a/funasr/auto/auto_model.py
+++ b/funasr/auto/auto_model.py
@@ -88,7 +88,8 @@
 class AutoModel:
     
     def __init__(self, **kwargs):
-        tables.print()
+        if kwargs.get("disable_log", False):
+            tables.print()
         
         model, kwargs = self.build_model(**kwargs)
         
diff --git a/runtime/python/websocket/funasr_wss_client.py b/runtime/python/websocket/funasr_wss_client.py
index a2d8889..b30964a 100644
--- a/runtime/python/websocket/funasr_wss_client.py
+++ b/runtime/python/websocket/funasr_wss_client.py
@@ -29,6 +29,14 @@
                     type=str,
                     default="5, 10, 5",
                     help="chunk")
+parser.add_argument("--encoder_chunk_look_back",
+                    type=int,
+                    default=4,
+                    help="chunk")
+parser.add_argument("--decoder_chunk_look_back",
+                    type=int,
+                    default=0,
+                    help="chunk")
 parser.add_argument("--chunk_interval",
                     type=int,
                     default=10,
@@ -113,25 +121,36 @@
     fst_dict = {}
     hotword_msg = ""
     if args.hotword.strip() != "":
-        f_scp = open(args.hotword)
-        hot_lines = f_scp.readlines()
-        for line in hot_lines:
-            words = line.strip().split(" ")
-            if len(words) < 2:
-                print("Please checkout format of hotwords")
-                continue
-            try:
-                fst_dict[" ".join(words[:-1])] = int(words[-1])
-            except ValueError:
-                print("Please checkout format of hotwords")
-        hotword_msg=json.dumps(fst_dict)
+        if os.path.exists(args.hotword):
+            f_scp = open(args.hotword)
+            hot_lines = f_scp.readlines()
+            for line in hot_lines:
+                words = line.strip().split(" ")
+                if len(words) < 2:
+                    print("Please checkout format of hotwords")
+                    continue
+                try:
+                    fst_dict[" ".join(words[:-1])] = int(words[-1])
+                except ValueError:
+                    print("Please checkout format of hotwords")
+            hotword_msg = json.dumps(fst_dict)
+        else:
+            hotword_msg = args.hotword
 
-    use_itn=True
+    use_itn = True
     if args.use_itn == 0:
         use_itn=False
     
-    message = json.dumps({"mode": args.mode, "chunk_size": args.chunk_size, "chunk_interval": args.chunk_interval,
-                          "wav_name": "microphone", "is_speaking": True, "hotwords":hotword_msg, "itn": use_itn})
+    message = json.dumps({"mode": args.mode,
+                          "chunk_size": args.chunk_size,
+                          "chunk_interval": args.chunk_interval,
+                          "encoder_chunk_look_back": args.encoder_chunk_look_back,
+                          "decoder_chunk_look_back": args.decoder_chunk_look_back,
+                          "wav_name": "microphone",
+                          "is_speaking": True,
+                          "hotwords": hotword_msg,
+                          "itn": use_itn,
+                          })
     #voices.put(message)
     await websocket.send(message)
     while True:
@@ -154,18 +173,21 @@
     fst_dict = {}
     hotword_msg = ""
     if args.hotword.strip() != "":
-        f_scp = open(args.hotword)
-        hot_lines = f_scp.readlines()
-        for line in hot_lines:
-            words = line.strip().split(" ")
-            if len(words) < 2:
-                print("Please checkout format of hotwords")
-                continue
-            try:
-                fst_dict[" ".join(words[:-1])] = int(words[-1])
-            except ValueError:
-                print("Please checkout format of hotwords")
-        hotword_msg=json.dumps(fst_dict)
+        if os.path.exists(args.hotword):
+            f_scp = open(args.hotword)
+            hot_lines = f_scp.readlines()
+            for line in hot_lines:
+                words = line.strip().split(" ")
+                if len(words) < 2:
+                    print("Please checkout format of hotwords")
+                    continue
+                try:
+                    fst_dict[" ".join(words[:-1])] = int(words[-1])
+                except ValueError:
+                    print("Please checkout format of hotwords")
+            hotword_msg = json.dumps(fst_dict)
+        else:
+            hotword_msg = args.hotword
         print (hotword_msg)
 
     sample_rate = args.audio_fs
@@ -203,8 +225,17 @@
         # print(stride)
 
         # send first time
-        message = json.dumps({"mode": args.mode, "chunk_size": args.chunk_size, "chunk_interval": args.chunk_interval, "audio_fs":sample_rate,
-                          "wav_name": wav_name, "wav_format": wav_format, "is_speaking": True, "hotwords":hotword_msg, "itn": use_itn})
+        message = json.dumps({"mode": args.mode,
+                              "chunk_size": args.chunk_size,
+                              "chunk_interval": args.chunk_interval,
+                              "encoder_chunk_look_back": args.encoder_chunk_look_back,
+                              "decoder_chunk_look_back": args.decoder_chunk_look_back,
+                              "audio_fs":sample_rate,
+                              "wav_name": wav_name,
+                              "wav_format": wav_format,
+                              "is_speaking": True,
+                              "hotwords": hotword_msg,
+                              "itn": use_itn})
 
         #voices.put(message)
         await websocket.send(message)
diff --git a/runtime/python/websocket/funasr_wss_server.py b/runtime/python/websocket/funasr_wss_server.py
index 22d2a7f..7db7b0a 100644
--- a/runtime/python/websocket/funasr_wss_server.py
+++ b/runtime/python/websocket/funasr_wss_server.py
@@ -7,14 +7,7 @@
 import numpy as np
 import argparse
 import ssl
-from modelscope.pipelines import pipeline
-from modelscope.utils.constant import Tasks
-from modelscope.utils.logger import get_logger
 
-tracemalloc.start()
-
-logger = get_logger(log_level=logging.CRITICAL)
-logger.setLevel(logging.CRITICAL)
 
 parser = argparse.ArgumentParser()
 parser.add_argument("--host",
@@ -29,24 +22,44 @@
                     help="grpc server port")
 parser.add_argument("--asr_model",
                     type=str,
-                    default="damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch",
+                    default="iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch",
                     help="model from modelscope")
+parser.add_argument("--asr_model_revision",
+                    type=str,
+                    default="v2.0.4",
+                    help="")
 parser.add_argument("--asr_model_online",
                     type=str,
-                    default="damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online",
+                    default="iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online",
                     help="model from modelscope")
+parser.add_argument("--asr_model_online_revision",
+                    type=str,
+                    default="v2.0.4",
+                    help="")
 parser.add_argument("--vad_model",
                     type=str,
-                    default="damo/speech_fsmn_vad_zh-cn-16k-common-pytorch",
+                    default="iic/speech_fsmn_vad_zh-cn-16k-common-pytorch",
                     help="model from modelscope")
+parser.add_argument("--vad_model_revision",
+                    type=str,
+                    default="v2.0.4",
+                    help="")
 parser.add_argument("--punc_model",
                     type=str,
-                    default="damo/punc_ct-transformer_zh-cn-common-vad_realtime-vocab272727",
+                    default="iic/punc_ct-transformer_zh-cn-common-vad_realtime-vocab272727",
                     help="model from modelscope")
+parser.add_argument("--punc_model_revision",
+                    type=str,
+                    default="v2.0.4",
+                    help="")
 parser.add_argument("--ngpu",
                     type=int,
                     default=1,
                     help="0 for cpu, 1 for gpu")
+parser.add_argument("--device",
+                    type=str,
+                    default="cuda",
+                    help="cuda, cpu")
 parser.add_argument("--ncpu",
                     type=int,
                     default=4,
@@ -68,213 +81,232 @@
 websocket_users = set()
 
 print("model loading")
+from funasr import AutoModel
+
 # asr
-inference_pipeline_asr = pipeline(
-    task=Tasks.auto_speech_recognition,
-    model=args.asr_model,
-    ngpu=args.ngpu,
-    ncpu=args.ncpu,
-    model_revision=None)
-
-
+model_asr = AutoModel(model=args.asr_model,
+                      model_revision=args.asr_model_revision,
+                      ngpu=args.ngpu,
+                      ncpu=args.ncpu,
+                      device=args.device,
+                      disable_pbar=True,
+                      disable_log=True,
+                      )
+# asr
+model_asr_streaming = AutoModel(model=args.asr_model_online,
+                                model_revision=args.asr_model_online_revision,
+                                ngpu=args.ngpu,
+                                ncpu=args.ncpu,
+                                device=args.device,
+                                disable_pbar=True,
+                                disable_log=True,
+                                )
 # vad
-inference_pipeline_vad = pipeline(
-    task=Tasks.voice_activity_detection,
-    model=args.vad_model,
-    model_revision=None,
-    mode='online',
-    ngpu=args.ngpu,
-    ncpu=args.ncpu,
-)
+model_vad = AutoModel(model=args.vad_model,
+                      model_revision=args.vad_model_revision,
+                      ngpu=args.ngpu,
+                      ncpu=args.ncpu,
+                      device=args.device,
+                      disable_pbar=True,
+                      disable_log=True,
+                      # chunk_size=60,
+                      )
 
 if args.punc_model != "":
-    inference_pipeline_punc = pipeline(
-        task=Tasks.punctuation,
-        model=args.punc_model,
-        model_revision="v1.0.2",
-        ngpu=args.ngpu,
-        ncpu=args.ncpu,
-    )
+	model_punc = AutoModel(model=args.punc_model,
+	                       model_revision=args.punc_model_revision,
+	                       ngpu=args.ngpu,
+	                       ncpu=args.ncpu,
+	                       device=args.device,
+	                       disable_pbar=True,
+                           disable_log=True,
+	                       )
 else:
-    inference_pipeline_punc = None
+	model_punc = None
 
-inference_pipeline_asr_online = pipeline(
-    task=Tasks.auto_speech_recognition,
-    model=args.asr_model_online,
-    ngpu=args.ngpu,
-    ncpu=args.ncpu,
-    model_revision='v1.0.7',
-    update_model='v1.0.7',
-    mode='paraformer_streaming')
+
 
 print("model loaded! only support one client at the same time now!!!!")
 
 async def ws_reset(websocket):
-    print("ws reset now, total num is ",len(websocket_users))
-    websocket.param_dict_asr_online = {"cache": dict()}
-    websocket.param_dict_vad = {'in_cache': dict(), "is_final": True}
-    websocket.param_dict_asr_online["is_final"]=True
-    # audio_in=b''.join(np.zeros(int(16000),dtype=np.int16))
-    # inference_pipeline_vad(audio_in=audio_in, param_dict=websocket.param_dict_vad)
-    # inference_pipeline_asr_online(audio_in=audio_in, param_dict=websocket.param_dict_asr_online)
-    await websocket.close()
-    
-    
+	print("ws reset now, total num is ",len(websocket_users))
+
+	websocket.status_dict_asr_online["cache"] = {}
+	websocket.status_dict_asr_online["is_final"] = True
+	websocket.status_dict_vad["cache"] = {}
+	websocket.status_dict_vad["is_final"] = True
+	websocket.status_dict_punc["cache"] = {}
+	
+	await websocket.close()
+
+
 async def clear_websocket():
-   for websocket in websocket_users:
-       await ws_reset(websocket)
-   websocket_users.clear()
- 
- 
-       
+	for websocket in websocket_users:
+		await ws_reset(websocket)
+	websocket_users.clear()
+
+
+
 async def ws_serve(websocket, path):
-    frames = []
-    frames_asr = []
-    frames_asr_online = []
-    global websocket_users
-    await clear_websocket()
-    websocket_users.add(websocket)
-    websocket.param_dict_asr = {}
-    websocket.param_dict_asr_online = {"cache": dict()}
-    websocket.param_dict_vad = {'in_cache': dict(), "is_final": False}
-    websocket.param_dict_punc = {'cache': list()}
-    websocket.vad_pre_idx = 0
-    speech_start = False
-    speech_end_i = -1
-    websocket.wav_name = "microphone"
-    websocket.mode = "2pass"
-    print("new user connected", flush=True)
-
-    try:
-        async for message in websocket:
-            if isinstance(message, str):
-                messagejson = json.loads(message)
-        
-                if "is_speaking" in messagejson:
-                    websocket.is_speaking = messagejson["is_speaking"]
-                    websocket.param_dict_asr_online["is_final"] = not websocket.is_speaking
-                if "chunk_interval" in messagejson:
-                    websocket.chunk_interval = messagejson["chunk_interval"]
-                if "wav_name" in messagejson:
-                    websocket.wav_name = messagejson.get("wav_name")
-                if "chunk_size" in messagejson:
-                    websocket.param_dict_asr_online["chunk_size"] = messagejson["chunk_size"]
-                if "encoder_chunk_look_back" in messagejson:
-                    websocket.param_dict_asr_online["encoder_chunk_look_back"] = messagejson["encoder_chunk_look_back"]
-                if "decoder_chunk_look_back" in messagejson:
-                    websocket.param_dict_asr_online["decoder_chunk_look_back"] = messagejson["decoder_chunk_look_back"]
-                if "mode" in messagejson:
-                    websocket.mode = messagejson["mode"]
-            if len(frames_asr_online) > 0 or len(frames_asr) > 0 or not isinstance(message, str):
-                if not isinstance(message, str):
-                    frames.append(message)
-                    duration_ms = len(message)//32
-                    websocket.vad_pre_idx += duration_ms
-        
-                    # asr online
-                    frames_asr_online.append(message)
-                    websocket.param_dict_asr_online["is_final"] = speech_end_i != -1
-                    if len(frames_asr_online) % websocket.chunk_interval == 0 or websocket.param_dict_asr_online["is_final"]:
-                        if websocket.mode == "2pass" or websocket.mode == "online":
-                            audio_in = b"".join(frames_asr_online)
-                            await async_asr_online(websocket, audio_in)
-                        frames_asr_online = []
-                    if speech_start:
-                        frames_asr.append(message)
-                    # vad online
-                    speech_start_i, speech_end_i = await async_vad(websocket, message)
-                    if speech_start_i != -1:
-                        speech_start = True
-                        beg_bias = (websocket.vad_pre_idx-speech_start_i)//duration_ms
-                        frames_pre = frames[-beg_bias:]
-                        frames_asr = []
-                        frames_asr.extend(frames_pre)
-                # asr punc offline
-                if speech_end_i != -1 or not websocket.is_speaking:
-                    # print("vad end point")
-                    if websocket.mode == "2pass" or websocket.mode == "offline":
-                        audio_in = b"".join(frames_asr)
-                        await async_asr(websocket, audio_in)
-                    frames_asr = []
-                    speech_start = False
-                    # frames_asr_online = []
-                    # websocket.param_dict_asr_online = {"cache": dict()}
-                    if not websocket.is_speaking:
-                        websocket.vad_pre_idx = 0
-                        frames = []
-                        websocket.param_dict_vad = {'in_cache': dict()}
-                    else:
-                        frames = frames[-20:]
-
-     
-    except websockets.ConnectionClosed:
-        print("ConnectionClosed...", websocket_users,flush=True)
-        await ws_reset(websocket)
-        websocket_users.remove(websocket)
-    except websockets.InvalidState:
-        print("InvalidState...")
-    except Exception as e:
-        print("Exception:", e)
+	frames = []
+	frames_asr = []
+	frames_asr_online = []
+	global websocket_users
+	await clear_websocket()
+	websocket_users.add(websocket)
+	websocket.status_dict_asr = {}
+	websocket.status_dict_asr_online = {"cache": {}, "is_final": False}
+	websocket.status_dict_vad = {'cache': {}, "is_final": False}
+	websocket.status_dict_punc = {'cache': {}}
+	websocket.chunk_interval = 10
+	websocket.vad_pre_idx = 0
+	speech_start = False
+	speech_end_i = -1
+	websocket.wav_name = "microphone"
+	websocket.mode = "2pass"
+	print("new user connected", flush=True)
+	
+	try:
+		async for message in websocket:
+			if isinstance(message, str):
+				messagejson = json.loads(message)
+				
+				if "is_speaking" in messagejson:
+					websocket.is_speaking = messagejson["is_speaking"]
+					websocket.status_dict_asr_online["is_final"] = not websocket.is_speaking
+				if "chunk_interval" in messagejson:
+					websocket.chunk_interval = messagejson["chunk_interval"]
+				if "wav_name" in messagejson:
+					websocket.wav_name = messagejson.get("wav_name")
+				if "chunk_size" in messagejson:
+					websocket.status_dict_asr_online["chunk_size"] = messagejson["chunk_size"]
+				if "encoder_chunk_look_back" in messagejson:
+					websocket.status_dict_asr_online["encoder_chunk_look_back"] = messagejson["encoder_chunk_look_back"]
+				if "decoder_chunk_look_back" in messagejson:
+					websocket.status_dict_asr_online["decoder_chunk_look_back"] = messagejson["decoder_chunk_look_back"]
+				if "hotword" in messagejson:
+					websocket.status_dict_asr["hotword"] = messagejson["hotword"]
+				if "mode" in messagejson:
+					websocket.mode = messagejson["mode"]
+			
+			websocket.status_dict_vad["chunk_size"] = int(websocket.status_dict_asr_online["chunk_size"][1]*60/websocket.chunk_interval)
+			if len(frames_asr_online) > 0 or len(frames_asr) > 0 or not isinstance(message, str):
+				if not isinstance(message, str):
+					frames.append(message)
+					duration_ms = len(message)//32
+					websocket.vad_pre_idx += duration_ms
+					
+					# asr online
+					frames_asr_online.append(message)
+					websocket.status_dict_asr_online["is_final"] = speech_end_i != -1
+					if len(frames_asr_online) % websocket.chunk_interval == 0 or websocket.status_dict_asr_online["is_final"]:
+						if websocket.mode == "2pass" or websocket.mode == "online":
+							audio_in = b"".join(frames_asr_online)
+							try:
+								await async_asr_online(websocket, audio_in)
+							except:
+								print(f"error in asr streaming, {websocket.status_dict_asr_online}")
+						frames_asr_online = []
+					if speech_start:
+						frames_asr.append(message)
+					# vad online
+					try:
+						speech_start_i, speech_end_i = await async_vad(websocket, message)
+					except:
+						print("error in vad")
+					if speech_start_i != -1:
+						speech_start = True
+						beg_bias = (websocket.vad_pre_idx-speech_start_i)//duration_ms
+						frames_pre = frames[-beg_bias:]
+						frames_asr = []
+						frames_asr.extend(frames_pre)
+				# asr punc offline
+				if speech_end_i != -1 or not websocket.is_speaking:
+					# print("vad end point")
+					if websocket.mode == "2pass" or websocket.mode == "offline":
+						audio_in = b"".join(frames_asr)
+						try:
+							await async_asr(websocket, audio_in)
+						except:
+							print("error in asr offline")
+					frames_asr = []
+					speech_start = False
+					frames_asr_online = []
+					websocket.status_dict_asr_online["cache"] = {}
+					if not websocket.is_speaking:
+						websocket.vad_pre_idx = 0
+						frames = []
+						websocket.status_dict_vad["cache"] = {}
+					else:
+						frames = frames[-20:]
+	
+	
+	except websockets.ConnectionClosed:
+		print("ConnectionClosed...", websocket_users,flush=True)
+		await ws_reset(websocket)
+		websocket_users.remove(websocket)
+	except websockets.InvalidState:
+		print("InvalidState...")
+	except Exception as e:
+		print("Exception:", e)
 
 
 async def async_vad(websocket, audio_in):
-
-    segments_result = inference_pipeline_vad(audio_in=audio_in, param_dict=websocket.param_dict_vad)
-
-    speech_start = -1
-    speech_end = -1
-    
-    if len(segments_result) == 0 or len(segments_result["text"]) > 1:
-        return speech_start, speech_end
-    if segments_result["text"][0][0] != -1:
-        speech_start = segments_result["text"][0][0]
-    if segments_result["text"][0][1] != -1:
-        speech_end = segments_result["text"][0][1]
-    return speech_start, speech_end
+	
+	segments_result = model_vad.generate(input=audio_in, **websocket.status_dict_vad)[0]["value"]
+	# print(segments_result)
+	
+	speech_start = -1
+	speech_end = -1
+	
+	if len(segments_result) == 0 or len(segments_result) > 1:
+		return speech_start, speech_end
+	if segments_result[0][0] != -1:
+		speech_start = segments_result[0][0]
+	if segments_result[0][1] != -1:
+		speech_end = segments_result[0][1]
+	return speech_start, speech_end
 
 
 async def async_asr(websocket, audio_in):
-            if len(audio_in) > 0:
-                # print(len(audio_in))
-                rec_result = inference_pipeline_asr(audio_in=audio_in,
-                                                    param_dict=websocket.param_dict_asr)
-                # print(rec_result)
-                if inference_pipeline_punc is not None and 'text' in rec_result and len(rec_result["text"])>0:
-                    rec_result = inference_pipeline_punc(text_in=rec_result['text'],
-                                                         param_dict=websocket.param_dict_punc)
-                    # print("offline", rec_result)
-                if 'text' in rec_result:
-                    mode = "2pass-offline" if "2pass" in websocket.mode else websocket.mode
-                    message = json.dumps({"mode": mode, "text": rec_result["text"], "wav_name": websocket.wav_name,"is_final":websocket.is_speaking})
-                    await websocket.send(message)
+	if len(audio_in) > 0:
+		# print(len(audio_in))
+		rec_result = model_asr.generate(input=audio_in, **websocket.status_dict_asr)[0]
+		# print("offline_asr, ", rec_result)
+		if model_punc is not None and len(rec_result["text"])>0:
+			# print("offline, before punc", rec_result, "cache", websocket.status_dict_punc)
+			rec_result = model_punc.generate(input=rec_result['text'], **websocket.status_dict_punc)[0]
+			# print("offline, after punc", rec_result)
+		if len(rec_result["text"])>0:
+			# print("offline", rec_result)
+			mode = "2pass-offline" if "2pass" in websocket.mode else websocket.mode
+			message = json.dumps({"mode": mode, "text": rec_result["text"], "wav_name": websocket.wav_name,"is_final":websocket.is_speaking})
+			await websocket.send(message)
 
 
 async def async_asr_online(websocket, audio_in):
-    if len(audio_in) > 0:
-        # print(websocket.param_dict_asr_online.get("is_final", False))
-        rec_result = inference_pipeline_asr_online(audio_in=audio_in,
-                                                   param_dict=websocket.param_dict_asr_online)
-        # print(rec_result)
-        if websocket.mode == "2pass" and websocket.param_dict_asr_online.get("is_final", False):
-            return
-            #     websocket.param_dict_asr_online["cache"] = dict()
-        if "text" in rec_result:
-            if rec_result["text"] != "sil" and rec_result["text"] != "waiting_for_more_voice":
-                # print("online", rec_result)
-                mode = "2pass-online" if "2pass" in websocket.mode else websocket.mode
-                message = json.dumps({"mode": mode, "text": rec_result["text"], "wav_name": websocket.wav_name,"is_final":websocket.is_speaking})
-                await websocket.send(message)
+	if len(audio_in) > 0:
+		# print(websocket.status_dict_asr_online.get("is_final", False))
+		rec_result = model_asr_streaming.generate(input=audio_in, **websocket.status_dict_asr_online)[0]
+		# print("online, ", rec_result)
+		if websocket.mode == "2pass" and websocket.status_dict_asr_online.get("is_final", False):
+			return
+			#     websocket.status_dict_asr_online["cache"] = dict()
+		if len(rec_result["text"]):
+			mode = "2pass-online" if "2pass" in websocket.mode else websocket.mode
+			message = json.dumps({"mode": mode, "text": rec_result["text"], "wav_name": websocket.wav_name,"is_final":websocket.is_speaking})
+			await websocket.send(message)
 
 if len(args.certfile)>0:
-    ssl_context = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
-    
-    # Generate with Lets Encrypt, copied to this location, chown to current user and 400 permissions
-    ssl_cert = args.certfile
-    ssl_key = args.keyfile
-    
-    ssl_context.load_cert_chain(ssl_cert, keyfile=ssl_key)
-    start_server = websockets.serve(ws_serve, args.host, args.port, subprotocols=["binary"], ping_interval=None,ssl=ssl_context)
+	ssl_context = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
+	
+	# Generate with Lets Encrypt, copied to this location, chown to current user and 400 permissions
+	ssl_cert = args.certfile
+	ssl_key = args.keyfile
+	
+	ssl_context.load_cert_chain(ssl_cert, keyfile=ssl_key)
+	start_server = websockets.serve(ws_serve, args.host, args.port, subprotocols=["binary"], ping_interval=None,ssl=ssl_context)
 else:
-    start_server = websockets.serve(ws_serve, args.host, args.port, subprotocols=["binary"], ping_interval=None)
+	start_server = websockets.serve(ws_serve, args.host, args.port, subprotocols=["binary"], ping_interval=None)
 asyncio.get_event_loop().run_until_complete(start_server)
 asyncio.get_event_loop().run_forever()

--
Gitblit v1.9.1