From ec05a7cb1f530662ca253002ebbe8ce675ce1da6 Mon Sep 17 00:00:00 2001
From: cgisky1980 <new448885@gmail.com>
Date: 星期二, 21 三月 2023 20:09:33 +0800
Subject: [PATCH] Create vad_asr_websocket_client.py
---
funasr/runtime/python/vad_asr_websocket_client/vad_asr_websocket_client.py | 197 +++++++++++++++++++++++++++++++++++++++++++++++++
1 files changed, 197 insertions(+), 0 deletions(-)
diff --git a/funasr/runtime/python/vad_asr_websocket_client/vad_asr_websocket_client.py b/funasr/runtime/python/vad_asr_websocket_client/vad_asr_websocket_client.py
new file mode 100644
index 0000000..c5096cb
--- /dev/null
+++ b/funasr/runtime/python/vad_asr_websocket_client/vad_asr_websocket_client.py
@@ -0,0 +1,197 @@
+#""" from https://github.com/cgisky1980/550W_AI_Assistant """
+
+from modelscope.pipelines import pipeline
+from modelscope.utils.constant import Tasks
+from modelscope.utils.logger import get_logger
+import logging
+logger = get_logger(log_level=logging.CRITICAL)
+logger.setLevel(logging.CRITICAL)
+import websocket
+import pyaudio
+import time
+import json
+import threading
+
+
+# ---------WebsocketClient鐩稿叧 涓昏澶勭悊 on_message on_open 宸茬粡鍋氫簡鏂嚎閲嶈繛澶勭悊
+class WebsocketClient(object):
+ def __init__(self, address, message_callback=None):
+ super(WebsocketClient, self).__init__()
+ self.address = address
+ self.message_callback = None
+
+ def on_message(self, ws, message):
+ try:
+ messages = json.loads(
+ (message.encode("raw_unicode_escape")).decode()
+ ) # 鏀跺埌WS娑堟伅鍚庣殑澶勭悊
+ if messages.get("type") == "ping":
+ self.ws.send('{"type":"pong"}')
+ except json.JSONDecodeError as e:
+ print(f"JSONDecodeError: {e}")
+ except KeyError:
+ print("KeyError!")
+
+ def on_error(self, ws, error):
+ print("client error:", error)
+
+ def on_close(self, ws):
+ print("### client closed ###")
+ self.ws.close()
+ self.is_running = False
+
+ def on_open(self, ws): # 杩炰笂ws鍚庡彂甯冪櫥褰曚俊鎭�
+ self.is_running = True
+ self.ws.send(
+ '{"type":"login","uid":"asr","pwd":"tts9102093109"}'
+ ) # WS閾炬帴涓婂悗鐨勭櫥闄嗗鐞�
+
+ def close_connect(self):
+ self.ws.close()
+
+ def send_message(self, message):
+ try:
+ self.ws.send(message)
+ except BaseException as err:
+ pass
+
+ def run(self): # WS鍒濆鍖�
+ websocket.enableTrace(True)
+ self.ws = websocket.WebSocketApp(
+ self.address,
+ on_message=lambda ws, message: self.on_message(ws, message),
+ on_error=lambda ws, error: self.on_error(ws, error),
+ on_close=lambda ws: self.on_close(ws),
+ )
+ websocket.enableTrace(False) # 瑕佺湅ws璋冭瘯淇℃伅锛岃鎶婅繖琛屾敞閲婃帀
+ self.ws.on_open = lambda ws: self.on_open(ws)
+ self.is_running = False
+ # WS鏂嚎閲嶈繛鍒ゆ柇
+ while True:
+ if not self.is_running:
+ self.ws.run_forever()
+ time.sleep(3) # 3绉掓娴嬩竴娆�
+
+
+class WSClient(object):
+ def __init__(self, address, call_back):
+ super(WSClient, self).__init__()
+ self.client = WebsocketClient(address, call_back)
+ self.client_thread = None
+
+ def run(self):
+ self.client_thread = threading.Thread(target=self.run_client)
+ self.client_thread.start()
+
+ def run_client(self):
+ self.client.run()
+
+ def send_message(self, message):
+ self.client.send_message(message)
+
+
+def vad(data): # VAD鎺ㄧ悊
+ segments_result = vad_pipline(audio_in=data)
+ if segments_result["text"] == "[]":
+ return False
+ else:
+ return True
+
+
+# 鍒涘缓涓�涓猇AD瀵硅薄
+vad_pipline = pipeline(
+ task=Tasks.voice_activity_detection,
+ model="damo/speech_fsmn_vad_zh-cn-16k-common-pytorch",
+ model_revision="v1.2.0",
+ output_dir=None,
+ batch_size=1,
+)
+
+param_dict = dict()
+param_dict["hotword"] = "灏忎簲 灏忎簲鏈�" # 璁剧疆鐑瘝锛岀敤绌烘牸闅斿紑
+
+
+# 鍒涘缓涓�涓狝SR瀵硅薄
+inference_pipeline2 = pipeline(
+ task=Tasks.auto_speech_recognition,
+ model="damo/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404",
+ param_dict=param_dict,
+)
+
+# 鍒涘缓涓�涓狿yAudio瀵硅薄
+p = pyaudio.PyAudio()
+
+# 瀹氫箟涓�浜涘弬鏁�
+FORMAT = pyaudio.paInt16 # 閲囨牱鏍煎紡
+CHANNELS = 1 # 鍗曞0閬�
+RATE = 16000 # 閲囨牱鐜�
+CHUNK = int(RATE / 1000 * 300) # 姣忎釜鐗囨鐨勫抚鏁帮紙300姣锛�
+RECORD_NUM = 0 # 褰曞埗鏃堕暱锛堢墖娈碉級
+
+# 鎵撳紑杈撳叆娴�
+stream = p.open(
+ format=FORMAT,
+ channels=CHANNELS,
+ rate=RATE,
+ input=True,
+ frames_per_buffer=CHUNK,
+)
+
+print("寮�濮�...")
+
+# 鍒涘缓涓�涓猈S杩炴帴
+ws_client = WSClient("ws://localhost:7272", None)
+ws_client.run()
+
+frames = [] # 瀛樺偍鎵�鏈夌殑甯ф暟鎹�
+buffer = [] # 瀛樺偍缂撳瓨涓殑甯ф暟鎹紙鏈�澶氫袱涓墖娈碉級
+silence_count = 0 # 缁熻杩炵画闈欓煶鐨勬鏁�
+speech_detected = False # 鏍囪鏄惁妫�娴嬪埌璇煶
+
+# 寰幆璇诲彇杈撳叆娴佷腑鐨勬暟鎹�
+while True:
+ data = stream.read(CHUNK) # 璇诲彇涓�涓墖娈电殑鏁版嵁
+ buffer.append(data) # 灏嗗綋鍓嶆暟鎹坊鍔犲埌缂撳瓨涓�
+
+ if len(buffer) > 2:
+ buffer.pop(0) # 濡傛灉缂撳瓨瓒呰繃涓や釜鐗囨锛屽垯鍒犻櫎鏈�鏃╃殑涓�涓�
+
+ if speech_detected:
+ frames.append(data)
+ RECORD_NUM += 1
+ # print(str(RECORD_NUM)+ "\r")
+
+ if vad(data): # VAD 鍒ゆ柇鏄惁鏈夊0闊�
+ if not speech_detected:
+ print("寮�濮嬪綍闊�...")
+ speech_detected = True # 鏍囪涓烘娴嬪埌璇煶
+ frames = []
+ frames.extend(buffer) # 鎶婁箣鍓�2涓闊虫暟鎹揩鍔犲叆
+ silence_count = 0 # 閲嶇疆闈欓煶娆℃暟
+
+ else:
+ silence_count += 1 # 澧炲姞闈欓煶娆℃暟
+ #妫�娴嬮潤闊虫鏁�4娆� 鎴栬�呭凡缁忓綍浜�50涓暟鎹潡锛屽垯褰曢煶鍋滄
+ if speech_detected and (silence_count > 4 or RECORD_NUM > 50):
+ print("鍋滄褰曢煶...")
+ audio_in = b"".join(frames)
+ rec_result = inference_pipeline2(audio_in=audio_in) # ws鎾姤鏁版嵁
+ rec_result["type"] = "nlp" # 娣诲姞ws鎾姤鏁版嵁
+ ws_client.send_message(
+ json.dumps(rec_result, ensure_ascii=False)
+ ) # ws鍙戦�佸埌鏈嶅姟绔�
+ print(rec_result)
+ frames = [] # 娓呯┖鎵�鏈夌殑甯ф暟鎹�
+ buffer = [] # 娓呯┖缂撳瓨涓殑甯ф暟鎹紙鏈�澶氫袱涓墖娈碉級
+ silence_count = 0 # 缁熻杩炵画闈欓煶鐨勬鏁版竻闆�
+ speech_detected = False # 鏍囪鏄惁妫�娴嬪埌璇煶
+ # RECORD_NUM = 0
+
+print("缁撴潫褰曞埗...")
+
+# 鍋滄骞跺叧闂緭鍏ユ祦
+stream.stop_stream()
+stream.close()
+
+# 鍏抽棴PyAudio瀵硅薄
+p.terminate()
--
Gitblit v1.9.1