From 94de39dde2e616a01683c518023d0fab72b4e103 Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期一, 19 二月 2024 22:21:50 +0800
Subject: [PATCH] aishell example

---
 runtime/python/websocket/funasr_wss_client.py |  147 +++++++++++++++++++++++++++++++++++++-----------
 1 files changed, 113 insertions(+), 34 deletions(-)

diff --git a/runtime/python/websocket/funasr_wss_client.py b/runtime/python/websocket/funasr_wss_client.py
index 7c96553..b30964a 100644
--- a/runtime/python/websocket/funasr_wss_client.py
+++ b/runtime/python/websocket/funasr_wss_client.py
@@ -27,24 +27,32 @@
                     help="grpc server port")
 parser.add_argument("--chunk_size",
                     type=str,
-                    default="0, 10, 5",
+                    default="5, 10, 5",
                     help="chunk")
 parser.add_argument("--encoder_chunk_look_back",
                     type=int,
                     default=4,
-                    help="number of chunks to lookback for encoder self-attention")
+                    help="chunk")
 parser.add_argument("--decoder_chunk_look_back",
                     type=int,
-                    default=1,
-                    help="number of encoder chunks to lookback for decoder cross-attention")
+                    default=0,
+                    help="chunk")
 parser.add_argument("--chunk_interval",
                     type=int,
                     default=10,
                     help="chunk")
+parser.add_argument("--hotword",
+                    type=str,
+                    default="",
+                    help="hotword file path, one hotword perline (e.g.:闃块噷宸村反 20)")
 parser.add_argument("--audio_in",
                     type=str,
                     default=None,
                     help="audio_in")
+parser.add_argument("--audio_fs",
+                    type=int,
+                    default=16000,
+                    help="audio_fs")
 parser.add_argument("--send_without_sleep",
                     action="store_true",
                     default=True,
@@ -61,11 +69,14 @@
                     type=str,
                     default=None,
                     help="output_dir")
-
 parser.add_argument("--ssl",
                     type=int,
                     default=1,
                     help="1 for ssl connect, 0 for no ssl")
+parser.add_argument("--use_itn",
+                    type=int,
+                    default=1,
+                    help="1 for using itn, 0 for not itn")
 parser.add_argument("--mode",
                     type=str,
                     default="2pass",
@@ -106,10 +117,40 @@
                     rate=RATE,
                     input=True,
                     frames_per_buffer=CHUNK)
+    # hotwords
+    fst_dict = {}
+    hotword_msg = ""
+    if args.hotword.strip() != "":
+        if os.path.exists(args.hotword):
+            f_scp = open(args.hotword)
+            hot_lines = f_scp.readlines()
+            for line in hot_lines:
+                words = line.strip().split(" ")
+                if len(words) < 2:
+                    print("Please checkout format of hotwords")
+                    continue
+                try:
+                    fst_dict[" ".join(words[:-1])] = int(words[-1])
+                except ValueError:
+                    print("Please checkout format of hotwords")
+            hotword_msg = json.dumps(fst_dict)
+        else:
+            hotword_msg = args.hotword
 
-    message = json.dumps({"mode": args.mode, "chunk_size": args.chunk_size, "encoder_chunk_look_back": args.encoder_chunk_look_back,
-                          "decoder_chunk_look_back": args.decoder_chunk_look_back, "chunk_interval": args.chunk_interval, 
-                          "wav_name": "microphone", "is_speaking": True})
+    use_itn = True
+    if args.use_itn == 0:
+        use_itn=False
+    
+    message = json.dumps({"mode": args.mode,
+                          "chunk_size": args.chunk_size,
+                          "chunk_interval": args.chunk_interval,
+                          "encoder_chunk_look_back": args.encoder_chunk_look_back,
+                          "decoder_chunk_look_back": args.decoder_chunk_look_back,
+                          "wav_name": "microphone",
+                          "is_speaking": True,
+                          "hotwords": hotword_msg,
+                          "itn": use_itn,
+                          })
     #voices.put(message)
     await websocket.send(message)
     while True:
@@ -127,6 +168,34 @@
         wavs = f_scp.readlines()
     else:
         wavs = [args.audio_in]
+
+    # hotwords
+    fst_dict = {}
+    hotword_msg = ""
+    if args.hotword.strip() != "":
+        if os.path.exists(args.hotword):
+            f_scp = open(args.hotword)
+            hot_lines = f_scp.readlines()
+            for line in hot_lines:
+                words = line.strip().split(" ")
+                if len(words) < 2:
+                    print("Please checkout format of hotwords")
+                    continue
+                try:
+                    fst_dict[" ".join(words[:-1])] = int(words[-1])
+                except ValueError:
+                    print("Please checkout format of hotwords")
+            hotword_msg = json.dumps(fst_dict)
+        else:
+            hotword_msg = args.hotword
+        print (hotword_msg)
+
+    sample_rate = args.audio_fs
+    wav_format = "pcm"
+    use_itn=True
+    if args.use_itn == 0:
+        use_itn=False
+     
     if chunk_size > 0:
         wavs = wavs[chunk_begin:chunk_begin + chunk_size]
     for wav in wavs:
@@ -143,29 +212,31 @@
             import wave
             with wave.open(wav_path, "rb") as wav_file:
                 params = wav_file.getparams()
+                sample_rate = wav_file.getframerate()
                 frames = wav_file.readframes(wav_file.getnframes())
-                audio_bytes = bytes(frames)
+                audio_bytes = bytes(frames)        
         else:
-            import ffmpeg
-            try:
-                # This launches a subprocess to decode audio while down-mixing and resampling as necessary.
-                # Requires the ffmpeg CLI and `ffmpeg-python` package to be installed.
-                audio_bytes, _ = (
-                    ffmpeg.input(wav_path, threads=0)
-                    .output("-", format="s16le", acodec="pcm_s16le", ac=1, ar=16000)
-                    .run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True)
-                )
-            except ffmpeg.Error as e:
-                raise RuntimeError(f"Failed to load audio: {e.stderr.decode()}") from e
+            wav_format = "others"
+            with open(wav_path, "rb") as f:
+                audio_bytes = f.read()
 
-        # stride = int(args.chunk_size/1000*16000*2)
-        stride = int(60 * args.chunk_size[1] / args.chunk_interval / 1000 * 16000 * 2)
+        stride = int(60 * args.chunk_size[1] / args.chunk_interval / 1000 * sample_rate * 2)
         chunk_num = (len(audio_bytes) - 1) // stride + 1
         # print(stride)
 
         # send first time
-        message = json.dumps({"mode": args.mode, "chunk_size": args.chunk_size, "chunk_interval": args.chunk_interval,
-                              "wav_name": wav_name, "is_speaking": True})
+        message = json.dumps({"mode": args.mode,
+                              "chunk_size": args.chunk_size,
+                              "chunk_interval": args.chunk_interval,
+                              "encoder_chunk_look_back": args.encoder_chunk_look_back,
+                              "decoder_chunk_look_back": args.decoder_chunk_look_back,
+                              "audio_fs":sample_rate,
+                              "wav_name": wav_name,
+                              "wav_format": wav_format,
+                              "is_speaking": True,
+                              "hotwords": hotword_msg,
+                              "itn": use_itn})
+
         #voices.put(message)
         await websocket.send(message)
         is_speaking = True
@@ -213,29 +284,37 @@
         
             meg = await websocket.recv()
             meg = json.loads(meg)
-            # print(meg)
             wav_name = meg.get("wav_name", "demo")
             text = meg["text"]
+            timestamp=""
+            offline_msg_done = meg.get("is_final", False)
+            if "timestamp" in meg:
+                timestamp = meg["timestamp"]
 
             if ibest_writer is not None:
-                text_write_line = "{}\t{}\n".format(wav_name, text)
+                if timestamp !="":
+                    text_write_line = "{}\t{}\t{}\n".format(wav_name, text, timestamp)
+                else:
+                    text_write_line = "{}\t{}\n".format(wav_name, text)
                 ibest_writer.write(text_write_line)
-                
+
+            if 'mode' not in meg:
+                continue
             if meg["mode"] == "online":
                 text_print += "{}".format(text)
                 text_print = text_print[-args.words_max_print:]
                 os.system('clear')
                 print("\rpid" + str(id) + ": " + text_print)
             elif meg["mode"] == "offline":
-                text_print += "{}".format(text)
+                if timestamp !="":
+                    text_print += "{} timestamp: {}".format(text, timestamp)
+                else:
+                    text_print += "{}".format(text)
+
                 # text_print = text_print[-args.words_max_print:]
                 # os.system('clear')
                 print("\rpid" + str(id) + ": " + wav_name + ": " + text_print)
-                if ("is_final" in meg and meg["is_final"]==False):
-                    offline_msg_done = True
-                
-                if not "is_final" in meg:
-                    offline_msg_done = True
+                offline_msg_done = True
             else:
                 if meg["mode"] == "2pass-online":
                     text_print_2pass_online += "{}".format(text)
@@ -247,7 +326,7 @@
                 text_print = text_print[-args.words_max_print:]
                 os.system('clear')
                 print("\rpid" + str(id) + ": " + text_print)
-                offline_msg_done=True
+                # offline_msg_done=True
 
     except Exception as e:
             print("Exception:", e)

--
Gitblit v1.9.1