From cd6d7d4ef09308dacea89cf7d5b4ce80cf84c456 Mon Sep 17 00:00:00 2001
From: huangmingming <huangmingming@deepscience.cn>
Date: 星期四, 16 三月 2023 15:37:08 +0800
Subject: [PATCH] resume vad in client

---
 funasr/runtime/python/grpc/grpc_main_client_mic.py |   31 +++++++++++++++++++++----------
 1 files changed, 21 insertions(+), 10 deletions(-)

diff --git a/funasr/runtime/python/grpc/grpc_main_client_mic.py b/funasr/runtime/python/grpc/grpc_main_client_mic.py
index 220e8b5..acbe90b 100644
--- a/funasr/runtime/python/grpc/grpc_main_client_mic.py
+++ b/funasr/runtime/python/grpc/grpc_main_client_mic.py
@@ -1,6 +1,7 @@
 import pyaudio
 import grpc
 import json
+import webrtcvad
 import time
 import asyncio
 import argparse
@@ -10,16 +11,24 @@
 
 async def deal_chunk(sig_mic):
     global stub,SPEAKING,asr_user,language,sample_rate
-    
-    SPEAKING = True
-    resp = transcribe_audio_bytes(stub, sig_mic, user=asr_user, language=language, speaking = True, isEnd = False) #speaking, send audio to server.
-          
-    if "decoding" == resp.action:     #partial result
-        print(json.loads(resp.sentence))
-    elif "finish" == resp.action:     #final result
-        print (json.loads(resp.sentence))
-
-
+    if vad.is_speech(sig_mic, sample_rate): #speaking
+        SPEAKING = True
+        response = transcribe_audio_bytes(stub, sig_mic, user=asr_user, language=language, speaking = True, isEnd = False) #speaking, send audio to server.
+    else: #silence   
+        begin_time = 0
+        if SPEAKING: #means we have some audio recorded, send recognize order to server.
+            SPEAKING = False
+            begin_time = int(round(time.time() * 1000))            
+            response = transcribe_audio_bytes(stub, None, user=asr_user, language=language, speaking = False, isEnd = False) #speak end, call server for recognize one sentence
+            resp = response.next()           
+            if "decoding" == resp.action:   
+                resp = response.next() #TODO, blocking operation may leads to miss some audio clips. C++ multi-threading is preferred.
+                if "finish" == resp.action:        
+                    end_time = int(round(time.time() * 1000))
+                    print (json.loads(resp.sentence))
+                    print ("delay in ms: %d " % (end_time - begin_time))
+                else:
+                    pass
         
 
 async def record(host,port,sample_rate,mic_chunk,record_seconds,asr_user,language):
@@ -79,6 +88,8 @@
     language = 'zh-CN'  
     
 
+    vad = webrtcvad.Vad()
+    vad.set_mode(1)
 
     FORMAT = pyaudio.paInt16
     CHANNELS = 1

--
Gitblit v1.9.1