From cd6d7d4ef09308dacea89cf7d5b4ce80cf84c456 Mon Sep 17 00:00:00 2001
From: huangmingming <huangmingming@deepscience.cn>
Date: 星期四, 16 三月 2023 15:37:08 +0800
Subject: [PATCH] resume vad in client
---
funasr/runtime/python/grpc/grpc_main_client_mic.py | 31 +++++++++++++++++++++----------
1 files changed, 21 insertions(+), 10 deletions(-)
diff --git a/funasr/runtime/python/grpc/grpc_main_client_mic.py b/funasr/runtime/python/grpc/grpc_main_client_mic.py
index 220e8b5..acbe90b 100644
--- a/funasr/runtime/python/grpc/grpc_main_client_mic.py
+++ b/funasr/runtime/python/grpc/grpc_main_client_mic.py
@@ -1,6 +1,7 @@
import pyaudio
import grpc
import json
+import webrtcvad
import time
import asyncio
import argparse
@@ -10,16 +11,24 @@
async def deal_chunk(sig_mic):
global stub,SPEAKING,asr_user,language,sample_rate
-
- SPEAKING = True
- resp = transcribe_audio_bytes(stub, sig_mic, user=asr_user, language=language, speaking = True, isEnd = False) #speaking, send audio to server.
-
- if "decoding" == resp.action: #partial result
- print(json.loads(resp.sentence))
- elif "finish" == resp.action: #final result
- print (json.loads(resp.sentence))
-
-
+ if vad.is_speech(sig_mic, sample_rate): #speaking
+ SPEAKING = True
+ response = transcribe_audio_bytes(stub, sig_mic, user=asr_user, language=language, speaking = True, isEnd = False) #speaking, send audio to server.
+ else: #silence
+ begin_time = 0
+ if SPEAKING: #means we have some audio recorded, send recognize order to server.
+ SPEAKING = False
+ begin_time = int(round(time.time() * 1000))
+ response = transcribe_audio_bytes(stub, None, user=asr_user, language=language, speaking = False, isEnd = False) #speak end, call server for recognize one sentence
+ resp = response.next()
+ if "decoding" == resp.action:
+ resp = response.next() #TODO, blocking operation may leads to miss some audio clips. C++ multi-threading is preferred.
+ if "finish" == resp.action:
+ end_time = int(round(time.time() * 1000))
+ print (json.loads(resp.sentence))
+ print ("delay in ms: %d " % (end_time - begin_time))
+ else:
+ pass
async def record(host,port,sample_rate,mic_chunk,record_seconds,asr_user,language):
@@ -79,6 +88,8 @@
language = 'zh-CN'
+ vad = webrtcvad.Vad()
+ vad.set_mode(1)
FORMAT = pyaudio.paInt16
CHANNELS = 1
--
Gitblit v1.9.1