From 49c00a7d6cb9c05d4bd0bb0fc8b59a2eed4b8950 Mon Sep 17 00:00:00 2001
From: huangmingming <huangmingming@deepscience.cn>
Date: 星期一, 13 三月 2023 12:07:11 +0800
Subject: [PATCH] grpc client remove VAD
---
funasr/runtime/python/grpc/grpc_main_client_mic.py | 67 ++++++++++-----------------------
1 files changed, 21 insertions(+), 46 deletions(-)
diff --git a/funasr/runtime/python/grpc/grpc_main_client_mic.py b/funasr/runtime/python/grpc/grpc_main_client_mic.py
index de2cded..220e8b5 100644
--- a/funasr/runtime/python/grpc/grpc_main_client_mic.py
+++ b/funasr/runtime/python/grpc/grpc_main_client_mic.py
@@ -1,50 +1,25 @@
import pyaudio
-import scipy.io.wavfile as wav
-import grpc_client
import grpc
import json
-from grpc_client import transcribe_audio_bytes
-from paraformer_pb2_grpc import ASRStub
-import webrtcvad
-import numpy as np
import time
import asyncio
-import datetime
+import argparse
-SPEAKING = False
-stub = None
-asr_user = None
-language = None
+from grpc_client import transcribe_audio_bytes
+from paraformer_pb2_grpc import ASRStub
async def deal_chunk(sig_mic):
+ global stub,SPEAKING,asr_user,language,sample_rate
- global stub,SPEAKING,asr_user,language
- sig = np.frombuffer(sig_mic, 'int16')
- if vad.is_speech(sig.tobytes(), sample_rate): #speaking
- SPEAKING = True
- response = transcribe_audio_bytes(stub, sig, user=asr_user, language=language, speaking = True, isEnd = False) #speaking, send audio to server.
- #print("response")
- #print (response.next())
- else: #silence
- begin_time = 0
- if SPEAKING: #means we have some audio recorded, send recognize order to server.
- SPEAKING = False
- begin_time = int(round(time.time() * 1000))
- response = transcribe_audio_bytes(stub, None, user=asr_user, language=language, speaking = False, isEnd = False) #speak end, call server for recognize one sentence
- resp = response.next()
- if "decoding" == resp.action:
- print(resp.action)
- print(json.loads(resp.sentence))
- resp = response.next() #TODO, blocking operation may leads to miss some audio clips. C++ multi-threading is preferred.
- if "finish" == resp.action:
- end_time = int(round(time.time() * 1000))
- print (json.loads(resp.sentence))
- #print ("silence, end_time: %d " % end_time)
- print ("delay in ms: %d " % (end_time - begin_time))
- else:
- #debug
- print (resp.action + " " + str(json.loads(resp.sentence)))
- pass
+ SPEAKING = True
+ resp = transcribe_audio_bytes(stub, sig_mic, user=asr_user, language=language, speaking = True, isEnd = False) #speaking, send audio to server.
+
+ if "decoding" == resp.action: #partial result
+ print(json.loads(resp.sentence))
+ elif "finish" == resp.action: #final result
+ print (json.loads(resp.sentence))
+
+
async def record(host,port,sample_rate,mic_chunk,record_seconds,asr_user,language):
@@ -58,7 +33,7 @@
#end grpc
response = transcribe_audio_bytes(stub, None, user=asr_user, language=language, speaking = False, isEnd = True)
- #print (response.next())
+ print (response.next().action)
if __name__ == '__main__':
@@ -97,13 +72,13 @@
args = parser.parse_args()
- global SPEAKING,asr_user,language
- SPEAKING = False
- asr_user = args.asr_user
- language = 'zh-CN'
- vad = webrtcvad.Vad()
- vad.set_mode(1)
+ SPEAKING = False
+ asr_user = args.user_allowed
+ sample_rate = args.sample_rate
+ language = 'zh-CN'
+
+
FORMAT = pyaudio.paInt16
CHANNELS = 1
@@ -116,7 +91,7 @@
frames_per_buffer=args.mic_chunk)
print("* recording")
- asyncio.run(record(args.host,args.port,args.sample_rate,args.mic_chunk,args.record_seconds,args.asr_user,args.language))
+ asyncio.run(record(args.host,args.port,args.sample_rate,args.mic_chunk,args.record_seconds,args.user_allowed,language))
stream.stop_stream()
stream.close()
p.terminate()
--
Gitblit v1.9.1