嘉渊
2023-04-24 6427c834dfd97b1f05c6659cdc7ccf010bf82fe1
funasr/runtime/python/grpc/grpc_main_client_mic.py
@@ -1,30 +1,19 @@
import pyaudio
import scipy.io.wavfile as wav
import grpc_client
import grpc
import json
from grpc_client import transcribe_audio_bytes
from paraformer_pb2_grpc import ASRStub
import webrtcvad
import numpy as np
import time
import asyncio
import datetime
import argparse
SPEAKING = False
stub = None
asr_user = None
language = None
from grpc_client import transcribe_audio_bytes
from paraformer_pb2_grpc import ASRStub
async def deal_chunk(sig_mic):
    global stub,SPEAKING,asr_user,language
    sig = np.frombuffer(sig_mic, 'int16')
    if vad.is_speech(sig.tobytes(), sample_rate): #speaking
    global stub,SPEAKING,asr_user,language,sample_rate
    if vad.is_speech(sig_mic, sample_rate): #speaking
        SPEAKING = True
        response = transcribe_audio_bytes(stub, sig, user=asr_user, language=language, speaking = True, isEnd = False) #speaking, send audio to server.
        #print("response")
        #print (response.next())
        response = transcribe_audio_bytes(stub, sig_mic, user=asr_user, language=language, speaking = True, isEnd = False) #speaking, send audio to server.
    else: #silence   
        begin_time = 0
        if SPEAKING: #means we have some audio recorded, send recognize order to server.
@@ -33,17 +22,12 @@
            response = transcribe_audio_bytes(stub, None, user=asr_user, language=language, speaking = False, isEnd = False) #speak end, call server for recognize one sentence
            resp = response.next()           
            if "decoding" == resp.action:   
                print(resp.action)
                print(json.loads(resp.sentence))
                resp = response.next() #TODO, blocking operation may leads to miss some audio clips. C++ multi-threading is preferred.
                if "finish" == resp.action:        
                    end_time = int(round(time.time() * 1000))
                    print (json.loads(resp.sentence))
                    #print ("silence, end_time: %d " % end_time)
                    print ("delay in ms: %d " % (end_time - begin_time))
                else:
                    #debug
                    print (resp.action + " " + str(json.loads(resp.sentence)))
                    pass
        
@@ -58,7 +42,7 @@
        #end grpc
        response = transcribe_audio_bytes(stub, None, user=asr_user, language=language, speaking = False, isEnd = True)
        #print (response.next())
        print (response.next().action)
if __name__ == '__main__':
@@ -97,10 +81,12 @@
    args = parser.parse_args()
    
    global SPEAKING,asr_user,language
    SPEAKING = False
    asr_user = args.asr_user
    asr_user = args.user_allowed
    sample_rate = args.sample_rate
    language = 'zh-CN'  
    vad = webrtcvad.Vad()
    vad.set_mode(1)
@@ -116,7 +102,7 @@
                frames_per_buffer=args.mic_chunk)
                
    print("* recording")
    asyncio.run(record(args.host,args.port,args.sample_rate,args.mic_chunk,args.record_seconds,args.asr_user,args.language))
    asyncio.run(record(args.host,args.port,args.sample_rate,args.mic_chunk,args.record_seconds,args.user_allowed,language))
    stream.stop_stream()
    stream.close()
    p.terminate()