huangmingming
2023-01-29 64b9366a05acad111c74fa3e3058fac54b145776
add client
2个文件已添加
145 ■■■■■ 已修改文件
funasr/runtime/python/grpc/grpc_client.py 19 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
funasr/runtime/python/grpc/grpc_main_client_mic.py 126 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
funasr/runtime/python/grpc/grpc_client.py
New file
@@ -0,0 +1,19 @@
import paraformer_pb2
import paraformer_pb2_grpc
import grpc
import queue
def transcribe_audio_bytes(stub, chunk, user='zksz', language='zh-CN', speaking = True, isEnd = False):
    req = paraformer_pb2.Request()
    if chunk is not None:
        req.audio_data = chunk.tostring()
    req.user = user
    req.language = language
    req.speaking = speaking
    req.isEnd = isEnd
    my_queue = queue.SimpleQueue()
    my_queue.put(req)
    return  stub.Recognize(iter(my_queue.get, None))
funasr/runtime/python/grpc/grpc_main_client_mic.py
New file
@@ -0,0 +1,126 @@
import pyaudio
import scipy.io.wavfile as wav
import grpc_client
import grpc
import json
from grpc_client import transcribe_audio_bytes
from paraformer_pb2_grpc import ASRStub
import webrtcvad
import numpy as np
import time
import asyncio
import datetime
SPEAKING = False
stub = None
asr_user = None
language = None
async def deal_chunk(sig_mic):
    global stub,SPEAKING,asr_user,language
    sig = np.frombuffer(sig_mic, 'int16')
    if vad.is_speech(sig.tobytes(), sample_rate): #speaking
        SPEAKING = True
        response = transcribe_audio_bytes(stub, sig, user=asr_user, language=language, speaking = True, isEnd = False) #speaking, send audio to server.
        #print("response")
        #print (response.next())
    else: #silence
        begin_time = 0
        if SPEAKING: #means we have some audio recorded, send recognize order to server.
            SPEAKING = False
            begin_time = int(round(time.time() * 1000))
            response = transcribe_audio_bytes(stub, None, user=asr_user, language=language, speaking = False, isEnd = False) #speak end, call server for recognize one sentence
            resp = response.next()
            if "decoding" == resp.action:
                print(resp.action)
                print(json.loads(resp.sentence))
                resp = response.next() #TODO, blocking operation may leads to miss some audio clips. C++ multi-threading is preferred.
                if "finish" == resp.action:
                    end_time = int(round(time.time() * 1000))
                    print (json.loads(resp.sentence))
                    #print ("silence, end_time: %d " % end_time)
                    print ("delay in ms: %d " % (end_time - begin_time))
                else:
                    #debug
                    print (resp.action + " " + str(json.loads(resp.sentence)))
                    pass
async def record(host,port,sample_rate,mic_chunk,record_seconds,asr_user,language):
    with grpc.insecure_channel('{}:{}'.format(host, port)) as channel:
        global stub
        stub = ASRStub(channel)
        for i in range(0, int(sample_rate / mic_chunk * record_seconds)):
            sig_mic = stream.read(mic_chunk,exception_on_overflow = False)
            await asyncio.create_task(deal_chunk(sig_mic))
        #end grpc
        response = transcribe_audio_bytes(stub, None, user=asr_user, language=language, speaking = False, isEnd = True)
        #print (response.next())
if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument("--host",
                        type=str,
                        default="127.0.0.1",
                        required=True,
                        help="grpc server host ip")
    parser.add_argument("--port",
                        type=int,
                        default=10095,
                        required=True,
                        help="grpc server port")
    parser.add_argument("--user_allowed",
                        type=str,
                        default="project1_user1",
                        help="allowed user for grpc client")
    parser.add_argument("--sample_rate",
                        type=int,
                        default=16000,
                        help="audio sample_rate from client")
    parser.add_argument("--mic_chunk",
                        type=int,
                        default=160,
                        help="chunk size for mic")
    parser.add_argument("--record_seconds",
                        type=int,
                        default=120,
                        help="run specified seconds then exit ")
    args = parser.parse_args()
    global SPEAKING,asr_user,language
    SPEAKING = False
    asr_user = args.asr_user
    language = 'zh-CN'
    vad = webrtcvad.Vad()
    vad.set_mode(1)
    FORMAT = pyaudio.paInt16
    CHANNELS = 1
    p = pyaudio.PyAudio()
    stream = p.open(format=FORMAT,
                channels=CHANNELS,
                rate=args.sample_rate,
                input=True,
                frames_per_buffer=args.mic_chunk)
    print("* recording")
    asyncio.run(record(args.host,args.port,args.sample_rate,args.mic_chunk,args.record_seconds,args.asr_user,args.language))
    stream.stop_stream()
    stream.close()
    p.terminate()
    print("recording stop")