Yabin Li
2023-05-08 8a08405b668e06c4670b4c13f6793e193f21a21d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
# -*- encoding: utf-8 -*-
import os
import time
import websockets
import asyncio
# import threading
import argparse
import json
import traceback
from multiprocessing import Process
from funasr.fileio.datadir_writer import DatadirWriter
 
parser = argparse.ArgumentParser()
parser.add_argument("--host",
                    type=str,
                    default="localhost",
                    required=False,
                    help="host ip, localhost, 0.0.0.0")
parser.add_argument("--port",
                    type=int,
                    default=10095,
                    required=False,
                    help="grpc server port")
parser.add_argument("--chunk_size",
                    type=str,
                    default="5, 10, 5",
                    help="chunk")
parser.add_argument("--chunk_interval",
                    type=int,
                    default=10,
                    help="chunk")
parser.add_argument("--audio_in",
                    type=str,
                    default=None,
                    help="audio_in")
parser.add_argument("--send_without_sleep",
                    action="store_true",
                    default=False,
                    help="if audio_in is set, send_without_sleep")
parser.add_argument("--test_thread_num",
                    type=int,
                    default=1,
                    help="test_thread_num")
parser.add_argument("--words_max_print",
                    type=int,
                    default=100,
                    help="chunk")
parser.add_argument("--output_dir",
                    type=str,
                    default=None,
                    help="output_dir")
 
args = parser.parse_args()
args.chunk_size = [int(x) for x in args.chunk_size.split(",")]
print(args)
# voices = asyncio.Queue()
from queue import Queue
voices = Queue()
 
ibest_writer = None
if args.output_dir is not None:
    writer = DatadirWriter(args.output_dir)
    ibest_writer = writer[f"1best_recog"]
 
async def record_microphone():
    is_finished = False
    import pyaudio
    #print("2")
    global voices 
    FORMAT = pyaudio.paInt16
    CHANNELS = 1
    RATE = 16000
    chunk_size = 60*args.chunk_size[1]/args.chunk_interval
    CHUNK = int(RATE / 1000 * chunk_size)
 
    p = pyaudio.PyAudio()
 
    stream = p.open(format=FORMAT,
                    channels=CHANNELS,
                    rate=RATE,
                    input=True,
                    frames_per_buffer=CHUNK)
    is_speaking = True
    while True:
 
        data = stream.read(CHUNK)
        data = data.decode('ISO-8859-1')
        message = json.dumps({"chunk_size": args.chunk_size, "chunk_interval": args.chunk_interval, "audio": data, "is_speaking": is_speaking, "is_finished": is_finished})
        
        voices.put(message)
 
        await asyncio.sleep(0.005)
 
async def record_from_scp():
    import wave
    global voices
    is_finished = False
    if args.audio_in.endswith(".scp"):
        f_scp = open(args.audio_in)
        wavs = f_scp.readlines()
    else:
        wavs = [args.audio_in]
    for wav in wavs:
        wav_splits = wav.strip().split()
        wav_name = wav_splits[0] if len(wav_splits) > 1 else "demo"
        wav_path = wav_splits[1] if len(wav_splits) > 1 else wav_splits[0]
        
        # bytes_f = open(wav_path, "rb")
        # bytes_data = bytes_f.read()
        with wave.open(wav_path, "rb") as wav_file:
            params = wav_file.getparams()
            # header_length = wav_file.getheaders()[0][1]
            # wav_file.setpos(header_length)
            frames = wav_file.readframes(wav_file.getnframes())
 
        audio_bytes = bytes(frames)
        # stride = int(args.chunk_size/1000*16000*2)
        stride = int(60*args.chunk_size[1]/args.chunk_interval/1000*16000*2)
        chunk_num = (len(audio_bytes)-1)//stride + 1
        # print(stride)
        is_speaking = True
        for i in range(chunk_num):
            if i == chunk_num-1:
                is_speaking = False
            beg = i*stride
            data = audio_bytes[beg:beg+stride]
            data = data.decode('ISO-8859-1')
            message = json.dumps({"chunk_size": args.chunk_size, "chunk_interval": args.chunk_interval, "is_speaking": is_speaking, "audio": data, "is_finished": is_finished, "wav_name": wav_name})
            voices.put(message)
            # print("data_chunk: ", len(data_chunk))
            # print(voices.qsize())
            sleep_duration = 0.001 if args.send_without_sleep else 60*args.chunk_size[1]/args.chunk_interval/1000
            await asyncio.sleep(sleep_duration)
 
    is_finished = True
    message = json.dumps({"is_finished": is_finished})
    voices.put(message)
 
async def ws_send():
    global voices
    global websocket
    print("started to sending data!")
    while True:
        while not voices.empty():
            data = voices.get()
            voices.task_done()
            try:
                await websocket.send(data)
            except Exception as e:
                print('Exception occurred:', e)
                traceback.print_exc()
                exit(0)
            await asyncio.sleep(0.005)
        await asyncio.sleep(0.005)
 
 
 
async def message(id):
    global websocket
    text_print = ""
    while True:
        try:
            meg = await websocket.recv()
            meg = json.loads(meg)
            # print(meg, end = '')
            # print("\r")
            # print(meg)
            wav_name = meg.get("wav_name", "demo")
            print(wav_name)
            text = meg["text"]
            if ibest_writer is not None:
                ibest_writer["text"][wav_name] = text
            if meg["mode"] == "online":
                text_print += " {}".format(text)
            else:
                text_print += "{}".format(text)
            text_print = text_print[-args.words_max_print:]
            os.system('clear')
            print("\rpid"+str(id)+": "+text_print)
        except Exception as e:
            print("Exception:", e)
            traceback.print_exc()
            exit(0)
 
async def print_messge():
    global websocket
    while True:
        try:
            meg = await websocket.recv()
            meg = json.loads(meg)
            print(meg)
        except Exception as e:
            print("Exception:", e)
            traceback.print_exc()
            exit(0)
 
async def ws_client(id):
    global websocket
    uri = "ws://{}:{}".format(args.host, args.port)
    async for websocket in websockets.connect(uri, subprotocols=["binary"], ping_interval=None):
        if args.audio_in is not None:
            task = asyncio.create_task(record_from_scp())
        else:
            task = asyncio.create_task(record_microphone())
        task2 = asyncio.create_task(ws_send())
        task3 = asyncio.create_task(message(id))
        await asyncio.gather(task, task2, task3)
 
def one_thread(id):
   asyncio.get_event_loop().run_until_complete(ws_client(id)) # 启动协程
   asyncio.get_event_loop().run_forever()
 
 
if __name__ == '__main__':
    process_list = []
    for i in range(args.test_thread_num):   
        p = Process(target=one_thread,args=(i,))
        p.start()
        process_list.append(p)
 
    for i in process_list:
        p.join()
 
    print('end')