python/FunASR-XL.git

			@@ -10,71 +10,56 @@


			parser = argparse.ArgumentParser()
			parser.add_argument("--host",
			type=str,
			default="0.0.0.0",
			required=False,
			help="host ip, localhost, 0.0.0.0")
			parser.add_argument("--port",
			type=int,
			default=10095,
			required=False,
			help="grpc server port")
			parser.add_argument("--asr_model",
			parser.add_argument(
			"--host", type=str, default="0.0.0.0", required=False, help="host ip, localhost, 0.0.0.0"
			)
			parser.add_argument("--port", type=int, default=10095, required=False, help="grpc server port")
			parser.add_argument(
			"--asr_model",
			type=str,
			default="iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch",
			help="model from modelscope")
			parser.add_argument("--asr_model_revision",
			type=str,
			default="v2.0.4",
			help="")
			parser.add_argument("--asr_model_online",
			help="model from modelscope",
			)
			parser.add_argument("--asr_model_revision", type=str, default="v2.0.4", help="")
			parser.add_argument(
			"--asr_model_online",
			type=str,
			default="iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online",
			help="model from modelscope")
			parser.add_argument("--asr_model_online_revision",
			type=str,
			default="v2.0.4",
			help="")
			parser.add_argument("--vad_model",
			help="model from modelscope",
			)
			parser.add_argument("--asr_model_online_revision", type=str, default="v2.0.4", help="")
			parser.add_argument(
			"--vad_model",
			type=str,
			default="iic/speech_fsmn_vad_zh-cn-16k-common-pytorch",
			help="model from modelscope")
			parser.add_argument("--vad_model_revision",
			type=str,
			default="v2.0.4",
			help="")
			parser.add_argument("--punc_model",
			help="model from modelscope",
			)
			parser.add_argument("--vad_model_revision", type=str, default="v2.0.4", help="")
			parser.add_argument(
			"--punc_model",
			type=str,
			default="iic/punc_ct-transformer_zh-cn-common-vad_realtime-vocab272727",
			help="model from modelscope")
			parser.add_argument("--punc_model_revision",
			type=str,
			default="v2.0.4",
			help="")
			parser.add_argument("--ngpu",
			type=int,
			default=1,
			help="0 for cpu, 1 for gpu")
			parser.add_argument("--device",
			type=str,
			default="cuda",
			help="cuda, cpu")
			parser.add_argument("--ncpu",
			type=int,
			default=4,
			help="cpu cores")
			parser.add_argument("--certfile",
			help="model from modelscope",
			)
			parser.add_argument("--punc_model_revision", type=str, default="v2.0.4", help="")
			parser.add_argument("--ngpu", type=int, default=1, help="0 for cpu, 1 for gpu")
			parser.add_argument("--device", type=str, default="cuda", help="cuda, cpu")
			parser.add_argument("--ncpu", type=int, default=4, help="cpu cores")
			parser.add_argument(
			"--certfile",
			type=str,
			default="../../ssl_key/server.crt",
			required=False,
			help="certfile for ssl")
			help="certfile for ssl",
			)

			parser.add_argument("--keyfile",
			parser.add_argument(
			"--keyfile",
			type=str,
			default="../../ssl_key/server.key",
			required=False,
			help="keyfile for ssl")
			help="keyfile for ssl",
			)
			args = parser.parse_args()


			@@ -84,7 +69,8 @@
			from funasr import AutoModel

			# asr
			model_asr = AutoModel(model=args.asr_model,
			model_asr = AutoModel(
			model=args.asr_model,
			model_revision=args.asr_model_revision,
			ngpu=args.ngpu,
			ncpu=args.ncpu,
			@@ -93,7 +79,8 @@
			disable_log=True,
			)
			# asr
			model_asr_streaming = AutoModel(model=args.asr_model_online,
			model_asr_streaming = AutoModel(
			model=args.asr_model_online,
			model_revision=args.asr_model_online_revision,
			ngpu=args.ngpu,
			ncpu=args.ncpu,
			@@ -102,7 +89,8 @@
			disable_log=True,
			)
			# vad
			model_vad = AutoModel(model=args.vad_model,
			model_vad = AutoModel(
			model=args.vad_model,
			model_revision=args.vad_model_revision,
			ngpu=args.ngpu,
			ncpu=args.ncpu,
			@@ -113,7 +101,8 @@
			)

			if args.punc_model != "":
			model_punc = AutoModel(model=args.punc_model,
			model_punc = AutoModel(
			model=args.punc_model,
			model_revision=args.punc_model_revision,
			ngpu=args.ngpu,
			ncpu=args.ncpu,
			@@ -125,8 +114,8 @@
			model_punc = None



			print("model loaded! only support one client at the same time now!!!!")


			async def ws_reset(websocket):
			print("ws reset now, total num is ",len(websocket_users))
			@@ -146,7 +135,6 @@
			websocket_users.clear()



			async def ws_serve(websocket, path):
			frames = []
			frames_asr = []
			@@ -156,8 +144,8 @@
			websocket_users.add(websocket)
			websocket.status_dict_asr = {}
			websocket.status_dict_asr_online = {"cache": {}, "is_final": False}
			websocket.status_dict_vad = {'cache': {}, "is_final": False}
			websocket.status_dict_punc = {'cache': {}}
			websocket.status_dict_vad = {"cache": {}, "is_final": False}
			websocket.status_dict_punc = {"cache": {}}
			websocket.chunk_interval = 10
			websocket.vad_pre_idx = 0
			speech_start = False
			@@ -181,18 +169,24 @@
			if "chunk_size" in messagejson:
			chunk_size = messagejson["chunk_size"]
			if isinstance(chunk_size, str):
			chunk_size = chunk_size.split(',')
			chunk_size = chunk_size.split(",")
			websocket.status_dict_asr_online["chunk_size"] = [int(x) for x in chunk_size]
			if "encoder_chunk_look_back" in messagejson:
			websocket.status_dict_asr_online["encoder_chunk_look_back"] = messagejson["encoder_chunk_look_back"]
			websocket.status_dict_asr_online["encoder_chunk_look_back"] = messagejson[
			"encoder_chunk_look_back"
			]
			if "decoder_chunk_look_back" in messagejson:
			websocket.status_dict_asr_online["decoder_chunk_look_back"] = messagejson["decoder_chunk_look_back"]
			websocket.status_dict_asr_online["decoder_chunk_look_back"] = messagejson[
			"decoder_chunk_look_back"
			]
			if "hotword" in messagejson:
			websocket.status_dict_asr["hotword"] = messagejson["hotword"]
			if "mode" in messagejson:
			websocket.mode = messagejson["mode"]

			websocket.status_dict_vad["chunk_size"] = int(websocket.status_dict_asr_online["chunk_size"][1]*60/websocket.chunk_interval)
			websocket.status_dict_vad["chunk_size"] = int(
			websocket.status_dict_asr_online["chunk_size"][1] * 60 / websocket.chunk_interval
			)
			if len(frames_asr_online) > 0 or len(frames_asr) > 0 or not isinstance(message, str):
			if not isinstance(message, str):
			frames.append(message)
			@@ -202,7 +196,10 @@
			# asr online
			frames_asr_online.append(message)
			websocket.status_dict_asr_online["is_final"] = speech_end_i != -1
			if len(frames_asr_online) % websocket.chunk_interval == 0 or websocket.status_dict_asr_online["is_final"]:
			if (
			len(frames_asr_online) % websocket.chunk_interval == 0
			or websocket.status_dict_asr_online["is_final"]
			):
			if websocket.mode == "2pass" or websocket.mode == "online":
			audio_in = b"".join(frames_asr_online)
			try:
			@@ -243,7 +240,6 @@
			else:
			frames = frames[-20:]


			except websockets.ConnectionClosed:
			print("ConnectionClosed...", websocket_users,flush=True)
			await ws_reset(websocket)
			@@ -278,27 +274,46 @@
			# print("offline_asr, ", rec_result)
			if model_punc is not None and len(rec_result["text"])>0:
			# print("offline, before punc", rec_result, "cache", websocket.status_dict_punc)
			rec_result = model_punc.generate(input=rec_result['text'], **websocket.status_dict_punc)[0]
			rec_result = model_punc.generate(
			input=rec_result["text"], **websocket.status_dict_punc
			)[0]
			# print("offline, after punc", rec_result)
			if len(rec_result["text"])>0:
			# print("offline", rec_result)
			mode = "2pass-offline" if "2pass" in websocket.mode else websocket.mode
			message = json.dumps({"mode": mode, "text": rec_result["text"], "wav_name": websocket.wav_name,"is_final":websocket.is_speaking})
			message = json.dumps(
			{
			"mode": mode,
			"text": rec_result["text"],
			"wav_name": websocket.wav_name,
			"is_final": websocket.is_speaking,
			}
			)
			await websocket.send(message)


			async def async_asr_online(websocket, audio_in):
			if len(audio_in) > 0:
			# print(websocket.status_dict_asr_online.get("is_final", False))
			rec_result = model_asr_streaming.generate(input=audio_in, **websocket.status_dict_asr_online)[0]
			rec_result = model_asr_streaming.generate(
			input=audio_in, **websocket.status_dict_asr_online
			)[0]
			# print("online, ", rec_result)
			if websocket.mode == "2pass" and websocket.status_dict_asr_online.get("is_final", False):
			return
			# websocket.status_dict_asr_online["cache"] = dict()
			if len(rec_result["text"]):
			mode = "2pass-online" if "2pass" in websocket.mode else websocket.mode
			message = json.dumps({"mode": mode, "text": rec_result["text"], "wav_name": websocket.wav_name,"is_final":websocket.is_speaking})
			message = json.dumps(
			{
			"mode": mode,
			"text": rec_result["text"],
			"wav_name": websocket.wav_name,
			"is_final": websocket.is_speaking,
			}
			)
			await websocket.send(message)


			if len(args.certfile)>0:
			ssl_context = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
			@@ -308,8 +323,12 @@
			ssl_key = args.keyfile

			ssl_context.load_cert_chain(ssl_cert, keyfile=ssl_key)
			start_server = websockets.serve(ws_serve, args.host, args.port, subprotocols=["binary"], ping_interval=None,ssl=ssl_context)
			start_server = websockets.serve(
			ws_serve, args.host, args.port, subprotocols=["binary"], ping_interval=None, ssl=ssl_context
			)
			else:
			start_server = websockets.serve(ws_serve, args.host, args.port, subprotocols=["binary"], ping_interval=None)
			start_server = websockets.serve(
			ws_serve, args.host, args.port, subprotocols=["binary"], ping_interval=None
			)
			asyncio.get_event_loop().run_until_complete(start_server)
			asyncio.get_event_loop().run_forever()