| funasr/runtime/python/grpc/grpc_main_server.py | ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史 | |
| funasr/runtime/python/grpc/grpc_server.py | ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史 | |
| funasr/runtime/python/grpc/paraformer_onnx.py | ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史 | |
| funasr/runtime/python/grpc/utils | ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史 | |
| funasr/runtime/python/onnxruntime/rapid_paraformer/utils/frontend.py | ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史 |
funasr/runtime/python/grpc/grpc_main_server.py
@@ -9,7 +9,8 @@ server = grpc.server(futures.ThreadPoolExecutor(max_workers=10), # interceptors=(AuthInterceptor('Bearer mysecrettoken'),) ) paraformer_pb2_grpc.add_ASRServicer_to_server(ASRServicer(args.user_allowed, args.model, args.sample_rate), server) paraformer_pb2_grpc.add_ASRServicer_to_server( ASRServicer(args.user_allowed, args.model, args.sample_rate, args.backend, args.onnx_dir), server) port = "[::]:" + str(args.port) server.add_insecure_port(port) server.start() @@ -37,7 +38,18 @@ parser.add_argument("--sample_rate", type=int, default=16000, help="audio sample_rate from client") help="audio sample_rate from client") parser.add_argument("--backend", type=str, default="pipeline", choices=("pipeline", "onnxruntime"), help="backend, optional modelscope pipeline or onnxruntime") parser.add_argument("--onnx_dir", type=str, default="/nfs/models/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch", help="onnx model dir") funasr/runtime/python/grpc/grpc_server.py
@@ -3,20 +3,32 @@ import json import time from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks import paraformer_pb2_grpc from paraformer_pb2 import Response from utils.frontend import load_bytes class ASRServicer(paraformer_pb2_grpc.ASRServicer): def __init__(self, user_allowed, model, sample_rate): def __init__(self, user_allowed, model, sample_rate, backend, onnx_dir): print("ASRServicer init") self.backend = backend self.init_flag = 0 self.client_buffers = {} self.client_transcription = {} self.auth_user = user_allowed.split("|") self.inference_16k_pipeline = pipeline(task=Tasks.auto_speech_recognition, model=model) if self.backend == "pipeline": try: from modelscope.pipelines import pipeline from modelscope.utils.constant import Tasks except ImportError: raise ImportError(f"Please install modelscope") self.inference_16k_pipeline = pipeline(task=Tasks.auto_speech_recognition, model=model) elif self.backend == "onnxruntime": try: from paraformer_onnx import Paraformer except ImportError: raise ImportError(f"Please install onnxruntime requirements, reference https://github.com/alibaba-damo-academy/FunASR/tree/main/funasr/runtime/python/onnxruntime/rapid_paraformer") self.inference_16k_pipeline = Paraformer(model_dir=onnx_dir) self.sample_rate = sample_rate def clear_states(self, user): @@ -90,12 +102,16 @@ result["text"] = "" print ("user: %s , delay(ms): %s, info: %s " % (req.user, delay_str, "waiting_for_more_voice")) yield Response(sentence=json.dumps(result), user=req.user, action="waiting", language=req.language) else: asr_result = self.inference_16k_pipeline(audio_in=tmp_data, audio_fs = self.sample_rate) if "text" in asr_result: asr_result = asr_result['text'] else: asr_result = "" else: if self.backend == "pipeline": asr_result = self.inference_16k_pipeline(audio_in=tmp_data, audio_fs = self.sample_rate) if "text" in asr_result: asr_result = asr_result['text'] else: asr_result = "" elif self.backend == "onnxruntime": array = load_bytes(tmp_data) asr_result = self.inference_16k_pipeline(array)[0] end_time = int(round(time.time() * 1000)) delay_str = str(end_time - begin_time) print ("user: %s , delay(ms): %s, text: %s " % (req.user, delay_str, asr_result)) funasr/runtime/python/grpc/paraformer_onnx.py
New file @@ -0,0 +1 @@ ../onnxruntime/paraformer/rapid_paraformer/paraformer_onnx.py funasr/runtime/python/grpc/utils
New file @@ -0,0 +1 @@ ../onnxruntime/paraformer/rapid_paraformer/utils funasr/runtime/python/onnxruntime/rapid_paraformer/utils/frontend.py
@@ -134,3 +134,18 @@ vars = np.array(vars_list).astype(np.float64) cmvn = np.array([means, vars]) return cmvn def load_bytes(input): middle_data = np.frombuffer(input, dtype=np.int16) middle_data = np.asarray(middle_data) if middle_data.dtype.kind not in 'iu': raise TypeError("'middle_data' must be an array of integers") dtype = np.dtype('float32') if dtype.kind != 'f': raise TypeError("'dtype' must be a floating point type") i = np.iinfo(middle_data.dtype) abs_max = 2 ** (i.bits - 1) offset = i.min + abs_max array = np.frombuffer((middle_data.astype(dtype) - offset) / abs_max, dtype=np.float32) return array