Merge branch 'alibaba-damo-academy:main' into cpp-python-websocket-compatible
| | |
| | | ): |
| | | assert check_argument_types() |
| | | self.set_all_random_seed(0) |
| | | if cache_dir is None: |
| | | cache_dir = Path.home() / ".cache" / "export" |
| | | |
| | | self.cache_dir = Path(cache_dir) |
| | | self.cache_dir = cache_dir |
| | | self.export_config = dict( |
| | | feats_dim=560, |
| | | onnx=False, |
| | | ) |
| | | print("output dir: {}".format(self.cache_dir)) |
| | | |
| | | self.onnx = onnx |
| | | self.device = device |
| | | self.quant = quant |
| | |
| | | verbose: bool = False, |
| | | ): |
| | | |
| | | export_dir = self.cache_dir / tag_name.replace(' ', '-') |
| | | export_dir = self.cache_dir |
| | | os.makedirs(export_dir, exist_ok=True) |
| | | |
| | | # export encoder1 |
| | |
| | | if model_dir.startswith('damo'): |
| | | from modelscope.hub.snapshot_download import snapshot_download |
| | | model_dir = snapshot_download(model_dir, cache_dir=self.cache_dir) |
| | | self.cache_dir = model_dir |
| | | |
| | | if mode is None: |
| | | import json |
| | |
| | | ## For the Server |
| | | |
| | | ### Prepare server environment |
| | | #### Backend is modelscope pipeline (default) |
| | | Install the modelscope and funasr |
| | | |
| | | ```shell |
| | |
| | | pip install -r requirements_server.txt |
| | | ``` |
| | | |
| | | #### Backend is funasr_onnx (optional) |
| | | |
| | | Install [`funasr_onnx`](https://github.com/alibaba-damo-academy/FunASR/tree/main/funasr/runtime/python/onnxruntime). |
| | | |
| | | ``` |
| | | pip install funasr_onnx -i https://pypi.Python.org/simple |
| | | ``` |
| | | |
| | | Export the model, more details ref to [export docs](https://github.com/alibaba-damo-academy/FunASR/tree/main/funasr/runtime/python/onnxruntime). |
| | | ```shell |
| | | python -m funasr.export.export_model --model-name damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch --export-dir ./export --type onnx --quantize True |
| | | ``` |
| | | |
| | | ### Generate protobuf file |
| | | Run on server, the two generated pb files are both used for server and client |
| | |
| | | python grpc_main_server.py --port 10095 --backend pipeline |
| | | ``` |
| | | |
| | | If you want run server with onnxruntime, please set `backend` and `onnx_dir`. |
| | | ``` |
| | | # Start server. |
| | | python grpc_main_server.py --port 10095 --backend onnxruntime --onnx_dir /models/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch |
| | | ``` |
| | | |
| | | ## For the client |
| | | |
| | |
| | | |
| | | <div align="left"><img src="proto/workflow.png" width="400"/> |
| | | |
| | | ## Reference |
| | | We borrow from or refer to some code as: |
| | | |
| | | 1)https://github.com/wenet-e2e/wenet/tree/main/runtime/core/grpc |
| | | |
| | | 2)https://github.com/Open-Speech-EkStep/inference_service/blob/main/realtime_inference_service.py |
| | | ## Acknowledge |
| | | 1. This project is maintained by [FunASR community](https://github.com/alibaba-damo-academy/FunASR). |
| | |
| | | plot_timestamp_to: str = "", |
| | | quantize: bool = False, |
| | | intra_op_num_threads: int = 4, |
| | | cache_dir: str = None |
| | | ): |
| | | |
| | | if not Path(model_dir).exists(): |
| | | raise FileNotFoundError(f'{model_dir} does not exist.') |
| | | |
| | | from modelscope.hub.snapshot_download import snapshot_download |
| | | try: |
| | | model_dir = snapshot_download(model_dir, cache_dir=cache_dir) |
| | | except: |
| | | raise "model_dir must be model_name in modelscope or local path downloaded from modelscope, but is {}".format(model_dir) |
| | | |
| | | model_file = os.path.join(model_dir, 'model.onnx') |
| | | if quantize: |
| | | model_file = os.path.join(model_dir, 'model_quant.onnx') |
| | | if not os.path.exists(model_file): |
| | | print(".onnx is not exist, begin to export onnx") |
| | | from funasr.export.export_model import ModelExport |
| | | export_model = ModelExport( |
| | | cache_dir=cache_dir, |
| | | onnx=True, |
| | | device="cpu", |
| | | quant=quantize, |
| | | ) |
| | | export_model.export(model_dir) |
| | | |
| | | config_file = os.path.join(model_dir, 'config.yaml') |
| | | cmvn_file = os.path.join(model_dir, 'am.mvn') |
| | | config = read_yaml(config_file) |
| | |
| | | batch_size: int = 1, |
| | | device_id: Union[str, int] = "-1", |
| | | quantize: bool = False, |
| | | intra_op_num_threads: int = 4 |
| | | intra_op_num_threads: int = 4, |
| | | cache_dir: str = None, |
| | | ): |
| | | |
| | | |
| | | if not Path(model_dir).exists(): |
| | | raise FileNotFoundError(f'{model_dir} does not exist.') |
| | | |
| | | from modelscope.hub.snapshot_download import snapshot_download |
| | | try: |
| | | model_dir = snapshot_download(model_dir, cache_dir=cache_dir) |
| | | except: |
| | | raise "model_dir must be model_name in modelscope or local path downloaded from modelscope, but is {}".format( |
| | | model_dir) |
| | | |
| | | model_file = os.path.join(model_dir, 'model.onnx') |
| | | if quantize: |
| | | model_file = os.path.join(model_dir, 'model_quant.onnx') |
| | | if not os.path.exists(model_file): |
| | | print(".onnx is not exist, begin to export onnx") |
| | | from funasr.export.export_model import ModelExport |
| | | export_model = ModelExport( |
| | | cache_dir=cache_dir, |
| | | onnx=True, |
| | | device="cpu", |
| | | quant=quantize, |
| | | ) |
| | | export_model.export(model_dir) |
| | | |
| | | config_file = os.path.join(model_dir, 'punc.yaml') |
| | | config = read_yaml(config_file) |
| | | |
| | |
| | | batch_size: int = 1, |
| | | device_id: Union[str, int] = "-1", |
| | | quantize: bool = False, |
| | | intra_op_num_threads: int = 4 |
| | | intra_op_num_threads: int = 4, |
| | | cache_dir: str = None |
| | | ): |
| | | super(CT_Transformer_VadRealtime, self).__init__(model_dir, batch_size, device_id, quantize, intra_op_num_threads) |
| | | super(CT_Transformer_VadRealtime, self).__init__(model_dir, batch_size, device_id, quantize, intra_op_num_threads, cache_dir=cache_dir) |
| | | |
| | | def __call__(self, text: str, param_dict: map, split_size=20): |
| | | cache_key = "cache" |
| | |
| | | logger.addHandler(sh) |
| | | logger_initialized[name] = True |
| | | logger.propagate = False |
| | | logging.basicConfig(level=logging.ERROR) |
| | | return logger |
| | |
| | | quantize: bool = False, |
| | | intra_op_num_threads: int = 4, |
| | | max_end_sil: int = None, |
| | | cache_dir: str = None |
| | | ): |
| | | |
| | | if not Path(model_dir).exists(): |
| | | raise FileNotFoundError(f'{model_dir} does not exist.') |
| | | from modelscope.hub.snapshot_download import snapshot_download |
| | | try: |
| | | model_dir = snapshot_download(model_dir, cache_dir=cache_dir) |
| | | except: |
| | | raise "model_dir must be model_name in modelscope or local path downloaded from modelscope, but is {}".format( |
| | | model_dir) |
| | | |
| | | model_file = os.path.join(model_dir, 'model.onnx') |
| | | if quantize: |
| | | model_file = os.path.join(model_dir, 'model_quant.onnx') |
| | | if not os.path.exists(model_file): |
| | | print(".onnx is not exist, begin to export onnx") |
| | | from funasr.export.export_model import ModelExport |
| | | export_model = ModelExport( |
| | | cache_dir=cache_dir, |
| | | onnx=True, |
| | | device="cpu", |
| | | quant=quantize, |
| | | ) |
| | | export_model.export(model_dir) |
| | | config_file = os.path.join(model_dir, 'vad.yaml') |
| | | cmvn_file = os.path.join(model_dir, 'vad.mvn') |
| | | config = read_yaml(config_file) |
| | |
| | | quantize: bool = False, |
| | | intra_op_num_threads: int = 4, |
| | | max_end_sil: int = None, |
| | | cache_dir: str = None |
| | | ): |
| | | |
| | | if not Path(model_dir).exists(): |
| | | raise FileNotFoundError(f'{model_dir} does not exist.') |
| | | from modelscope.hub.snapshot_download import snapshot_download |
| | | try: |
| | | model_dir = snapshot_download(model_dir, cache_dir=cache_dir) |
| | | except: |
| | | raise "model_dir must be model_name in modelscope or local path downloaded from modelscope, but is {}".format( |
| | | model_dir) |
| | | |
| | | model_file = os.path.join(model_dir, 'model.onnx') |
| | | if quantize: |
| | | model_file = os.path.join(model_dir, 'model_quant.onnx') |
| | | if not os.path.exists(model_file): |
| | | print(".onnx is not exist, begin to export onnx") |
| | | from funasr.export.export_model import ModelExport |
| | | export_model = ModelExport( |
| | | cache_dir=cache_dir, |
| | | onnx=True, |
| | | device="cpu", |
| | | quant=quantize, |
| | | ) |
| | | export_model.export(model_dir) |
| | | config_file = os.path.join(model_dir, 'vad.yaml') |
| | | cmvn_file = os.path.join(model_dir, 'vad.mvn') |
| | | config = read_yaml(config_file) |
| | |
| | | |
| | | |
| | | MODULE_NAME = 'funasr_onnx' |
| | | VERSION_NUM = '0.0.8' |
| | | VERSION_NUM = '0.0.10' |
| | | |
| | | setuptools.setup( |
| | | name=MODULE_NAME, |
| | |
| | | long_description=get_readme(), |
| | | long_description_content_type='text/markdown', |
| | | include_package_data=True, |
| | | install_requires=["librosa", "onnxruntime>=1.7.0", |
| | | "scipy", "numpy>=1.19.3", |
| | | "typeguard", "kaldi-native-fbank", |
| | | "PyYAML>=5.1.2"], |
| | | install_requires=["librosa", |
| | | "onnxruntime>=1.7.0", |
| | | "scipy", |
| | | "numpy>=1.19.3", |
| | | "typeguard", |
| | | "kaldi-native-fbank", |
| | | "PyYAML>=5.1.2", |
| | | "funasr", |
| | | "modelscope", |
| | | "onnx" |
| | | ], |
| | | packages=[MODULE_NAME, f'{MODULE_NAME}.utils'], |
| | | keywords=[ |
| | | 'funasr,asr' |