python/FunASR-XL.git

parent: a30953f7 | 补丁 | 提交 | ignore whitespace

Merge branch 'alibaba-damo-academy:main' into cpp-python-websocket-compatible

zhaomingwork

2023-05-12 3ad2374353cda728f62957682264736f21da92b7

Merge branch 'alibaba-damo-academy:main' into cpp-python-websocket-compatible

7个文件已修改

	funasr/export/export_model.py	9 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/runtime/python/grpc/Readme.md	26 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/runtime/python/onnxruntime/funasr_onnx/paraformer_bin.py	20 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/runtime/python/onnxruntime/funasr_onnx/punc_bin.py	30 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/runtime/python/onnxruntime/funasr_onnx/utils/utils.py	1 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/runtime/python/onnxruntime/funasr_onnx/vad_bin.py	37 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/runtime/python/onnxruntime/setup.py	17 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史

 funasr/export/export_model.py

@@ -27,15 +27,13 @@
    ):
        assert check_argument_types()
        self.set_all_random_seed(0)
        if cache_dir is None:
            cache_dir = Path.home() / ".cache" / "export"

        self.cache_dir = Path(cache_dir)
        self.cache_dir = cache_dir
        self.export_config = dict(
            feats_dim=560,
            onnx=False,
        )
        print("output dir: {}".format(self.cache_dir))
        
        self.onnx = onnx
        self.device = device
        self.quant = quant
@@ -52,7 +50,7 @@
        verbose: bool = False,
    ):

        export_dir = self.cache_dir / tag_name.replace(' ', '-')
        export_dir = self.cache_dir
        os.makedirs(export_dir, exist_ok=True)

        # export encoder1
@@ -174,6 +172,7 @@
        if model_dir.startswith('damo'):
            from modelscope.hub.snapshot_download import snapshot_download
            model_dir = snapshot_download(model_dir, cache_dir=self.cache_dir)
        self.cache_dir = model_dir

        if mode is None:
            import json

 funasr/runtime/python/grpc/Readme.md

@@ -5,7 +5,6 @@
## For the Server

### Prepare server environment
#### Backend is modelscope pipeline (default)
Install the modelscope and funasr

```shell
@@ -22,18 +21,6 @@
pip install -r requirements_server.txt
```

#### Backend is funasr_onnx (optional)

Install [`funasr_onnx`](https://github.com/alibaba-damo-academy/FunASR/tree/main/funasr/runtime/python/onnxruntime).

```
pip install funasr_onnx -i https://pypi.Python.org/simple
```

Export the model, more details ref to [export docs](https://github.com/alibaba-damo-academy/FunASR/tree/main/funasr/runtime/python/onnxruntime).
```shell
python -m funasr.export.export_model --model-name damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch --export-dir ./export --type onnx --quantize True
```

### Generate protobuf file
Run on server, the two generated pb files are both used for server and client
@@ -51,11 +38,6 @@
python grpc_main_server.py --port 10095 --backend pipeline
```

If you want run server with onnxruntime, please set `backend` and `onnx_dir`.
```
# Start server.
python grpc_main_server.py --port 10095 --backend onnxruntime --onnx_dir /models/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch
```

## For the client

@@ -87,9 +69,5 @@

<div align="left"><img src="proto/workflow.png" width="400"/>

## Reference
We borrow from or refer to some code as:

1)https://github.com/wenet-e2e/wenet/tree/main/runtime/core/grpc

2)https://github.com/Open-Speech-EkStep/inference_service/blob/main/realtime_inference_service.py
## Acknowledge
1. This project is maintained by [FunASR community](https://github.com/alibaba-damo-academy/FunASR).

 funasr/runtime/python/onnxruntime/funasr_onnx/paraformer_bin.py

@@ -32,14 +32,30 @@
                 plot_timestamp_to: str = "",
                 quantize: bool = False,
                 intra_op_num_threads: int = 4,
                 cache_dir: str = None
                 ):

        if not Path(model_dir).exists():
            raise FileNotFoundError(f'{model_dir} does not exist.')

            from modelscope.hub.snapshot_download import snapshot_download
            try:
                model_dir = snapshot_download(model_dir, cache_dir=cache_dir)
            except:
                raise "model_dir must be model_name in modelscope or local path downloaded from modelscope, but is {}".format(model_dir)
        
        model_file = os.path.join(model_dir, 'model.onnx')
        if quantize:
            model_file = os.path.join(model_dir, 'model_quant.onnx')
        if not os.path.exists(model_file):
            print(".onnx is not exist, begin to export onnx")
            from funasr.export.export_model import ModelExport
            export_model = ModelExport(
                cache_dir=cache_dir,
                onnx=True,
                device="cpu",
                quant=quantize,
            )
            export_model.export(model_dir)
            
        config_file = os.path.join(model_dir, 'config.yaml')
        cmvn_file = os.path.join(model_dir, 'am.mvn')
        config = read_yaml(config_file)

 funasr/runtime/python/onnxruntime/funasr_onnx/punc_bin.py

@@ -24,15 +24,32 @@
                 batch_size: int = 1,
                 device_id: Union[str, int] = "-1",
                 quantize: bool = False,
                 intra_op_num_threads: int = 4
                 intra_op_num_threads: int = 4,
                 cache_dir: str = None,
                 ):

    
        if not Path(model_dir).exists():
            raise FileNotFoundError(f'{model_dir} does not exist.')

            from modelscope.hub.snapshot_download import snapshot_download
            try:
                model_dir = snapshot_download(model_dir, cache_dir=cache_dir)
            except:
                raise "model_dir must be model_name in modelscope or local path downloaded from modelscope, but is {}".format(
                    model_dir)
    
        model_file = os.path.join(model_dir, 'model.onnx')
        if quantize:
            model_file = os.path.join(model_dir, 'model_quant.onnx')
        if not os.path.exists(model_file):
            print(".onnx is not exist, begin to export onnx")
            from funasr.export.export_model import ModelExport
            export_model = ModelExport(
                cache_dir=cache_dir,
                onnx=True,
                device="cpu",
                quant=quantize,
            )
            export_model.export(model_dir)
            
        config_file = os.path.join(model_dir, 'punc.yaml')
        config = read_yaml(config_file)

@@ -135,9 +152,10 @@
                 batch_size: int = 1,
                 device_id: Union[str, int] = "-1",
                 quantize: bool = False,
                 intra_op_num_threads: int = 4
                 intra_op_num_threads: int = 4,
                 cache_dir: str = None
                 ):
        super(CT_Transformer_VadRealtime, self).__init__(model_dir, batch_size, device_id, quantize, intra_op_num_threads)
        super(CT_Transformer_VadRealtime, self).__init__(model_dir, batch_size, device_id, quantize, intra_op_num_threads, cache_dir=cache_dir)

    def __call__(self, text: str, param_dict: map, split_size=20):
        cache_key = "cache"

 funasr/runtime/python/onnxruntime/funasr_onnx/utils/utils.py

@@ -271,4 +271,5 @@
    logger.addHandler(sh)
    logger_initialized[name] = True
    logger.propagate = False
    logging.basicConfig(level=logging.ERROR)
    return logger

 funasr/runtime/python/onnxruntime/funasr_onnx/vad_bin.py

@@ -31,14 +31,30 @@
                 quantize: bool = False,
                 intra_op_num_threads: int = 4,
                 max_end_sil: int = None,
                 cache_dir: str = None
                 ):
        
        if not Path(model_dir).exists():
            raise FileNotFoundError(f'{model_dir} does not exist.')
            from modelscope.hub.snapshot_download import snapshot_download
            try:
                model_dir = snapshot_download(model_dir, cache_dir=cache_dir)
            except:
                raise "model_dir must be model_name in modelscope or local path downloaded from modelscope, but is {}".format(
                    model_dir)
        
        model_file = os.path.join(model_dir, 'model.onnx')
        if quantize:
            model_file = os.path.join(model_dir, 'model_quant.onnx')
        if not os.path.exists(model_file):
            print(".onnx is not exist, begin to export onnx")
            from funasr.export.export_model import ModelExport
            export_model = ModelExport(
                cache_dir=cache_dir,
                onnx=True,
                device="cpu",
                quant=quantize,
            )
            export_model.export(model_dir)
        config_file = os.path.join(model_dir, 'vad.yaml')
        cmvn_file = os.path.join(model_dir, 'vad.mvn')
        config = read_yaml(config_file)
@@ -172,14 +188,29 @@
                 quantize: bool = False,
                 intra_op_num_threads: int = 4,
                 max_end_sil: int = None,
                 cache_dir: str = None
                 ):
		
        if not Path(model_dir).exists():
            raise FileNotFoundError(f'{model_dir} does not exist.')
            from modelscope.hub.snapshot_download import snapshot_download
            try:
                model_dir = snapshot_download(model_dir, cache_dir=cache_dir)
            except:
                raise "model_dir must be model_name in modelscope or local path downloaded from modelscope, but is {}".format(
                    model_dir)
        
        model_file = os.path.join(model_dir, 'model.onnx')
        if quantize:
            model_file = os.path.join(model_dir, 'model_quant.onnx')
        if not os.path.exists(model_file):
            print(".onnx is not exist, begin to export onnx")
            from funasr.export.export_model import ModelExport
            export_model = ModelExport(
                cache_dir=cache_dir,
                onnx=True,
                device="cpu",
                quant=quantize,
            )
            export_model.export(model_dir)
        config_file = os.path.join(model_dir, 'vad.yaml')
        cmvn_file = os.path.join(model_dir, 'vad.mvn')
        config = read_yaml(config_file)

 funasr/runtime/python/onnxruntime/setup.py

@@ -13,7 +13,7 @@


MODULE_NAME = 'funasr_onnx'
VERSION_NUM = '0.0.8'
VERSION_NUM = '0.0.10'

setuptools.setup(
    name=MODULE_NAME,
@@ -27,10 +27,17 @@
    long_description=get_readme(),
    long_description_content_type='text/markdown',
    include_package_data=True,
    install_requires=["librosa", "onnxruntime>=1.7.0",
                      "scipy", "numpy>=1.19.3",
                      "typeguard", "kaldi-native-fbank",
                      "PyYAML>=5.1.2"],
    install_requires=["librosa",
                      "onnxruntime>=1.7.0",
                      "scipy",
                      "numpy>=1.19.3",
                      "typeguard",
                      "kaldi-native-fbank",
                      "PyYAML>=5.1.2",
                      "funasr",
                      "modelscope",
                      "onnx"
                      ],
    packages=[MODULE_NAME, f'{MODULE_NAME}.utils'],
    keywords=[
        'funasr,asr'

			@@ -27,15 +27,13 @@
			):
			assert check_argument_types()
			self.set_all_random_seed(0)
			if cache_dir is None:
			cache_dir = Path.home() / ".cache" / "export"

			self.cache_dir = Path(cache_dir)
			self.cache_dir = cache_dir
			self.export_config = dict(
			feats_dim=560,
			onnx=False,
			)
			print("output dir: {}".format(self.cache_dir))

			self.onnx = onnx
			self.device = device
			self.quant = quant
			@@ -52,7 +50,7 @@
			verbose: bool = False,
			):

			export_dir = self.cache_dir / tag_name.replace(' ', '-')
			export_dir = self.cache_dir
			os.makedirs(export_dir, exist_ok=True)

			# export encoder1
			@@ -174,6 +172,7 @@
			if model_dir.startswith('damo'):
			from modelscope.hub.snapshot_download import snapshot_download
			model_dir = snapshot_download(model_dir, cache_dir=self.cache_dir)
			self.cache_dir = model_dir

			if mode is None:
			import json

			@@ -5,7 +5,6 @@
			## For the Server

			### Prepare server environment
			#### Backend is modelscope pipeline (default)
			Install the modelscope and funasr

			```shell
			@@ -22,18 +21,6 @@
			pip install -r requirements_server.txt
			```

			#### Backend is funasr_onnx (optional)

			Install [`funasr_onnx`](https://github.com/alibaba-damo-academy/FunASR/tree/main/funasr/runtime/python/onnxruntime).

			```
			pip install funasr_onnx -i https://pypi.Python.org/simple
			```

			Export the model, more details ref to [export docs](https://github.com/alibaba-damo-academy/FunASR/tree/main/funasr/runtime/python/onnxruntime).
			```shell
			python -m funasr.export.export_model --model-name damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch --export-dir ./export --type onnx --quantize True
			```

			### Generate protobuf file
			Run on server, the two generated pb files are both used for server and client
			@@ -51,11 +38,6 @@
			python grpc_main_server.py --port 10095 --backend pipeline
			```

			If you want run server with onnxruntime, please set `backend` and `onnx_dir`.
			```
			# Start server.
			python grpc_main_server.py --port 10095 --backend onnxruntime --onnx_dir /models/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch
			```

			## For the client

			@@ -87,9 +69,5 @@

			<div align="left"><img src="proto/workflow.png" width="400"/>

			## Reference
			We borrow from or refer to some code as:

			1)https://github.com/wenet-e2e/wenet/tree/main/runtime/core/grpc

			2)https://github.com/Open-Speech-EkStep/inference_service/blob/main/realtime_inference_service.py
			## Acknowledge
			1. This project is maintained by [FunASR community](https://github.com/alibaba-damo-academy/FunASR).

			@@ -32,14 +32,30 @@
			plot_timestamp_to: str = "",
			quantize: bool = False,
			intra_op_num_threads: int = 4,
			cache_dir: str = None
			):

			if not Path(model_dir).exists():
			raise FileNotFoundError(f'{model_dir} does not exist.')

			from modelscope.hub.snapshot_download import snapshot_download
			try:
			model_dir = snapshot_download(model_dir, cache_dir=cache_dir)
			except:
			raise "model_dir must be model_name in modelscope or local path downloaded from modelscope, but is {}".format(model_dir)

			model_file = os.path.join(model_dir, 'model.onnx')
			if quantize:
			model_file = os.path.join(model_dir, 'model_quant.onnx')
			if not os.path.exists(model_file):
			print(".onnx is not exist, begin to export onnx")
			from funasr.export.export_model import ModelExport
			export_model = ModelExport(
			cache_dir=cache_dir,
			onnx=True,
			device="cpu",
			quant=quantize,
			)
			export_model.export(model_dir)

			config_file = os.path.join(model_dir, 'config.yaml')
			cmvn_file = os.path.join(model_dir, 'am.mvn')
			config = read_yaml(config_file)

			@@ -24,15 +24,32 @@
			batch_size: int = 1,
			device_id: Union[str, int] = "-1",
			quantize: bool = False,
			intra_op_num_threads: int = 4
			intra_op_num_threads: int = 4,
			cache_dir: str = None,
			):


			if not Path(model_dir).exists():
			raise FileNotFoundError(f'{model_dir} does not exist.')

			from modelscope.hub.snapshot_download import snapshot_download
			try:
			model_dir = snapshot_download(model_dir, cache_dir=cache_dir)
			except:
			raise "model_dir must be model_name in modelscope or local path downloaded from modelscope, but is {}".format(
			model_dir)

			model_file = os.path.join(model_dir, 'model.onnx')
			if quantize:
			model_file = os.path.join(model_dir, 'model_quant.onnx')
			if not os.path.exists(model_file):
			print(".onnx is not exist, begin to export onnx")
			from funasr.export.export_model import ModelExport
			export_model = ModelExport(
			cache_dir=cache_dir,
			onnx=True,
			device="cpu",
			quant=quantize,
			)
			export_model.export(model_dir)

			config_file = os.path.join(model_dir, 'punc.yaml')
			config = read_yaml(config_file)

			@@ -135,9 +152,10 @@
			batch_size: int = 1,
			device_id: Union[str, int] = "-1",
			quantize: bool = False,
			intra_op_num_threads: int = 4
			intra_op_num_threads: int = 4,
			cache_dir: str = None
			):
			super(CT_Transformer_VadRealtime, self).__init__(model_dir, batch_size, device_id, quantize, intra_op_num_threads)
			super(CT_Transformer_VadRealtime, self).__init__(model_dir, batch_size, device_id, quantize, intra_op_num_threads, cache_dir=cache_dir)

			def __call__(self, text: str, param_dict: map, split_size=20):
			cache_key = "cache"

			@@ -271,4 +271,5 @@
			logger.addHandler(sh)
			logger_initialized[name] = True
			logger.propagate = False
			logging.basicConfig(level=logging.ERROR)
			return logger

			@@ -31,14 +31,30 @@
			quantize: bool = False,
			intra_op_num_threads: int = 4,
			max_end_sil: int = None,
			cache_dir: str = None
			):

			if not Path(model_dir).exists():
			raise FileNotFoundError(f'{model_dir} does not exist.')
			from modelscope.hub.snapshot_download import snapshot_download
			try:
			model_dir = snapshot_download(model_dir, cache_dir=cache_dir)
			except:
			raise "model_dir must be model_name in modelscope or local path downloaded from modelscope, but is {}".format(
			model_dir)

			model_file = os.path.join(model_dir, 'model.onnx')
			if quantize:
			model_file = os.path.join(model_dir, 'model_quant.onnx')
			if not os.path.exists(model_file):
			print(".onnx is not exist, begin to export onnx")
			from funasr.export.export_model import ModelExport
			export_model = ModelExport(
			cache_dir=cache_dir,
			onnx=True,
			device="cpu",
			quant=quantize,
			)
			export_model.export(model_dir)
			config_file = os.path.join(model_dir, 'vad.yaml')
			cmvn_file = os.path.join(model_dir, 'vad.mvn')
			config = read_yaml(config_file)
			@@ -172,14 +188,29 @@
			quantize: bool = False,
			intra_op_num_threads: int = 4,
			max_end_sil: int = None,
			cache_dir: str = None
			):

			if not Path(model_dir).exists():
			raise FileNotFoundError(f'{model_dir} does not exist.')
			from modelscope.hub.snapshot_download import snapshot_download
			try:
			model_dir = snapshot_download(model_dir, cache_dir=cache_dir)
			except:
			raise "model_dir must be model_name in modelscope or local path downloaded from modelscope, but is {}".format(
			model_dir)

			model_file = os.path.join(model_dir, 'model.onnx')
			if quantize:
			model_file = os.path.join(model_dir, 'model_quant.onnx')
			if not os.path.exists(model_file):
			print(".onnx is not exist, begin to export onnx")
			from funasr.export.export_model import ModelExport
			export_model = ModelExport(
			cache_dir=cache_dir,
			onnx=True,
			device="cpu",
			quant=quantize,
			)
			export_model.export(model_dir)
			config_file = os.path.join(model_dir, 'vad.yaml')
			cmvn_file = os.path.join(model_dir, 'vad.mvn')
			config = read_yaml(config_file)

			@@ -13,7 +13,7 @@


			MODULE_NAME = 'funasr_onnx'
			VERSION_NUM = '0.0.8'
			VERSION_NUM = '0.0.10'

			setuptools.setup(
			name=MODULE_NAME,
			@@ -27,10 +27,17 @@
			long_description=get_readme(),
			long_description_content_type='text/markdown',
			include_package_data=True,
			install_requires=["librosa", "onnxruntime>=1.7.0",
			"scipy", "numpy>=1.19.3",
			"typeguard", "kaldi-native-fbank",
			"PyYAML>=5.1.2"],
			install_requires=["librosa",
			"onnxruntime>=1.7.0",
			"scipy",
			"numpy>=1.19.3",
			"typeguard",
			"kaldi-native-fbank",
			"PyYAML>=5.1.2",
			"funasr",
			"modelscope",
			"onnx"
			],
			packages=[MODULE_NAME, f'{MODULE_NAME}.utils'],
			keywords=[
			'funasr,asr'