From ec1f0f087fc3581defb6a3505d6e11e1bd81ca1d Mon Sep 17 00:00:00 2001
From: zhaomingwork <zhaomingwork@qq.com>
Date: 星期五, 12 五月 2023 15:00:06 +0800
Subject: [PATCH] Merge branch 'cpp-python-websocket-compatible' of github.com:zhaomingwork/FunASR into cpp-python-websocket-compatible

---
 funasr/runtime/python/onnxruntime/setup.py                      |   17 ++++-
 funasr/runtime/python/onnxruntime/funasr_onnx/vad_bin.py        |   37 +++++++++++-
 funasr/runtime/python/onnxruntime/funasr_onnx/utils/utils.py    |    1 
 funasr/runtime/python/onnxruntime/funasr_onnx/paraformer_bin.py |   20 ++++++
 funasr/runtime/python/grpc/Readme.md                            |   26 --------
 funasr/runtime/python/onnxruntime/funasr_onnx/punc_bin.py       |   30 ++++++++--
 funasr/export/export_model.py                                   |    9 +-
 7 files changed, 95 insertions(+), 45 deletions(-)

diff --git a/funasr/export/export_model.py b/funasr/export/export_model.py
index b69eeee..c02c299 100644
--- a/funasr/export/export_model.py
+++ b/funasr/export/export_model.py
@@ -27,15 +27,13 @@
     ):
         assert check_argument_types()
         self.set_all_random_seed(0)
-        if cache_dir is None:
-            cache_dir = Path.home() / ".cache" / "export"
 
-        self.cache_dir = Path(cache_dir)
+        self.cache_dir = cache_dir
         self.export_config = dict(
             feats_dim=560,
             onnx=False,
         )
-        print("output dir: {}".format(self.cache_dir))
+        
         self.onnx = onnx
         self.device = device
         self.quant = quant
@@ -52,7 +50,7 @@
         verbose: bool = False,
     ):
 
-        export_dir = self.cache_dir / tag_name.replace(' ', '-')
+        export_dir = self.cache_dir
         os.makedirs(export_dir, exist_ok=True)
 
         # export encoder1
@@ -174,6 +172,7 @@
         if model_dir.startswith('damo'):
             from modelscope.hub.snapshot_download import snapshot_download
             model_dir = snapshot_download(model_dir, cache_dir=self.cache_dir)
+        self.cache_dir = model_dir
 
         if mode is None:
             import json
diff --git a/funasr/runtime/python/grpc/Readme.md b/funasr/runtime/python/grpc/Readme.md
index 742268b..832b87e 100644
--- a/funasr/runtime/python/grpc/Readme.md
+++ b/funasr/runtime/python/grpc/Readme.md
@@ -5,7 +5,6 @@
 ## For the Server
 
 ### Prepare server environment
-#### Backend is modelscope pipeline (default)
 Install the modelscope and funasr
 
 ```shell
@@ -22,18 +21,6 @@
 pip install -r requirements_server.txt
 ```
 
-#### Backend is funasr_onnx (optional)
-
-Install [`funasr_onnx`](https://github.com/alibaba-damo-academy/FunASR/tree/main/funasr/runtime/python/onnxruntime).
-
-```
-pip install funasr_onnx -i https://pypi.Python.org/simple
-```
-
-Export the model, more details ref to [export docs](https://github.com/alibaba-damo-academy/FunASR/tree/main/funasr/runtime/python/onnxruntime).
-```shell
-python -m funasr.export.export_model --model-name damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch --export-dir ./export --type onnx --quantize True
-```
 
 ### Generate protobuf file
 Run on server, the two generated pb files are both used for server and client
@@ -51,11 +38,6 @@
 python grpc_main_server.py --port 10095 --backend pipeline
 ```
 
-If you want run server with onnxruntime, please set `backend` and `onnx_dir`.
-```
-# Start server.
-python grpc_main_server.py --port 10095 --backend onnxruntime --onnx_dir /models/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch
-```
 
 ## For the client
 
@@ -87,9 +69,5 @@
 
 <div align="left"><img src="proto/workflow.png" width="400"/>
 
-## Reference
-We borrow from or refer to some code as:
-
-1)https://github.com/wenet-e2e/wenet/tree/main/runtime/core/grpc
-
-2)https://github.com/Open-Speech-EkStep/inference_service/blob/main/realtime_inference_service.py
\ No newline at end of file
+## Acknowledge
+1. This project is maintained by [FunASR community](https://github.com/alibaba-damo-academy/FunASR).
\ No newline at end of file
diff --git a/funasr/runtime/python/onnxruntime/funasr_onnx/paraformer_bin.py b/funasr/runtime/python/onnxruntime/funasr_onnx/paraformer_bin.py
index 8fcb2b5..f3e0f3d 100644
--- a/funasr/runtime/python/onnxruntime/funasr_onnx/paraformer_bin.py
+++ b/funasr/runtime/python/onnxruntime/funasr_onnx/paraformer_bin.py
@@ -32,14 +32,30 @@
                  plot_timestamp_to: str = "",
                  quantize: bool = False,
                  intra_op_num_threads: int = 4,
+                 cache_dir: str = None
                  ):
 
         if not Path(model_dir).exists():
-            raise FileNotFoundError(f'{model_dir} does not exist.')
-
+            from modelscope.hub.snapshot_download import snapshot_download
+            try:
+                model_dir = snapshot_download(model_dir, cache_dir=cache_dir)
+            except:
+                raise "model_dir must be model_name in modelscope or local path downloaded from modelscope, but is {}".format(model_dir)
+        
         model_file = os.path.join(model_dir, 'model.onnx')
         if quantize:
             model_file = os.path.join(model_dir, 'model_quant.onnx')
+        if not os.path.exists(model_file):
+            print(".onnx is not exist, begin to export onnx")
+            from funasr.export.export_model import ModelExport
+            export_model = ModelExport(
+                cache_dir=cache_dir,
+                onnx=True,
+                device="cpu",
+                quant=quantize,
+            )
+            export_model.export(model_dir)
+            
         config_file = os.path.join(model_dir, 'config.yaml')
         cmvn_file = os.path.join(model_dir, 'am.mvn')
         config = read_yaml(config_file)
diff --git a/funasr/runtime/python/onnxruntime/funasr_onnx/punc_bin.py b/funasr/runtime/python/onnxruntime/funasr_onnx/punc_bin.py
index 6fd01e4..8890714 100644
--- a/funasr/runtime/python/onnxruntime/funasr_onnx/punc_bin.py
+++ b/funasr/runtime/python/onnxruntime/funasr_onnx/punc_bin.py
@@ -24,15 +24,32 @@
                  batch_size: int = 1,
                  device_id: Union[str, int] = "-1",
                  quantize: bool = False,
-                 intra_op_num_threads: int = 4
+                 intra_op_num_threads: int = 4,
+                 cache_dir: str = None,
                  ):
-
+    
         if not Path(model_dir).exists():
-            raise FileNotFoundError(f'{model_dir} does not exist.')
-
+            from modelscope.hub.snapshot_download import snapshot_download
+            try:
+                model_dir = snapshot_download(model_dir, cache_dir=cache_dir)
+            except:
+                raise "model_dir must be model_name in modelscope or local path downloaded from modelscope, but is {}".format(
+                    model_dir)
+    
         model_file = os.path.join(model_dir, 'model.onnx')
         if quantize:
             model_file = os.path.join(model_dir, 'model_quant.onnx')
+        if not os.path.exists(model_file):
+            print(".onnx is not exist, begin to export onnx")
+            from funasr.export.export_model import ModelExport
+            export_model = ModelExport(
+                cache_dir=cache_dir,
+                onnx=True,
+                device="cpu",
+                quant=quantize,
+            )
+            export_model.export(model_dir)
+            
         config_file = os.path.join(model_dir, 'punc.yaml')
         config = read_yaml(config_file)
 
@@ -135,9 +152,10 @@
                  batch_size: int = 1,
                  device_id: Union[str, int] = "-1",
                  quantize: bool = False,
-                 intra_op_num_threads: int = 4
+                 intra_op_num_threads: int = 4,
+                 cache_dir: str = None
                  ):
-        super(CT_Transformer_VadRealtime, self).__init__(model_dir, batch_size, device_id, quantize, intra_op_num_threads)
+        super(CT_Transformer_VadRealtime, self).__init__(model_dir, batch_size, device_id, quantize, intra_op_num_threads, cache_dir=cache_dir)
 
     def __call__(self, text: str, param_dict: map, split_size=20):
         cache_key = "cache"
diff --git a/funasr/runtime/python/onnxruntime/funasr_onnx/utils/utils.py b/funasr/runtime/python/onnxruntime/funasr_onnx/utils/utils.py
index 78c3f0d..dcee425 100644
--- a/funasr/runtime/python/onnxruntime/funasr_onnx/utils/utils.py
+++ b/funasr/runtime/python/onnxruntime/funasr_onnx/utils/utils.py
@@ -271,4 +271,5 @@
     logger.addHandler(sh)
     logger_initialized[name] = True
     logger.propagate = False
+    logging.basicConfig(level=logging.ERROR)
     return logger
diff --git a/funasr/runtime/python/onnxruntime/funasr_onnx/vad_bin.py b/funasr/runtime/python/onnxruntime/funasr_onnx/vad_bin.py
index 022f1e7..244dd75 100644
--- a/funasr/runtime/python/onnxruntime/funasr_onnx/vad_bin.py
+++ b/funasr/runtime/python/onnxruntime/funasr_onnx/vad_bin.py
@@ -31,14 +31,30 @@
 	             quantize: bool = False,
 	             intra_op_num_threads: int = 4,
 	             max_end_sil: int = None,
+	             cache_dir: str = None
 	             ):
 		
 		if not Path(model_dir).exists():
-			raise FileNotFoundError(f'{model_dir} does not exist.')
+			from modelscope.hub.snapshot_download import snapshot_download
+			try:
+				model_dir = snapshot_download(model_dir, cache_dir=cache_dir)
+			except:
+				raise "model_dir must be model_name in modelscope or local path downloaded from modelscope, but is {}".format(
+					model_dir)
 		
 		model_file = os.path.join(model_dir, 'model.onnx')
 		if quantize:
 			model_file = os.path.join(model_dir, 'model_quant.onnx')
+		if not os.path.exists(model_file):
+			print(".onnx is not exist, begin to export onnx")
+			from funasr.export.export_model import ModelExport
+			export_model = ModelExport(
+				cache_dir=cache_dir,
+				onnx=True,
+				device="cpu",
+				quant=quantize,
+			)
+			export_model.export(model_dir)
 		config_file = os.path.join(model_dir, 'vad.yaml')
 		cmvn_file = os.path.join(model_dir, 'vad.mvn')
 		config = read_yaml(config_file)
@@ -172,14 +188,29 @@
 	             quantize: bool = False,
 	             intra_op_num_threads: int = 4,
 	             max_end_sil: int = None,
+	             cache_dir: str = None
 	             ):
-		
 		if not Path(model_dir).exists():
-			raise FileNotFoundError(f'{model_dir} does not exist.')
+			from modelscope.hub.snapshot_download import snapshot_download
+			try:
+				model_dir = snapshot_download(model_dir, cache_dir=cache_dir)
+			except:
+				raise "model_dir must be model_name in modelscope or local path downloaded from modelscope, but is {}".format(
+					model_dir)
 		
 		model_file = os.path.join(model_dir, 'model.onnx')
 		if quantize:
 			model_file = os.path.join(model_dir, 'model_quant.onnx')
+		if not os.path.exists(model_file):
+			print(".onnx is not exist, begin to export onnx")
+			from funasr.export.export_model import ModelExport
+			export_model = ModelExport(
+				cache_dir=cache_dir,
+				onnx=True,
+				device="cpu",
+				quant=quantize,
+			)
+			export_model.export(model_dir)
 		config_file = os.path.join(model_dir, 'vad.yaml')
 		cmvn_file = os.path.join(model_dir, 'vad.mvn')
 		config = read_yaml(config_file)
diff --git a/funasr/runtime/python/onnxruntime/setup.py b/funasr/runtime/python/onnxruntime/setup.py
index 0b249dd..f9c9051 100644
--- a/funasr/runtime/python/onnxruntime/setup.py
+++ b/funasr/runtime/python/onnxruntime/setup.py
@@ -13,7 +13,7 @@
 
 
 MODULE_NAME = 'funasr_onnx'
-VERSION_NUM = '0.0.8'
+VERSION_NUM = '0.0.10'
 
 setuptools.setup(
     name=MODULE_NAME,
@@ -27,10 +27,17 @@
     long_description=get_readme(),
     long_description_content_type='text/markdown',
     include_package_data=True,
-    install_requires=["librosa", "onnxruntime>=1.7.0",
-                      "scipy", "numpy>=1.19.3",
-                      "typeguard", "kaldi-native-fbank",
-                      "PyYAML>=5.1.2"],
+    install_requires=["librosa",
+                      "onnxruntime>=1.7.0",
+                      "scipy",
+                      "numpy>=1.19.3",
+                      "typeguard",
+                      "kaldi-native-fbank",
+                      "PyYAML>=5.1.2",
+                      "funasr",
+                      "modelscope",
+                      "onnx"
+                      ],
     packages=[MODULE_NAME, f'{MODULE_NAME}.utils'],
     keywords=[
         'funasr,asr'

--
Gitblit v1.9.1