python/FunASR-XL.git

parent: 15c4709b | 补丁 | 提交 | show whitespace

zhifu gao

2024-03-11 0d9384c8c0161259192cc3d676ca0d60e0d18e5c

Dev gzf (#1474)

* qwenaudio qwenaudiochat

* qwenaudio qwenaudiochat

* whisper

* whisper

* llm

* llm

* llm

* llm

* llm

* llm

* llm

* llm

* export onnx

* export onnx

* export onnx

* dingding

* dingding

* llm

* doc

* onnx

* onnx

* onnx

* onnx

* onnx

* onnx

* v1.0.15

* qwenaudio

6个文件已修改

	README.md	15 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	README_zh.md	16 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/download/download_from_hub.py	1 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/version.txt	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	runtime/python/onnxruntime/funasr_onnx/punc_bin.py	12 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	runtime/python/onnxruntime/setup.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史

 README.md

@@ -227,6 +227,21 @@
res = model.export(quantize=False)
```

### Text ONNX
```python
# pip3 install -U funasr-onnx
from funasr_onnx import Paraformer
model_dir = "damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch"
model = Paraformer(model_dir, batch_size=1, quantize=True)

wav_path = ['~/.cache/modelscope/hub/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/example/asr_example.wav']

result = model(wav_path)
print(result)
```

More examples ref to [demo](runtime/python/onnxruntime)

## Deployment Service
FunASR supports deploying pre-trained or further fine-tuned models for service. Currently, it supports the following types of service deployment:
- File transcription service, Mandarin, CPU version, done

 README_zh.md

@@ -216,7 +216,7 @@
funasr-export ++model=paraformer ++quantize=false
```

### 从python指令导出
### 从Python导出
```python
from funasr import AutoModel

@@ -225,6 +225,20 @@
res = model.export(quantize=False)
```

### 测试ONNX
```python
# pip3 install -U funasr-onnx
from funasr_onnx import Paraformer
model_dir = "damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch"
model = Paraformer(model_dir, batch_size=1, quantize=True)

wav_path = ['~/.cache/modelscope/hub/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/example/asr_example.wav']

result = model(wav_path)
print(result)
```

更多例子请参考 [样例](runtime/python/onnxruntime)

<a name="服务部署"></a>
## 服务部署

 funasr/download/download_from_hub.py

@@ -48,6 +48,7 @@
            if "file_path_metas" in conf_json:
                add_file_root_path(model_or_path, conf_json["file_path_metas"], cfg)
            cfg.update(kwargs)
            if "config" in cfg:
            config = OmegaConf.load(cfg["config"])
            kwargs = OmegaConf.merge(config, cfg)
        kwargs["model"] = config["model"]

 funasr/version.txt

@@ -1 +1 @@
1.0.14
1.0.15

 runtime/python/onnxruntime/funasr_onnx/punc_bin.py

@@ -56,9 +56,9 @@
                      "\npip3 install -U funasr -i https://mirror.sjtu.edu.cn/pypi/web/simple"

            model = AutoModel(model=model_dir)
            model_dir = model.export(type="onnx", quantize=quantize)
            model_dir = model.export(quantize=quantize)
            
        config_file = os.path.join(model_dir, 'punc.yaml')
        config_file = os.path.join(model_dir, 'confi.yaml')
        config = read_yaml(config_file)
        token_list = os.path.join(model_dir, 'tokens.json')
        with open(token_list, 'r', encoding='utf-8') as f:
@@ -67,7 +67,7 @@
        self.converter = TokenIDConverter(token_list)
        self.ort_infer = OrtInferSession(model_file, device_id, intra_op_num_threads=intra_op_num_threads)
        self.batch_size = 1
        self.punc_list = config['punc_list']
        self.punc_list = config["model_conf"]['punc_list']
        self.period = 0
        for i in range(len(self.punc_list)):
            if self.punc_list[i] == ",":
@@ -76,9 +76,9 @@
                self.punc_list[i] = "？"
            elif self.punc_list[i] == "。":
                self.period = i
        if "seg_jieba" in config:
            self.seg_jieba = True
            self.jieba_usr_dict_path = os.path.join(model_dir, 'jieba_usr_dict')
        if os.path.exists(self.jieba_usr_dict_path):
            self.seg_jieba = True
            self.code_mix_split_words_jieba = code_mix_split_words_jieba(self.jieba_usr_dict_path)
        else:
            self.seg_jieba = False
@@ -175,7 +175,7 @@
                 intra_op_num_threads: int = 4,
                 cache_dir: str = None
                 ):
        super(CT_Transformer_VadRealtime, self).__init__(model_dir, batch_size, device_id, quantize, intra_op_num_threads, cache_dir=cache_dir)
        super().__init__(model_dir, batch_size, device_id, quantize, intra_op_num_threads, cache_dir=cache_dir)

    def __call__(self, text: str, param_dict: map, split_size=20):
        cache_key = "cache"

 runtime/python/onnxruntime/setup.py

@@ -13,7 +13,7 @@


MODULE_NAME = 'funasr_onnx'
VERSION_NUM = '0.2.5'
VERSION_NUM = '0.3.0'

setuptools.setup(
    name=MODULE_NAME,

			@@ -227,6 +227,21 @@
			res = model.export(quantize=False)
			```

			### Text ONNX
			```python
			# pip3 install -U funasr-onnx
			from funasr_onnx import Paraformer
			model_dir = "damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch"
			model = Paraformer(model_dir, batch_size=1, quantize=True)

			wav_path = ['~/.cache/modelscope/hub/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/example/asr_example.wav']

			result = model(wav_path)
			print(result)
			```

			More examples ref to [demo](runtime/python/onnxruntime)

			## Deployment Service
			FunASR supports deploying pre-trained or further fine-tuned models for service. Currently, it supports the following types of service deployment:
			- File transcription service, Mandarin, CPU version, done

			@@ -216,7 +216,7 @@
			funasr-export ++model=paraformer ++quantize=false
			```

			### 从python指令导出
			### 从Python导出
			```python
			from funasr import AutoModel

			@@ -225,6 +225,20 @@
			res = model.export(quantize=False)
			```

			### 测试ONNX
			```python
			# pip3 install -U funasr-onnx
			from funasr_onnx import Paraformer
			model_dir = "damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch"
			model = Paraformer(model_dir, batch_size=1, quantize=True)

			wav_path = ['~/.cache/modelscope/hub/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/example/asr_example.wav']

			result = model(wav_path)
			print(result)
			```

			更多例子请参考 [样例](runtime/python/onnxruntime)

			<a name="服务部署"></a>
			## 服务部署

			@@ -48,6 +48,7 @@
			if "file_path_metas" in conf_json:
			add_file_root_path(model_or_path, conf_json["file_path_metas"], cfg)
			cfg.update(kwargs)
			if "config" in cfg:
			config = OmegaConf.load(cfg["config"])
			kwargs = OmegaConf.merge(config, cfg)
			kwargs["model"] = config["model"]

			@@ -56,9 +56,9 @@
			"\npip3 install -U funasr -i https://mirror.sjtu.edu.cn/pypi/web/simple"

			model = AutoModel(model=model_dir)
			model_dir = model.export(type="onnx", quantize=quantize)
			model_dir = model.export(quantize=quantize)

			config_file = os.path.join(model_dir, 'punc.yaml')
			config_file = os.path.join(model_dir, 'confi.yaml')
			config = read_yaml(config_file)
			token_list = os.path.join(model_dir, 'tokens.json')
			with open(token_list, 'r', encoding='utf-8') as f:
			@@ -67,7 +67,7 @@
			self.converter = TokenIDConverter(token_list)
			self.ort_infer = OrtInferSession(model_file, device_id, intra_op_num_threads=intra_op_num_threads)
			self.batch_size = 1
			self.punc_list = config['punc_list']
			self.punc_list = config["model_conf"]['punc_list']
			self.period = 0
			for i in range(len(self.punc_list)):
			if self.punc_list[i] == ",":
			@@ -76,9 +76,9 @@
			self.punc_list[i] = "？"
			elif self.punc_list[i] == "。":
			self.period = i
			if "seg_jieba" in config:
			self.seg_jieba = True
			self.jieba_usr_dict_path = os.path.join(model_dir, 'jieba_usr_dict')
			if os.path.exists(self.jieba_usr_dict_path):
			self.seg_jieba = True
			self.code_mix_split_words_jieba = code_mix_split_words_jieba(self.jieba_usr_dict_path)
			else:
			self.seg_jieba = False
			@@ -175,7 +175,7 @@
			intra_op_num_threads: int = 4,
			cache_dir: str = None
			):
			super(CT_Transformer_VadRealtime, self).__init__(model_dir, batch_size, device_id, quantize, intra_op_num_threads, cache_dir=cache_dir)
			super().__init__(model_dir, batch_size, device_id, quantize, intra_op_num_threads, cache_dir=cache_dir)

			def __call__(self, text: str, param_dict: map, split_size=20):
			cache_key = "cache"

			@@ -13,7 +13,7 @@


			MODULE_NAME = 'funasr_onnx'
			VERSION_NUM = '0.2.5'
			VERSION_NUM = '0.3.0'

			setuptools.setup(
			name=MODULE_NAME,