zhifu gao
2024-03-11 0d9384c8c0161259192cc3d676ca0d60e0d18e5c
Dev gzf (#1474)

* qwenaudio qwenaudiochat

* qwenaudio qwenaudiochat

* whisper

* whisper

* llm

* llm

* llm

* llm

* llm

* llm

* llm

* llm

* export onnx

* export onnx

* export onnx

* dingding

* dingding

* llm

* doc

* onnx

* onnx

* onnx

* onnx

* onnx

* onnx

* v1.0.15

* qwenaudio
6个文件已修改
48 ■■■■ 已修改文件
README.md 15 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
README_zh.md 16 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
funasr/download/download_from_hub.py 1 ●●●● 补丁 | 查看 | 原始文档 | blame | 历史
funasr/version.txt 2 ●●● 补丁 | 查看 | 原始文档 | blame | 历史
runtime/python/onnxruntime/funasr_onnx/punc_bin.py 12 ●●●● 补丁 | 查看 | 原始文档 | blame | 历史
runtime/python/onnxruntime/setup.py 2 ●●● 补丁 | 查看 | 原始文档 | blame | 历史
README.md
@@ -227,6 +227,21 @@
res = model.export(quantize=False)
```
### Text ONNX
```python
# pip3 install -U funasr-onnx
from funasr_onnx import Paraformer
model_dir = "damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch"
model = Paraformer(model_dir, batch_size=1, quantize=True)
wav_path = ['~/.cache/modelscope/hub/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/example/asr_example.wav']
result = model(wav_path)
print(result)
```
More examples ref to [demo](runtime/python/onnxruntime)
## Deployment Service
FunASR supports deploying pre-trained or further fine-tuned models for service. Currently, it supports the following types of service deployment:
- File transcription service, Mandarin, CPU version, done
README_zh.md
@@ -216,7 +216,7 @@
funasr-export ++model=paraformer ++quantize=false
```
### 从python指令导出
### 从Python导出
```python
from funasr import AutoModel
@@ -225,6 +225,20 @@
res = model.export(quantize=False)
```
### 测试ONNX
```python
# pip3 install -U funasr-onnx
from funasr_onnx import Paraformer
model_dir = "damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch"
model = Paraformer(model_dir, batch_size=1, quantize=True)
wav_path = ['~/.cache/modelscope/hub/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/example/asr_example.wav']
result = model(wav_path)
print(result)
```
更多例子请参考 [样例](runtime/python/onnxruntime)
<a name="服务部署"></a>
## 服务部署
funasr/download/download_from_hub.py
@@ -48,6 +48,7 @@
            if "file_path_metas" in conf_json:
                add_file_root_path(model_or_path, conf_json["file_path_metas"], cfg)
            cfg.update(kwargs)
            if "config" in cfg:
            config = OmegaConf.load(cfg["config"])
            kwargs = OmegaConf.merge(config, cfg)
        kwargs["model"] = config["model"]
funasr/version.txt
@@ -1 +1 @@
1.0.14
1.0.15
runtime/python/onnxruntime/funasr_onnx/punc_bin.py
@@ -56,9 +56,9 @@
                      "\npip3 install -U funasr -i https://mirror.sjtu.edu.cn/pypi/web/simple"
            model = AutoModel(model=model_dir)
            model_dir = model.export(type="onnx", quantize=quantize)
            model_dir = model.export(quantize=quantize)
            
        config_file = os.path.join(model_dir, 'punc.yaml')
        config_file = os.path.join(model_dir, 'confi.yaml')
        config = read_yaml(config_file)
        token_list = os.path.join(model_dir, 'tokens.json')
        with open(token_list, 'r', encoding='utf-8') as f:
@@ -67,7 +67,7 @@
        self.converter = TokenIDConverter(token_list)
        self.ort_infer = OrtInferSession(model_file, device_id, intra_op_num_threads=intra_op_num_threads)
        self.batch_size = 1
        self.punc_list = config['punc_list']
        self.punc_list = config["model_conf"]['punc_list']
        self.period = 0
        for i in range(len(self.punc_list)):
            if self.punc_list[i] == ",":
@@ -76,9 +76,9 @@
                self.punc_list[i] = "?"
            elif self.punc_list[i] == "。":
                self.period = i
        if "seg_jieba" in config:
            self.seg_jieba = True
            self.jieba_usr_dict_path = os.path.join(model_dir, 'jieba_usr_dict')
        if os.path.exists(self.jieba_usr_dict_path):
            self.seg_jieba = True
            self.code_mix_split_words_jieba = code_mix_split_words_jieba(self.jieba_usr_dict_path)
        else:
            self.seg_jieba = False
@@ -175,7 +175,7 @@
                 intra_op_num_threads: int = 4,
                 cache_dir: str = None
                 ):
        super(CT_Transformer_VadRealtime, self).__init__(model_dir, batch_size, device_id, quantize, intra_op_num_threads, cache_dir=cache_dir)
        super().__init__(model_dir, batch_size, device_id, quantize, intra_op_num_threads, cache_dir=cache_dir)
    def __call__(self, text: str, param_dict: map, split_size=20):
        cache_key = "cache"
runtime/python/onnxruntime/setup.py
@@ -13,7 +13,7 @@
MODULE_NAME = 'funasr_onnx'
VERSION_NUM = '0.2.5'
VERSION_NUM = '0.3.0'
setuptools.setup(
    name=MODULE_NAME,