From cbe2ea7e07cbf364827bd89cefc42b3f643ea3be Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期一, 18 三月 2024 23:59:09 +0800
Subject: [PATCH] paraformer streaming bugfix
---
runtime/python/onnxruntime/funasr_onnx/punc_bin.py | 20 ++++++++------------
1 files changed, 8 insertions(+), 12 deletions(-)
diff --git a/runtime/python/onnxruntime/funasr_onnx/punc_bin.py b/runtime/python/onnxruntime/funasr_onnx/punc_bin.py
index b1aca6e..db45baa 100644
--- a/runtime/python/onnxruntime/funasr_onnx/punc_bin.py
+++ b/runtime/python/onnxruntime/funasr_onnx/punc_bin.py
@@ -26,6 +26,7 @@
quantize: bool = False,
intra_op_num_threads: int = 4,
cache_dir: str = None,
+ **kwargs
):
if not Path(model_dir).exists():
@@ -56,9 +57,9 @@
"\npip3 install -U funasr -i https://mirror.sjtu.edu.cn/pypi/web/simple"
model = AutoModel(model=model_dir)
- model_dir = model.export(type="onnx", quantize=quantize)
+ model_dir = model.export(type="onnx", quantize=quantize, **kwargs)
- config_file = os.path.join(model_dir, 'punc.yaml')
+ config_file = os.path.join(model_dir, 'config.yaml')
config = read_yaml(config_file)
token_list = os.path.join(model_dir, 'tokens.json')
with open(token_list, 'r', encoding='utf-8') as f:
@@ -67,7 +68,7 @@
self.converter = TokenIDConverter(token_list)
self.ort_infer = OrtInferSession(model_file, device_id, intra_op_num_threads=intra_op_num_threads)
self.batch_size = 1
- self.punc_list = config['punc_list']
+ self.punc_list = config["model_conf"]['punc_list']
self.period = 0
for i in range(len(self.punc_list)):
if self.punc_list[i] == ",":
@@ -76,9 +77,9 @@
self.punc_list[i] = "锛�"
elif self.punc_list[i] == "銆�":
self.period = i
- if "seg_jieba" in config:
+ self.jieba_usr_dict_path = os.path.join(model_dir, 'jieba_usr_dict')
+ if os.path.exists(self.jieba_usr_dict_path):
self.seg_jieba = True
- self.jieba_usr_dict_path = os.path.join(model_dir, 'jieba_usr_dict')
self.code_mix_split_words_jieba = code_mix_split_words_jieba(self.jieba_usr_dict_path)
else:
self.seg_jieba = False
@@ -168,14 +169,9 @@
CT-Transformer: Controllable time-delay transformer for real-time punctuation prediction and disfluency detection
https://arxiv.org/pdf/2003.01309.pdf
"""
- def __init__(self, model_dir: Union[str, Path] = None,
- batch_size: int = 1,
- device_id: Union[str, int] = "-1",
- quantize: bool = False,
- intra_op_num_threads: int = 4,
- cache_dir: str = None
+ def __init__(self, *args, **kwargs
):
- super(CT_Transformer_VadRealtime, self).__init__(model_dir, batch_size, device_id, quantize, intra_op_num_threads, cache_dir=cache_dir)
+ super().__init__(*args, **kwargs)
def __call__(self, text: str, param_dict: map, split_size=20):
cache_key = "cache"
--
Gitblit v1.9.1