From 0cf5dfec2c8313fc2ed2aab8d10bf3dc4b9c283f Mon Sep 17 00:00:00 2001
From: 雾聪 <wucong.lyb@alibaba-inc.com>
Date: 星期四, 14 三月 2024 14:41:49 +0800
Subject: [PATCH] update cmakelist
---
runtime/python/onnxruntime/funasr_onnx/punc_bin.py | 35 +++++++++++++++++------------------
1 files changed, 17 insertions(+), 18 deletions(-)
diff --git a/runtime/python/onnxruntime/funasr_onnx/punc_bin.py b/runtime/python/onnxruntime/funasr_onnx/punc_bin.py
index 6e289f6..1b8a1a2 100644
--- a/runtime/python/onnxruntime/funasr_onnx/punc_bin.py
+++ b/runtime/python/onnxruntime/funasr_onnx/punc_bin.py
@@ -6,7 +6,7 @@
from pathlib import Path
from typing import List, Union, Tuple
import numpy as np
-
+import json
from .utils.utils import (ONNXRuntimeError,
OrtInferSession, get_logger,
read_yaml)
@@ -48,27 +48,26 @@
if not os.path.exists(model_file):
print(".onnx is not exist, begin to export onnx")
try:
- from funasr.export.export_model import ModelExport
+ from funasr import AutoModel
except:
raise "You are exporting onnx, please install funasr and try it again. To install funasr, you could:\n" \
"\npip3 install -U funasr\n" \
"For the users in China, you could install with the command:\n" \
"\npip3 install -U funasr -i https://mirror.sjtu.edu.cn/pypi/web/simple"
- export_model = ModelExport(
- cache_dir=cache_dir,
- onnx=True,
- device="cpu",
- quant=quantize,
- )
- export_model.export(model_dir)
-
- config_file = os.path.join(model_dir, 'punc.yaml')
- config = read_yaml(config_file)
- self.converter = TokenIDConverter(config['token_list'])
+ model = AutoModel(model=model_dir)
+ model_dir = model.export(quantize=quantize)
+
+ config_file = os.path.join(model_dir, 'config.yaml')
+ config = read_yaml(config_file)
+ token_list = os.path.join(model_dir, 'tokens.json')
+ with open(token_list, 'r', encoding='utf-8') as f:
+ token_list = json.load(f)
+
+ self.converter = TokenIDConverter(token_list)
self.ort_infer = OrtInferSession(model_file, device_id, intra_op_num_threads=intra_op_num_threads)
self.batch_size = 1
- self.punc_list = config['punc_list']
+ self.punc_list = config["model_conf"]['punc_list']
self.period = 0
for i in range(len(self.punc_list)):
if self.punc_list[i] == ",":
@@ -77,9 +76,9 @@
self.punc_list[i] = "锛�"
elif self.punc_list[i] == "銆�":
self.period = i
- if "seg_jieba" in config:
+ self.jieba_usr_dict_path = os.path.join(model_dir, 'jieba_usr_dict')
+ if os.path.exists(self.jieba_usr_dict_path):
self.seg_jieba = True
- self.jieba_usr_dict_path = os.path.join(model_dir, 'jieba_usr_dict')
self.code_mix_split_words_jieba = code_mix_split_words_jieba(self.jieba_usr_dict_path)
else:
self.seg_jieba = False
@@ -176,7 +175,7 @@
intra_op_num_threads: int = 4,
cache_dir: str = None
):
- super(CT_Transformer_VadRealtime, self).__init__(model_dir, batch_size, device_id, quantize, intra_op_num_threads, cache_dir=cache_dir)
+ super().__init__(model_dir, batch_size, device_id, quantize, intra_op_num_threads, cache_dir=cache_dir)
def __call__(self, text: str, param_dict: map, split_size=20):
cache_key = "cache"
@@ -276,7 +275,7 @@
param_dict[cache_key] = cache_out
return sentence_out, sentence_punc_list_out, cache_out
- def vad_mask(self, size, vad_pos, dtype=np.bool):
+ def vad_mask(self, size, vad_pos, dtype=bool):
"""Create mask for decoder self-attention.
:param int size: size of mask
--
Gitblit v1.9.1