| | |
| | | from .utils.utils import (ONNXRuntimeError, |
| | | OrtInferSession, get_logger, |
| | | read_yaml) |
| | | from .utils.utils import (TokenIDConverter, split_to_mini_sentence,code_mix_split_words) |
| | | from .utils.utils import (TokenIDConverter, split_to_mini_sentence,code_mix_split_words,code_mix_split_words_jieba) |
| | | logging = get_logger() |
| | | |
| | | |
| | |
| | | ): |
| | | |
| | | if not Path(model_dir).exists(): |
| | | from modelscope.hub.snapshot_download import snapshot_download |
| | | try: |
| | | from modelscope.hub.snapshot_download import snapshot_download |
| | | except: |
| | | raise "You are exporting model from modelscope, please install modelscope and try it again. To install modelscope, you could:\n" \ |
| | | "\npip3 install -U modelscope\n" \ |
| | | "For the users in China, you could install with the command:\n" \ |
| | | "\npip3 install -U modelscope -i https://mirror.sjtu.edu.cn/pypi/web/simple" |
| | | try: |
| | | model_dir = snapshot_download(model_dir, cache_dir=cache_dir) |
| | | except: |
| | |
| | | model_file = os.path.join(model_dir, 'model_quant.onnx') |
| | | if not os.path.exists(model_file): |
| | | print(".onnx is not exist, begin to export onnx") |
| | | from funasr.export.export_model import ModelExport |
| | | try: |
| | | from funasr.export.export_model import ModelExport |
| | | except: |
| | | raise "You are exporting onnx, please install funasr and try it again. To install funasr, you could:\n" \ |
| | | "\npip3 install -U funasr\n" \ |
| | | "For the users in China, you could install with the command:\n" \ |
| | | "\npip3 install -U funasr -i https://mirror.sjtu.edu.cn/pypi/web/simple" |
| | | export_model = ModelExport( |
| | | cache_dir=cache_dir, |
| | | onnx=True, |
| | |
| | | self.punc_list[i] = "?" |
| | | elif self.punc_list[i] == "。": |
| | | self.period = i |
| | | if "seg_jieba" in config: |
| | | self.seg_jieba = True |
| | | self.jieba_usr_dict_path = os.path.join(model_dir, 'jieba_usr_dict') |
| | | self.code_mix_split_words_jieba = code_mix_split_words_jieba(self.jieba_usr_dict_path) |
| | | else: |
| | | self.seg_jieba = False |
| | | |
| | | def __call__(self, text: Union[list, str], split_size=20): |
| | | split_text = code_mix_split_words(text) |
| | | if self.seg_jieba: |
| | | split_text = self.code_mix_split_words_jieba(text) |
| | | else: |
| | | split_text = code_mix_split_words(text) |
| | | split_text_id = self.converter.tokens2ids(split_text) |
| | | mini_sentences = split_to_mini_sentence(split_text, split_size) |
| | | mini_sentences_id = split_to_mini_sentence(split_text_id, split_size) |
| | |
| | | data = { |
| | | "input": mini_sentence_id[None,:], |
| | | "text_lengths": np.array([text_length], dtype='int32'), |
| | | "vad_mask": vad_mask |
| | | "vad_mask": vad_mask, |
| | | "sub_masks": vad_mask |
| | | } |
| | | try: |