| | |
| | | import numpy as np |
| | | import scipy.signal |
| | | import soundfile |
| | | |
| | | import jieba |
| | | |
| | | from funasr.text.build_tokenizer import build_tokenizer |
| | | from funasr.text.cleaner import TextCleaner |
| | |
| | | self.seg_dict = None |
| | | if seg_dict_file is not None: |
| | | self.seg_dict = {} |
| | | with open(seg_dict_file) as f: |
| | | with open(seg_dict_file, "r", encoding="utf8") as f: |
| | | lines = f.readlines() |
| | | for line in lines: |
| | | s = line.strip().split() |
| | |
| | | self.split_text_name = split_text_name |
| | | self.seg_jieba = seg_jieba |
| | | if self.seg_jieba: |
| | | import jieba |
| | | jieba.load_userdict(seg_dict_file) |
| | | |
| | | @classmethod |