| | |
| | | import scipy.signal |
| | | import librosa |
| | | |
| | | from funasr.text.build_tokenizer import build_tokenizer |
| | | from funasr.text.cleaner import TextCleaner |
| | | from funasr.text.token_id_converter import TokenIDConverter |
| | | from funasr.tokenizer.build_tokenizer import build_tokenizer |
| | | from funasr.tokenizer.cleaner import TextCleaner |
| | | from funasr.tokenizer.token_id_converter import TokenIDConverter |
| | | |
| | | |
| | | class AbsPreprocessor(ABC): |
| | |
| | | tokens = seg_tokenize(tokens, self.seg_dict) |
| | | else: |
| | | tokens = self.tokenizer.text2tokens(text) |
| | | |
| | | text_ints = self.token_id_converter.tokens2ids(tokens) |
| | | data[self.text_name] = np.array(text_ints, dtype=np.int64) |
| | | return data |