From 27f31cd42bb4e20dc19de0034fc0d80b449f1db1 Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期三, 06 十二月 2023 17:01:12 +0800
Subject: [PATCH] funasr2

---
 funasr/datasets/small_datasets/preprocessor.py |    7 ++++---
 1 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/funasr/datasets/small_datasets/preprocessor.py b/funasr/datasets/small_datasets/preprocessor.py
index f0d3c9a..62beaab 100644
--- a/funasr/datasets/small_datasets/preprocessor.py
+++ b/funasr/datasets/small_datasets/preprocessor.py
@@ -11,9 +11,9 @@
 import scipy.signal
 import librosa
 
-from funasr.text.build_tokenizer import build_tokenizer
-from funasr.text.cleaner import TextCleaner
-from funasr.text.token_id_converter import TokenIDConverter
+from funasr.tokenizer.build_tokenizer import build_tokenizer
+from funasr.tokenizer.cleaner import TextCleaner
+from funasr.tokenizer.token_id_converter import TokenIDConverter
 
 
 class AbsPreprocessor(ABC):
@@ -361,6 +361,7 @@
                     tokens = seg_tokenize(tokens, self.seg_dict)
             else:
                 tokens = self.tokenizer.text2tokens(text)
+                
             text_ints = self.token_id_converter.tokens2ids(tokens)
             data[self.text_name] = np.array(text_ints, dtype=np.int64)
         return data

--
Gitblit v1.9.1