From 0bb5d87d1ee98289bbe241e1f2caf1ab8e64c69c Mon Sep 17 00:00:00 2001
From: lyblsgo <lyblsgo@163.com>
Date: 星期六, 22 四月 2023 20:34:07 +0800
Subject: [PATCH] Merge branch 'dev_knf' of https://github.com/alibaba-damo-academy/FunASR into dev_knf
---
funasr/datasets/preprocessor.py | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/funasr/datasets/preprocessor.py b/funasr/datasets/preprocessor.py
index 1adca05..20a3831 100644
--- a/funasr/datasets/preprocessor.py
+++ b/funasr/datasets/preprocessor.py
@@ -48,6 +48,7 @@
def seg_tokenize(txt, seg_dict):
out_txt = ""
for word in txt:
+ word = word.lower()
if word in seg_dict:
out_txt += seg_dict[word] + " "
else:
@@ -359,7 +360,6 @@
if self.split_with_space:
tokens = text.strip().split(" ")
if self.seg_dict is not None:
- tokens = forward_segment("".join(tokens), self.seg_dict)
tokens = seg_tokenize(tokens, self.seg_dict)
else:
tokens = self.tokenizer.text2tokens(text)
--
Gitblit v1.9.1