From 4d907718f39e2b0f7a0c714c2e3de289e742fc61 Mon Sep 17 00:00:00 2001
From: Carl <415692979@qq.com>
Date: 星期四, 28 三月 2024 13:42:00 +0800
Subject: [PATCH] 修正commit 87b62d68957a2194b017a43b6c2a15424a05a984 引入的英文整句标点预测导致末尾两个单词中间的空格被删除的问题。 (#1556)
---
funasr/tokenizer/hf_tokenizer.py | 11 ++++++-----
1 files changed, 6 insertions(+), 5 deletions(-)
diff --git a/funasr/tokenizer/hf_tokenizer.py b/funasr/tokenizer/hf_tokenizer.py
index c856b3d..b6043e9 100644
--- a/funasr/tokenizer/hf_tokenizer.py
+++ b/funasr/tokenizer/hf_tokenizer.py
@@ -1,14 +1,15 @@
-try:
- from transformers import AutoTokenizer
-except:
- print("If you want to use hugging, please `pip install -U transformers`")
+
from funasr.register import tables
@tables.register("tokenizer_classes", "HuggingfaceTokenizer")
def HuggingfaceTokenizer(init_param_path, **kwargs):
-
+ try:
+ from transformers import AutoTokenizer
+ except:
+ # print("If you want to use hugging, please `pip install -U transformers`")
+ pass
tokenizer = AutoTokenizer.from_pretrained(init_param_path)
return tokenizer
--
Gitblit v1.9.1