From 4d907718f39e2b0f7a0c714c2e3de289e742fc61 Mon Sep 17 00:00:00 2001
From: Carl <415692979@qq.com>
Date: 星期四, 28 三月 2024 13:42:00 +0800
Subject: [PATCH] 修正commit 87b62d68957a2194b017a43b6c2a15424a05a984 引入的英文整句标点预测导致末尾两个单词中间的空格被删除的问题。 (#1556)
---
funasr/tokenizer/whisper_tokenizer.py | 10 +++++-----
1 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/funasr/tokenizer/whisper_tokenizer.py b/funasr/tokenizer/whisper_tokenizer.py
index f41c823..6684f25 100644
--- a/funasr/tokenizer/whisper_tokenizer.py
+++ b/funasr/tokenizer/whisper_tokenizer.py
@@ -1,14 +1,14 @@
-try:
- from whisper.tokenizer import get_tokenizer
-except:
- print("If you want to use hugging, please `pip install -U transformers`")
from funasr.register import tables
@tables.register("tokenizer_classes", "WhisperTokenizer")
def WhisperTokenizer(**kwargs):
-
+ try:
+ from whisper.tokenizer import get_tokenizer
+ except:
+ print("Notice: If you want to use whisper, please `pip install -U openai-whisper`")
+
language = kwargs.get("language", None)
task = kwargs.get("task", "transcribe")
is_multilingual = kwargs.get("is_multilingual", True)
--
Gitblit v1.9.1