From 26d642bfdf59a50365a9c8158acb223cae1004dc Mon Sep 17 00:00:00 2001
From: zhifu gao <zhifu.gzf@alibaba-inc.com>
Date: 星期二, 23 四月 2024 20:13:44 +0800
Subject: [PATCH] Dev gzf exp (#1651)

---
 funasr/auto/auto_model.py |    4 +++-
 1 files changed, 3 insertions(+), 1 deletions(-)

diff --git a/funasr/auto/auto_model.py b/funasr/auto/auto_model.py
index 630c390..ba8881a 100644
--- a/funasr/auto/auto_model.py
+++ b/funasr/auto/auto_model.py
@@ -175,6 +175,8 @@
             kwargs["token_list"] = tokenizer.token_list if hasattr(tokenizer, "token_list") else None
             kwargs["token_list"] = tokenizer.get_vocab() if hasattr(tokenizer, "get_vocab") else kwargs["token_list"]
             vocab_size = len(kwargs["token_list"]) if kwargs["token_list"] is not None else -1
+            if vocab_size == -1 and hasattr(tokenizer, "get_vocab_size"):
+                vocab_size = tokenizer.get_vocab_size()
         else:
             vocab_size = -1
         kwargs["tokenizer"] = tokenizer
@@ -415,7 +417,7 @@
             return_raw_text = kwargs.get('return_raw_text', False)
             # step.3 compute punc model
             if self.punc_model is not None:
-                if not len(result["text"]):
+                if not len(result["text"].strip()):
                     if return_raw_text:
                         result['raw_text'] = ''
                 else:

--
Gitblit v1.9.1