From 01df8f330ccc754223d5e2d688dc0a55d27f2dcc Mon Sep 17 00:00:00 2001
From: querryton <72929808+querryton@users.noreply.github.com>
Date: 星期六, 20 四月 2024 16:07:13 +0800
Subject: [PATCH] [fix] Fix a bug in seaco_paraformer model "inference" function (#1639)

---
 funasr/auto/auto_model.py |    7 ++++++-
 1 files changed, 6 insertions(+), 1 deletions(-)

diff --git a/funasr/auto/auto_model.py b/funasr/auto/auto_model.py
index 830d88c..ba8881a 100644
--- a/funasr/auto/auto_model.py
+++ b/funasr/auto/auto_model.py
@@ -175,6 +175,8 @@
             kwargs["token_list"] = tokenizer.token_list if hasattr(tokenizer, "token_list") else None
             kwargs["token_list"] = tokenizer.get_vocab() if hasattr(tokenizer, "get_vocab") else kwargs["token_list"]
             vocab_size = len(kwargs["token_list"]) if kwargs["token_list"] is not None else -1
+            if vocab_size == -1 and hasattr(tokenizer, "get_vocab_size"):
+                vocab_size = tokenizer.get_vocab_size()
         else:
             vocab_size = -1
         kwargs["tokenizer"] = tokenizer
@@ -211,6 +213,9 @@
             else:
                 print(f"error, init_param does not exist!: {init_param}")
         
+        # fp16
+        if kwargs.get("fp16", False):
+            model.to(torch.float16)
         return model, kwargs
     
     def __call__(self, *args, **cfg):
@@ -412,7 +417,7 @@
             return_raw_text = kwargs.get('return_raw_text', False)
             # step.3 compute punc model
             if self.punc_model is not None:
-                if not len(result["text"]):
+                if not len(result["text"].strip()):
                     if return_raw_text:
                         result['raw_text'] = ''
                 else:

--
Gitblit v1.9.1