From a05e753d11d9c36983ec4e58c421dbcf86d1dcd4 Mon Sep 17 00:00:00 2001
From: Xian Shi <40013335+R1ckShi@users.noreply.github.com>
Date: 星期二, 17 十月 2023 16:47:27 +0800
Subject: [PATCH] Merge branch 'main' into dev_onnx

---
 funasr/runtime/python/onnxruntime/funasr_onnx/paraformer_bin.py |   17 ++++++++---------
 1 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/funasr/runtime/python/onnxruntime/funasr_onnx/paraformer_bin.py b/funasr/runtime/python/onnxruntime/funasr_onnx/paraformer_bin.py
index 884def9..71cf434 100644
--- a/funasr/runtime/python/onnxruntime/funasr_onnx/paraformer_bin.py
+++ b/funasr/runtime/python/onnxruntime/funasr_onnx/paraformer_bin.py
@@ -242,14 +242,6 @@
 
         if not Path(model_dir).exists():
             try:
-                from modelscope.hub.snapshot_download import snapshot_download
-            except:
-                raise "You are exporting model from modelscope, please install modelscope and try it again. To install modelscope, you could:\n" \
-                      "\npip3 install -U modelscope\n" \
-                      "For the users in China, you could install with the command:\n" \
-                      "\npip3 install -U modelscope -i https://mirror.sjtu.edu.cn/pypi/web/simple"
-
-            try:
                 model_dir = snapshot_download(model_dir, cache_dir=cache_dir)
             except:
                 raise "model_dir must be model_name in modelscope or local path downloaded from modelscope, but is {}".format(model_dir)
@@ -333,7 +325,14 @@
         hotwords_length = torch.Tensor(hotwords_length).to(torch.int32)
         # hotwords.append('<s>')
         def word_map(word):
-            return torch.tensor([self.vocab[i] for i in word])
+            hotwords = []
+            for c in word:
+                if c not in self.vocab.keys():
+                    hotwords.append(8403)
+                    logging.warning("oov character {} found in hotword {}, replaced by <unk>".format(c, word))
+                else:
+                    hotwords.append(self.vocab[c])
+            return torch.tensor(hotwords)
         hotword_int = [word_map(i) for i in hotwords]
         # import pdb; pdb.set_trace()
         hotword_int.append(torch.tensor([1]))

--
Gitblit v1.9.1