From 9c622feb645ee8ab166cd6d5fc9d0b2130a0f5fd Mon Sep 17 00:00:00 2001
From: shixian.shi <shixian.shi@alibaba-inc.com>
Date: 星期二, 12 九月 2023 19:54:10 +0800
Subject: [PATCH] update proc for oov in hotword onnx inference
---
funasr/runtime/python/onnxruntime/funasr_onnx/paraformer_bin.py | 9 ++++++++-
funasr/runtime/python/onnxruntime/demo_contextual_paraformer.py | 2 +-
2 files changed, 9 insertions(+), 2 deletions(-)
diff --git a/funasr/runtime/python/onnxruntime/demo_contextual_paraformer.py b/funasr/runtime/python/onnxruntime/demo_contextual_paraformer.py
index 984c0d6..9da3817 100644
--- a/funasr/runtime/python/onnxruntime/demo_contextual_paraformer.py
+++ b/funasr/runtime/python/onnxruntime/demo_contextual_paraformer.py
@@ -5,7 +5,7 @@
model = ContextualParaformer(model_dir, batch_size=1)
wav_path = ['{}/.cache/modelscope/hub/damo/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404/example/asr_example.wav'.format(Path.home())]
-hotwords = '闅忔満鐑瘝 鍚勭鐑瘝 榄旀惌 闃块噷宸村反'
+hotwords = '闅忔満鐑瘝 鍚勭鐑瘝 榄旀惌 闃块噷宸村反 浠�'
result = model(wav_path, hotwords)
print(result)
diff --git a/funasr/runtime/python/onnxruntime/funasr_onnx/paraformer_bin.py b/funasr/runtime/python/onnxruntime/funasr_onnx/paraformer_bin.py
index c994036..4caa5c1 100644
--- a/funasr/runtime/python/onnxruntime/funasr_onnx/paraformer_bin.py
+++ b/funasr/runtime/python/onnxruntime/funasr_onnx/paraformer_bin.py
@@ -314,7 +314,14 @@
hotwords_length = torch.Tensor(hotwords_length).to(torch.int32)
# hotwords.append('<s>')
def word_map(word):
- return torch.tensor([self.vocab[i] for i in word])
+ hotwords = []
+ for c in word:
+ if c not in self.vocab.keys():
+ hotwords.append(8403)
+ logging.warning("oov character {} found in hotword {}, replaced by <unk>".format(c, word))
+ else:
+ hotwords.append(self.vocab[c])
+ return torch.tensor(hotwords)
hotword_int = [word_map(i) for i in hotwords]
# import pdb; pdb.set_trace()
hotword_int.append(torch.tensor([1]))
--
Gitblit v1.9.1