From cec84de3b676b5fdcd6c2f5dc30fe4b3571ed574 Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期三, 15 二月 2023 20:09:21 +0800
Subject: [PATCH] Merge branch 'main' of github.com:alibaba-damo-academy/FunASR add

---
 funasr/runtime/python/onnxruntime/paraformer/rapid_paraformer/paraformer_onnx.py |   26 +++++++++-----------------
 1 files changed, 9 insertions(+), 17 deletions(-)

diff --git a/funasr/runtime/python/onnxruntime/paraformer/rapid_paraformer/paraformer_onnx.py b/funasr/runtime/python/onnxruntime/paraformer/rapid_paraformer/paraformer_onnx.py
index ca3d566..621b903 100644
--- a/funasr/runtime/python/onnxruntime/paraformer/rapid_paraformer/paraformer_onnx.py
+++ b/funasr/runtime/python/onnxruntime/paraformer/rapid_paraformer/paraformer_onnx.py
@@ -9,10 +9,11 @@
 import librosa
 import numpy as np
 
-from .utils import (CharTokenizer, Hypothesis, ONNXRuntimeError,
-                    OrtInferSession, TokenIDConverter, WavFrontend, get_logger,
+from utils.utils import (CharTokenizer, Hypothesis, ONNXRuntimeError,
+                    OrtInferSession, TokenIDConverter, get_logger,
                     read_yaml)
-from .postprocess_utils import sentence_postprocess
+from utils.postprocess_utils import sentence_postprocess
+from utils.frontend import WavFrontend
 
 logging = get_logger()
 
@@ -40,8 +41,8 @@
         self.ort_infer = OrtInferSession(model_file, device_id)
         self.batch_size = batch_size
 
-    def __call__(self, wav_content: Union[str, np.ndarray, List[str]]) -> List:
-        waveform_list = self.load_data(wav_content)
+    def __call__(self, wav_content: Union[str, np.ndarray, List[str]], fs: int = None) -> List:
+        waveform_list = self.load_data(wav_content, fs)
         waveform_nums = len(waveform_list)
 
         asr_res = []
@@ -62,10 +63,10 @@
         return asr_res
 
     def load_data(self,
-                  wav_content: Union[str, np.ndarray, List[str]]) -> List:
+                  wav_content: Union[str, np.ndarray, List[str]], fs: int = None) -> List:
         def load_wav(path: str) -> np.ndarray:
-            waveform, _ = librosa.load(path, sr=None)
-            return waveform[None, ...]
+            waveform, _ = librosa.load(path, sr=fs)
+            return waveform
 
         if isinstance(wav_content, np.ndarray):
             return [wav_content]
@@ -139,13 +140,4 @@
         # text = self.tokenizer.tokens2text(token)
         return text
 
-
-if __name__ == '__main__':
-    project_dir = Path(__file__).resolve().parent.parent
-    model_dir = "/home/zhifu.gzf/.cache/modelscope/hub/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch"
-    model = Paraformer(model_dir)
-
-    wav_file = os.path.join(model_dir, 'example/asr_example.wav')
-    result = model(wav_file)
-    print(result)
 

--
Gitblit v1.9.1