zhifu gao
2023-02-20 0856ea2ebdcb976db6e786de5cd79fae3d35cd4c
funasr/runtime/python/onnxruntime/paraformer/rapid_paraformer/paraformer_onnx.py
@@ -9,10 +9,11 @@
import librosa
import numpy as np
from .utils import (CharTokenizer, Hypothesis, ONNXRuntimeError,
                    OrtInferSession, TokenIDConverter, WavFrontend, get_logger,
from utils.utils import (CharTokenizer, Hypothesis, ONNXRuntimeError,
                    OrtInferSession, TokenIDConverter, get_logger,
                    read_yaml)
from .postprocess_utils import sentence_postprocess
from utils.postprocess_utils import sentence_postprocess
from utils.frontend import WavFrontend
logging = get_logger()
@@ -40,8 +41,8 @@
        self.ort_infer = OrtInferSession(model_file, device_id)
        self.batch_size = batch_size
    def __call__(self, wav_content: Union[str, np.ndarray, List[str]]) -> List:
        waveform_list = self.load_data(wav_content)
    def __call__(self, wav_content: Union[str, np.ndarray, List[str]], **kwargs) -> List:
        waveform_list = self.load_data(wav_content, self.frontend.opts.samp_freq)
        waveform_nums = len(waveform_list)
        asr_res = []
@@ -53,8 +54,9 @@
            try:
                am_scores, valid_token_lens = self.infer(feats, feats_len)
            except ONNXRuntimeError:
                logging.error(traceback.format_exc())
                preds = []
                #logging.warning(traceback.format_exc())
                logging.warning("input wav is silence or noise")
                preds = ['']
            else:
                preds = self.decode(am_scores, valid_token_lens)
@@ -62,10 +64,10 @@
        return asr_res
    def load_data(self,
                  wav_content: Union[str, np.ndarray, List[str]]) -> List:
                  wav_content: Union[str, np.ndarray, List[str]], fs: int = None) -> List:
        def load_wav(path: str) -> np.ndarray:
            waveform, _ = librosa.load(path, sr=None)
            return waveform[None, ...]
            waveform, _ = librosa.load(path, sr=fs)
            return waveform
        if isinstance(wav_content, np.ndarray):
            return [wav_content]
@@ -139,13 +141,4 @@
        # text = self.tokenizer.tokens2text(token)
        return text
if __name__ == '__main__':
    project_dir = Path(__file__).resolve().parent.parent
    model_dir = "/home/zhifu.gzf/.cache/modelscope/hub/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch"
    model = Paraformer(model_dir)
    wav_file = os.path.join(model_dir, 'example/asr_example.wav')
    result = model(wav_file)
    print(result)