| runtime/python/onnxruntime/demo_contextual_paraformer.py | ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史 | |
| runtime/python/onnxruntime/funasr_onnx/paraformer_bin.py | ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史 | |
| runtime/python/onnxruntime/funasr_onnx/utils/utils.py | ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史 |
runtime/python/onnxruntime/demo_contextual_paraformer.py
@@ -1,7 +1,7 @@ from funasr_onnx import ContextualParaformer from pathlib import Path model_dir = "./export/damo/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404" model_dir = "../export/damo/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404" # your export dir model = ContextualParaformer(model_dir, batch_size=1) wav_path = ['{}/.cache/modelscope/hub/damo/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404/example/asr_example.wav'.format(Path.home())] runtime/python/onnxruntime/funasr_onnx/paraformer_bin.py
@@ -7,7 +7,6 @@ from typing import List, Union, Tuple import copy import torch import librosa import numpy as np @@ -18,7 +17,7 @@ sentence_postprocess_sentencepiece) from .utils.frontend import WavFrontend from .utils.timestamp_utils import time_stamp_lfr6_onnx from .utils.utils import pad_list, make_pad_mask from .utils.utils import pad_list logging = get_logger() @@ -309,7 +308,7 @@ # index from bias_embed bias_embed = bias_embed.transpose(1, 0, 2) _ind = np.arange(0, len(hotwords)).tolist() bias_embed = bias_embed[_ind, hotwords_length.cpu().numpy().tolist()] bias_embed = bias_embed[_ind, hotwords_length.tolist()] waveform_list = self.load_data(wav_content, self.frontend.opts.frame_opts.samp_freq) waveform_nums = len(waveform_list) asr_res = [] @@ -336,7 +335,7 @@ hotwords = hotwords.split(" ") hotwords_length = [len(i) - 1 for i in hotwords] hotwords_length.append(0) hotwords_length = torch.Tensor(hotwords_length).to(torch.int32) hotwords_length = np.array(hotwords_length) # hotwords.append('<s>') def word_map(word): hotwords = [] @@ -346,11 +345,12 @@ logging.warning("oov character {} found in hotword {}, replaced by <unk>".format(c, word)) else: hotwords.append(self.vocab[c]) return torch.tensor(hotwords) return np.array(hotwords) hotword_int = [word_map(i) for i in hotwords] # import pdb; pdb.set_trace() hotword_int.append(torch.tensor([1])) hotword_int.append(np.array([1])) hotwords = pad_list(hotword_int, pad_value=0, max_len=10) # import pdb; pdb.set_trace() return hotwords, hotwords_length def bb_infer(self, feats: np.ndarray, @@ -359,7 +359,7 @@ return outputs def eb_infer(self, hotwords, hotwords_length): outputs = self.ort_infer_eb([hotwords.to(torch.int32).numpy(), hotwords_length.to(torch.int32).numpy()]) outputs = self.ort_infer_eb([hotwords.astype(np.int32), hotwords_length.astype(np.int32)]) return outputs def decode(self, am_scores: np.ndarray, token_nums: int) -> List[str]: runtime/python/onnxruntime/funasr_onnx/utils/utils.py
@@ -2,12 +2,10 @@ import functools import logging import pickle from pathlib import Path from typing import Any, Dict, Iterable, List, NamedTuple, Set, Tuple, Union import re import torch import numpy as np import yaml try: @@ -27,14 +25,15 @@ n_batch = len(xs) if max_len is None: max_len = max(x.size(0) for x in xs) pad = xs[0].new(n_batch, max_len, *xs[0].size()[1:]).fill_(pad_value) # pad = xs[0].new(n_batch, max_len, *xs[0].size()[1:]).fill_(pad_value) # numpy format pad = (np.zeros((n_batch, max_len)) + pad_value).astype(np.int32) for i in range(n_batch): pad[i, : xs[i].size(0)] = xs[i] pad[i, : xs[i].shape[0]] = xs[i] return pad ''' def make_pad_mask(lengths, xs=None, length_dim=-1, maxlen=None): if length_dim == 0: raise ValueError("length_dim cannot be 0: {}".format(length_dim)) @@ -67,7 +66,7 @@ ) mask = mask[ind].expand_as(xs).to(xs.device) return mask ''' class TokenIDConverter(): def __init__(self, token_list: Union[List, str],