| runtime/python/onnxruntime/demo_contextual_paraformer.py | ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史 | |
| runtime/python/onnxruntime/funasr_onnx/paraformer_bin.py | ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史 | |
| runtime/python/onnxruntime/funasr_onnx/utils/utils.py | ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史 |
runtime/python/onnxruntime/demo_contextual_paraformer.py
@@ -1,7 +1,7 @@ from funasr_onnx import ContextualParaformer from pathlib import Path model_dir = "./export/damo/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404" model_dir = "../export/damo/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404" # your export dir model = ContextualParaformer(model_dir, batch_size=1) wav_path = ['{}/.cache/modelscope/hub/damo/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404/example/asr_example.wav'.format(Path.home())] runtime/python/onnxruntime/funasr_onnx/paraformer_bin.py
@@ -7,7 +7,6 @@ from typing import List, Union, Tuple import copy import torch import librosa import numpy as np @@ -18,7 +17,7 @@ sentence_postprocess_sentencepiece) from .utils.frontend import WavFrontend from .utils.timestamp_utils import time_stamp_lfr6_onnx from .utils.utils import pad_list, make_pad_mask from .utils.utils import pad_list logging = get_logger() @@ -309,7 +308,7 @@ # index from bias_embed bias_embed = bias_embed.transpose(1, 0, 2) _ind = np.arange(0, len(hotwords)).tolist() bias_embed = bias_embed[_ind, hotwords_length.cpu().numpy().tolist()] bias_embed = bias_embed[_ind, hotwords_length.tolist()] waveform_list = self.load_data(wav_content, self.frontend.opts.frame_opts.samp_freq) waveform_nums = len(waveform_list) asr_res = [] @@ -336,7 +335,7 @@ hotwords = hotwords.split(" ") hotwords_length = [len(i) - 1 for i in hotwords] hotwords_length.append(0) hotwords_length = torch.Tensor(hotwords_length).to(torch.int32) hotwords_length = np.array(hotwords_length) # hotwords.append('<s>') def word_map(word): hotwords = [] @@ -346,11 +345,12 @@ logging.warning("oov character {} found in hotword {}, replaced by <unk>".format(c, word)) else: hotwords.append(self.vocab[c]) return torch.tensor(hotwords) return np.array(hotwords) hotword_int = [word_map(i) for i in hotwords] # import pdb; pdb.set_trace() hotword_int.append(torch.tensor([1])) hotword_int.append(np.array([1])) hotwords = pad_list(hotword_int, pad_value=0, max_len=10) # import pdb; pdb.set_trace() return hotwords, hotwords_length def bb_infer(self, feats: np.ndarray, @@ -359,7 +359,7 @@ return outputs def eb_infer(self, hotwords, hotwords_length): outputs = self.ort_infer_eb([hotwords.to(torch.int32).numpy(), hotwords_length.to(torch.int32).numpy()]) outputs = self.ort_infer_eb([hotwords.astype(np.int32), hotwords_length.astype(np.int32)]) return outputs def decode(self, am_scores: np.ndarray, token_nums: int) -> List[str]: runtime/python/onnxruntime/funasr_onnx/utils/utils.py
@@ -7,7 +7,6 @@ from typing import Any, Dict, Iterable, List, NamedTuple, Set, Tuple, Union import re import torch import numpy as np import yaml try: @@ -27,14 +26,15 @@ n_batch = len(xs) if max_len is None: max_len = max(x.size(0) for x in xs) pad = xs[0].new(n_batch, max_len, *xs[0].size()[1:]).fill_(pad_value) # pad = xs[0].new(n_batch, max_len, *xs[0].size()[1:]).fill_(pad_value) # numpy format pad = np.zeros((n_batch, max_len)).astype(np.int32) for i in range(n_batch): pad[i, : xs[i].size(0)] = xs[i] pad[i, : xs[i].shape[0]] = xs[i] return pad ''' def make_pad_mask(lengths, xs=None, length_dim=-1, maxlen=None): if length_dim == 0: raise ValueError("length_dim cannot be 0: {}".format(length_dim)) @@ -67,7 +67,7 @@ ) mask = mask[ind].expand_as(xs).to(xs.device) return mask ''' class TokenIDConverter(): def __init__(self, token_list: Union[List, str],