| | |
| | | import librosa |
| | | import numpy as np |
| | | |
| | | from .utils.utils import (CharTokenizer, Hypothesis, ONNXRuntimeError, |
| | | from utils.utils import (CharTokenizer, Hypothesis, ONNXRuntimeError, |
| | | OrtInferSession, TokenIDConverter, get_logger, |
| | | read_yaml) |
| | | from .utils.postprocess_utils import sentence_postprocess |
| | | from .utils.frontend import WavFrontend |
| | | from utils.postprocess_utils import sentence_postprocess |
| | | from utils.frontend import WavFrontend |
| | | |
| | | logging = get_logger() |
| | | |
| | |
| | | self.ort_infer = OrtInferSession(model_file, device_id) |
| | | self.batch_size = batch_size |
| | | |
| | | def __call__(self, wav_content: Union[str, np.ndarray, List[str]]) -> List: |
| | | waveform_list = self.load_data(wav_content) |
| | | def __call__(self, wav_content: Union[str, np.ndarray, List[str]], **kwargs) -> List: |
| | | waveform_list = self.load_data(wav_content, self.frontend.opts.samp_freq) |
| | | waveform_nums = len(waveform_list) |
| | | |
| | | asr_res = [] |
| | |
| | | try: |
| | | am_scores, valid_token_lens = self.infer(feats, feats_len) |
| | | except ONNXRuntimeError: |
| | | logging.error(traceback.format_exc()) |
| | | preds = [] |
| | | #logging.warning(traceback.format_exc()) |
| | | logging.warning("input wav is silence or noise") |
| | | preds = [''] |
| | | else: |
| | | preds = self.decode(am_scores, valid_token_lens) |
| | | |
| | |
| | | return asr_res |
| | | |
| | | def load_data(self, |
| | | wav_content: Union[str, np.ndarray, List[str]]) -> List: |
| | | wav_content: Union[str, np.ndarray, List[str]], fs: int = None) -> List: |
| | | def load_wav(path: str) -> np.ndarray: |
| | | waveform, _ = librosa.load(path, sr=None) |
| | | waveform, _ = librosa.load(path, sr=fs) |
| | | return waveform |
| | | |
| | | if isinstance(wav_content, np.ndarray): |