| | |
| | | self.ort_infer = OrtInferSession(model_file, device_id) |
| | | self.batch_size = batch_size |
| | | |
| | | def __call__(self, wav_content: Union[str, np.ndarray, List[str]]) -> List: |
| | | waveform_list = self.load_data(wav_content) |
| | | def __call__(self, wav_content: Union[str, np.ndarray, List[str]], fs: int = None) -> List: |
| | | waveform_list = self.load_data(wav_content, fs) |
| | | waveform_nums = len(waveform_list) |
| | | |
| | | asr_res = [] |
| | |
| | | return asr_res |
| | | |
| | | def load_data(self, |
| | | wav_content: Union[str, np.ndarray, List[str]]) -> List: |
| | | wav_content: Union[str, np.ndarray, List[str]], fs: int = None) -> List: |
| | | def load_wav(path: str) -> np.ndarray: |
| | | waveform, _ = librosa.load(path, sr=None) |
| | | waveform, _ = librosa.load(path, sr=fs) |
| | | return waveform |
| | | |
| | | if isinstance(wav_content, np.ndarray): |
| | |
| | | fs: int = 16000, |
| | | window: str = 'hamming', |
| | | n_mels: int = 80, |
| | | frame_length: int = 25.0, |
| | | frame_length: int = 25, |
| | | frame_shift: int = 10, |
| | | filter_length_min: int = -1, |
| | | filter_length_max: float = -1, |