| | |
| | | import numpy as np |
| | | import torch |
| | | import torchaudio |
| | | import soundfile |
| | | # import librosa |
| | | import librosa |
| | | import yaml |
| | | |
| | | from funasr.bin.asr_infer import Speech2Text |
| | |
| | | try: |
| | | raw_inputs = torchaudio.load(data_path_and_name_and_type[0])[0][0] |
| | | except: |
| | | raw_inputs = soundfile.read(data_path_and_name_and_type[0], dtype='float32')[0] |
| | | # raw_inputs = librosa.load(data_path_and_name_and_type[0], dtype='float32')[0] |
| | | raw_inputs, sr = librosa.load(data_path_and_name_and_type[0], dtype='float32') |
| | | if raw_inputs.ndim == 2: |
| | | raw_inputs = raw_inputs[:, 0] |
| | | raw_inputs = torch.tensor(raw_inputs) |