| | |
| | | import numpy as np |
| | | import kaldiio |
| | | import librosa |
| | | |
| | | |
| | | import torchaudio |
| | | import time |
| | | |
| | | def load_audio(audio_path: str, fs: int=16000): |
| | | audio = None |
| | |
| | | if ".ark:" in audio_path: |
| | | audio = kaldiio.load_mat(audio_path) |
| | | else: |
| | | audio, fs = librosa.load(audio_path, sr=fs) |
| | | # audio, fs = librosa.load(audio_path, sr=fs) |
| | | audio, fs = torchaudio.load(audio_path) |
| | | audio = audio[0, :] |
| | | return audio |
| | | |
| | | def extract_features(data, date_type: str="sound", frontend=None): |
| | |
| | | "humanfriendly", |
| | | "scipy>=1.4.1", |
| | | "librosa", |
| | | # "jamo", # For kss |
| | | "jamo", # For kss |
| | | "PyYAML>=5.1.2", |
| | | # "soundfile>=0.12.1", |
| | | # "h5py>=3.1.0", |