funasr/datasets/audio_datasets/scp2jsonl.py
@@ -68,7 +68,7 @@ if os.path.exists(line): waveform, _ = librosa.load(line, sr=16000) sample_num = len(waveform) context_len = int(sample_num//16000*1000/10) context_len = int(sample_num/16000*1000/10) else: context_len = len(line.split()) if " " in line else len(line) res[key] = {data_type: line, f"{data_type}_len": context_len}