VirtuosoQ
2024-04-26 e9d2cfc3a134b00f4e98271fbee3838d1ccecbcc
funasr/datasets/audio_datasets/scp2jsonl.py
@@ -68,7 +68,7 @@
        if os.path.exists(line):
            waveform, _ = librosa.load(line, sr=16000)
            sample_num = len(waveform)
            context_len = int(sample_num//16000*1000/10)
            context_len = int(sample_num/16000*1000/10)
        else:
            context_len = len(line.split()) if " " in line else len(line)
        res[key] = {data_type: line, f"{data_type}_len": context_len}
@@ -79,6 +79,7 @@
def main_hydra(cfg: DictConfig):
 
    kwargs = OmegaConf.to_container(cfg, resolve=True)
    print(kwargs)
    scp_file_list = kwargs.get("scp_file_list", ("/Users/zhifu/funasr1.0/test_local/wav.scp", "/Users/zhifu/funasr1.0/test_local/text.txt"))
    if isinstance(scp_file_list, str):