funasr/datasets/audio_datasets/index_ds.py
@@ -35,7 +35,7 @@ with open(path, encoding="utf-8") as fin: file_list_all = fin.readlines() num_per_slice = (len(file_list_all) - 1) // data_split_num + 1 num_per_slice = (len(file_list_all) - 1) // data_split_num + 1 # 16 file_list = file_list_all[ data_split_i * num_per_slice : (data_split_i + 1) * num_per_slice ] @@ -104,10 +104,10 @@ or target_len > self.max_target_length ): continue if (source_len + target_len) > self.max_token_length: continue contents_i = { "source": source, "prompt": prompt,