import argparse import os def read_segments_file(segments_file): utt2segments = dict() with open(segments_file, "r") as fr: lines = fr.readlines() for line in lines: parts = line.strip().split() segment_utt_id, utt_id, start, end = parts[0], parts[1], float(parts[2]), float(parts[3]) if utt_id not in utt2segments: utt2segments[utt_id] = [] utt2segments[utt_id].append((segment_utt_id, start, end)) return utt2segments def write_label(label_file, label_list): with open(label_file, "w") as fw: for (start, end) in label_list: fw.write(f"{start} {end} sp\n") fw.flush() def write_label_scp_file(label_scp_file, label_scp: dict): with open(label_scp_file, "w") as fw: for (utt_id, label_path) in label_scp.items(): fw.write(f"{utt_id} {label_path}\n") fw.flush() def main(args): input_segments = args.input_segments label_path = args.label_path output_label_scp_file = args.output_label_scp_file utt2segments = read_segments_file(input_segments) print(f"Collect {len(utt2segments)} utt2segments in file {input_segments}") result_label_scp = dict() for utt_id in utt2segments.keys(): segment_list = utt2segments[utt_id] cur_label_path = os.path.join(label_path, f"{utt_id}.lab") write_label(cur_label_path, label_list=[(i1, i2) for (_, i1, i2) in segment_list]) result_label_scp[utt_id] = cur_label_path write_label_scp_file(output_label_scp_file, result_label_scp) print(f"Write {len(result_label_scp)} labels") if __name__ == '__main__': parser = argparse.ArgumentParser(description="Make the lab file for segments") parser.add_argument("--input_segments", required=True, help="The input segments file") parser.add_argument("--label_path", required=True, help="The label_path to save file.lab") parser.add_argument("--output_label_scp_file", required=True, help="The output label.scp file") args = parser.parse_args() main(args)