游雁
2023-11-16 4ace5a95b052d338947fc88809a440ccd55cf6b4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import argparse
import os
 
 
def read_segments_file(segments_file):
    utt2segments = dict()
    with open(segments_file, "r") as fr:
        lines = fr.readlines()
        for line in lines:
            parts = line.strip().split()
            segment_utt_id, utt_id, start, end = parts[0], parts[1], float(parts[2]), float(parts[3])
            if utt_id not in utt2segments:
                utt2segments[utt_id] = []
            utt2segments[utt_id].append((segment_utt_id, start, end))
    return utt2segments
 
 
def write_label(label_file, label_list):
    with open(label_file, "w") as fw:
        for (start, end) in label_list:
            fw.write(f"{start} {end} sp\n")
        fw.flush()
 
 
def write_label_scp_file(label_scp_file, label_scp: dict):
    with open(label_scp_file, "w") as fw:
        for (utt_id, label_path) in label_scp.items():
            fw.write(f"{utt_id} {label_path}\n")
        fw.flush()
 
 
def main(args):
    input_segments = args.input_segments
    label_path = args.label_path
    output_label_scp_file = args.output_label_scp_file
 
    utt2segments = read_segments_file(input_segments)
    print(f"Collect {len(utt2segments)} utt2segments in file {input_segments}")
 
    result_label_scp = dict()
    for utt_id in utt2segments.keys():
        segment_list = utt2segments[utt_id]
        cur_label_path = os.path.join(label_path, f"{utt_id}.lab")
        write_label(cur_label_path, label_list=[(i1, i2) for (_, i1, i2) in segment_list])
        result_label_scp[utt_id] = cur_label_path
    write_label_scp_file(output_label_scp_file, result_label_scp)
    print(f"Write {len(result_label_scp)} labels")
 
 
if __name__ == '__main__':
    parser = argparse.ArgumentParser(description="Make the lab file for segments")
    parser.add_argument("--input_segments", required=True, help="The input segments file")
    parser.add_argument("--label_path", required=True, help="The label_path to save file.lab")
    parser.add_argument("--output_label_scp_file", required=True, help="The output label.scp file")
 
    args = parser.parse_args()
    main(args)