# -*- coding: utf-8 -*- """ Process the textgrid files """ import argparse import codecs from distutils.util import strtobool from pathlib import Path import textgrid import pdb import numpy as np import sys import math class Segment(object): def __init__(self, uttid, spkr, stime, etime, text): self.uttid = uttid self.spkr = spkr self.stime = round(stime, 2) self.etime = round(etime, 2) self.text = text def change_stime(self, time): self.stime = time def change_etime(self, time): self.etime = time def get_args(): parser = argparse.ArgumentParser(description="process the textgrid files") parser.add_argument("--path", type=str, required=True, help="Data path") args = parser.parse_args() return args def main(args): textgrid_flist = codecs.open(Path(args.path) / "textgrid.flist", "r", "utf-8") segment_file = codecs.open(Path(args.path)/"segments", "w", "utf-8") utt2spk = codecs.open(Path(args.path)/"utt2spk", "w", "utf-8") # get the path of textgrid file for each utterance for line in textgrid_flist: line_array = line.strip().split(" ") path = Path(line_array[1]) uttid = line_array[0] try: tg = textgrid.TextGrid.fromFile(path) except: pdb.set_trace() num_spk = tg.__len__() spk2textgrid = {} spk2weight = {} weight2spk = {} cnt = 2 xmax = 0 for i in range(tg.__len__()): spk_name = tg[i].name if spk_name not in spk2weight: spk2weight[spk_name] = cnt weight2spk[cnt] = spk_name cnt = cnt * 2 segments = [] for j in range(tg[i].__len__()): if tg[i][j].mark: if xmax < tg[i][j].maxTime: xmax = tg[i][j].maxTime segments.append( Segment( uttid, tg[i].name, tg[i][j].minTime, tg[i][j].maxTime, tg[i][j].mark.strip(), ) ) segments = sorted(segments, key=lambda x: x.stime) spk2textgrid[spk_name] = segments olp_label = np.zeros((num_spk, int(xmax/0.01)), dtype=np.int32) for spkid in spk2weight.keys(): weight = spk2weight[spkid] segments = spk2textgrid[spkid] idx = int(math.log2(weight) )- 1 for i in range(len(segments)): stime = segments[i].stime etime = segments[i].etime olp_label[idx, int(stime/0.01): int(etime/0.01)] = weight sum_label = olp_label.sum(axis=0) stime = 0 pre_value = 0 for pos in range(sum_label.shape[0]): if sum_label[pos] in weight2spk: if pre_value in weight2spk: if sum_label[pos] != pre_value: spkids = weight2spk[pre_value] spkid_array = spkids.split("_") spkid = spkid_array[-1] #spkid = uttid+spkid if round(stime*0.01, 2) != round((pos-1)*0.01, 2): segment_file.write("%s_%s_%s_%s %s %s %s\n" % (uttid, spkid, str(int(stime)).zfill(7), str(int(pos-1)).zfill(7), uttid, round(stime*0.01, 2) ,round((pos-1)*0.01, 2))) utt2spk.write("%s_%s_%s_%s %s\n" % (uttid, spkid, str(int(stime)).zfill(7), str(int(pos-1)).zfill(7), uttid+"_"+spkid)) stime = pos pre_value = sum_label[pos] else: stime = pos pre_value = sum_label[pos] else: if pre_value in weight2spk: spkids = weight2spk[pre_value] spkid_array = spkids.split("_") spkid = spkid_array[-1] #spkid = uttid+spkid if round(stime*0.01, 2) != round((pos-1)*0.01, 2): segment_file.write("%s_%s_%s_%s %s %s %s\n" % (uttid, spkid, str(int(stime)).zfill(7), str(int(pos-1)).zfill(7), uttid, round(stime*0.01, 2) ,round((pos-1)*0.01, 2))) utt2spk.write("%s_%s_%s_%s %s\n" % (uttid, spkid, str(int(stime)).zfill(7), str(int(pos-1)).zfill(7), uttid+"_"+spkid)) stime = pos pre_value = sum_label[pos] textgrid_flist.close() segment_file.close() if __name__ == "__main__": args = get_args() main(args)