# -*- coding: utf-8 -*- """ Process the textgrid files """ import argparse import codecs from distutils.util import strtobool from pathlib import Path try: import textgrid except: raise "Please install textgrid firstly: pip install textgrid" import pdb import numpy as np import sys import math class Segment(object): def __init__(self, uttid, spkr, stime, etime, text): self.uttid = uttid self.spkr = spkr self.stime = round(stime, 2) self.etime = round(etime, 2) self.text = text def change_stime(self, time): self.stime = time def change_etime(self, time): self.etime = time def get_args(): parser = argparse.ArgumentParser(description="process the textgrid files") parser.add_argument("--path", type=str, required=True, help="Data path") args = parser.parse_args() return args def main(args): textgrid_flist = codecs.open(Path(args.path) / "textgrid.flist", "r", "utf-8") segment_file = codecs.open(Path(args.path)/"segments", "w", "utf-8") utt2spk = codecs.open(Path(args.path)/"utt2spk", "w", "utf-8") # get the path of textgrid file for each utterance for line in textgrid_flist: line_array = line.strip().split(" ") path = Path(line_array[1]) uttid = line_array[0] try: tg = textgrid.TextGrid.fromFile(path) except: pdb.set_trace() num_spk = tg.__len__() spk2textgrid = {} spk2weight = {} weight2spk = {} cnt = 2 xmax = 0 for i in range(tg.__len__()): spk_name = tg[i].name if spk_name not in spk2weight: spk2weight[spk_name] = cnt weight2spk[cnt] = spk_name cnt = cnt * 2 segments = [] for j in range(tg[i].__len__()): if tg[i][j].mark: if xmax < tg[i][j].maxTime: xmax = tg[i][j].maxTime segments.append( Segment( uttid, tg[i].name, tg[i][j].minTime, tg[i][j].maxTime, tg[i][j].mark.strip(), ) ) segments = sorted(segments, key=lambda x: x.stime) spk2textgrid[spk_name] = segments olp_label = np.zeros((num_spk, int(xmax/0.01)), dtype=np.int32) for spkid in spk2weight.keys(): weight = spk2weight[spkid] segments = spk2textgrid[spkid] idx = int(math.log2(weight) )- 1 for i in range(len(segments)): stime = segments[i].stime etime = segments[i].etime olp_label[idx, int(stime/0.01): int(etime/0.01)] = weight sum_label = olp_label.sum(axis=0) stime = 0 pre_value = 0 for pos in range(sum_label.shape[0]): if sum_label[pos] in weight2spk: if pre_value in weight2spk: if sum_label[pos] != pre_value: spkids = weight2spk[pre_value] spkid_array = spkids.split("_") spkid = spkid_array[-1] #spkid = uttid+spkid if round(stime*0.01, 2) != round((pos-1)*0.01, 2): segment_file.write("%s_%s_%s_%s %s %s %s\n" % (uttid, spkid, str(int(stime)).zfill(7), str(int(pos-1)).zfill(7), uttid, round(stime*0.01, 2) ,round((pos-1)*0.01, 2))) utt2spk.write("%s_%s_%s_%s %s\n" % (uttid, spkid, str(int(stime)).zfill(7), str(int(pos-1)).zfill(7), uttid+"_"+spkid)) stime = pos pre_value = sum_label[pos] else: stime = pos pre_value = sum_label[pos] else: if pre_value in weight2spk: spkids = weight2spk[pre_value] spkid_array = spkids.split("_") spkid = spkid_array[-1] #spkid = uttid+spkid if round(stime*0.01, 2) != round((pos-1)*0.01, 2): segment_file.write("%s_%s_%s_%s %s %s %s\n" % (uttid, spkid, str(int(stime)).zfill(7), str(int(pos-1)).zfill(7), uttid, round(stime*0.01, 2) ,round((pos-1)*0.01, 2))) utt2spk.write("%s_%s_%s_%s %s\n" % (uttid, spkid, str(int(stime)).zfill(7), str(int(pos-1)).zfill(7), uttid+"_"+spkid)) stime = pos pre_value = sum_label[pos] textgrid_flist.close() segment_file.close() if __name__ == "__main__": args = get_args() main(args)