python/FunASR-XL.git

# -*- coding: utf-8 -*-
"""
Process the textgrid files
"""
import argparse
import codecs
from distutils.util import strtobool
from pathlib import Path
import textgrid
import pdb
import numpy as np
import sys
import math
 
 
class Segment(object):
    def __init__(self, uttid, spkr, stime, etime, text):
        self.uttid = uttid
        self.spkr = spkr
        self.stime = round(stime, 2)
        self.etime = round(etime, 2)
        self.text = text
 
    def change_stime(self, time):
        self.stime = time
 
    def change_etime(self, time):
        self.etime = time
 
 
def get_args():
    parser = argparse.ArgumentParser(description="process the textgrid files")
    parser.add_argument("--path", type=str, required=True, help="Data path")
    args = parser.parse_args()
    return args
 
 
 
def main(args):
    textgrid_flist = codecs.open(Path(args.path) / "textgrid.flist", "r", "utf-8")
    segment_file = codecs.open(Path(args.path)/"segments", "w", "utf-8")
    utt2spk = codecs.open(Path(args.path)/"utt2spk", "w", "utf-8")
 
    # get the path of textgrid file for each utterance
    for line in textgrid_flist:
        line_array = line.strip().split(" ")
        path = Path(line_array[1])
        uttid = line_array[0]
 
        try:
            tg = textgrid.TextGrid.fromFile(path)
        except:
            pdb.set_trace()
        num_spk = tg.__len__()
        spk2textgrid = {}
        spk2weight = {}
        weight2spk = {}
        cnt = 2
        xmax = 0
        for i in range(tg.__len__()):
            spk_name = tg[i].name
            if spk_name not in spk2weight:
                spk2weight[spk_name] = cnt
                weight2spk[cnt] = spk_name
                cnt = cnt * 2
            segments = []
            for j in range(tg[i].__len__()):
                if tg[i][j].mark:
                    if xmax < tg[i][j].maxTime:
                        xmax = tg[i][j].maxTime
                    segments.append(
                        Segment(
                            uttid,
                            tg[i].name,
                            tg[i][j].minTime,
                            tg[i][j].maxTime,
                            tg[i][j].mark.strip(),
                        )
                    )
            segments = sorted(segments, key=lambda x: x.stime)
            spk2textgrid[spk_name] = segments
        olp_label = np.zeros((num_spk, int(xmax/0.01)), dtype=np.int32)
        for spkid in spk2weight.keys():
            weight = spk2weight[spkid]
            segments = spk2textgrid[spkid]
            idx = int(math.log2(weight) )- 1
            for i in range(len(segments)):
                stime = segments[i].stime
                etime = segments[i].etime
                olp_label[idx, int(stime/0.01): int(etime/0.01)] = weight
        sum_label = olp_label.sum(axis=0)
        stime = 0
        pre_value = 0
        for pos in range(sum_label.shape[0]):
            if sum_label[pos] in weight2spk:
                if pre_value in weight2spk:
                    if sum_label[pos] != pre_value:    
                        spkids = weight2spk[pre_value]
                        spkid_array = spkids.split("_")
                        spkid = spkid_array[-1]
                        #spkid = uttid+spkid 
                        if round(stime*0.01, 2) != round((pos-1)*0.01, 2):
                            segment_file.write("%s_%s_%s_%s %s %s %s\n" % (uttid, spkid, str(int(stime)).zfill(7), str(int(pos-1)).zfill(7), uttid, round(stime*0.01, 2) ,round((pos-1)*0.01, 2)))
                            utt2spk.write("%s_%s_%s_%s %s\n" % (uttid, spkid, str(int(stime)).zfill(7), str(int(pos-1)).zfill(7), uttid+"_"+spkid))
                        stime = pos
                        pre_value = sum_label[pos]
                else:
                    stime = pos
                    pre_value = sum_label[pos]
            else:
                if pre_value in weight2spk:
                    spkids = weight2spk[pre_value]
                    spkid_array = spkids.split("_")
                    spkid = spkid_array[-1]
                    #spkid = uttid+spkid 
                    if round(stime*0.01, 2) != round((pos-1)*0.01, 2):
                        segment_file.write("%s_%s_%s_%s %s %s %s\n" % (uttid, spkid, str(int(stime)).zfill(7), str(int(pos-1)).zfill(7), uttid, round(stime*0.01, 2) ,round((pos-1)*0.01, 2)))
                        utt2spk.write("%s_%s_%s_%s %s\n" % (uttid, spkid, str(int(stime)).zfill(7), str(int(pos-1)).zfill(7), uttid+"_"+spkid))
                    stime = pos
                    pre_value = sum_label[pos]
    textgrid_flist.close()
    segment_file.close()
 
 
if __name__ == "__main__":
    args = get_args()
    main(args)