#!/usr/bin/env python3 # -*- coding: utf-8 -*- import codecs class TextGrid(object): def __init__( self, file_type="", object_class="", xmin=0.0, xmax=0.0, tiers_status="", tiers=[], ): self.file_type = file_type self.object_class = object_class self.xmin = xmin self.xmax = xmax self.tiers_status = tiers_status self.tiers = tiers if self.xmax < self.xmin: raise ValueError("xmax ({}) < xmin ({})".format(self.xmax, self.xmin)) def cutoff(self, xstart=None, xend=None): if xstart is None: xstart = self.xmin if xend is None: xend = self.xmax if xend < xstart: raise ValueError("xend ({}) < xstart ({})".format(xend, xstart)) new_xmax = xend - xstart + self.xmin new_xmin = self.xmin new_tiers = [] for tier in self.tiers: new_tiers.append(tier.cutoff(xstart=xstart, xend=xend)) return TextGrid( file_type=self.file_type, object_class=self.object_class, xmin=new_xmin, xmax=new_xmax, tiers_status=self.tiers_status, tiers=new_tiers, ) class Tier(object): def __init__(self, tier_class="", name="", xmin=0.0, xmax=0.0, intervals=[]): self.tier_class = tier_class self.name = name self.xmin = xmin self.xmax = xmax self.intervals = intervals if self.xmax < self.xmin: raise ValueError("xmax ({}) < xmin ({})".format(self.xmax, self.xmin)) def cutoff(self, xstart=None, xend=None): if xstart is None: xstart = self.xmin if xend is None: xend = self.xmax if xend < xstart: raise ValueError("xend ({}) < xstart ({})".format(xend, xstart)) bias = xstart - self.xmin new_xmax = xend - bias new_xmin = self.xmin new_intervals = [] for interval in self.intervals: if interval.xmax <= xstart or interval.xmin >= xend: pass elif interval.xmin < xstart: new_intervals.append( Interval( xmin=new_xmin, xmax=interval.xmax - bias, text=interval.text ) ) elif interval.xmax > xend: new_intervals.append( Interval( xmin=interval.xmin - bias, xmax=new_xmax, text=interval.text ) ) else: new_intervals.append( Interval( xmin=interval.xmin - bias, xmax=interval.xmax - bias, text=interval.text, ) ) return Tier( tier_class=self.tier_class, name=self.name, xmin=new_xmin, xmax=new_xmax, intervals=new_intervals, ) class Interval(object): def __init__(self, xmin=0.0, xmax=0.0, text=""): self.xmin = xmin self.xmax = xmax self.text = text if self.xmax < self.xmin: raise ValueError("xmax ({}) < xmin ({})".format(self.xmax, self.xmin)) def read_textgrid_from_file(filepath): with codecs.open(filepath, "r", encoding="utf-8") as handle: lines = handle.readlines() if lines[-1] == "\r\n": lines = lines[:-1] assert "File type" in lines[0], "error line 0, {}".format(lines[0]) file_type = ( lines[0] .split("=")[1] .replace(" ", "") .replace('"', "") .replace("\r", "") .replace("\n", "") ) assert "Object class" in lines[1], "error line 1, {}".format(lines[1]) object_class = ( lines[1] .split("=")[1] .replace(" ", "") .replace('"', "") .replace("\r", "") .replace("\n", "") ) assert lines[2] == "\r\n", "error line 2, {}".format(lines[2]) assert "xmin" in lines[3], "error line 3, {}".format(lines[3]) xmin = float( lines[3].split("=")[1].replace(" ", "").replace("\r", "").replace("\n", "") ) assert "xmax" in lines[4], "error line 4, {}".format(lines[4]) xmax = float( lines[4].split("=")[1].replace(" ", "").replace("\r", "").replace("\n", "") ) assert "tiers?" in lines[5], "error line 5, {}".format(lines[5]) tiers_status = ( lines[5].split("?")[1].replace(" ", "").replace("\r", "").replace("\n", "") ) assert "size" in lines[6], "error line 6, {}".format(lines[6]) size = int( lines[6].split("=")[1].replace(" ", "").replace("\r", "").replace("\n", "") ) assert lines[7] == "item []:\r\n", "error line 7, {}".format(lines[7]) tier_start = [] for item_idx in range(size): tier_start.append(lines.index(" " * 4 + "item [{}]:\r\n".format(item_idx + 1))) tier_end = tier_start[1:] + [len(lines)] tiers = [] for tier_idx in range(size): tiers.append( read_tier_from_lines( tier_lines=lines[tier_start[tier_idx] + 1 : tier_end[tier_idx]] ) ) return TextGrid( file_type=file_type, object_class=object_class, xmin=xmin, xmax=xmax, tiers_status=tiers_status, tiers=tiers, ) def read_tier_from_lines(tier_lines): assert "class" in tier_lines[0], "error line 0, {}".format(tier_lines[0]) tier_class = ( tier_lines[0] .split("=")[1] .replace(" ", "") .replace('"', "") .replace("\r", "") .replace("\n", "") ) assert "name" in tier_lines[1], "error line 1, {}".format(tier_lines[1]) name = ( tier_lines[1] .split("=")[1] .replace(" ", "") .replace('"', "") .replace("\r", "") .replace("\n", "") ) assert "xmin" in tier_lines[2], "error line 2, {}".format(tier_lines[2]) xmin = float( tier_lines[2].split("=")[1].replace(" ", "").replace("\r", "").replace("\n", "") ) assert "xmax" in tier_lines[3], "error line 3, {}".format(tier_lines[3]) xmax = float( tier_lines[3].split("=")[1].replace(" ", "").replace("\r", "").replace("\n", "") ) assert "intervals: size" in tier_lines[4], "error line 4, {}".format(tier_lines[4]) intervals_num = int( tier_lines[4].split("=")[1].replace(" ", "").replace("\r", "").replace("\n", "") ) # handle unformatted case # R12_S203204205_C09_I1_Near_203.TextGrid # R12_S203204205_C09_I1_Near_205.TextGrid if tier_lines[-1] == "\n": tier_lines = tier_lines[:-1] if len(tier_lines[5:]) == intervals_num * 5: intervals = [] for intervals_idx in range(intervals_num): assert tier_lines[ 5 + 5 * intervals_idx + 0 ] == " " * 8 + "intervals [{}]:\r\n".format(intervals_idx + 1) assert tier_lines[ 5 + 5 * intervals_idx + 1 ] == " " * 8 + "intervals [{}]:\r\n".format(intervals_idx + 1) intervals.append( read_interval_from_lines( interval_lines=tier_lines[ 7 + 5 * intervals_idx : 10 + 5 * intervals_idx ] ) ) elif len(tier_lines[5:]) == intervals_num * 4: # handle unformatted case # R12_S203204205_C09_I1_Near_203.TextGrid # R12_S203204205_C09_I1_Near_204.TextGrid # R12_S203204205_C09_I1_Near_205.TextGrid intervals = [] for intervals_idx in range(intervals_num): assert tier_lines[ 5 + 4 * intervals_idx + 0 ] == " " * 8 + "intervals [{}]:\r\n".format(intervals_idx + 1) intervals.append( read_interval_from_lines( interval_lines=tier_lines[ 6 + 4 * intervals_idx : 9 + 4 * intervals_idx ] ) ) else: import pdb pdb.set_trace() raise ValueError( "error lines {} % {} != 0".format(len(tier_lines[5:]), intervals_num) ) return Tier( tier_class=tier_class, name=name, xmin=xmin, xmax=xmax, intervals=intervals ) def read_interval_from_lines(interval_lines): assert len(interval_lines) == 3, "error lines" assert "xmin" in interval_lines[0], "error line 0, {}".format(interval_lines[0]) xmin = float( interval_lines[0] .split("=")[1] .replace(" ", "") .replace("\r", "") .replace("\n", "") ) assert "xmax" in interval_lines[1], "error line 1, {}".format(interval_lines[1]) xmax = float( interval_lines[1] .split("=")[1] .replace(" ", "") .replace("\r", "") .replace("\n", "") ) assert "text" in interval_lines[2], "error line 2, {}".format(interval_lines[2]) text = ( interval_lines[2] .split("=")[1] .replace(" ", "") .replace('"', "") .replace("\r", "") .replace("\n", "") ) return Interval(xmin=xmin, xmax=xmax, text=text)