import numpy as np
|
from funasr.utils.job_runner import MultiProcessRunnerV3
|
import os
|
|
|
class MyRunner(MultiProcessRunnerV3):
|
|
def prepare(self, parser):
|
parser.add_argument("--rttm_scp", type=str)
|
parser.add_argument("--seg_file", type=str)
|
args = parser.parse_args()
|
|
if not os.path.exists(os.path.dirname(args.seg_file)):
|
os.makedirs(os.path.dirname(args.seg_file))
|
|
meeting2rttms = {}
|
for one_line in open(args.rttm_scp, "rt"):
|
parts = [x for x in one_line.strip().split(" ") if x != ""]
|
mid, st, dur, spk_name = parts[1], float(parts[3]), float(parts[4]), parts[7]
|
if mid not in meeting2rttms:
|
meeting2rttms[mid] = []
|
meeting2rttms[mid].append(one_line)
|
|
task_list = list(meeting2rttms.items())
|
return task_list, None, args
|
|
def post(self, results_list, args):
|
with open(args.seg_file, "wt") as fd:
|
for results in results_list:
|
fd.writelines(results)
|
|
|
def process(task_args):
|
_, task_list, _, args = task_args
|
outputs = []
|
for mid, rttms in task_list:
|
spk_turns = []
|
length = 0
|
for one_line in rttms:
|
parts = one_line.strip().split(" ")
|
_, st, dur, spk_name = parts[1], float(parts[3]), float(parts[4]), parts[7]
|
st, ed = int(st*100), int((st + dur)*100)
|
length = ed if ed > length else length
|
spk_turns.append([mid, st, ed, spk_name])
|
is_sph = np.zeros((length+1, ), dtype=bool)
|
for _, st, ed, _ in spk_turns:
|
is_sph[st:ed] = True
|
|
st, in_speech = 0, False
|
for i in range(length+1):
|
if not in_speech and is_sph[i]:
|
st, in_speech = i, True
|
if in_speech and not is_sph[i]:
|
in_speech = False
|
outputs.append("{}-{:07d}-{:07d} {} {:.2f} {:.2f}\n".format(
|
mid, st, i, mid, float(st)/100, float(i)/100
|
))
|
return outputs
|
|
|
if __name__ == '__main__':
|
my_runner = MyRunner(process)
|
my_runner.run()
|