From 7d23704b9aebe8b6113b0708545be36a2298ff69 Mon Sep 17 00:00:00 2001
From: 志浩 <neo.dzh@alibaba-inc.com>
Date: 星期三, 15 二月 2023 20:04:08 +0800
Subject: [PATCH] add scripts for simu data
---
egs/mars/sd/scripts/dump_rttm_to_labels.py | 20 ++++++++++++++++++--
1 files changed, 18 insertions(+), 2 deletions(-)
diff --git a/egs/mars/sd/scripts/dump_rttm_to_labels.py b/egs/mars/sd/scripts/dump_rttm_to_labels.py
index b4508ec..d582eb6 100644
--- a/egs/mars/sd/scripts/dump_rttm_to_labels.py
+++ b/egs/mars/sd/scripts/dump_rttm_to_labels.py
@@ -31,9 +31,25 @@
for scp_path in wav_scp_list:
meeting_scp.update(load_scp_as_dict(scp_path))
- assert len(meeting_scp) == len(meeting2rttm), \
- "Number of wav and rttm mismatch {} != {}".format(len(meeting_scp), len(meeting2rttm))
+ if len(meeting_scp) != len(meeting2rttm):
+ logging.warning("Number of wav and rttm mismatch {} != {}".format(
+ len(meeting_scp), len(meeting2rttm)))
+ common_keys = set(meeting_scp.keys()) & set(meeting2rttm.keys())
+ logging.warning("Keep {} records.".format(len(common_keys)))
+ new_meeting_scp = OrderedDict()
+ for key in meeting_scp:
+ if key not in common_keys:
+ logging.warning("Pop {} from wav scp".format(key))
+ else:
+ new_meeting_scp[key] = meeting_scp[key]
+ new_meeting2rttm = OrderedDict()
+ for key in meeting2rttm:
+ if key not in meeting2rttm:
+ logging.warning("Pop {} from rttm scp".format(key))
+ else:
+ new_meeting2rttm[key] = meeting2rttm[key]
+ meeting_scp, meeting2rttm = new_meeting_scp, new_meeting2rttm
if not os.path.exists(args.out_dir):
os.makedirs(args.out_dir)
--
Gitblit v1.9.1