From fc08b62d05723cdc1ce021bb8ba044ca014fb1f7 Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期一, 13 三月 2023 18:38:41 +0800
Subject: [PATCH] readme
---
egs/mars/sd/scripts/real_meeting_process/dump_real_meeting_chunks.py | 11 ++++++-----
1 files changed, 6 insertions(+), 5 deletions(-)
diff --git a/egs/mars/sd/scripts/real_meeting_process/dump_real_meeting_chunks.py b/egs/mars/sd/scripts/real_meeting_process/dump_real_meeting_chunks.py
index 762d4c1..a2bcd39 100644
--- a/egs/mars/sd/scripts/real_meeting_process/dump_real_meeting_chunks.py
+++ b/egs/mars/sd/scripts/real_meeting_process/dump_real_meeting_chunks.py
@@ -25,7 +25,8 @@
print("Speaker {} has only {} frames, but expect {} frames at least, use them all.".format(spk, count, 300))
short_spk_list.append(spk)
- ivc_list = [kaldiio.load_mat(utt2ivc[utt])[np.newaxis, :] for utt in utt_list]
+ ivc_list = [kaldiio.load_mat(utt2ivc[utt]) for utt in utt_list]
+ ivc_list = [x/np.linalg.norm(x, axis=-1) for x in ivc_list]
ivc = np.concatenate(ivc_list, axis=0)
ivc = np.mean(ivc, axis=0, keepdims=False)
return ivc
@@ -34,7 +35,7 @@
def process(meeting_scp, labels_scp, spk2utt, utt2xvec, utt2frames, meeting2spk_list, args):
out_prefix = args.out
- ivc_dim = 512
+ ivc_dim = 192
win_len, win_shift = 400, 160
label_weights = 2 ** np.array(list(range(args.n_spk)))
wav_writer = kaldiio.WriteHelper("ark,scp:{}_wav.ark,{}_wav.scp".format(out_prefix, out_prefix))
@@ -56,7 +57,7 @@
xvec_list = []
for spk in meeting2spk_list[mid]:
- spk_xvec = calc_rand_ivc(spk, spk2utt, utt2xvec, utt2frames, 1000)[np.newaxis, :]
+ spk_xvec = calc_rand_ivc(spk, spk2utt, utt2xvec, utt2frames, 1000)
xvec_list.append(spk_xvec)
for _ in range(args.n_spk - len(xvec_list)):
xvec_list.append(np.zeros((ivc_dim,), dtype=np.float32))
@@ -66,10 +67,10 @@
wav_label = meeting_labels[st:ed, :]
frame_num = (ed-st) // win_shift
# wav_label = np.pad(wav_label, ((win_len/2, win_len/2), (0, 0)), "constant")
- feat_label = np.zeros((frame_num, wav_label.shape[1]), dtype=int)
+ feat_label = np.zeros((frame_num, wav_label.shape[1]), dtype=np.float32)
for i in range(frame_num):
frame_label = wav_label[i*win_shift: (i+1)*win_shift, :]
- feat_label[i, :] = (np.sum(frame_label, axis=0) > 0).astype(int)
+ feat_label[i, :] = (np.sum(frame_label, axis=0) > 0).astype(np.float32)
label_writer(seg_id, feat_label)
frames_list.append((mid, feat_label.shape[0]))
--
Gitblit v1.9.1