import random
|
import numpy as np
|
import os
|
import sys
|
|
|
if __name__=="__main__":
|
path = sys.argv[1] # dump2/raw/Eval_Ali_far
|
wav_scp_file = open(path+"/wav.scp", 'r')
|
wav_scp = wav_scp_file.readlines()
|
wav_scp_file.close()
|
spk2id_file = open(path + "/spk2id", 'r')
|
spk2id = spk2id_file.readlines()
|
spk2id_file.close()
|
embedding_scp_file = open(path + "/oracle_embedding.scp", 'r')
|
embedding_scp = embedding_scp_file.readlines()
|
embedding_scp_file.close()
|
|
embedding_map = {}
|
for line in embedding_scp:
|
spk = line.strip().split(' ')[0]
|
if spk not in embedding_map.keys():
|
emb=np.load(line.strip().split(' ')[1])
|
embedding_map[spk] = emb
|
|
meeting_map_tmp = {}
|
global_spk_list = []
|
for line in spk2id:
|
line_list = line.strip().split(' ')
|
meeting = line_list[0].split('-')[0]
|
spk_id = line_list[0].split('-')[-1].split('_')[-1]
|
spk = meeting + '_' + spk_id
|
global_spk_list.append(spk)
|
if meeting in meeting_map_tmp.keys():
|
meeting_map_tmp[meeting].append(spk)
|
else:
|
meeting_map_tmp[meeting] = [spk]
|
|
meeting_map = {}
|
os.system('mkdir -p ' + path + '/oracle_profile_nopadding')
|
for meeting in meeting_map_tmp.keys():
|
emb_list = []
|
for i in range(len(meeting_map_tmp[meeting])):
|
spk = meeting_map_tmp[meeting][i]
|
emb_list.append(embedding_map[spk])
|
profile = np.vstack(emb_list)
|
np.save(path + '/oracle_profile_nopadding/' + meeting + '.npy', profile)
|
meeting_map[meeting] = path + '/oracle_profile_nopadding/' + meeting + '.npy'
|
|
profile_scp = open(path + '/oracle_profile_nopadding.scp', 'w')
|
profile_map_scp = open(path + '/oracle_profile_nopadding_spk_list', 'w')
|
|
for line in wav_scp:
|
uttid = line.strip().split(' ')[0]
|
meeting = uttid.split('-')[0]
|
profile_scp.write(uttid + ' ' + meeting_map[meeting] + '\n')
|
profile_map_scp.write(uttid + ' ' + '$'.join(meeting_map_tmp[meeting]) + '\n')
|
profile_scp.close()
|
profile_map_scp.close()
|