import soundfile
|
import os
|
import sys
|
import codecs
|
import numpy as np
|
import pdb
|
|
|
segment_file_path = sys.argv[1]
|
wav_scp_file_path = sys.argv[2]
|
data_path = sys.argv[3]
|
|
wav_save_path = data_path + "/wav/"
|
os.system("mkdir -p " + wav_save_path)
|
pos_path = data_path + "/pos_map/"
|
os.system("mkdir -p " + pos_path)
|
|
wav_dict = {}
|
seg2time = {}
|
seg2time_new = {}
|
session2profile = {}
|
|
with codecs.open(wav_scp_file_path, "r", "utf-8") as f:
|
for line in f.readlines():
|
sessionid, wav_path = line.strip().split()
|
wav_dict[sessionid] = wav_path
|
|
with codecs.open(segment_file_path, "r", "utf-8") as f:
|
for line in f.readlines():
|
_, sessionid, stime, etime = line.strip().split()
|
if sessionid not in seg2time.keys():
|
seg2time[sessionid] = [(int(16000 * float(stime)), int(16000 * float(etime)))]
|
else:
|
seg2time[sessionid].append((int(16000 * float(stime)), int(16000 * float(etime))))
|
with codecs.open(data_path + "/map.scp", "w", "utf-8") as f1:
|
for sessionid, seg_times in seg2time.items():
|
seg2time_new[sessionid] = []
|
last_time = 0
|
with codecs.open(pos_path + sessionid + ".pos", "w", "utf-8") as f2:
|
for seg_time in seg_times:
|
tmp = seg_time[0] - last_time
|
cur_seg = (seg_time[0] - tmp, seg_time[1] - tmp)
|
seg2time_new[sessionid].append((seg_time[0] - last_time, seg_time[1] - last_time))
|
last_time = cur_seg[1]
|
f2.write("%s-%07d-%07d %d %d %d %d\n" % (sessionid, seg_time[0]/160, seg_time[1]/160, seg_time[0], seg_time[1], cur_seg[0], cur_seg[1]))
|
f1.write("%s %s\n" % (sessionid, pos_path + sessionid + ".pos"))
|
|
with codecs.open(data_path + "/cluster_profile_zeropadding16.scp", "r", "utf-8") as f:
|
for line in f.readlines():
|
session, path = line.strip().split()
|
session2profile[session] = path
|
|
with codecs.open(data_path + "/wav.scp", "w", "utf-8") as f1:
|
with codecs.open(data_path + "/profile.scp", "w", "utf-8") as f2:
|
for sessionid, wav_path in wav_dict.items():
|
wav = soundfile.read(wav_path)[0]
|
if wav.ndim == 2:
|
wav = wav[:, 0]
|
seg_list = [wav[seg[0]: seg[1]] for seg in seg2time[sessionid]]
|
wav_new = np.concatenate(seg_list, axis=0)
|
cur_time = 0
|
flag = True
|
while flag:
|
start = cur_time
|
end = cur_time + 256000
|
if end < wav_new.shape[0]:
|
cur_wav = wav_new[start: end]
|
else:
|
cur_wav = wav_new[start: ]
|
flag = False
|
cur_time = cur_time + 64000
|
wav_name = "%s-%07d_%07d.wav" % (sessionid, start/160, end/160)
|
soundfile.write(wav_save_path + wav_name, cur_wav, 16000)
|
f1.write("%s %s\n" % (wav_name, wav_save_path + wav_name))
|
f2.write("%s %s\n" % (wav_name, session2profile[sessionid]))
|
|