import argparse import os if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('root_path', help='raw data path') args = parser.parse_args() root_path = args.root_path work_path = os.path.join(root_path, ".work") scp_files = os.listdir(work_path) reco2dur_dict = {} with open(os.path.join(root_path, 'reco2dur')) as f: lines = f.readlines() for line in lines: parts = line.strip().split() reco2dur_dict[parts[0]] = parts[1] spk2utt_dict = {} with open(os.path.join(root_path, 'spk2utt')) as f: lines = f.readlines() for line in lines: parts = line.strip().split() spk = parts[0] utts = parts[1:] for utt in utts: tmp = utt.split('data') rec = 'data_' + '_'.join(tmp[1][1:].split('_')[:-2]) if rec in spk2utt_dict.keys(): spk2utt_dict[rec].append((spk, utt)) else: spk2utt_dict[rec] = [] spk2utt_dict[rec].append((spk, utt)) segment_dict = {} with open(os.path.join(root_path, 'segments')) as f: lines = f.readlines() for line in lines: parts = line.strip().split() if parts[1] in segment_dict.keys(): segment_dict[parts[1]].append((parts[0], parts[2], parts[3])) else: segment_dict[parts[1]] = [] segment_dict[parts[1]].append((parts[0], parts[2], parts[3])) utt2spk_dict = {} with open(os.path.join(root_path, 'utt2spk')) as f: lines = f.readlines() for line in lines: parts = line.strip().split() utt = parts[0] tmp = utt.split('data') rec = 'data_' + '_'.join(tmp[1][1:].split('_')[:-2]) if rec in utt2spk_dict.keys(): utt2spk_dict[rec].append((parts[0], parts[1])) else: utt2spk_dict[rec] = [] utt2spk_dict[rec].append((parts[0], parts[1])) for file in scp_files: scp_file = os.path.join(work_path, file) idx = scp_file.split('.')[-2] reco2dur_file = os.path.join(work_path, 'reco2dur.{}'.format(str(idx))) spk2utt_file = os.path.join(work_path, 'spk2utt.{}'.format(str(idx))) segment_file = os.path.join(work_path, 'segments.{}'.format(str(idx))) utt2spk_file = os.path.join(work_path, 'utt2spk.{}'.format(str(idx))) fpp = open(scp_file) scp_lines = fpp.readlines() keys = [] for line in scp_lines: name = line.strip().split()[0] keys.append(name) with open(reco2dur_file, 'w') as f: lines = [] for key in keys: string = key + ' ' + reco2dur_dict[key] lines.append(string + '\n') lines[-1] = lines[-1][:-1] f.writelines(lines) with open(spk2utt_file, 'w') as f: lines = [] for key in keys: items = spk2utt_dict[key] for item in items: string = item[0] for it in item[1:]: string += ' ' string += it lines.append(string + '\n') lines[-1] = lines[-1][:-1] f.writelines(lines) with open(segment_file, 'w') as f: lines = [] for key in keys: items = segment_dict[key] for item in items: string = item[0] + ' ' + key + ' ' + item[1] + ' ' + item[2] lines.append(string + '\n') lines[-1] = lines[-1][:-1] f.writelines(lines) with open(utt2spk_file, 'w') as f: lines = [] for key in keys: items = utt2spk_dict[key] for item in items: string = item[0] + ' ' + item[1] lines.append(string + '\n') lines[-1] = lines[-1][:-1] f.writelines(lines) fpp.close()