| New file |
| | |
| | | import argparse |
| | | import os |
| | | |
| | | if __name__ == '__main__': |
| | | parser = argparse.ArgumentParser() |
| | | parser.add_argument('root_path', help='raw data path') |
| | | args = parser.parse_args() |
| | | |
| | | root_path = args.root_path |
| | | work_path = os.path.join(root_path, ".work") |
| | | scp_files = os.listdir(work_path) |
| | | |
| | | reco2dur_dict = {} |
| | | with open(os.path.join(root_path, 'reco2dur')) as f: |
| | | lines = f.readlines() |
| | | for line in lines: |
| | | parts = line.strip().split() |
| | | reco2dur_dict[parts[0]] = parts[1] |
| | | |
| | | spk2utt_dict = {} |
| | | with open(os.path.join(root_path, 'spk2utt')) as f: |
| | | lines = f.readlines() |
| | | for line in lines: |
| | | parts = line.strip().split() |
| | | spk = parts[0] |
| | | utts = parts[1:] |
| | | for utt in utts: |
| | | tmp = utt.split('data') |
| | | rec = 'data_' + '_'.join(tmp[1][1:].split('_')[:-2]) |
| | | if rec in spk2utt_dict.keys(): |
| | | spk2utt_dict[rec].append((spk, utt)) |
| | | else: |
| | | spk2utt_dict[rec] = [] |
| | | spk2utt_dict[rec].append((spk, utt)) |
| | | |
| | | segment_dict = {} |
| | | with open(os.path.join(root_path, 'segments')) as f: |
| | | lines = f.readlines() |
| | | for line in lines: |
| | | parts = line.strip().split() |
| | | if parts[1] in segment_dict.keys(): |
| | | segment_dict[parts[1]].append((parts[0], parts[2], parts[3])) |
| | | else: |
| | | segment_dict[parts[1]] = [] |
| | | segment_dict[parts[1]].append((parts[0], parts[2], parts[3])) |
| | | |
| | | utt2spk_dict = {} |
| | | with open(os.path.join(root_path, 'utt2spk')) as f: |
| | | lines = f.readlines() |
| | | for line in lines: |
| | | parts = line.strip().split() |
| | | utt = parts[0] |
| | | tmp = utt.split('data') |
| | | rec = 'data_' + '_'.join(tmp[1][1:].split('_')[:-2]) |
| | | if rec in utt2spk_dict.keys(): |
| | | utt2spk_dict[rec].append((parts[0], parts[1])) |
| | | else: |
| | | utt2spk_dict[rec] = [] |
| | | utt2spk_dict[rec].append((parts[0], parts[1])) |
| | | |
| | | for file in scp_files: |
| | | scp_file = os.path.join(work_path, file) |
| | | idx = scp_file.split('.')[-1] |
| | | reco2dur_file = os.path.join(work_path, 'reco2dur.{}'.format(str(idx))) |
| | | spk2utt_file = os.path.join(work_path, 'spk2utt.{}'.format(str(idx))) |
| | | segment_file = os.path.join(work_path, 'segments.{}'.format(str(idx))) |
| | | utt2spk_file = os.path.join(work_path, 'utt2spk.{}'.format(str(idx))) |
| | | |
| | | fpp = open(scp_file) |
| | | scp_lines = fpp.readlines() |
| | | keys = [] |
| | | for line in scp_lines: |
| | | name = line.strip().split()[0] |
| | | keys.append(name) |
| | | |
| | | with open(reco2dur_file, 'w') as f: |
| | | lines = [] |
| | | for key in keys: |
| | | string = key + ' ' + reco2dur_dict[key] |
| | | lines.append(string + '\n') |
| | | lines[-1] = lines[-1][:-1] |
| | | f.writelines(lines) |
| | | |
| | | with open(spk2utt_file, 'w') as f: |
| | | lines = [] |
| | | for key in keys: |
| | | items = spk2utt_dict[key] |
| | | for item in items: |
| | | string = item[0] |
| | | for it in item[1:]: |
| | | string += ' ' |
| | | string += it |
| | | lines.append(string + '\n') |
| | | lines[-1] = lines[-1][:-1] |
| | | f.writelines(lines) |
| | | |
| | | with open(segment_file, 'w') as f: |
| | | lines = [] |
| | | for key in keys: |
| | | items = segment_dict[key] |
| | | for item in items: |
| | | string = item[0] + ' ' + key + ' ' + item[1] + ' ' + item[2] |
| | | lines.append(string + '\n') |
| | | lines[-1] = lines[-1][:-1] |
| | | f.writelines(lines) |
| | | |
| | | with open(utt2spk_file, 'w') as f: |
| | | lines = [] |
| | | for key in keys: |
| | | items = utt2spk_dict[key] |
| | | for item in items: |
| | | string = item[0] + ' ' + item[1] |
| | | lines.append(string + '\n') |
| | | lines[-1] = lines[-1][:-1] |
| | | f.writelines(lines) |
| | | |
| | | fpp.close() |