#!/usr/bin/env python3 # Copyright 2015 David Snyder # 2018 Ewald Enzinger # Apache 2.0. # # Modified version of egs/sre16/v1/local/make_musan.py (commit e3fb7c4a0da4167f8c94b80f4d3cc5ab4d0e22e8). # This version uses the raw MUSAN audio files (16 kHz) and does not use sox to resample at 8 kHz. # # This file is meant to be invoked by make_musan.sh. import os, sys def process_music_annotations(path): utt2spk = {} utt2vocals = {} lines = open(path, 'r').readlines() for line in lines: utt, genres, vocals, musician = line.rstrip().split()[:4] # For this application, the musican ID isn't important utt2spk[utt] = utt utt2vocals[utt] = vocals == "Y" return utt2spk, utt2vocals def prepare_music(root_dir, use_vocals): utt2vocals = {} utt2spk = {} utt2wav = {} num_good_files = 0 num_bad_files = 0 music_dir = os.path.join(root_dir, "music") for root, dirs, files in os.walk(music_dir): for file in files: file_path = os.path.join(root, file) if file.endswith(".wav"): utt = str(file).replace(".wav", "") utt2wav[utt] = file_path elif str(file) == "ANNOTATIONS": utt2spk_part, utt2vocals_part = process_music_annotations(file_path) utt2spk.update(utt2spk_part) utt2vocals.update(utt2vocals_part) utt2spk_str = "" utt2wav_str = "" for utt in utt2vocals: if utt in utt2wav: if use_vocals or not utt2vocals[utt]: utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + "\n" utt2wav_str = utt2wav_str + utt + " " + utt2wav[utt] + "\n" num_good_files += 1 else: print("Missing file {}".format(utt)) num_bad_files += 1 print("In music directory, processed {} files: {} had missing wav data".format(num_good_files, num_bad_files)) return utt2spk_str, utt2wav_str def prepare_speech(root_dir): utt2spk = {} utt2wav = {} num_good_files = 0 num_bad_files = 0 speech_dir = os.path.join(root_dir, "speech") for root, dirs, files in os.walk(speech_dir): for file in files: file_path = os.path.join(root, file) if file.endswith(".wav"): utt = str(file).replace(".wav", "") utt2wav[utt] = file_path utt2spk[utt] = utt utt2spk_str = "" utt2wav_str = "" for utt in utt2spk: if utt in utt2wav: utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + "\n" utt2wav_str = utt2wav_str + utt + " " + utt2wav[utt] + "\n" num_good_files += 1 else: print("Missing file {}".format(utt)) num_bad_files += 1 print("In speech directory, processed {} files: {} had missing wav data".format(num_good_files, num_bad_files)) return utt2spk_str, utt2wav_str def prepare_noise(root_dir): utt2spk = {} utt2wav = {} num_good_files = 0 num_bad_files = 0 noise_dir = os.path.join(root_dir, "noise") for root, dirs, files in os.walk(noise_dir): for file in files: file_path = os.path.join(root, file) if file.endswith(".wav"): utt = str(file).replace(".wav", "") utt2wav[utt] = file_path utt2spk[utt] = utt utt2spk_str = "" utt2wav_str = "" for utt in utt2spk: if utt in utt2wav: utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + "\n" utt2wav_str = utt2wav_str + utt + " " + utt2wav[utt] + "\n" num_good_files += 1 else: print("Missing file {}".format(utt)) num_bad_files += 1 print("In noise directory, processed {} files: {} had missing wav data".format(num_good_files, num_bad_files)) return utt2spk_str, utt2wav_str def main(): in_dir = sys.argv[1] out_dir = sys.argv[2] use_vocals = sys.argv[3] == "Y" utt2spk_music, utt2wav_music = prepare_music(in_dir, use_vocals) utt2spk_speech, utt2wav_speech = prepare_speech(in_dir) utt2spk_noise, utt2wav_noise = prepare_noise(in_dir) utt2spk = utt2spk_speech + utt2spk_music + utt2spk_noise utt2wav = utt2wav_speech + utt2wav_music + utt2wav_noise wav_fi = open(os.path.join(out_dir, "wav.scp"), 'w') wav_fi.write(utt2wav) utt2spk_fi = open(os.path.join(out_dir, "utt2spk"), 'w') utt2spk_fi.write(utt2spk) if __name__=="__main__": main()