2个文件已修改
14个文件已添加
3 文件已重命名
| New file |
| | |
| | | #!/bin/bash |
| | | # Copyright 2017 David Snyder |
| | | # Apache 2.0. |
| | | # |
| | | # This script prepares the Callhome portion of the NIST SRE 2000 |
| | | # corpus (LDC2001S97). It is the evaluation dataset used in the |
| | | # callhome_diarization recipe. |
| | | |
| | | if [ $# -ne 2 ]; then |
| | | echo "Usage: $0 <callhome-speech> <out-data-dir>" |
| | | echo "e.g.: $0 /mnt/data/LDC2001S97 data/" |
| | | exit 1; |
| | | fi |
| | | |
| | | src_dir=$1 |
| | | data_dir=$2 |
| | | |
| | | tmp_dir=$data_dir/callhome/.tmp/ |
| | | mkdir -p $tmp_dir |
| | | |
| | | # Download some metadata that wasn't provided in the LDC release |
| | | if [ ! -d "$tmp_dir/sre2000-key" ]; then |
| | | wget --no-check-certificate -P $tmp_dir/ \ |
| | | http://www.openslr.org/resources/10/sre2000-key.tar.gz |
| | | tar -xvf $tmp_dir/sre2000-key.tar.gz -C $tmp_dir/ |
| | | fi |
| | | |
| | | # The list of 500 recordings |
| | | awk '{print $1}' $tmp_dir/sre2000-key/reco2num > $tmp_dir/reco.list |
| | | |
| | | # Create wav.scp file |
| | | count=0 |
| | | missing=0 |
| | | while read reco; do |
| | | path=$(find $src_dir -name "$reco.sph") |
| | | if [ -z "${path// }" ]; then |
| | | >&2 echo "$0: Missing Sphere file for $reco" |
| | | missing=$((missing+1)) |
| | | else |
| | | echo "$reco sph2pipe -f wav -p $path |" |
| | | fi |
| | | count=$((count+1)) |
| | | done < $tmp_dir/reco.list > $data_dir/callhome/wav.scp |
| | | |
| | | if [ $missing -gt 0 ]; then |
| | | echo "$0: Missing $missing out of $count recordings" |
| | | fi |
| | | |
| | | cp $tmp_dir/sre2000-key/segments $data_dir/callhome/ |
| | | awk '{print $1, $2}' $data_dir/callhome/segments > $data_dir/callhome/utt2spk |
| | | utils/utt2spk_to_spk2utt.pl $data_dir/callhome/utt2spk > $data_dir/callhome/spk2utt |
| | | cp $tmp_dir/sre2000-key/reco2num $data_dir/callhome/reco2num_spk |
| | | cp $tmp_dir/sre2000-key/fullref.rttm $data_dir/callhome/ |
| | | |
| | | utils/validate_data_dir.sh --no-text --no-feats $data_dir/callhome |
| | | utils/fix_data_dir.sh $data_dir/callhome |
| | | |
| | | utils/copy_data_dir.sh $data_dir/callhome $data_dir/callhome1 |
| | | utils/copy_data_dir.sh $data_dir/callhome $data_dir/callhome2 |
| | | |
| | | utils/shuffle_list.pl $data_dir/callhome/wav.scp | head -n 250 \ |
| | | | utils/filter_scp.pl - $data_dir/callhome/wav.scp \ |
| | | > $data_dir/callhome1/wav.scp |
| | | utils/fix_data_dir.sh $data_dir/callhome1 |
| | | utils/filter_scp.pl --exclude $data_dir/callhome1/wav.scp \ |
| | | $data_dir/callhome/wav.scp > $data_dir/callhome2/wav.scp |
| | | utils/fix_data_dir.sh $data_dir/callhome2 |
| | | utils/filter_scp.pl $data_dir/callhome1/wav.scp $data_dir/callhome/reco2num_spk \ |
| | | > $data_dir/callhome1/reco2num_spk |
| | | utils/filter_scp.pl $data_dir/callhome2/wav.scp $data_dir/callhome/reco2num_spk \ |
| | | > $data_dir/callhome2/reco2num_spk |
| | | |
| | | rm -rf $tmp_dir 2> /dev/null |
| New file |
| | |
| | | #!/usr/bin/env python3 |
| | | |
| | | # Copyright 2019 Hitachi, Ltd. (author: Yusuke Fujita) |
| | | # Licensed under the MIT license. |
| | | # |
| | | # This script generates simulated multi-talker mixtures for diarization |
| | | # |
| | | # common/make_mixture.py \ |
| | | # mixture.scp \ |
| | | # data/mixture \ |
| | | # wav/mixture |
| | | |
| | | |
| | | import argparse |
| | | import os |
| | | from eend import kaldi_data |
| | | import numpy as np |
| | | import math |
| | | import soundfile as sf |
| | | import json |
| | | |
| | | parser = argparse.ArgumentParser() |
| | | parser.add_argument('script', |
| | | help='list of json') |
| | | parser.add_argument('out_data_dir', |
| | | help='output data dir of mixture') |
| | | parser.add_argument('out_wav_dir', |
| | | help='output mixture wav files are stored here') |
| | | parser.add_argument('--rate', type=int, default=16000, |
| | | help='sampling rate') |
| | | args = parser.parse_args() |
| | | |
| | | # open output data files |
| | | segments_f = open(args.out_data_dir + '/segments', 'w') |
| | | utt2spk_f = open(args.out_data_dir + '/utt2spk', 'w') |
| | | wav_scp_f = open(args.out_data_dir + '/wav.scp', 'w') |
| | | |
| | | # "-R" forces the default random seed for reproducibility |
| | | resample_cmd = "sox -R -t wav - -t wav - rate {}".format(args.rate) |
| | | |
| | | for line in open(args.script): |
| | | recid, jsonstr = line.strip().split(None, 1) |
| | | indata = json.loads(jsonstr) |
| | | wavfn = indata['recid'] |
| | | # recid now include out_wav_dir |
| | | recid = os.path.join(args.out_wav_dir, wavfn).replace('/','_') |
| | | noise = indata['noise'] |
| | | noise_snr = indata['snr'] |
| | | mixture = [] |
| | | for speaker in indata['speakers']: |
| | | spkid = speaker['spkid'] |
| | | utts = speaker['utts'] |
| | | intervals = speaker['intervals'] |
| | | rir = speaker['rir'] |
| | | data = [] |
| | | pos = 0 |
| | | for interval, utt in zip(intervals, utts): |
| | | # append silence interval data |
| | | silence = np.zeros(int(interval * args.rate)) |
| | | data.append(silence) |
| | | # utterance is reverberated using room impulse response |
| | | preprocess = "wav-reverberate --print-args=false " \ |
| | | " --impulse-response={} - -".format(rir) |
| | | if isinstance(utt, list): |
| | | rec, st, et = utt |
| | | st = np.rint(st * args.rate).astype(int) |
| | | et = np.rint(et * args.rate).astype(int) |
| | | else: |
| | | rec = utt |
| | | st = 0 |
| | | et = None |
| | | if rir is not None: |
| | | wav_rxfilename = kaldi_data.process_wav(rec, preprocess) |
| | | else: |
| | | wav_rxfilename = rec |
| | | wav_rxfilename = kaldi_data.process_wav( |
| | | wav_rxfilename, resample_cmd) |
| | | speech, _ = kaldi_data.load_wav(wav_rxfilename, st, et) |
| | | data.append(speech) |
| | | # calculate start/end position in samples |
| | | startpos = pos + len(silence) |
| | | endpos = startpos + len(speech) |
| | | # write segments and utt2spk |
| | | uttid = '{}_{}_{:07d}_{:07d}'.format( |
| | | spkid, recid, int(startpos / args.rate * 100), |
| | | int(endpos / args.rate * 100)) |
| | | print(uttid, recid, |
| | | startpos / args.rate, endpos / args.rate, file=segments_f) |
| | | print(uttid, spkid, file=utt2spk_f) |
| | | # update position for next utterance |
| | | pos = endpos |
| | | data = np.concatenate(data) |
| | | mixture.append(data) |
| | | |
| | | # fitting to the maximum-length speaker data, then mix all speakers |
| | | maxlen = max(len(x) for x in mixture) |
| | | mixture = [np.pad(x, (0, maxlen - len(x)), 'constant') for x in mixture] |
| | | mixture = np.sum(mixture, axis=0) |
| | | # noise is repeated or cutted for fitting to the mixture data length |
| | | noise_resampled = kaldi_data.process_wav(noise, resample_cmd) |
| | | noise_data, _ = kaldi_data.load_wav(noise_resampled) |
| | | if maxlen > len(noise_data): |
| | | noise_data = np.pad(noise_data, (0, maxlen - len(noise_data)), 'wrap') |
| | | else: |
| | | noise_data = noise_data[:maxlen] |
| | | # noise power is scaled according to selected SNR, then mixed |
| | | signal_power = np.sum(mixture**2) / len(mixture) |
| | | noise_power = np.sum(noise_data**2) / len(noise_data) |
| | | scale = math.sqrt( |
| | | math.pow(10, - noise_snr / 10) * signal_power / noise_power) |
| | | mixture += noise_data * scale |
| | | # output the wav file and write wav.scp |
| | | outfname = '{}.wav'.format(wavfn) |
| | | outpath = os.path.join(args.out_wav_dir, outfname) |
| | | sf.write(outpath, mixture, args.rate) |
| | | print(recid, os.path.abspath(outpath), file=wav_scp_f) |
| | | |
| | | wav_scp_f.close() |
| | | segments_f.close() |
| | | utt2spk_f.close() |
| New file |
| | |
| | | #!/usr/bin/env python3 |
| | | # Copyright 2015 David Snyder |
| | | # 2018 Ewald Enzinger |
| | | # Apache 2.0. |
| | | # |
| | | # Modified version of egs/sre16/v1/local/make_musan.py (commit e3fb7c4a0da4167f8c94b80f4d3cc5ab4d0e22e8). |
| | | # This version uses the raw MUSAN audio files (16 kHz) and does not use sox to resample at 8 kHz. |
| | | # |
| | | # This file is meant to be invoked by make_musan.sh. |
| | | |
| | | import os, sys |
| | | |
| | | def process_music_annotations(path): |
| | | utt2spk = {} |
| | | utt2vocals = {} |
| | | lines = open(path, 'r').readlines() |
| | | for line in lines: |
| | | utt, genres, vocals, musician = line.rstrip().split()[:4] |
| | | # For this application, the musican ID isn't important |
| | | utt2spk[utt] = utt |
| | | utt2vocals[utt] = vocals == "Y" |
| | | return utt2spk, utt2vocals |
| | | |
| | | def prepare_music(root_dir, use_vocals): |
| | | utt2vocals = {} |
| | | utt2spk = {} |
| | | utt2wav = {} |
| | | num_good_files = 0 |
| | | num_bad_files = 0 |
| | | music_dir = os.path.join(root_dir, "music") |
| | | for root, dirs, files in os.walk(music_dir): |
| | | for file in files: |
| | | file_path = os.path.join(root, file) |
| | | if file.endswith(".wav"): |
| | | utt = str(file).replace(".wav", "") |
| | | utt2wav[utt] = file_path |
| | | elif str(file) == "ANNOTATIONS": |
| | | utt2spk_part, utt2vocals_part = process_music_annotations(file_path) |
| | | utt2spk.update(utt2spk_part) |
| | | utt2vocals.update(utt2vocals_part) |
| | | utt2spk_str = "" |
| | | utt2wav_str = "" |
| | | for utt in utt2vocals: |
| | | if utt in utt2wav: |
| | | if use_vocals or not utt2vocals[utt]: |
| | | utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + "\n" |
| | | utt2wav_str = utt2wav_str + utt + " " + utt2wav[utt] + "\n" |
| | | num_good_files += 1 |
| | | else: |
| | | print("Missing file {}".format(utt)) |
| | | num_bad_files += 1 |
| | | print("In music directory, processed {} files: {} had missing wav data".format(num_good_files, num_bad_files)) |
| | | return utt2spk_str, utt2wav_str |
| | | |
| | | def prepare_speech(root_dir): |
| | | utt2spk = {} |
| | | utt2wav = {} |
| | | num_good_files = 0 |
| | | num_bad_files = 0 |
| | | speech_dir = os.path.join(root_dir, "speech") |
| | | for root, dirs, files in os.walk(speech_dir): |
| | | for file in files: |
| | | file_path = os.path.join(root, file) |
| | | if file.endswith(".wav"): |
| | | utt = str(file).replace(".wav", "") |
| | | utt2wav[utt] = file_path |
| | | utt2spk[utt] = utt |
| | | utt2spk_str = "" |
| | | utt2wav_str = "" |
| | | for utt in utt2spk: |
| | | if utt in utt2wav: |
| | | utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + "\n" |
| | | utt2wav_str = utt2wav_str + utt + " " + utt2wav[utt] + "\n" |
| | | num_good_files += 1 |
| | | else: |
| | | print("Missing file {}".format(utt)) |
| | | num_bad_files += 1 |
| | | print("In speech directory, processed {} files: {} had missing wav data".format(num_good_files, num_bad_files)) |
| | | return utt2spk_str, utt2wav_str |
| | | |
| | | def prepare_noise(root_dir): |
| | | utt2spk = {} |
| | | utt2wav = {} |
| | | num_good_files = 0 |
| | | num_bad_files = 0 |
| | | noise_dir = os.path.join(root_dir, "noise") |
| | | for root, dirs, files in os.walk(noise_dir): |
| | | for file in files: |
| | | file_path = os.path.join(root, file) |
| | | if file.endswith(".wav"): |
| | | utt = str(file).replace(".wav", "") |
| | | utt2wav[utt] = file_path |
| | | utt2spk[utt] = utt |
| | | utt2spk_str = "" |
| | | utt2wav_str = "" |
| | | for utt in utt2spk: |
| | | if utt in utt2wav: |
| | | utt2spk_str = utt2spk_str + utt + " " + utt2spk[utt] + "\n" |
| | | utt2wav_str = utt2wav_str + utt + " " + utt2wav[utt] + "\n" |
| | | num_good_files += 1 |
| | | else: |
| | | print("Missing file {}".format(utt)) |
| | | num_bad_files += 1 |
| | | print("In noise directory, processed {} files: {} had missing wav data".format(num_good_files, num_bad_files)) |
| | | return utt2spk_str, utt2wav_str |
| | | |
| | | def main(): |
| | | in_dir = sys.argv[1] |
| | | out_dir = sys.argv[2] |
| | | use_vocals = sys.argv[3] == "Y" |
| | | utt2spk_music, utt2wav_music = prepare_music(in_dir, use_vocals) |
| | | utt2spk_speech, utt2wav_speech = prepare_speech(in_dir) |
| | | utt2spk_noise, utt2wav_noise = prepare_noise(in_dir) |
| | | utt2spk = utt2spk_speech + utt2spk_music + utt2spk_noise |
| | | utt2wav = utt2wav_speech + utt2wav_music + utt2wav_noise |
| | | wav_fi = open(os.path.join(out_dir, "wav.scp"), 'w') |
| | | wav_fi.write(utt2wav) |
| | | utt2spk_fi = open(os.path.join(out_dir, "utt2spk"), 'w') |
| | | utt2spk_fi.write(utt2spk) |
| | | |
| | | |
| | | if __name__=="__main__": |
| | | main() |
| New file |
| | |
| | | #!/bin/bash |
| | | # Copyright 2015 David Snyder |
| | | # Apache 2.0. |
| | | # |
| | | # This script, called by ../run.sh, creates the MUSAN |
| | | # data directory. The required dataset is freely available at |
| | | # http://www.openslr.org/17/ |
| | | |
| | | set -e |
| | | in_dir=$1 |
| | | data_dir=$2 |
| | | use_vocals='Y' |
| | | |
| | | mkdir -p local/musan.tmp |
| | | |
| | | echo "Preparing ${data_dir}/musan..." |
| | | mkdir -p ${data_dir}/musan |
| | | local/make_musan.py ${in_dir} ${data_dir}/musan ${use_vocals} |
| | | |
| | | utils/fix_data_dir.sh ${data_dir}/musan |
| | | |
| | | grep "music" ${data_dir}/musan/utt2spk > local/musan.tmp/utt2spk_music |
| | | grep "speech" ${data_dir}/musan/utt2spk > local/musan.tmp/utt2spk_speech |
| | | grep "noise" ${data_dir}/musan/utt2spk > local/musan.tmp/utt2spk_noise |
| | | utils/subset_data_dir.sh --utt-list local/musan.tmp/utt2spk_music \ |
| | | ${data_dir}/musan ${data_dir}/musan_music |
| | | utils/subset_data_dir.sh --utt-list local/musan.tmp/utt2spk_speech \ |
| | | ${data_dir}/musan ${data_dir}/musan_speech |
| | | utils/subset_data_dir.sh --utt-list local/musan.tmp/utt2spk_noise \ |
| | | ${data_dir}/musan ${data_dir}/musan_noise |
| | | |
| | | utils/fix_data_dir.sh ${data_dir}/musan_music |
| | | utils/fix_data_dir.sh ${data_dir}/musan_speech |
| | | utils/fix_data_dir.sh ${data_dir}/musan_noise |
| | | |
| | | rm -rf local/musan.tmp |
| | | |
| New file |
| | |
| | | #!/usr/bin/perl |
| | | # |
| | | # Copyright 2015 David Snyder |
| | | # Apache 2.0. |
| | | # Usage: make_sre.pl <path-to-data> <name-of-source> <sre-ref> <output-dir> |
| | | |
| | | if (@ARGV != 4) { |
| | | print STDERR "Usage: $0 <path-to-data> <name-of-source> <sre-ref> <output-dir>\n"; |
| | | print STDERR "e.g. $0 /export/corpora5/LDC/LDC2006S44 sre2004 sre_ref data/sre2004\n"; |
| | | exit(1); |
| | | } |
| | | |
| | | ($db_base, $sre_name, $sre_ref_filename, $out_dir) = @ARGV; |
| | | %utt2sph = (); |
| | | %spk2gender = (); |
| | | |
| | | $tmp_dir = "$out_dir/tmp"; |
| | | if (system("mkdir -p $tmp_dir") != 0) { |
| | | die "Error making directory $tmp_dir"; |
| | | } |
| | | |
| | | if (system("find $db_base -name '*.sph' > $tmp_dir/sph.list") != 0) { |
| | | die "Error getting list of sph files"; |
| | | } |
| | | open(WAVLIST, "<", "$tmp_dir/sph.list") or die "cannot open wav list"; |
| | | |
| | | while(<WAVLIST>) { |
| | | chomp; |
| | | $sph = $_; |
| | | @A1 = split("/",$sph); |
| | | @A2 = split("[./]",$A1[$#A1]); |
| | | $uttId=$A2[0]; |
| | | $utt2sph{$uttId} = $sph; |
| | | } |
| | | |
| | | open(GNDR,">", "$out_dir/spk2gender") or die "Could not open the output file $out_dir/spk2gender"; |
| | | open(SPKR,">", "$out_dir/utt2spk") or die "Could not open the output file $out_dir/utt2spk"; |
| | | open(WAV,">", "$out_dir/wav.scp") or die "Could not open the output file $out_dir/wav.scp"; |
| | | open(SRE_REF, "<", $sre_ref_filename) or die "Cannot open SRE reference."; |
| | | while (<SRE_REF>) { |
| | | chomp; |
| | | ($speaker, $gender, $other_sre_name, $utt_id, $channel) = split(" ", $_); |
| | | $channel_num = "1"; |
| | | if ($channel eq "A") { |
| | | $channel_num = "1"; |
| | | } else { |
| | | $channel_num = "2"; |
| | | } |
| | | if (($other_sre_name eq $sre_name) and (exists $utt2sph{$utt_id})) { |
| | | $full_utt_id = "$speaker-$gender-$sre_name-$utt_id-$channel"; |
| | | $spk2gender{"$speaker-$gender"} = $gender; |
| | | print WAV "$full_utt_id"," sph2pipe -f wav -p -c $channel_num $utt2sph{$utt_id} |\n"; |
| | | print SPKR "$full_utt_id $speaker-$gender","\n"; |
| | | } |
| | | } |
| | | foreach $speaker (keys %spk2gender) { |
| | | print GNDR "$speaker $spk2gender{$speaker}\n"; |
| | | } |
| | | |
| | | close(GNDR) || die; |
| | | close(SPKR) || die; |
| | | close(WAV) || die; |
| | | close(SRE_REF) || die; |
| New file |
| | |
| | | #!/bin/bash |
| | | # Copyright 2015 David Snyder |
| | | # Apache 2.0. |
| | | # |
| | | # See README.txt for more info on data required. |
| | | |
| | | set -e |
| | | |
| | | data_root=$1 |
| | | data_dir=$2 |
| | | |
| | | wget -P data/local/ http://www.openslr.org/resources/15/speaker_list.tgz |
| | | tar -C data/local/ -xvf data/local/speaker_list.tgz |
| | | sre_ref=data/local/speaker_list |
| | | |
| | | local/make_sre.pl $data_root/LDC2006S44/ \ |
| | | sre2004 $sre_ref $data_dir/sre2004 |
| | | |
| | | local/make_sre.pl $data_root/LDC2011S01 \ |
| | | sre2005 $sre_ref $data_dir/sre2005_train |
| | | |
| | | local/make_sre.pl $data_root/LDC2011S04 \ |
| | | sre2005 $sre_ref $data_dir/sre2005_test |
| | | |
| | | local/make_sre.pl $data_root/LDC2011S09 \ |
| | | sre2006 $sre_ref $data_dir/sre2006_train |
| | | |
| | | local/make_sre.pl $data_root/LDC2011S10 \ |
| | | sre2006 $sre_ref $data_dir/sre2006_test_1 |
| | | |
| | | local/make_sre.pl $data_root/LDC2012S01 \ |
| | | sre2006 $sre_ref $data_dir/sre2006_test_2 |
| | | |
| | | local/make_sre.pl $data_root/LDC2011S05 \ |
| | | sre2008 $sre_ref $data_dir/sre2008_train |
| | | |
| | | local/make_sre.pl $data_root/LDC2011S08 \ |
| | | sre2008 $sre_ref $data_dir/sre2008_test |
| | | |
| | | utils/combine_data.sh $data_dir/sre \ |
| | | $data_dir/sre2004 $data_dir/sre2005_train \ |
| | | $data_dir/sre2005_test $data_dir/sre2006_train \ |
| | | $data_dir/sre2006_test_1 $data_dir/sre2006_test_2 \ |
| | | $data_dir/sre2008_train $data_dir/sre2008_test |
| | | |
| | | utils/validate_data_dir.sh --no-text --no-feats $data_dir/sre |
| | | utils/fix_data_dir.sh $data_dir/sre |
| | | rm data/local/speaker_list.* |
| New file |
| | |
| | | #!/usr/bin/perl |
| | | use warnings; #sed replacement for -w perl parameter |
| | | # |
| | | # Copyright 2017 David Snyder |
| | | # Apache 2.0 |
| | | |
| | | if (@ARGV != 2) { |
| | | print STDERR "Usage: $0 <path-to-LDC98S75> <path-to-output>\n"; |
| | | print STDERR "e.g. $0 /export/corpora3/LDC/LDC98S75 data/swbd2_phase1_train\n"; |
| | | exit(1); |
| | | } |
| | | ($db_base, $out_dir) = @ARGV; |
| | | |
| | | if (system("mkdir -p $out_dir")) { |
| | | die "Error making directory $out_dir"; |
| | | } |
| | | |
| | | open(CS, "<$db_base/doc/callstat.tbl") || die "Could not open $db_base/doc/callstat.tbl"; |
| | | open(GNDR, ">$out_dir/spk2gender") || die "Could not open the output file $out_dir/spk2gender"; |
| | | open(SPKR, ">$out_dir/utt2spk") || die "Could not open the output file $out_dir/utt2spk"; |
| | | open(WAV, ">$out_dir/wav.scp") || die "Could not open the output file $out_dir/wav.scp"; |
| | | |
| | | @badAudio = ("3", "4"); |
| | | |
| | | $tmp_dir = "$out_dir/tmp"; |
| | | if (system("mkdir -p $tmp_dir") != 0) { |
| | | die "Error making directory $tmp_dir"; |
| | | } |
| | | |
| | | if (system("find $db_base -name '*.sph' > $tmp_dir/sph.list") != 0) { |
| | | die "Error getting list of sph files"; |
| | | } |
| | | |
| | | open(WAVLIST, "<$tmp_dir/sph.list") or die "cannot open wav list"; |
| | | |
| | | %wavs = (); |
| | | while(<WAVLIST>) { |
| | | chomp; |
| | | $sph = $_; |
| | | @t = split("/",$sph); |
| | | @t1 = split("[./]",$t[$#t]); |
| | | $uttId = $t1[0]; |
| | | $wavs{$uttId} = $sph; |
| | | } |
| | | |
| | | while (<CS>) { |
| | | $line = $_ ; |
| | | @A = split(",", $line); |
| | | @A1 = split("[./]",$A[0]); |
| | | $wav = $A1[0]; |
| | | if (/$wav/i ~~ @badAudio) { |
| | | # do nothing |
| | | print "Bad Audio = $wav"; |
| | | } else { |
| | | $spkr1= "sw_" . $A[2]; |
| | | $spkr2= "sw_" . $A[3]; |
| | | $gender1 = $A[5]; |
| | | $gender2 = $A[6]; |
| | | if ($gender1 eq "M") { |
| | | $gender1 = "m"; |
| | | } elsif ($gender1 eq "F") { |
| | | $gender1 = "f"; |
| | | } else { |
| | | die "Unknown Gender in $line"; |
| | | } |
| | | if ($gender2 eq "M") { |
| | | $gender2 = "m"; |
| | | } elsif ($gender2 eq "F") { |
| | | $gender2 = "f"; |
| | | } else { |
| | | die "Unknown Gender in $line"; |
| | | } |
| | | if (-e "$wavs{$wav}") { |
| | | $uttId = $spkr1 ."_" . $wav ."_1"; |
| | | if (!$spk2gender{$spkr1}) { |
| | | $spk2gender{$spkr1} = $gender1; |
| | | print GNDR "$spkr1"," $gender1\n"; |
| | | } |
| | | print WAV "$uttId"," sph2pipe -f wav -p -c 1 $wavs{$wav} |\n"; |
| | | print SPKR "$uttId"," $spkr1","\n"; |
| | | |
| | | $uttId = $spkr2 . "_" . $wav ."_2"; |
| | | if (!$spk2gender{$spkr2}) { |
| | | $spk2gender{$spkr2} = $gender2; |
| | | print GNDR "$spkr2"," $gender2\n"; |
| | | } |
| | | print WAV "$uttId"," sph2pipe -f wav -p -c 2 $wavs{$wav} |\n"; |
| | | print SPKR "$uttId"," $spkr2","\n"; |
| | | } else { |
| | | print STDERR "Missing $wavs{$wav} for $wav\n"; |
| | | } |
| | | } |
| | | } |
| | | |
| | | close(WAV) || die; |
| | | close(SPKR) || die; |
| | | close(GNDR) || die; |
| | | if (system("utils/utt2spk_to_spk2utt.pl $out_dir/utt2spk >$out_dir/spk2utt") != 0) { |
| | | die "Error creating spk2utt file in directory $out_dir"; |
| | | } |
| | | if (system("utils/fix_data_dir.sh $out_dir") != 0) { |
| | | die "Error fixing data dir $out_dir"; |
| | | } |
| | | if (system("utils/validate_data_dir.sh --no-text --no-feats $out_dir") != 0) { |
| | | die "Error validating directory $out_dir"; |
| | | } |
| New file |
| | |
| | | #!/usr/bin/perl |
| | | use warnings; #sed replacement for -w perl parameter |
| | | # |
| | | # Copyright 2013 Daniel Povey |
| | | # Apache 2.0 |
| | | |
| | | if (@ARGV != 2) { |
| | | print STDERR "Usage: $0 <path-to-LDC99S79> <path-to-output>\n"; |
| | | print STDERR "e.g. $0 /export/corpora5/LDC/LDC99S79 data/swbd2_phase2_train\n"; |
| | | exit(1); |
| | | } |
| | | ($db_base, $out_dir) = @ARGV; |
| | | |
| | | if (system("mkdir -p $out_dir")) { |
| | | die "Error making directory $out_dir"; |
| | | } |
| | | |
| | | open(CS, "<$db_base/DISC1/doc/callstat.tbl") || die "Could not open $db_base/DISC1/doc/callstat.tbl"; |
| | | open(CI, "<$db_base/DISC1/doc/callinfo.tbl") || die "Could not open $db_base/DISC1/doc/callinfo.tbl"; |
| | | open(GNDR, ">$out_dir/spk2gender") || die "Could not open the output file $out_dir/spk2gender"; |
| | | open(SPKR, ">$out_dir/utt2spk") || die "Could not open the output file $out_dir/utt2spk"; |
| | | open(WAV, ">$out_dir/wav.scp") || die "Could not open the output file $out_dir/wav.scp"; |
| | | |
| | | @badAudio = ("3", "4"); |
| | | |
| | | $tmp_dir = "$out_dir/tmp"; |
| | | if (system("mkdir -p $tmp_dir") != 0) { |
| | | die "Error making directory $tmp_dir"; |
| | | } |
| | | |
| | | if (system("find $db_base -name '*.sph' > $tmp_dir/sph.list") != 0) { |
| | | die "Error getting list of sph files"; |
| | | } |
| | | |
| | | open(WAVLIST, "<$tmp_dir/sph.list") or die "cannot open wav list"; |
| | | |
| | | while(<WAVLIST>) { |
| | | chomp; |
| | | $sph = $_; |
| | | @t = split("/",$sph); |
| | | @t1 = split("[./]",$t[$#t]); |
| | | $uttId=$t1[0]; |
| | | $wav{$uttId} = $sph; |
| | | } |
| | | |
| | | while (<CS>) { |
| | | $line = $_ ; |
| | | $ci = <CI>; |
| | | $ci = <CI>; |
| | | @ci = split(",",$ci); |
| | | $wav = $ci[0]; |
| | | @A = split(",", $line); |
| | | if (/$wav/i ~~ @badAudio) { |
| | | # do nothing |
| | | } else { |
| | | $spkr1= "sw_" . $A[2]; |
| | | $spkr2= "sw_" . $A[3]; |
| | | $gender1 = $A[4]; |
| | | $gender2 = $A[5]; |
| | | if ($gender1 eq "M") { |
| | | $gender1 = "m"; |
| | | } elsif ($gender1 eq "F") { |
| | | $gender1 = "f"; |
| | | } else { |
| | | die "Unknown Gender in $line"; |
| | | } |
| | | if ($gender2 eq "M") { |
| | | $gender2 = "m"; |
| | | } elsif ($gender2 eq "F") { |
| | | $gender2 = "f"; |
| | | } else { |
| | | die "Unknown Gender in $line"; |
| | | } |
| | | if (-e "$wav{$wav}") { |
| | | $uttId = $spkr1 ."_" . $wav ."_1"; |
| | | if (!$spk2gender{$spkr1}) { |
| | | $spk2gender{$spkr1} = $gender1; |
| | | print GNDR "$spkr1"," $gender1\n"; |
| | | } |
| | | print WAV "$uttId"," sph2pipe -f wav -p -c 1 $wav{$wav} |\n"; |
| | | print SPKR "$uttId"," $spkr1","\n"; |
| | | |
| | | $uttId = $spkr2 . "_" . $wav ."_2"; |
| | | if (!$spk2gender{$spkr2}) { |
| | | $spk2gender{$spkr2} = $gender2; |
| | | print GNDR "$spkr2"," $gender2\n"; |
| | | } |
| | | print WAV "$uttId"," sph2pipe -f wav -p -c 2 $wav{$wav} |\n"; |
| | | print SPKR "$uttId"," $spkr2","\n"; |
| | | } else { |
| | | print STDERR "Missing $wav{$wav} for $wav\n"; |
| | | } |
| | | } |
| | | } |
| | | |
| | | close(WAV) || die; |
| | | close(SPKR) || die; |
| | | close(GNDR) || die; |
| | | if (system("utils/utt2spk_to_spk2utt.pl $out_dir/utt2spk >$out_dir/spk2utt") != 0) { |
| | | die "Error creating spk2utt file in directory $out_dir"; |
| | | } |
| | | if (system("utils/fix_data_dir.sh $out_dir") != 0) { |
| | | die "Error fixing data dir $out_dir"; |
| | | } |
| | | if (system("utils/validate_data_dir.sh --no-text --no-feats $out_dir") != 0) { |
| | | die "Error validating directory $out_dir"; |
| | | } |
| New file |
| | |
| | | #!/usr/bin/perl |
| | | use warnings; #sed replacement for -w perl parameter |
| | | # |
| | | # Copyright 2013 Daniel Povey |
| | | # Apache 2.0 |
| | | |
| | | if (@ARGV != 2) { |
| | | print STDERR "Usage: $0 <path-to-LDC2002S06> <path-to-output>\n"; |
| | | print STDERR "e.g. $0 /export/corpora5/LDC/LDC2002S06 data/swbd2_phase3_train\n"; |
| | | exit(1); |
| | | } |
| | | ($db_base, $out_dir) = @ARGV; |
| | | |
| | | if (system("mkdir -p $out_dir")) { |
| | | die "Error making directory $out_dir"; |
| | | } |
| | | |
| | | open(CS, "<$db_base/DISC1/docs/callstat.tbl") || die "Could not open $db_base/DISC1/docs/callstat.tbl"; |
| | | open(GNDR, ">$out_dir/spk2gender") || die "Could not open the output file $out_dir/spk2gender"; |
| | | open(SPKR, ">$out_dir/utt2spk") || die "Could not open the output file $out_dir/utt2spk"; |
| | | open(WAV, ">$out_dir/wav.scp") || die "Could not open the output file $out_dir/wav.scp"; |
| | | |
| | | @badAudio = ("3", "4"); |
| | | |
| | | $tmp_dir = "$out_dir/tmp"; |
| | | if (system("mkdir -p $tmp_dir") != 0) { |
| | | die "Error making directory $tmp_dir"; |
| | | } |
| | | |
| | | if (system("find $db_base -name '*.sph' > $tmp_dir/sph.list") != 0) { |
| | | die "Error getting list of sph files"; |
| | | } |
| | | |
| | | open(WAVLIST, "<$tmp_dir/sph.list") or die "cannot open wav list"; |
| | | while(<WAVLIST>) { |
| | | chomp; |
| | | $sph = $_; |
| | | @t = split("/",$sph); |
| | | @t1 = split("[./]",$t[$#t]); |
| | | $uttId=$t1[0]; |
| | | $wav{$uttId} = $sph; |
| | | } |
| | | |
| | | while (<CS>) { |
| | | $line = $_ ; |
| | | @A = split(",", $line); |
| | | $wav = "sw_" . $A[0] ; |
| | | if (/$wav/i ~~ @badAudio) { |
| | | # do nothing |
| | | } else { |
| | | $spkr1= "sw_" . $A[3]; |
| | | $spkr2= "sw_" . $A[4]; |
| | | $gender1 = $A[5]; |
| | | $gender2 = $A[6]; |
| | | if ($gender1 eq "M") { |
| | | $gender1 = "m"; |
| | | } elsif ($gender1 eq "F") { |
| | | $gender1 = "f"; |
| | | } else { |
| | | die "Unknown Gender in $line"; |
| | | } |
| | | if ($gender2 eq "M") { |
| | | $gender2 = "m"; |
| | | } elsif ($gender2 eq "F") { |
| | | $gender2 = "f"; |
| | | } else { |
| | | die "Unknown Gender in $line"; |
| | | } |
| | | if (-e "$wav{$wav}") { |
| | | $uttId = $spkr1 ."_" . $wav ."_1"; |
| | | if (!$spk2gender{$spkr1}) { |
| | | $spk2gender{$spkr1} = $gender1; |
| | | print GNDR "$spkr1"," $gender1\n"; |
| | | } |
| | | print WAV "$uttId"," sph2pipe -f wav -p -c 1 $wav{$wav} |\n"; |
| | | print SPKR "$uttId"," $spkr1","\n"; |
| | | |
| | | $uttId = $spkr2 . "_" . $wav ."_2"; |
| | | if (!$spk2gender{$spkr2}) { |
| | | $spk2gender{$spkr2} = $gender2; |
| | | print GNDR "$spkr2"," $gender2\n"; |
| | | } |
| | | print WAV "$uttId"," sph2pipe -f wav -p -c 2 $wav{$wav} |\n"; |
| | | print SPKR "$uttId"," $spkr2","\n"; |
| | | } else { |
| | | print STDERR "Missing $wav{$wav} for $wav\n"; |
| | | } |
| | | } |
| | | } |
| | | |
| | | close(WAV) || die; |
| | | close(SPKR) || die; |
| | | close(GNDR) || die; |
| | | if (system("utils/utt2spk_to_spk2utt.pl $out_dir/utt2spk >$out_dir/spk2utt") != 0) { |
| | | die "Error creating spk2utt file in directory $out_dir"; |
| | | } |
| | | if (system("utils/fix_data_dir.sh $out_dir") != 0) { |
| | | die "Error fixing data dir $out_dir"; |
| | | } |
| | | if (system("utils/validate_data_dir.sh --no-text --no-feats $out_dir") != 0) { |
| | | die "Error validating directory $out_dir"; |
| | | } |
| New file |
| | |
| | | #!/usr/bin/perl |
| | | use warnings; #sed replacement for -w perl parameter |
| | | # |
| | | # Copyright 2013 Daniel Povey |
| | | # Apache 2.0 |
| | | |
| | | if (@ARGV != 2) { |
| | | print STDERR "Usage: $0 <path-to-LDC2001S13> <path-to-output>\n"; |
| | | print STDERR "e.g. $0 /export/corpora5/LDC/LDC2001S13 data/swbd_cellular1_train\n"; |
| | | exit(1); |
| | | } |
| | | ($db_base, $out_dir) = @ARGV; |
| | | |
| | | if (system("mkdir -p $out_dir")) { |
| | | die "Error making directory $out_dir"; |
| | | } |
| | | |
| | | open(CS, "<$db_base/doc/swb_callstats.tbl") || die "Could not open $db_base/doc/swb_callstats.tbl"; |
| | | open(GNDR, ">$out_dir/spk2gender") || die "Could not open the output file $out_dir/spk2gender"; |
| | | open(SPKR, ">$out_dir/utt2spk") || die "Could not open the output file $out_dir/utt2spk"; |
| | | open(WAV, ">$out_dir/wav.scp") || die "Could not open the output file $out_dir/wav.scp"; |
| | | |
| | | @badAudio = ("40019", "45024", "40022"); |
| | | |
| | | while (<CS>) { |
| | | $line = $_ ; |
| | | @A = split(",", $line); |
| | | if (/$A[0]/i ~~ @badAudio) { |
| | | # do nothing |
| | | } else { |
| | | $wav = "sw_" . $A[0]; |
| | | $spkr1= "sw_" . $A[1]; |
| | | $spkr2= "sw_" . $A[2]; |
| | | $gender1 = $A[3]; |
| | | $gender2 = $A[4]; |
| | | if ($A[3] eq "M") { |
| | | $gender1 = "m"; |
| | | } elsif ($A[3] eq "F") { |
| | | $gender1 = "f"; |
| | | } else { |
| | | die "Unknown Gender in $line"; |
| | | } |
| | | if ($A[4] eq "M") { |
| | | $gender2 = "m"; |
| | | } elsif ($A[4] eq "F") { |
| | | $gender2 = "f"; |
| | | } else { |
| | | die "Unknown Gender in $line"; |
| | | } |
| | | if (-e "$db_base/$wav.sph") { |
| | | $uttId = $spkr1 . "-swbdc_" . $wav ."_1"; |
| | | if (!$spk2gender{$spkr1}) { |
| | | $spk2gender{$spkr1} = $gender1; |
| | | print GNDR "$spkr1"," $gender1\n"; |
| | | } |
| | | print WAV "$uttId"," sph2pipe -f wav -p -c 1 $db_base/$wav.sph |\n"; |
| | | print SPKR "$uttId"," $spkr1","\n"; |
| | | |
| | | $uttId = $spkr2 . "-swbdc_" . $wav ."_2"; |
| | | if (!$spk2gender{$spkr2}) { |
| | | $spk2gender{$spkr2} = $gender2; |
| | | print GNDR "$spkr2"," $gender2\n"; |
| | | } |
| | | print WAV "$uttId"," sph2pipe -f wav -p -c 2 $db_base/$wav.sph |\n"; |
| | | print SPKR "$uttId"," $spkr2","\n"; |
| | | } else { |
| | | print STDERR "Missing $db_base/$wav.sph\n"; |
| | | } |
| | | } |
| | | } |
| | | |
| | | close(WAV) || die; |
| | | close(SPKR) || die; |
| | | close(GNDR) || die; |
| | | if (system("utils/utt2spk_to_spk2utt.pl $out_dir/utt2spk >$out_dir/spk2utt") != 0) { |
| | | die "Error creating spk2utt file in directory $out_dir"; |
| | | } |
| | | if (system("utils/fix_data_dir.sh $out_dir") != 0) { |
| | | die "Error fixing data dir $out_dir"; |
| | | } |
| | | if (system("utils/validate_data_dir.sh --no-text --no-feats $out_dir") != 0) { |
| | | die "Error validating directory $out_dir"; |
| | | } |
| New file |
| | |
| | | #!/usr/bin/perl |
| | | use warnings; #sed replacement for -w perl parameter |
| | | # |
| | | # Copyright 2013 Daniel Povey |
| | | # Apache 2.0 |
| | | |
| | | if (@ARGV != 2) { |
| | | print STDERR "Usage: $0 <path-to-LDC2004S07> <path-to-output>\n"; |
| | | print STDERR "e.g. $0 /export/corpora5/LDC/LDC2004S07 data/swbd_cellular2_train\n"; |
| | | exit(1); |
| | | } |
| | | ($db_base, $out_dir) = @ARGV; |
| | | |
| | | if (system("mkdir -p $out_dir")) { |
| | | die "Error making directory $out_dir"; |
| | | } |
| | | |
| | | open(CS, "<$db_base/docs/swb_callstats.tbl") || die "Could not open $db_base/docs/swb_callstats.tbl"; |
| | | open(GNDR, ">$out_dir/spk2gender") || die "Could not open the output file $out_dir/spk2gender"; |
| | | open(SPKR, ">$out_dir/utt2spk") || die "Could not open the output file $out_dir/utt2spk"; |
| | | open(WAV, ">$out_dir/wav.scp") || die "Could not open the output file $out_dir/wav.scp"; |
| | | |
| | | @badAudio=("45024", "40022"); |
| | | |
| | | while (<CS>) { |
| | | $line = $_ ; |
| | | @A = split(",", $line); |
| | | if (/$A[0]/i ~~ @badAudio) { |
| | | # do nothing |
| | | } else { |
| | | $wav = "sw_" . $A[0]; |
| | | $spkr1= "sw_" . $A[1]; |
| | | $spkr2= "sw_" . $A[2]; |
| | | $gender1 = $A[3]; |
| | | $gender2 = $A[4]; |
| | | if ($A[3] eq "M") { |
| | | $gender1 = "m"; |
| | | } elsif ($A[3] eq "F") { |
| | | $gender1 = "f"; |
| | | } else { |
| | | die "Unknown Gender in $line"; |
| | | } |
| | | if ($A[4] eq "M") { |
| | | $gender2 = "m"; |
| | | } elsif ($A[4] eq "F") { |
| | | $gender2 = "f"; |
| | | } else { |
| | | die "Unknown Gender in $line"; |
| | | } |
| | | if (-e "$db_base/data/$wav.sph") { |
| | | $uttId = $spkr1 . "-swbdc_" . $wav ."_1"; |
| | | if (!$spk2gender{$spkr1}) { |
| | | $spk2gender{$spkr1} = $gender1; |
| | | print GNDR "$spkr1"," $gender1\n"; |
| | | } |
| | | print WAV "$uttId"," sph2pipe -f wav -p -c 1 $db_base/data/$wav.sph |\n"; |
| | | print SPKR "$uttId"," $spkr1","\n"; |
| | | |
| | | $uttId = $spkr2 . "-swbdc_" . $wav ."_2"; |
| | | if (!$spk2gender{$spkr2}) { |
| | | $spk2gender{$spkr2} = $gender2; |
| | | print GNDR "$spkr2"," $gender2\n"; |
| | | } |
| | | print WAV "$uttId"," sph2pipe -f wav -p -c 2 $db_base/data/$wav.sph |\n"; |
| | | print SPKR "$uttId"," $spkr2","\n"; |
| | | } else { |
| | | print STDERR "Missing $db_base/data/$wav.sph\n"; |
| | | } |
| | | } |
| | | } |
| | | |
| | | close(WAV) || die; |
| | | close(SPKR) || die; |
| | | close(GNDR) || die; |
| | | if (system("utils/utt2spk_to_spk2utt.pl $out_dir/utt2spk >$out_dir/spk2utt") != 0) { |
| | | die "Error creating spk2utt file in directory $out_dir"; |
| | | } |
| | | if (system("utils/fix_data_dir.sh $out_dir") != 0) { |
| | | die "Error fixing data dir $out_dir"; |
| | | } |
| | | if (system("utils/validate_data_dir.sh --no-text --no-feats $out_dir") != 0) { |
| | | die "Error validating directory $out_dir"; |
| | | } |
| New file |
| | |
| | | #!/usr/bin/env python3 |
| | | |
| | | # Copyright 2019 Hitachi, Ltd. (author: Yusuke Fujita) |
| | | # Licensed under the MIT license. |
| | | |
| | | """ |
| | | This script generates random multi-talker mixtures for diarization. |
| | | It generates a scp-like outputs: lines of "[recid] [json]". |
| | | recid: recording id of mixture |
| | | serial numbers like mix_0000001, mix_0000002, ... |
| | | json: mixture configuration formatted in "one-line" |
| | | The json format is as following: |
| | | { |
| | | 'speakers':[ # list of speakers |
| | | { |
| | | 'spkid': 'Name', # speaker id |
| | | 'rir': '/rirdir/rir.wav', # wav_rxfilename of room impulse response |
| | | 'utts': [ # list of wav_rxfilenames of utterances |
| | | '/wavdir/utt1.wav', |
| | | '/wavdir/utt2.wav',...], |
| | | 'intervals': [1.2, 3.4, ...] # list of silence durations before utterances |
| | | }, ... ], |
| | | 'noise': '/noisedir/noise.wav' # wav_rxfilename of background noise |
| | | 'snr': 15.0, # SNR for mixing background noise |
| | | 'recid': 'mix_000001' # recording id of the mixture |
| | | } |
| | | |
| | | Usage: |
| | | common/random_mixture.py \ |
| | | --n_mixtures=10000 \ # number of mixtures |
| | | data/voxceleb1_train \ # kaldi-style data dir of utterances |
| | | data/musan_noise_bg \ # background noises |
| | | data/simu_rirs \ # room impulse responses |
| | | > mixture.scp # output scp-like file |
| | | |
| | | The actual data dir and wav files are generated using make_mixture.py: |
| | | common/make_mixture.py \ |
| | | mixture.scp \ # scp-like file for mixture |
| | | data/mixture \ # output data dir |
| | | wav/mixture # output wav dir |
| | | """ |
| | | |
| | | import argparse |
| | | import os |
| | | from eend import kaldi_data |
| | | import random |
| | | import numpy as np |
| | | import json |
| | | import itertools |
| | | |
| | | parser = argparse.ArgumentParser() |
| | | parser.add_argument('data_dir', |
| | | help='data dir of single-speaker recordings') |
| | | parser.add_argument('noise_dir', |
| | | help='data dir of background noise recordings') |
| | | parser.add_argument('rir_dir', |
| | | help='data dir of room impulse responses') |
| | | parser.add_argument('--n_mixtures', type=int, default=10, |
| | | help='number of mixture recordings') |
| | | parser.add_argument('--n_speakers', type=int, default=4, |
| | | help='number of speakers in a mixture') |
| | | parser.add_argument('--min_utts', type=int, default=10, |
| | | help='minimum number of uttenraces per speaker') |
| | | parser.add_argument('--max_utts', type=int, default=20, |
| | | help='maximum number of utterances per speaker') |
| | | parser.add_argument('--sil_scale', type=float, default=10.0, |
| | | help='average silence time') |
| | | parser.add_argument('--noise_snrs', default="10:15:20", |
| | | help='colon-delimited SNRs for background noises') |
| | | parser.add_argument('--random_seed', type=int, default=777, |
| | | help='random seed') |
| | | parser.add_argument('--speech_rvb_probability', type=float, default=1, |
| | | help='reverb probability') |
| | | args = parser.parse_args() |
| | | |
| | | random.seed(args.random_seed) |
| | | np.random.seed(args.random_seed) |
| | | |
| | | # load list of wav files from kaldi-style data dirs |
| | | wavs = kaldi_data.load_wav_scp( |
| | | os.path.join(args.data_dir, 'wav.scp')) |
| | | noises = kaldi_data.load_wav_scp( |
| | | os.path.join(args.noise_dir, 'wav.scp')) |
| | | rirs = kaldi_data.load_wav_scp( |
| | | os.path.join(args.rir_dir, 'wav.scp')) |
| | | |
| | | # spk2utt is used for counting number of utterances per speaker |
| | | spk2utt = kaldi_data.load_spk2utt( |
| | | os.path.join(args.data_dir, 'spk2utt')) |
| | | |
| | | segments = kaldi_data.load_segments_hash( |
| | | os.path.join(args.data_dir, 'segments')) |
| | | |
| | | # choice lists for random sampling |
| | | all_speakers = list(spk2utt.keys()) |
| | | all_noises = list(noises.keys()) |
| | | all_rirs = list(rirs.keys()) |
| | | noise_snrs = [float(x) for x in args.noise_snrs.split(':')] |
| | | |
| | | mixtures = [] |
| | | for it in range(args.n_mixtures): |
| | | # recording ids are mix_0000001, mix_0000002, ... |
| | | recid = 'mix_{:07d}'.format(it + 1) |
| | | # randomly select speakers, a background noise and a SNR |
| | | speakers = random.sample(all_speakers, args.n_speakers) |
| | | noise = random.choice(all_noises) |
| | | noise_snr = random.choice(noise_snrs) |
| | | mixture = {'speakers': []} |
| | | for speaker in speakers: |
| | | # randomly select the number of utterances |
| | | n_utts = np.random.randint(args.min_utts, args.max_utts + 1) |
| | | # utts = spk2utt[speaker][:n_utts] |
| | | cycle_utts = itertools.cycle(spk2utt[speaker]) |
| | | # random start utterance |
| | | roll = np.random.randint(0, len(spk2utt[speaker])) |
| | | for i in range(roll): |
| | | next(cycle_utts) |
| | | utts = [next(cycle_utts) for i in range(n_utts)] |
| | | # randomly select wait time before appending utterance |
| | | intervals = np.random.exponential(args.sil_scale, size=n_utts) |
| | | # randomly select a room impulse response |
| | | if random.random() < args.speech_rvb_probability: |
| | | rir = rirs[random.choice(all_rirs)] |
| | | else: |
| | | rir = None |
| | | if segments is not None: |
| | | utts = [segments[utt] for utt in utts] |
| | | utts = [(wavs[rec], st, et) for (rec, st, et) in utts] |
| | | mixture['speakers'].append({ |
| | | 'spkid': speaker, |
| | | 'rir': rir, |
| | | 'utts': utts, |
| | | 'intervals': intervals.tolist() |
| | | }) |
| | | else: |
| | | mixture['speakers'].append({ |
| | | 'spkid': speaker, |
| | | 'rir': rir, |
| | | 'utts': [wavs[utt] for utt in utts], |
| | | 'intervals': intervals.tolist() |
| | | }) |
| | | mixture['noise'] = noises[noise] |
| | | mixture['snr'] = noise_snr |
| | | mixture['recid'] = recid |
| | | print(recid, json.dumps(mixture)) |
| New file |
| | |
| | | #!/bin/bash |
| | | |
| | | # Copyright 2019 Hitachi, Ltd. (author: Yusuke Fujita) |
| | | # Licensed under the MIT license. |
| | | # |
| | | # BLSTM-based model experiment |
| | | ./run.sh --train-config conf/blstm/train.yaml --average-start 20 --average-end 20 \ |
| | | --adapt-config conf/blstm/adapt.yaml --adapt-average-start 10 --adapt-average-end 10 \ |
| | | --infer-config conf/blstm/infer.yaml $* |
| New file |
| | |
| | | #!/bin/bash |
| | | |
| | | # Copyright 2019 Hitachi, Ltd. (author: Yusuke Fujita, Shota Horiguchi) |
| | | # Licensed under the MIT license. |
| | | # |
| | | # This script prepares kaldi-style data sets shared with different experiments |
| | | # - data/xxxx |
| | | # callhome, sre, swb2, and swb_cellular datasets |
| | | # - data/simu_${simu_outputs} |
| | | # simulation mixtures generated with various options |
| | | |
| | | stage=0 |
| | | |
| | | # Modify corpus directories |
| | | # - callhome_dir |
| | | # CALLHOME (LDC2001S97) |
| | | # - swb2_phase1_train |
| | | # Switchboard-2 Phase 1 (LDC98S75) |
| | | # - data_root |
| | | # LDC99S79, LDC2002S06, LDC2001S13, LDC2004S07, |
| | | # LDC2006S44, LDC2011S01, LDC2011S04, LDC2011S09, |
| | | # LDC2011S10, LDC2012S01, LDC2011S05, LDC2011S08 |
| | | # - musan_root |
| | | # MUSAN corpus (https://www.openslr.org/17/) |
| | | callhome_dir=/export/corpora/NIST/LDC2001S97 |
| | | swb2_phase1_train=/export/corpora/LDC/LDC98S75 |
| | | data_root=/export/corpora5/LDC |
| | | musan_root=/export/corpora/JHU/musan |
| | | # Modify simulated data storage area. |
| | | # This script distributes simulated data under these directories |
| | | simu_actual_dirs=( |
| | | /export/c05/$USER/diarization-data |
| | | /export/c08/$USER/diarization-data |
| | | /export/c09/$USER/diarization-data |
| | | ) |
| | | |
| | | # data preparation options |
| | | max_jobs_run=4 |
| | | sad_num_jobs=30 |
| | | sad_opts="--extra-left-context 79 --extra-right-context 21 --frames-per-chunk 150 --extra-left-context-initial 0 --extra-right-context-final 0 --acwt 0.3" |
| | | sad_graph_opts="--min-silence-duration=0.03 --min-speech-duration=0.3 --max-speech-duration=10.0" |
| | | sad_priors_opts="--sil-scale=0.1" |
| | | |
| | | # simulation options |
| | | simu_opts_overlap=yes |
| | | simu_opts_num_speaker_array=(1 2 3 4) |
| | | simu_opts_sil_scale_array=(2 2 5 9) |
| | | simu_opts_rvb_prob=0.5 |
| | | simu_opts_num_train=100000 |
| | | simu_opts_min_utts=10 |
| | | simu_opts_max_utts=20 |
| | | |
| | | simu_cmd="run.pl" |
| | | train_cmd="run.pl" |
| | | random_mixture_cmd="run.pl" |
| | | make_mixture_cmd="run.pl" |
| | | |
| | | . parse_options.sh || exit |
| | | |
| | | if [ $stage -le 0 ]; then |
| | | echo "prepare kaldi-style datasets" |
| | | # Prepare CALLHOME dataset. This will be used to evaluation. |
| | | if ! validate_data_dir.sh --no-text --no-feats data/callhome1_spkall \ |
| | | || ! validate_data_dir.sh --no-text --no-feats data/callhome2_spkall; then |
| | | # imported from https://github.com/kaldi-asr/kaldi/blob/master/egs/callhome_diarization/v1 |
| | | local/make_callhome.sh $callhome_dir data |
| | | # Generate two-speaker subsets |
| | | for dset in callhome1 callhome2; do |
| | | # Extract two-speaker recordings in wav.scp |
| | | copy_data_dir.sh data/${dset} data/${dset}_spkall |
| | | # Regenerate segments file from fullref.rttm |
| | | # $2: recid, $4: start_time, $5: duration, $8: speakerid |
| | | awk '{printf "%s_%s_%07d_%07d %s %.2f %.2f\n", \ |
| | | $2, $8, $4*100, ($4+$5)*100, $2, $4, $4+$5}' \ |
| | | data/callhome/fullref.rttm | sort > data/${dset}_spkall/segments |
| | | utils/fix_data_dir.sh data/${dset}_spkall |
| | | # Speaker ID is '[recid]_[speakerid] |
| | | awk '{split($1,A,"_"); printf "%s %s_%s\n", $1, A[1], A[2]}' \ |
| | | data/${dset}_spkall/segments > data/${dset}_spkall/utt2spk |
| | | utils/fix_data_dir.sh data/${dset}_spkall |
| | | # Generate rttm files for scoring |
| | | steps/segmentation/convert_utt2spk_and_segments_to_rttm.py \ |
| | | data/${dset}_spkall/utt2spk data/${dset}_spkall/segments \ |
| | | data/${dset}_spkall/rttm |
| | | utils/data/get_reco2dur.sh data/${dset}_spkall |
| | | done |
| | | fi |
| | | # Prepare a collection of NIST SRE and SWB data. This will be used to train, |
| | | if ! validate_data_dir.sh --no-text --no-feats data/swb_sre_comb; then |
| | | local/make_sre.sh $data_root data |
| | | # Prepare SWB for x-vector DNN training. |
| | | local/make_swbd2_phase1.pl $swb2_phase1_train \ |
| | | data/swbd2_phase1_train |
| | | local/make_swbd2_phase2.pl $data_root/LDC99S79 \ |
| | | data/swbd2_phase2_train |
| | | local/make_swbd2_phase3.pl $data_root/LDC2002S06 \ |
| | | data/swbd2_phase3_train |
| | | local/make_swbd_cellular1.pl $data_root/LDC2001S13 \ |
| | | data/swbd_cellular1_train |
| | | local/make_swbd_cellular2.pl $data_root/LDC2004S07 \ |
| | | data/swbd_cellular2_train |
| | | # Combine swb and sre data |
| | | utils/combine_data.sh data/swb_sre_comb \ |
| | | data/swbd_cellular1_train data/swbd_cellular2_train \ |
| | | data/swbd2_phase1_train \ |
| | | data/swbd2_phase2_train data/swbd2_phase3_train data/sre |
| | | fi |
| | | # musan data. "back-ground |
| | | if ! validate_data_dir.sh --no-text --no-feats data/musan_noise_bg; then |
| | | local/make_musan.sh $musan_root data |
| | | utils/copy_data_dir.sh data/musan_noise data/musan_noise_bg |
| | | awk '{if(NR>1) print $1,$1}' $musan_root/noise/free-sound/ANNOTATIONS > data/musan_noise_bg/utt2spk |
| | | utils/fix_data_dir.sh data/musan_noise_bg |
| | | fi |
| | | # simu rirs 8k |
| | | if ! validate_data_dir.sh --no-text --no-feats data/simu_rirs_8k; then |
| | | mkdir -p data/simu_rirs_8k |
| | | if [ ! -e sim_rir_8k.zip ]; then |
| | | wget --no-check-certificate http://www.openslr.org/resources/26/sim_rir_8k.zip |
| | | fi |
| | | unzip sim_rir_8k.zip -d data/sim_rir_8k |
| | | find $PWD/data/sim_rir_8k -iname "*.wav" \ |
| | | | awk '{n=split($1,A,/[\/\.]/); print A[n-3]"_"A[n-1], $1}' \ |
| | | | sort > data/simu_rirs_8k/wav.scp |
| | | awk '{print $1, $1}' data/simu_rirs_8k/wav.scp > data/simu_rirs_8k/utt2spk |
| | | utils/fix_data_dir.sh data/simu_rirs_8k |
| | | fi |
| | | # Automatic segmentation using pretrained SAD model |
| | | # it will take one day using 30 CPU jobs: |
| | | # make_mfcc: 1 hour, compute_output: 18 hours, decode: 0.5 hours |
| | | sad_nnet_dir=exp/segmentation_1a/tdnn_stats_asr_sad_1a |
| | | sad_work_dir=exp/segmentation_1a/tdnn_stats_asr_sad_1a |
| | | if ! validate_data_dir.sh --no-text $sad_work_dir/swb_sre_comb_seg; then |
| | | if [ ! -d exp/segmentation_1a ]; then |
| | | wget http://kaldi-asr.org/models/4/0004_tdnn_stats_asr_sad_1a.tar.gz |
| | | tar zxf 0004_tdnn_stats_asr_sad_1a.tar.gz |
| | | fi |
| | | steps/segmentation/detect_speech_activity.sh \ |
| | | --nj $sad_num_jobs \ |
| | | --graph-opts "$sad_graph_opts" \ |
| | | --transform-probs-opts "$sad_priors_opts" $sad_opts \ |
| | | data/swb_sre_comb $sad_nnet_dir mfcc_hires $sad_work_dir \ |
| | | $sad_work_dir/swb_sre_comb || exit 1 |
| | | fi |
| | | # Extract >1.5 sec segments and split into train/valid sets |
| | | if ! validate_data_dir.sh --no-text --no-feats data/swb_sre_cv; then |
| | | copy_data_dir.sh data/swb_sre_comb data/swb_sre_comb_seg |
| | | awk '$4-$3>1.5{print;}' $sad_work_dir/swb_sre_comb_seg/segments > data/swb_sre_comb_seg/segments |
| | | cp $sad_work_dir/swb_sre_comb_seg/{utt2spk,spk2utt} data/swb_sre_comb_seg |
| | | fix_data_dir.sh data/swb_sre_comb_seg |
| | | utils/subset_data_dir_tr_cv.sh data/swb_sre_comb_seg data/swb_sre_tr data/swb_sre_cv |
| | | fi |
| | | fi |
| | | |
| | | simudir=data/simu |
| | | if [ $stage -le 1 ]; then |
| | | echo "simulation of mixture" |
| | | mkdir -p $simudir/.work |
| | | local/random_mixture_cmd=random_mixture.py |
| | | local/make_mixture_cmd=make_mixture.py |
| | | |
| | | for ((i=0; i<${#simu_opts_sil_scale_array[@]}; ++i)); do |
| | | simu_opts_num_speaker=${simu_opts_num_speaker_array[i]} |
| | | simu_opts_sil_scale=${simu_opts_sil_scale_array[i]} |
| | | for dset in swb_sre_tr swb_sre_cv; do |
| | | if [ "$dset" == "swb_sre_tr" ]; then |
| | | n_mixtures=${simu_opts_num_train} |
| | | else |
| | | n_mixtures=500 |
| | | fi |
| | | simuid=${dset}_ns${simu_opts_num_speaker}_beta${simu_opts_sil_scale}_${n_mixtures} |
| | | # check if you have the simulation |
| | | if ! validate_data_dir.sh --no-text --no-feats $simudir/data/$simuid; then |
| | | # random mixture generation |
| | | $train_cmd $simudir/.work/random_mixture_$simuid.log \ |
| | | $random_mixture_cmd --n_speakers $simu_opts_num_speaker --n_mixtures $n_mixtures \ |
| | | --speech_rvb_probability $simu_opts_rvb_prob \ |
| | | --sil_scale $simu_opts_sil_scale \ |
| | | data/$dset data/musan_noise_bg data/simu_rirs_8k \ |
| | | \> $simudir/.work/mixture_$simuid.scp |
| | | nj=64 |
| | | mkdir -p $simudir/wav/$simuid |
| | | # distribute simulated data to $simu_actual_dir |
| | | split_scps= |
| | | for n in $(seq $nj); do |
| | | split_scps="$split_scps $simudir/.work/mixture_$simuid.$n.scp" |
| | | mkdir -p $simudir/.work/data_$simuid.$n |
| | | actual=${simu_actual_dirs[($n-1)%${#simu_actual_dirs[@]}]}/$simudir/wav/$simuid/$n |
| | | mkdir -p $actual |
| | | ln -nfs $actual $simudir/wav/$simuid/$n |
| | | done |
| | | utils/split_scp.pl $simudir/.work/mixture_$simuid.scp $split_scps || exit 1 |
| | | |
| | | $simu_cmd --max-jobs-run 64 JOB=1:$nj $simudir/.work/make_mixture_$simuid.JOB.log \ |
| | | $make_mixture_cmd --rate=8000 \ |
| | | $simudir/.work/mixture_$simuid.JOB.scp \ |
| | | $simudir/.work/data_$simuid.JOB $simudir/wav/$simuid/JOB |
| | | utils/combine_data.sh $simudir/data/$simuid $simudir/.work/data_$simuid.* |
| | | steps/segmentation/convert_utt2spk_and_segments_to_rttm.py \ |
| | | $simudir/data/$simuid/utt2spk $simudir/data/$simuid/segments \ |
| | | $simudir/data/$simuid/rttm |
| | | utils/data/get_reco2dur.sh $simudir/data/$simuid |
| | | fi |
| | | simuid_concat=${dset}_ns"$(IFS="n"; echo "${simu_opts_num_speaker_array[*]}")"_beta"$(IFS="n"; echo "${simu_opts_sil_scale_array[*]}")"_${n_mixtures} |
| | | mkdir -p $simudir/data/$simuid_concat |
| | | for f in `ls -F $simudir/data/$simuid | grep -v "/"`; do |
| | | cat $simudir/data/$simuid/$f >> $simudir/data/$simuid_concat/$f |
| | | done |
| | | done |
| | | done |
| | | fi |
| | | |
| | | if [ $stage -le 3 ]; then |
| | | # compose eval/callhome2_spkall |
| | | eval_set=data/eval/callhome2_spkall |
| | | if ! validate_data_dir.sh --no-text --no-feats $eval_set; then |
| | | utils/copy_data_dir.sh data/callhome2_spkall $eval_set |
| | | cp data/callhome2_spkall/rttm $eval_set/rttm |
| | | awk -v dstdir=wav/eval/callhome2_spkall '{print $1, dstdir"/"$1".wav"}' data/callhome2_spkall/wav.scp > $eval_set/wav.scp |
| | | mkdir -p wav/eval/callhome2_spkall |
| | | wav-copy scp:data/callhome2_spkall/wav.scp scp:$eval_set/wav.scp |
| | | utils/data/get_reco2dur.sh $eval_set |
| | | fi |
| | | |
| | | # compose eval/callhome1_spkall |
| | | adapt_set=data/eval/callhome1_spkall |
| | | if ! validate_data_dir.sh --no-text --no-feats $adapt_set; then |
| | | utils/copy_data_dir.sh data/callhome1_spkall $adapt_set |
| | | cp data/callhome1_spkall/rttm $adapt_set/rttm |
| | | awk -v dstdir=wav/eval/callhome1_spkall '{print $1, dstdir"/"$1".wav"}' data/callhome1_spkall/wav.scp > $adapt_set/wav.scp |
| | | mkdir -p wav/eval/callhome1_spkall |
| | | wav-copy scp:data/callhome1_spkall/wav.scp scp:$adapt_set/wav.scp |
| | | utils/data/get_reco2dur.sh $adapt_set |
| | | fi |
| | | fi |
| | |
| | | export FUNASR_DIR=$PWD/../../.. |
| | | |
| | | # kaldi-related |
| | | export KALDI_ROOT= |
| | | [ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh |
| | | export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/tools/sph2pipe_v2.5:$KALDI_ROOT/tools/sctk/bin:$PWD:$PATH |
| | | [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1 |
| | | . $KALDI_ROOT/tools/config/common_path.sh |
| | | |
| | | # NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C |
| | | export PYTHONIOENCODING=UTF-8 |
| | | export PYTHONPATH=../../../:$PYTHONPATH |
| | |
| | | |
| | | exp_dir="." |
| | | input_size=345 |
| | | stage=1 |
| | | stop_stage=4 |
| | | stage=-1 |
| | | stop_stage=-1 |
| | | |
| | | # exp tag |
| | | tag="exp_fix" |
| | |
| | | simu_allspkr_chunk2000_model_dir="baseline_$(basename "${simu_allspkr_chunk2000_diar_config}" .yaml)_${tag}" |
| | | callhome_model_dir="baseline_$(basename "${callhome_diar_config}" .yaml)_${tag}" |
| | | |
| | | # Prepare data for training and inference |
| | | if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then |
| | | echo "stage 0: Prepare data for training and inference" |
| | | # simulate mixture data for training and inference |
| | | if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then |
| | | echo "stage 0: Simulate mixture data for training and inference" |
| | | echo "The detail can be found in https://github.com/hitachi-speech/EEND" |
| | | ehco "Before running this step, you should download and compile kaldi and set KALDI_ROOT in this script and path.sh" |
| | | echo "This stage may take a long time, please waiting..." |
| | | KALDI_ROOT= |
| | | ln -s $KALDI_ROOT/egs/wsj/s5/steps steps |
| | | ln -s $KALDI_ROOT/egs/wsj/s5/utils utils |
| | | . local/run_prepare_shared_eda.sh |
| | | fi |
| | | |
| | | ## Prepare data for training and inference |
| | | #if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then |
| | | # echo "stage 0: Prepare data for training and inference" |
| | | # echo "The detail can be found in https://github.com/hitachi-speech/EEND" |
| | | # . ./local/ |
| | | #fi |
| | | # |
| | | |
| | | # Training on simulated two-speaker data |
| | | world_size=$gpu_num |
| | | simu_2spkr_ave_id=avg${simu_average_2spkr_start}-${simu_average_2spkr_end} |