| | |
| | | token_type=char |
| | | scp=wav.scp |
| | | type=sound |
| | | stage=2 |
| | | stop_stage=2 |
| | | stage=1 |
| | | stop_stage=1 |
| | | |
| | | # feature configuration |
| | | feats_dim=80 |
| | |
| | | import argparse |
| | | import json |
| | | import os |
| | | |
| | | import numpy as np |
| | | |
| | | |
| | | def get_parser(): |
| | | parser = argparse.ArgumentParser( |
| | |
| | | formatter_class=argparse.ArgumentDefaultsHelpFormatter, |
| | | ) |
| | | parser.add_argument( |
| | | "--dims", |
| | | "-d", |
| | | "--dim", |
| | | default=80, |
| | | type=int, |
| | | help="feature dims", |
| | | ) |
| | | parser.add_argument( |
| | | "--cmvn-dir", |
| | | "-c", |
| | | "--cmvn_dir", |
| | | default=False, |
| | | required=True, |
| | | type=str, |
| | |
| | | |
| | | parser.add_argument( |
| | | "--nj", |
| | | "-n", |
| | | default=1, |
| | | required=True, |
| | | type=int, |
| | | help="num of cmvn file", |
| | | ) |
| | | parser.add_argument( |
| | | "--output-dir", |
| | | "-o", |
| | | "--output_dir", |
| | | default=False, |
| | | required=True, |
| | | type=str, |
| | |
| | | total_vars = np.zeros(args.dims) |
| | | total_frames = 0 |
| | | |
| | | cmvn_file = args.output_dir + "/cmvn.json" |
| | | cmvn_file = os.path.join(args.output_dir, "cmvn.json") |
| | | |
| | | for i in range(1, args.nj+1): |
| | | with open(args.cmvn_dir + "/cmvn." + str(i) + ".json", "r") as fin: |
| | | with open(os.path.join(args.cmvn_dir, "cmvn.{}.json".format(str(i)))) as fin: |
| | | cmvn_stats = json.load(fin) |
| | | |
| | | total_means += np.array(cmvn_stats["mean_stats"]) |
| | |
| | | fbankdir=$1 |
| | | |
| | | split_dir=${fbankdir}/cmvn/split_${nj}; |
| | | mkdir -p $split_dir |
| | | split_scps="" |
| | | for n in $(seq $nj); do |
| | | split_scps="$split_scps $split_dir/wav.$n.scp" |
| | | done |
| | | utils/split_scp.pl ${fbankdir}/wav.scp $split_scps || exit 1; |
| | | |
| | | logdir=${fbankdir}/cmvn/log |
| | | $cmd JOB=1:$nj $logdir/cmvn.JOB.log \ |
| | | python utils/compute_cmvn.py \ |
| | | --dim ${feats_dim} \ |
| | | --wav_path $split_dir \ |
| | | --idx JOB |
| | | |
| | | #python utils/combine_cmvn_file.py -d ${feats_dim} -c ${fbankdir}/cmvn -n $nj -o $fbankdir |
| | | #mkdir -p $split_dir |
| | | #split_scps="" |
| | | #for n in $(seq $nj); do |
| | | # split_scps="$split_scps $split_dir/wav.$n.scp" |
| | | #done |
| | | #utils/split_scp.pl ${fbankdir}/wav.scp $split_scps || exit 1; |
| | | # |
| | | #logdir=${fbankdir}/cmvn/log |
| | | #$cmd JOB=1:$nj $logdir/cmvn.JOB.log \ |
| | | # python utils/compute_cmvn.py \ |
| | | # --dim ${feats_dim} \ |
| | | # --wav_path $split_dir \ |
| | | # --idx JOB |
| | | |
| | | python utils/combine_cmvn_file.py --dim ${feats_dim} -cmvn_dir $split_dir -nj $nj -output_dir ${fbankdir}/cmvn |
| | | |
| | | #echo "$0: Succeeded compute global cmvn" |