嘉渊
2023-04-25 f5d183b5ce1050c0d9f36f18365c1c625c01fa5b
update
3个文件已修改
53 ■■■■ 已修改文件
egs/aishell/paraformer/run.sh 4 ●●●● 补丁 | 查看 | 原始文档 | blame | 历史
egs/aishell/transformer/utils/combine_cmvn_file.py 19 ●●●● 补丁 | 查看 | 原始文档 | blame | 历史
egs/aishell/transformer/utils/compute_cmvn.sh 30 ●●●● 补丁 | 查看 | 原始文档 | blame | 历史
egs/aishell/paraformer/run.sh
@@ -21,8 +21,8 @@
token_type=char
scp=wav.scp
type=sound
stage=2
stop_stage=2
stage=1
stop_stage=1
# feature configuration
feats_dim=80
egs/aishell/transformer/utils/combine_cmvn_file.py
@@ -1,6 +1,9 @@
import argparse
import json
import os
import numpy as np
def get_parser():
    parser = argparse.ArgumentParser(
@@ -8,15 +11,13 @@
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
    )
    parser.add_argument(
        "--dims",
        "-d",
        "--dim",
        default=80,
        type=int,
        help="feature dims",
    )
    parser.add_argument(
        "--cmvn-dir",
        "-c",
        "--cmvn_dir",
        default=False,
        required=True,
        type=str,
@@ -25,15 +26,13 @@
    parser.add_argument(
        "--nj",
        "-n",
        default=1,
        required=True,
        type=int,
        help="num of cmvn file",
    )
    parser.add_argument(
        "--output-dir",
        "-o",
        "--output_dir",
        default=False,
        required=True,
        type=str,
@@ -50,10 +49,10 @@
    total_vars = np.zeros(args.dims)
    total_frames = 0
    cmvn_file = args.output_dir + "/cmvn.json"
    cmvn_file = os.path.join(args.output_dir, "cmvn.json")
    for i in range(1, args.nj+1):
        with open(args.cmvn_dir + "/cmvn." + str(i) + ".json", "r") as fin:
    for i in range(1, args.nj + 1):
        with open(os.path.join(args.cmvn_dir, "cmvn.{}.json".format(str(i)))) as fin:
            cmvn_stats = json.load(fin)
        total_means += np.array(cmvn_stats["mean_stats"])
egs/aishell/transformer/utils/compute_cmvn.sh
@@ -13,20 +13,20 @@
fbankdir=$1
split_dir=${fbankdir}/cmvn/split_${nj};
mkdir -p $split_dir
split_scps=""
for n in $(seq $nj); do
    split_scps="$split_scps $split_dir/wav.$n.scp"
done
utils/split_scp.pl ${fbankdir}/wav.scp $split_scps || exit 1;
logdir=${fbankdir}/cmvn/log
$cmd JOB=1:$nj $logdir/cmvn.JOB.log \
    python utils/compute_cmvn.py \
      --dim ${feats_dim} \
      --wav_path $split_dir \
      --idx JOB
#python utils/combine_cmvn_file.py -d ${feats_dim} -c ${fbankdir}/cmvn -n $nj -o $fbankdir
#mkdir -p $split_dir
#split_scps=""
#for n in $(seq $nj); do
#    split_scps="$split_scps $split_dir/wav.$n.scp"
#done
#utils/split_scp.pl ${fbankdir}/wav.scp $split_scps || exit 1;
#
#logdir=${fbankdir}/cmvn/log
#$cmd JOB=1:$nj $logdir/cmvn.JOB.log \
#    python utils/compute_cmvn.py \
#      --dim ${feats_dim} \
#      --wav_path $split_dir \
#      --idx JOB
python utils/combine_cmvn_file.py --dim ${feats_dim} -cmvn_dir $split_dir -nj $nj -output_dir ${fbankdir}/cmvn
#echo "$0: Succeeded compute global cmvn"