From f5d183b5ce1050c0d9f36f18365c1c625c01fa5b Mon Sep 17 00:00:00 2001
From: 嘉渊 <wangjiaming.wjm@alibaba-inc.com>
Date: 星期二, 25 四月 2023 22:47:26 +0800
Subject: [PATCH] update
---
egs/aishell/transformer/utils/combine_cmvn_file.py | 19 +++++++++----------
egs/aishell/transformer/utils/compute_cmvn.sh | 30 +++++++++++++++---------------
egs/aishell/paraformer/run.sh | 4 ++--
3 files changed, 26 insertions(+), 27 deletions(-)
diff --git a/egs/aishell/paraformer/run.sh b/egs/aishell/paraformer/run.sh
index 5193404..9ea06c5 100755
--- a/egs/aishell/paraformer/run.sh
+++ b/egs/aishell/paraformer/run.sh
@@ -21,8 +21,8 @@
token_type=char
scp=wav.scp
type=sound
-stage=2
-stop_stage=2
+stage=1
+stop_stage=1
# feature configuration
feats_dim=80
diff --git a/egs/aishell/transformer/utils/combine_cmvn_file.py b/egs/aishell/transformer/utils/combine_cmvn_file.py
index b2974a4..e0ee99f 100755
--- a/egs/aishell/transformer/utils/combine_cmvn_file.py
+++ b/egs/aishell/transformer/utils/combine_cmvn_file.py
@@ -1,6 +1,9 @@
import argparse
import json
+import os
+
import numpy as np
+
def get_parser():
parser = argparse.ArgumentParser(
@@ -8,15 +11,13 @@
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
parser.add_argument(
- "--dims",
- "-d",
+ "--dim",
default=80,
type=int,
help="feature dims",
)
parser.add_argument(
- "--cmvn-dir",
- "-c",
+ "--cmvn_dir",
default=False,
required=True,
type=str,
@@ -25,15 +26,13 @@
parser.add_argument(
"--nj",
- "-n",
default=1,
required=True,
type=int,
help="num of cmvn file",
)
parser.add_argument(
- "--output-dir",
- "-o",
+ "--output_dir",
default=False,
required=True,
type=str,
@@ -50,10 +49,10 @@
total_vars = np.zeros(args.dims)
total_frames = 0
- cmvn_file = args.output_dir + "/cmvn.json"
+ cmvn_file = os.path.join(args.output_dir, "cmvn.json")
- for i in range(1, args.nj+1):
- with open(args.cmvn_dir + "/cmvn." + str(i) + ".json", "r") as fin:
+ for i in range(1, args.nj + 1):
+ with open(os.path.join(args.cmvn_dir, "cmvn.{}.json".format(str(i)))) as fin:
cmvn_stats = json.load(fin)
total_means += np.array(cmvn_stats["mean_stats"])
diff --git a/egs/aishell/transformer/utils/compute_cmvn.sh b/egs/aishell/transformer/utils/compute_cmvn.sh
index 797ce0e..b3a7228 100755
--- a/egs/aishell/transformer/utils/compute_cmvn.sh
+++ b/egs/aishell/transformer/utils/compute_cmvn.sh
@@ -13,20 +13,20 @@
fbankdir=$1
split_dir=${fbankdir}/cmvn/split_${nj};
-mkdir -p $split_dir
-split_scps=""
-for n in $(seq $nj); do
- split_scps="$split_scps $split_dir/wav.$n.scp"
-done
-utils/split_scp.pl ${fbankdir}/wav.scp $split_scps || exit 1;
-
-logdir=${fbankdir}/cmvn/log
-$cmd JOB=1:$nj $logdir/cmvn.JOB.log \
- python utils/compute_cmvn.py \
- --dim ${feats_dim} \
- --wav_path $split_dir \
- --idx JOB
-
-#python utils/combine_cmvn_file.py -d ${feats_dim} -c ${fbankdir}/cmvn -n $nj -o $fbankdir
+#mkdir -p $split_dir
+#split_scps=""
+#for n in $(seq $nj); do
+# split_scps="$split_scps $split_dir/wav.$n.scp"
+#done
+#utils/split_scp.pl ${fbankdir}/wav.scp $split_scps || exit 1;
#
+#logdir=${fbankdir}/cmvn/log
+#$cmd JOB=1:$nj $logdir/cmvn.JOB.log \
+# python utils/compute_cmvn.py \
+# --dim ${feats_dim} \
+# --wav_path $split_dir \
+# --idx JOB
+
+python utils/combine_cmvn_file.py --dim ${feats_dim} -cmvn_dir $split_dir -nj $nj -output_dir ${fbankdir}/cmvn
+
#echo "$0: Succeeded compute global cmvn"
--
Gitblit v1.9.1