From e0a8c4b00631ed636418f4280964e473f05d5002 Mon Sep 17 00:00:00 2001
From: hnluo <haoneng.lhn@alibaba-inc.com>
Date: 星期四, 25 五月 2023 11:08:46 +0800
Subject: [PATCH] Merge pull request #552 from alibaba-damo-academy/dev_wjm2
---
egs/aishell/transformer/utils/compute_cmvn.py | 24 +++++++++++++++++++++++-
1 files changed, 23 insertions(+), 1 deletions(-)
diff --git a/egs/aishell/transformer/utils/compute_cmvn.py b/egs/aishell/transformer/utils/compute_cmvn.py
index 949cc08..6c9b445 100755
--- a/egs/aishell/transformer/utils/compute_cmvn.py
+++ b/egs/aishell/transformer/utils/compute_cmvn.py
@@ -5,6 +5,7 @@
import numpy as np
import torchaudio
import torchaudio.compliance.kaldi as kaldi
+import yaml
def get_parser():
@@ -24,6 +25,11 @@
required=True,
type=str,
help="the path of wav scps",
+ )
+ parser.add_argument(
+ "--config",
+ type=str,
+ help="the config file for computing cmvn",
)
parser.add_argument(
"--idx",
@@ -82,11 +88,27 @@
# mean_stats += np.sum(mat, axis=0)
# var_stats += np.sum(np.square(mat), axis=0)
# total_frames += mat.shape[0]
+
+ with open(args.config) as f:
+ configs = yaml.safe_load(f)
+ frontend_configs = configs.get("frontend_conf", {})
+ num_mel_bins = frontend_configs.get("n_mels", 80)
+ frame_length = frontend_configs.get("frame_length", 25)
+ frame_shift = frontend_configs.get("frame_shift", 10)
+ window_type = frontend_configs.get("window", "hamming")
+ resample_rate = frontend_configs.get("fs", 16000)
+ assert num_mel_bins == args.dim
+
with open(wav_scp_file) as f:
lines = f.readlines()
for line in lines:
_, wav_file = line.strip().split()
- fbank = compute_fbank(wav_file, num_mel_bins=args.dim)
+ fbank = compute_fbank(wav_file,
+ num_mel_bins=args.dim,
+ frame_length=frame_length,
+ frame_shift=frame_shift,
+ resample_rate=resample_rate,
+ window_type=window_type)
mean_stats += np.sum(fbank, axis=0)
var_stats += np.sum(np.square(fbank), axis=0)
total_frames += fbank.shape[0]
--
Gitblit v1.9.1