From 28ccfbfc51068a663a80764e14074df5edf2b5ba Mon Sep 17 00:00:00 2001
From: kongdeqiang <kongdeqiang960204@163.com>
Date: 星期五, 13 三月 2026 17:41:41 +0800
Subject: [PATCH] 提交

---
 funasr/bin/compute_audio_cmvn.py |   23 ++++++++++++-----------
 1 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/funasr/bin/compute_audio_cmvn.py b/funasr/bin/compute_audio_cmvn.py
index 91bb2ac..880a247 100644
--- a/funasr/bin/compute_audio_cmvn.py
+++ b/funasr/bin/compute_audio_cmvn.py
@@ -7,7 +7,7 @@
 from omegaconf import DictConfig, OmegaConf
 
 from funasr.register import tables
-from funasr.download.download_from_hub import download_model
+from funasr.download.download_model_from_hub import download_model
 from funasr.train_utils.set_all_random_seed import set_all_random_seed
 
 
@@ -52,7 +52,7 @@
         frontend=frontend,
         tokenizer=None,
         is_training=False,
-        **kwargs.get("dataset_conf")
+        **kwargs.get("dataset_conf"),
     )
 
     # dataloader
@@ -68,11 +68,14 @@
         dataset_train, collate_fn=dataset_train.collator, **batch_sampler_train
     )
 
-    iter_stop = int(kwargs.get("scale", 1.0) * len(dataloader_train))
-
     total_frames = 0
     for batch_idx, batch in enumerate(dataloader_train):
-        if batch_idx >= iter_stop:
+        iter_stop = int(kwargs.get("scale", -1.0) * len(dataloader_train))
+        log_step = iter_stop // 100
+        if batch_idx % log_step == 0:
+            logging.info(f"prcessed: {batch_idx}/{iter_stop}")
+        if batch_idx >= iter_stop and iter_stop > 0.0:
+            logging.info(f"prcessed: {iter_stop}/{iter_stop}")
             break
 
         fbank = batch["speech"].numpy()[0, :, :]
@@ -85,8 +88,8 @@
         total_frames += fbank.shape[0]
 
     cmvn_info = {
-        "mean_stats": list(mean_stats.tolist()),
-        "var_stats": list(var_stats.tolist()),
+        "mean_stats": mean_stats.tolist(),
+        "var_stats": var_stats.tolist(),
         "total_frames": total_frames,
     }
     cmvn_file = kwargs.get("cmvn_file", "cmvn.json")
@@ -115,11 +118,9 @@
             + str(dims)
             + "\n"
         )
-        mean_str = str(list(mean)).replace(",", "").replace("[", "[ ").replace("]", " ]")
-        fout.write("<LearnRateCoef> 0 " + mean_str + "\n")
+        fout.write("<LearnRateCoef> 0 [ " + " ".join([str(item) for item in mean]) + " ]\n")
         fout.write("<Rescale> " + str(dims) + " " + str(dims) + "\n")
-        var_str = str(list(var)).replace(",", "").replace("[", "[ ").replace("]", " ]")
-        fout.write("<LearnRateCoef> 0 " + var_str + "\n")
+        fout.write("<LearnRateCoef> 0 [ " + " ".join([str(item) for item in var]) + " ]\n")
         fout.write("</Nnet>" + "\n")
 
 

--
Gitblit v1.9.1