From 3cd3473bf7a3b41484baa86d9092248d78e7af39 Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期五, 21 四月 2023 17:17:37 +0800
Subject: [PATCH] docs
---
funasr/bin/asr_inference_mfcca.py | 12 +++++++-----
1 files changed, 7 insertions(+), 5 deletions(-)
diff --git a/funasr/bin/asr_inference_mfcca.py b/funasr/bin/asr_inference_mfcca.py
index e25b2a9..e832869 100644
--- a/funasr/bin/asr_inference_mfcca.py
+++ b/funasr/bin/asr_inference_mfcca.py
@@ -41,8 +41,6 @@
from funasr.utils import asr_utils, wav_utils, postprocess_utils
import pdb
-header_colors = '\033[95m'
-end_colors = '\033[0m'
global_asr_language: str = 'zh-cn'
global_sample_rate: Union[int, Dict[Any, int]] = {
@@ -55,7 +53,7 @@
Examples:
>>> import soundfile
- >>> speech2text = Speech2Text("asr_config.yml", "asr.pth")
+ >>> speech2text = Speech2Text("asr_config.yml", "asr.pb")
>>> audio, rate = soundfile.read("speech.wav")
>>> speech2text(audio)
[(text, token, token_int, hypothesis object), ...]
@@ -194,8 +192,8 @@
# Input as audio signal
if isinstance(speech, np.ndarray):
speech = torch.tensor(speech)
-
-
+ if(speech.dim()==3):
+ speech = torch.squeeze(speech, 2)
#speech = speech.unsqueeze(0).to(getattr(torch, self.dtype))
speech = speech.to(getattr(torch, self.dtype))
# lenghts: (1,)
@@ -474,6 +472,8 @@
**kwargs,
):
assert check_argument_types()
+ ncpu = kwargs.get("ncpu", 1)
+ torch.set_num_threads(ncpu)
if batch_size > 1:
raise NotImplementedError("batch decoding is not implemented")
if word_lm_train_config is not None:
@@ -534,6 +534,8 @@
data_path_and_name_and_type,
dtype=dtype,
batch_size=batch_size,
+ fs=fs,
+ mc=True,
key_file=key_file,
num_workers=num_workers,
preprocess_fn=ASRTask.build_preprocess_fn(speech2text.asr_train_args, False),
--
Gitblit v1.9.1