From 580b11b57ac4b62f7e2acda73813a4e10e8e4cd3 Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期二, 10 十月 2023 17:17:29 +0800
Subject: [PATCH] v0.8.0
---
funasr/bin/asr_inference_launch.py | 19 +++++++++++++------
1 files changed, 13 insertions(+), 6 deletions(-)
diff --git a/funasr/bin/asr_inference_launch.py b/funasr/bin/asr_inference_launch.py
index 15dbdd4..c728d72 100644
--- a/funasr/bin/asr_inference_launch.py
+++ b/funasr/bin/asr_inference_launch.py
@@ -55,6 +55,7 @@
distribute_spk)
from funasr.build_utils.build_model_from_file import build_model_from_file
from funasr.utils.cluster_backend import ClusterBackend
+from funasr.utils.modelscope_utils import get_cache_dir
from tqdm import tqdm
def inference_asr(
@@ -498,6 +499,7 @@
):
ncpu = kwargs.get("ncpu", 1)
torch.set_num_threads(ncpu)
+ language = kwargs.get("model_lang", None)
if word_lm_train_config is not None:
raise NotImplementedError("Word LM is not implemented")
@@ -704,10 +706,13 @@
text, token, token_int = result[0], result[1], result[2]
time_stamp = result[4] if len(result[4]) > 0 else None
- if use_timestamp and time_stamp is not None and len(time_stamp):
- postprocessed_result = postprocess_utils.sentence_postprocess(token, time_stamp)
+ if language == "en-bpe":
+ postprocessed_result = postprocess_utils.sentence_postprocess_sentencepiece(token)
else:
- postprocessed_result = postprocess_utils.sentence_postprocess(token)
+ if use_timestamp and time_stamp is not None and len(time_stamp):
+ postprocessed_result = postprocess_utils.sentence_postprocess(token, time_stamp)
+ else:
+ postprocessed_result = postprocess_utils.sentence_postprocess(token)
text_postprocessed = ""
time_stamp_postprocessed = ""
text_postprocessed_punc = postprocessed_result
@@ -787,7 +792,7 @@
time_stamp_writer: bool = True,
punc_infer_config: Optional[str] = None,
punc_model_file: Optional[str] = None,
- sv_model_file: Optional[str] = None,
+ sv_model_file: Optional[str] = None,
streaming: bool = False,
embedding_node: str = "resnet1_dense",
sv_threshold: float = 0.9465,
@@ -808,6 +813,9 @@
level=log_level,
format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s",
)
+
+ if sv_model_file is None:
+ sv_model_file = "{}/damo/speech_paraformer-large-vad-punc-spk_asr_nat-zh-cn/campplus_cn_common.bin".format(get_cache_dir(None))
if param_dict is not None:
hotword_list_or_file = param_dict.get('hotword')
@@ -1084,7 +1092,6 @@
logging.info("decoding, utt: {}, predictions: {}".format(key, text_postprocessed_punc))
torch.cuda.empty_cache()
distribute_spk(asr_result_list[0]['sentences'], sv_output)
- import pdb; pdb.set_trace()
return asr_result_list
return _forward
@@ -2030,7 +2037,7 @@
return inference_paraformer(**kwargs)
elif mode == "paraformer_streaming":
return inference_paraformer_online(**kwargs)
- elif mode == "paraformer_vad_speaker":
+ elif mode.startswith("paraformer_vad_speaker"):
return inference_paraformer_vad_speaker(**kwargs)
elif mode.startswith("paraformer_vad"):
return inference_paraformer_vad_punc(**kwargs)
--
Gitblit v1.9.1