From aba47683fd4b2984dbff7fc79b0f532fc2d9f6b7 Mon Sep 17 00:00:00 2001
From: Yabin Li <wucong.lyb@alibaba-inc.com>
Date: 星期一, 04 三月 2024 16:44:49 +0800
Subject: [PATCH] Update SDK_advanced_guide_offline_zh.md
---
funasr/auto/auto_model.py | 46 ++++++++++++++++++++++++++++------------------
1 files changed, 28 insertions(+), 18 deletions(-)
diff --git a/funasr/auto/auto_model.py b/funasr/auto/auto_model.py
index 2cb2e1d..921ede8 100644
--- a/funasr/auto/auto_model.py
+++ b/funasr/auto/auto_model.py
@@ -95,7 +95,7 @@
class AutoModel:
def __init__(self, **kwargs):
- if not kwargs.get("disable_log", False):
+ if not kwargs.get("disable_log", True):
tables.print()
model, kwargs = self.build_model(**kwargs)
@@ -162,18 +162,21 @@
tokenizer_class = tables.tokenizer_classes.get(tokenizer)
tokenizer = tokenizer_class(**kwargs["tokenizer_conf"])
kwargs["tokenizer"] = tokenizer
- kwargs["token_list"] = tokenizer.token_list
- vocab_size = len(tokenizer.token_list)
+
+ kwargs["token_list"] = tokenizer.token_list if hasattr(tokenizer, "token_list") else None
+ kwargs["token_list"] = tokenizer.get_vocab() if hasattr(tokenizer, "get_vocab") else kwargs["token_list"]
+ vocab_size = len(kwargs["token_list"]) if kwargs["token_list"] is not None else -1
else:
vocab_size = -1
# build frontend
frontend = kwargs.get("frontend", None)
+ kwargs["input_size"] = None
if frontend is not None:
frontend_class = tables.frontend_classes.get(frontend)
frontend = frontend_class(**kwargs["frontend_conf"])
kwargs["frontend"] = frontend
- kwargs["input_size"] = frontend.output_size()
+ kwargs["input_size"] = frontend.output_size() if hasattr(frontend, "output_size") else None
# build model
model_class = tables.model_classes.get(kwargs["model"])
@@ -184,15 +187,18 @@
# init_param
init_param = kwargs.get("init_param", None)
if init_param is not None:
- logging.info(f"Loading pretrained params from {init_param}")
- load_pretrained_model(
- model=model,
- path=init_param,
- ignore_init_mismatch=kwargs.get("ignore_init_mismatch", False),
- oss_bucket=kwargs.get("oss_bucket", None),
- scope_map=kwargs.get("scope_map", None),
- excludes=kwargs.get("excludes", None),
- )
+ if os.path.exists(init_param):
+ logging.info(f"Loading pretrained params from {init_param}")
+ load_pretrained_model(
+ model=model,
+ path=init_param,
+ ignore_init_mismatch=kwargs.get("ignore_init_mismatch", False),
+ oss_bucket=kwargs.get("oss_bucket", None),
+ scope_map=kwargs.get("scope_map", []),
+ excludes=kwargs.get("excludes", None),
+ )
+ else:
+ print(f"error, init_param does not exist!: {init_param}")
return model, kwargs
@@ -387,7 +393,8 @@
# step.3 compute punc model
if self.punc_model is not None:
if not len(result["text"]):
- result['raw_text'] = ''
+ if return_raw_text:
+ result['raw_text'] = ''
else:
self.punc_kwargs.update(cfg)
punc_res = self.inference(result["text"], model=self.punc_model, kwargs=self.punc_kwargs, **cfg)
@@ -429,10 +436,13 @@
distribute_spk(sentence_list, sv_output)
result['sentence_info'] = sentence_list
elif kwargs.get("sentence_timestamp", False):
- sentence_list = timestamp_sentence(punc_res[0]['punc_array'],
- result['timestamp'],
- raw_text,
- return_raw_text=return_raw_text)
+ if not len(result['text']):
+ sentence_list = []
+ else:
+ sentence_list = timestamp_sentence(punc_res[0]['punc_array'],
+ result['timestamp'],
+ raw_text,
+ return_raw_text=return_raw_text)
result['sentence_info'] = sentence_list
if "spk_embedding" in result: del result['spk_embedding']
--
Gitblit v1.9.1