From 7498bd7388afdde8d5e6f8a4cb6aeb8be8ac60fa Mon Sep 17 00:00:00 2001
From: shixian.shi <shixian.shi@alibaba-inc.com>
Date: 星期五, 08 三月 2024 11:37:46 +0800
Subject: [PATCH] update code
---
funasr/auto/auto_model.py | 87 +++++++++++++++++++++++++++----------------
1 files changed, 54 insertions(+), 33 deletions(-)
diff --git a/funasr/auto/auto_model.py b/funasr/auto/auto_model.py
index 66c0750..9ae9f18 100644
--- a/funasr/auto/auto_model.py
+++ b/funasr/auto/auto_model.py
@@ -1,3 +1,8 @@
+#!/usr/bin/env python3
+# -*- encoding: utf-8 -*-
+# Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
+# MIT License (https://opensource.org/licenses/MIT)
+
import json
import time
import copy
@@ -12,18 +17,18 @@
from funasr.register import tables
from funasr.utils.load_utils import load_bytes
from funasr.download.file import download_from_url
+from funasr.utils.timestamp_tools import timestamp_sentence
from funasr.download.download_from_hub import download_model
from funasr.utils.vad_utils import slice_padding_audio_samples
+from funasr.utils.load_utils import load_audio_text_image_video
from funasr.train_utils.set_all_random_seed import set_all_random_seed
from funasr.train_utils.load_pretrained_model import load_pretrained_model
-from funasr.utils.load_utils import load_audio_text_image_video
-from funasr.utils.timestamp_tools import timestamp_sentence
from funasr.models.campplus.utils import sv_chunk, postprocess, distribute_spk
try:
from funasr.models.campplus.cluster_backend import ClusterBackend
except:
print("If you want to use the speaker diarization, please `pip install hdbscan`")
-
+import pdb
def prepare_data_iterator(data_in, input_len=None, data_type=None, key=None):
"""
@@ -41,6 +46,7 @@
chars = string.ascii_letters + string.digits
if isinstance(data_in, str) and data_in.startswith('http'): # url
data_in = download_from_url(data_in)
+
if isinstance(data_in, str) and os.path.exists(data_in): # wav_path; filelist: wav.scp, file.jsonl;text.txt;
_, file_extension = os.path.splitext(data_in)
file_extension = file_extension.lower()
@@ -90,7 +96,7 @@
class AutoModel:
def __init__(self, **kwargs):
- if not kwargs.get("disable_log", False):
+ if not kwargs.get("disable_log", True):
tables.print()
model, kwargs = self.build_model(**kwargs)
@@ -137,11 +143,11 @@
def build_model(self, **kwargs):
assert "model" in kwargs
if "model_conf" not in kwargs:
- logging.info("download models from model hub: {}".format(kwargs.get("model_hub", "ms")))
+ logging.info("download models from model hub: {}".format(kwargs.get("hub", "ms")))
kwargs = download_model(**kwargs)
set_all_random_seed(kwargs.get("seed", 0))
-
+
device = kwargs.get("device", "cuda")
if not torch.cuda.is_available() or kwargs.get("ngpu", 1) == 0:
device = "cpu"
@@ -157,37 +163,41 @@
tokenizer_class = tables.tokenizer_classes.get(tokenizer)
tokenizer = tokenizer_class(**kwargs["tokenizer_conf"])
kwargs["tokenizer"] = tokenizer
- kwargs["token_list"] = tokenizer.token_list
- vocab_size = len(tokenizer.token_list)
+
+ kwargs["token_list"] = tokenizer.token_list if hasattr(tokenizer, "token_list") else None
+ kwargs["token_list"] = tokenizer.get_vocab() if hasattr(tokenizer, "get_vocab") else kwargs["token_list"]
+ vocab_size = len(kwargs["token_list"]) if kwargs["token_list"] is not None else -1
else:
vocab_size = -1
-
# build frontend
frontend = kwargs.get("frontend", None)
+ kwargs["input_size"] = None
if frontend is not None:
frontend_class = tables.frontend_classes.get(frontend)
frontend = frontend_class(**kwargs["frontend_conf"])
kwargs["frontend"] = frontend
- kwargs["input_size"] = frontend.output_size()
+ kwargs["input_size"] = frontend.output_size() if hasattr(frontend, "output_size") else None
# build model
model_class = tables.model_classes.get(kwargs["model"])
- model = model_class(**kwargs, **kwargs["model_conf"], vocab_size=vocab_size)
-
+ model = model_class(**kwargs, **kwargs.get("model_conf", {}), vocab_size=vocab_size)
model.to(device)
# init_param
init_param = kwargs.get("init_param", None)
if init_param is not None:
- logging.info(f"Loading pretrained params from {init_param}")
- load_pretrained_model(
- model=model,
- path=init_param,
- ignore_init_mismatch=kwargs.get("ignore_init_mismatch", False),
- oss_bucket=kwargs.get("oss_bucket", None),
- scope_map=kwargs.get("scope_map", None),
- excludes=kwargs.get("excludes", None),
- )
+ if os.path.exists(init_param):
+ logging.info(f"Loading pretrained params from {init_param}")
+ load_pretrained_model(
+ model=model,
+ path=init_param,
+ ignore_init_mismatch=kwargs.get("ignore_init_mismatch", False),
+ oss_bucket=kwargs.get("oss_bucket", None),
+ scope_map=kwargs.get("scope_map", []),
+ excludes=kwargs.get("excludes", None),
+ )
+ else:
+ print(f"error, init_param does not exist!: {init_param}")
return model, kwargs
@@ -213,9 +223,9 @@
batch_size = kwargs.get("batch_size", 1)
# if kwargs.get("device", "cpu") == "cpu":
# batch_size = 1
-
+
key_list, data_list = prepare_data_iterator(input, input_len=input_len, data_type=kwargs.get("data_type", None), key=key)
-
+
speed_stats = {}
asr_result_list = []
num_samples = len(data_list)
@@ -228,13 +238,17 @@
data_batch = data_list[beg_idx:end_idx]
key_batch = key_list[beg_idx:end_idx]
batch = {"data_in": data_batch, "key": key_batch}
+
if (end_idx - beg_idx) == 1 and kwargs.get("data_type", None) == "fbank": # fbank
batch["data_in"] = data_batch[0]
batch["data_lengths"] = input_len
time1 = time.perf_counter()
with torch.no_grad():
- results, meta_data = model.inference(**batch, **kwargs)
+ res = model.inference(**batch, **kwargs)
+ if isinstance(res, (list, tuple)):
+ results = res[0]
+ meta_data = res[1] if len(res) > 1 else {}
time2 = time.perf_counter()
asr_result_list.extend(results)
@@ -381,11 +395,15 @@
return_raw_text = kwargs.get('return_raw_text', False)
# step.3 compute punc model
if self.punc_model is not None:
- self.punc_kwargs.update(cfg)
- punc_res = self.inference(result["text"], model=self.punc_model, kwargs=self.punc_kwargs, **cfg)
- raw_text = copy.copy(result["text"])
- if return_raw_text: result['raw_text'] = raw_text
- result["text"] = punc_res[0]["text"]
+ if not len(result["text"]):
+ if return_raw_text:
+ result['raw_text'] = ''
+ else:
+ self.punc_kwargs.update(cfg)
+ punc_res = self.inference(result["text"], model=self.punc_model, kwargs=self.punc_kwargs, **cfg)
+ raw_text = copy.copy(result["text"])
+ if return_raw_text: result['raw_text'] = raw_text
+ result["text"] = punc_res[0]["text"]
else:
raw_text = None
@@ -421,10 +439,13 @@
distribute_spk(sentence_list, sv_output)
result['sentence_info'] = sentence_list
elif kwargs.get("sentence_timestamp", False):
- sentence_list = timestamp_sentence(punc_res[0]['punc_array'],
- result['timestamp'],
- raw_text,
- return_raw_text=return_raw_text)
+ if not len(result['text']):
+ sentence_list = []
+ else:
+ sentence_list = timestamp_sentence(punc_res[0]['punc_array'],
+ result['timestamp'],
+ raw_text,
+ return_raw_text=return_raw_text)
result['sentence_info'] = sentence_list
if "spk_embedding" in result: del result['spk_embedding']
--
Gitblit v1.9.1