From d80ac2fd2df4e7fb8a28acfa512bb11472b5cc99 Mon Sep 17 00:00:00 2001
From: liugz18 <57401541+liugz18@users.noreply.github.com>
Date: 星期四, 18 七月 2024 21:34:55 +0800
Subject: [PATCH] Rename 'res' in line 514 to avoid with naming conflict with line 365
---
funasr/auto/auto_frontend.py | 67 +++++++++++++++++++--------------
1 files changed, 39 insertions(+), 28 deletions(-)
diff --git a/funasr/auto/auto_frontend.py b/funasr/auto/auto_frontend.py
index b802b83..562d5f8 100644
--- a/funasr/auto/auto_frontend.py
+++ b/funasr/auto/auto_frontend.py
@@ -19,7 +19,7 @@
from funasr.download.file import download_from_url
from funasr.auto.auto_model import prepare_data_iterator
from funasr.utils.timestamp_tools import timestamp_sentence
-from funasr.download.download_from_hub import download_model
+from funasr.download.download_model_from_hub import download_model
from funasr.utils.vad_utils import slice_padding_audio_samples
from funasr.train_utils.set_all_random_seed import set_all_random_seed
from funasr.train_utils.load_pretrained_model import load_pretrained_model
@@ -33,7 +33,7 @@
if "model_conf" not in kwargs:
logging.info("download models from model hub: {}".format(kwargs.get("hub", "ms")))
kwargs = download_model(**kwargs)
-
+
# build frontend
frontend = kwargs.get("frontend", None)
if frontend is not None:
@@ -45,25 +45,23 @@
del kwargs["frontend"]
self.kwargs = kwargs
-
def __call__(self, input, input_len=None, kwargs=None, **cfg):
-
+
kwargs = self.kwargs if kwargs is None else kwargs
kwargs.update(cfg)
-
key_list, data_list = prepare_data_iterator(input, input_len=input_len)
batch_size = kwargs.get("batch_size", 1)
- device = kwargs.get("device", "cpu")
+ device = kwargs.get("device", "cuda")
if device == "cpu":
batch_size = 1
-
+
meta_data = {}
-
+
result_list = []
num_samples = len(data_list)
- pbar = tqdm(colour="blue", total=num_samples + 1, dynamic_ncols=True)
-
+ # pbar = tqdm(colour="blue", total=num_samples + 1, dynamic_ncols=True)
+
time0 = time.perf_counter()
for beg_idx in range(0, num_samples, batch_size):
end_idx = min(num_samples, beg_idx + batch_size)
@@ -72,27 +70,40 @@
# extract fbank feats
time1 = time.perf_counter()
- audio_sample_list = load_audio_text_image_video(data_batch, fs=self.frontend.fs, audio_fs=kwargs.get("fs", 16000))
+ audio_sample_list = load_audio_text_image_video(
+ data_batch, fs=self.frontend.fs, audio_fs=kwargs.get("fs", 16000)
+ )
time2 = time.perf_counter()
meta_data["load_data"] = f"{time2 - time1:0.3f}"
- speech, speech_lengths = extract_fbank(audio_sample_list, data_type=kwargs.get("data_type", "sound"),
- frontend=self.frontend, **kwargs)
+ speech, speech_lengths = extract_fbank(
+ audio_sample_list,
+ data_type=kwargs.get("data_type", "sound"),
+ frontend=self.frontend,
+ **kwargs,
+ )
time3 = time.perf_counter()
meta_data["extract_feat"] = f"{time3 - time2:0.3f}"
- meta_data["batch_data_time"] = speech_lengths.sum().item() * self.frontend.frame_shift * self.frontend.lfr_n / 1000
-
- speech.to(device=device), speech_lengths.to(device=device)
- batch = {"input": speech, "input_len": speech_lengths, "key": key_batch}
- result_list.append(batch)
-
- pbar.update(1)
- description = (
- f"{meta_data}, "
+ meta_data["batch_data_time"] = (
+ speech_lengths.sum().item() * self.frontend.frame_shift * self.frontend.lfr_n / 1000
)
- pbar.set_description(description)
-
- time_end = time.perf_counter()
- pbar.set_description(f"time escaped total: {time_end - time0:0.3f}")
-
- return result_list
+ if kwargs.get("return_pt", True):
+ speech, speech_lengths = speech.to(device=device), speech_lengths.to(device=device)
+ else:
+ speech, speech_lengths = speech.numpy(), speech_lengths.numpy()
+ batch = {
+ "input": speech,
+ "input_len": speech_lengths,
+ "key": key_batch,
+ "data_type": "fbank",
+ }
+ result_list.append(batch)
+
+ # pbar.update(1)
+ # description = f"{meta_data}, "
+ # pbar.set_description(description)
+
+ time_end = time.perf_counter()
+ # pbar.set_description(f"time escaped total: {time_end - time0:0.3f}")
+
+ return result_list
--
Gitblit v1.9.1