From c3442d9566f5a2011c95b0d2998958a1b5348564 Mon Sep 17 00:00:00 2001
From: shixian.shi <shixian.shi@alibaba-inc.com>
Date: 星期五, 12 一月 2024 18:04:42 +0800
Subject: [PATCH] update device
---
funasr/models/paraformer_streaming/model.py | 21 ++++++++++++++-------
1 files changed, 14 insertions(+), 7 deletions(-)
diff --git a/funasr/models/paraformer_streaming/model.py b/funasr/models/paraformer_streaming/model.py
index 927b091..b736aa9 100644
--- a/funasr/models/paraformer_streaming/model.py
+++ b/funasr/models/paraformer_streaming/model.py
@@ -519,24 +519,31 @@
if len(cache) == 0:
self.init_cache(cache, **kwargs)
- _is_final = kwargs.get("is_final", False)
+
meta_data = {}
chunk_size = kwargs.get("chunk_size", [0, 10, 5])
- chunk_stride_samples = chunk_size[1] * 960 # 600ms
+ chunk_stride_samples = int(chunk_size[1] * 960) # 600ms
time1 = time.perf_counter()
- audio_sample_list = load_audio_text_image_video(data_in, fs=frontend.fs, audio_fs=kwargs.get("fs", 16000),
- data_type=kwargs.get("data_type", "sound"),
- tokenizer=tokenizer)
+ cfg = {"is_final": kwargs.get("is_final", False)}
+ audio_sample_list = load_audio_text_image_video(data_in,
+ fs=frontend.fs,
+ audio_fs=kwargs.get("fs", 16000),
+ data_type=kwargs.get("data_type", "sound"),
+ tokenizer=tokenizer,
+ cache=cfg,
+ )
+ _is_final = cfg["is_final"] # if data_in is a file or url, set is_final=True
+
time2 = time.perf_counter()
meta_data["load_data"] = f"{time2 - time1:0.3f}"
assert len(audio_sample_list) == 1, "batch_size must be set 1"
audio_sample = torch.cat((cache["prev_samples"], audio_sample_list[0]))
- n = len(audio_sample) // chunk_stride_samples + int(_is_final)
- m = len(audio_sample) % chunk_stride_samples * (1-int(_is_final))
+ n = int(len(audio_sample) // chunk_stride_samples + int(_is_final))
+ m = int(len(audio_sample) % chunk_stride_samples * (1-int(_is_final)))
tokens = []
for i in range(n):
kwargs["is_final"] = _is_final and i == n -1
--
Gitblit v1.9.1