From dfcc5d47587d3e793cbfec2e9509c0e9a9e1732c Mon Sep 17 00:00:00 2001
From: Dogvane Huang <dogvane@gmail.com>
Date: 星期二, 02 七月 2024 12:24:13 +0800
Subject: [PATCH] fix c# demo project to new onnx model files (#1689)
---
funasr/datasets/audio_datasets/index_ds.py | 13 ++++++++++++-
1 files changed, 12 insertions(+), 1 deletions(-)
diff --git a/funasr/datasets/audio_datasets/index_ds.py b/funasr/datasets/audio_datasets/index_ds.py
index d26124b..39ef409 100644
--- a/funasr/datasets/audio_datasets/index_ds.py
+++ b/funasr/datasets/audio_datasets/index_ds.py
@@ -21,6 +21,7 @@
self.min_source_length = kwargs.get("min_source_length", 0)
self.max_target_length = kwargs.get("max_target_length", 2048)
self.min_target_length = kwargs.get("min_target_length", 0)
+ self.max_token_length = kwargs.get("max_token_length", 2200)
is_training = kwargs.get("is_training", True)
if not (path.endswith(".jsonl") or path.endswith(".json")):
@@ -34,7 +35,7 @@
with open(path, encoding="utf-8") as fin:
file_list_all = fin.readlines()
- num_per_slice = (len(file_list_all) - 1) // data_split_num + 1
+ num_per_slice = (len(file_list_all) - 1) // data_split_num + 1 # 16
file_list = file_list_all[
data_split_i * num_per_slice : (data_split_i + 1) * num_per_slice
]
@@ -103,6 +104,10 @@
or target_len > self.max_target_length
):
continue
+
+ if (source_len + target_len) > self.max_token_length:
+ continue
+
contents_i = {
"source": source,
"prompt": prompt,
@@ -113,6 +118,12 @@
text_language = data.get("text_language", None)
if text_language is not None:
contents_i["text_language"] = text_language
+ if "emo_target" in data:
+ contents_i["emo_target"] = data["emo_target"]
+ if "event_target" in data:
+ contents_i["event_target"] = data["event_target"]
+ if "with_or_wo_itn" in data:
+ contents_i["with_or_wo_itn"] = data["with_or_wo_itn"]
# audio_language = data.get("audio_language", None)
# if audio_language is not None:
# contents_i["audio_language"] = audio_language
--
Gitblit v1.9.1