From 3df109adfccedeb134dea4ba2ea9a2da89872048 Mon Sep 17 00:00:00 2001
From: Isuxiz Slidder <48672727+Isuxiz@users.noreply.github.com>
Date: 星期一, 31 三月 2025 17:51:52 +0800
Subject: [PATCH] Update model.py to fix "IndexError: index 1 is out of bounds for dimension 1 with size 0" (#2454)
---
funasr/datasets/openai_datasets/index_ds.py | 12 +++++++++++-
1 files changed, 11 insertions(+), 1 deletions(-)
diff --git a/funasr/datasets/openai_datasets/index_ds.py b/funasr/datasets/openai_datasets/index_ds.py
index cc518f8..010d2d5 100644
--- a/funasr/datasets/openai_datasets/index_ds.py
+++ b/funasr/datasets/openai_datasets/index_ds.py
@@ -15,7 +15,8 @@
def __init__(self, path: str, **kwargs):
super().__init__()
- self.max_source_length = kwargs.get("max_source_length", 2048)
+
+ self.max_source_length = kwargs.get("max_source_length", 3000)
self.min_source_length = kwargs.get("min_source_length", 0)
self.max_target_length = kwargs.get("max_target_length", 2048)
self.min_target_length = kwargs.get("min_target_length", 0)
@@ -52,6 +53,15 @@
data = data_dict["messages"]
speech_length = data_dict.get("speech_length", -1) // 8
text_length = data_dict.get("text_length", 0)
+ if speech_length > self.max_source_length:
+ logging.info(
+ "speech_length: {speech_length} > {self.max_source_length}, drop it"
+ )
+ continue
+ if text_length > self.max_target_length:
+ continue
+
+ self.max_target_length = kwargs.get("max_target_length", 2048)
system, user, assistant = [], [], []
for i, item in enumerate(data):
--
Gitblit v1.9.1