From 2acd24f0158b2c86d2fb4e6f1134b67a1150500e Mon Sep 17 00:00:00 2001
From: jmwang66 <wangjiaming.wjm@alibaba-inc.com>
Date: 星期四, 29 二月 2024 17:14:59 +0800
Subject: [PATCH] update whisper lid (#1407)
---
funasr/datasets/llm_datasets/datasets.py | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/funasr/datasets/llm_datasets/datasets.py b/funasr/datasets/llm_datasets/datasets.py
index ab0e48a..9673d76 100644
--- a/funasr/datasets/llm_datasets/datasets.py
+++ b/funasr/datasets/llm_datasets/datasets.py
@@ -37,13 +37,13 @@
self.data_type = "sound"
self.tokenizer = tokenizer
- self.int_pad_value = int_pad_value
self.float_pad_value = float_pad_value
self.prompt = kwargs.get("prompt", "Transcribe speech to text.")
self.prompt_pre = "USER: \nINSTRUCTION: {}\nINPUT: ".format(
self.prompt) # "USER: \nINSTRUCTION: {}\nnINPUT: {}\nASSISTANT: "
self.prompt_af = ""
self.IGNORE_INDEX = kwargs.get("IGNORE_INDEX", -100)
+ self.int_pad_value = self.IGNORE_INDEX
def get_source_len(self, index):
item = self.index_ds[index]
--
Gitblit v1.9.1