From c8bae0ec85eee25d66de6b1e4502eff74d750b24 Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期四, 21 十二月 2023 13:29:37 +0800
Subject: [PATCH] funasr2

---
 funasr/datasets/audio_datasets/datasets.py |   16 ++++++++++++++++
 1 files changed, 16 insertions(+), 0 deletions(-)

diff --git a/funasr/datasets/audio_datasets/datasets.py b/funasr/datasets/audio_datasets/datasets.py
index 353a3a0..d69d0b5 100644
--- a/funasr/datasets/audio_datasets/datasets.py
+++ b/funasr/datasets/audio_datasets/datasets.py
@@ -24,6 +24,17 @@
 		super().__init__()
 		index_ds_class = registry_tables.index_ds_classes.get(index_ds.lower())
 		self.index_ds = index_ds_class(path)
+		preprocessor_speech = kwargs.get("preprocessor_speech", None)
+		if preprocessor_speech:
+			preprocessor_speech_class = registry_tables.preprocessor_speech_classes.get(preprocessor_speech.lower())
+			preprocessor_speech = preprocessor_speech_class(**kwargs.get("preprocessor_speech_conf"))
+		self.preprocessor_speech = preprocessor_speech
+		preprocessor_text = kwargs.get("preprocessor_text", None)
+		if preprocessor_text:
+			preprocessor_text_class = registry_tables.preprocessor_text_classes.get(preprocessor_text.lower())
+			preprocessor_text = preprocessor_text_class(**kwargs.get("preprocessor_text_conf"))
+		self.preprocessor_text = preprocessor_text
+		
 		self.frontend = frontend
 		self.fs = 16000 if frontend is None else frontend.fs
 		self.data_type = "sound"
@@ -49,8 +60,13 @@
 		# pdb.set_trace()
 		source = item["source"]
 		data_src = load_audio(source, fs=self.fs)
+		if self.preprocessor_speech:
+			data_src = self.preprocessor_speech(data_src)
 		speech, speech_lengths = extract_fbank(data_src, data_type=self.data_type, frontend=self.frontend) # speech: [b, T, d]
+
 		target = item["target"]
+		if self.preprocessor_text:
+			target = self.preprocessor_text(target)
 		ids = self.tokenizer.encode(target)
 		ids_lengths = len(ids)
 		text, text_lengths = torch.tensor(ids, dtype=torch.int64), torch.tensor([ids_lengths], dtype=torch.int32)

--
Gitblit v1.9.1