From 528f92f7a2a26cade1c57ccf26b0ba6524e7cae5 Mon Sep 17 00:00:00 2001
From: TnR2 <115166373+TnR2@users.noreply.github.com>
Date: 星期三, 01 十月 2025 14:45:17 +0800
Subject: [PATCH] fix: handle empty strings after event removal in transcription processing (def rich_transcription_postprocess(s)) (#2681)
---
examples/industrial_data_pretraining/llm_asr/conf/template.yaml | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/examples/industrial_data_pretraining/llm_asr/conf/template.yaml b/examples/industrial_data_pretraining/llm_asr/conf/template.yaml
index 3c51ff4..c64c886 100644
--- a/examples/industrial_data_pretraining/llm_asr/conf/template.yaml
+++ b/examples/industrial_data_pretraining/llm_asr/conf/template.yaml
@@ -73,7 +73,7 @@
dataset: AudioLLMDataset
dataset_conf:
index_ds: IndexDSJsonl
- batch_sampler: RankFullLocalShuffleBatchSampler
+ batch_sampler: BatchSampler
batch_type: example # example or length
batch_size: 8 # if batch_type is example, batch_size is the numbers of samples; if length, batch_size is source_token_len+target_token_len;
max_token_length: 2048 # filter samples if source_token_len+target_token_len > max_token_length,
--
Gitblit v1.9.1