From 810046e3df4910c8f5c1a90e4b53aca45b3397e8 Mon Sep 17 00:00:00 2001
From: wuhongsheng <664116298@qq.com>
Date: 星期一, 01 七月 2024 10:42:58 +0800
Subject: [PATCH] 优化merge segments 参数，解决新闻联播男女主持人“晚上好”合并一个speakid问题 (#1861)

---
 examples/industrial_data_pretraining/llm_asr/conf/template.yaml |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/examples/industrial_data_pretraining/llm_asr/conf/template.yaml b/examples/industrial_data_pretraining/llm_asr/conf/template.yaml
index 3c51ff4..c64c886 100644
--- a/examples/industrial_data_pretraining/llm_asr/conf/template.yaml
+++ b/examples/industrial_data_pretraining/llm_asr/conf/template.yaml
@@ -73,7 +73,7 @@
 dataset: AudioLLMDataset
 dataset_conf:
     index_ds: IndexDSJsonl
-    batch_sampler: RankFullLocalShuffleBatchSampler
+    batch_sampler: BatchSampler
     batch_type: example # example or length
     batch_size: 8 # if batch_type is example, batch_size is the numbers of samples; if length, batch_size is source_token_len+target_token_len;
     max_token_length: 2048 # filter samples if source_token_len+target_token_len > max_token_length,

--
Gitblit v1.9.1