From e24dbdc496debec225414d4d2c760f5775e64f2a Mon Sep 17 00:00:00 2001
From: 天地 <tiandiweizun@gmail.com>
Date: 星期三, 26 三月 2025 13:44:41 +0800
Subject: [PATCH] 感觉应该从文件读取更合适,因为上面判断了文件存在,且可以读取,如果本身是文本的话,下面也会有逻辑进行处理 (#2452)
---
examples/aishell/transformer/conf/transformer_12e_6d_2048_256.yaml | 6 +++---
1 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/examples/aishell/transformer/conf/transformer_12e_6d_2048_256.yaml b/examples/aishell/transformer/conf/transformer_12e_6d_2048_256.yaml
index 395ea44..efcf593 100644
--- a/examples/aishell/transformer/conf/transformer_12e_6d_2048_256.yaml
+++ b/examples/aishell/transformer/conf/transformer_12e_6d_2048_256.yaml
@@ -81,9 +81,9 @@
dataset: AudioDataset
dataset_conf:
index_ds: IndexDSJsonl
- batch_sampler: BatchSampler
- batch_type: example # example or length
- batch_size: 32 # if batch_type is example, batch_size is the numbers of samples; if length, batch_size is source_token_len+target_token_len;
+ batch_sampler: EspnetStyleBatchSampler
+ batch_type: length # example or length
+ batch_size: 25000 # if batch_type is example, batch_size is the numbers of samples; if length, batch_size is source_token_len+target_token_len;
max_token_length: 2048 # filter samples if source_token_len+target_token_len > max_token_length,
buffer_size: 1024
shuffle: True
--
Gitblit v1.9.1