From 40eefbe376bd2e846bd9c451636f4e42ba1bf8bf Mon Sep 17 00:00:00 2001
From: zhuzizyf <42790740+zhuzizyf@users.noreply.github.com>
Date: 星期三, 12 四月 2023 17:18:19 +0800
Subject: [PATCH] Merge pull request #1 from zhuzizyf/fix-dataset-bug
---
egs/aishell2/data2vec_pretrain/conf/train_pretrain_transformer.yaml | 14 ++++++++++++++
1 files changed, 14 insertions(+), 0 deletions(-)
diff --git a/egs/aishell2/data2vec_pretrain/conf/train_pretrain_transformer.yaml b/egs/aishell2/data2vec_pretrain/conf/train_pretrain_transformer.yaml
index d7ddce6..4052774 100644
--- a/egs/aishell2/data2vec_pretrain/conf/train_pretrain_transformer.yaml
+++ b/egs/aishell2/data2vec_pretrain/conf/train_pretrain_transformer.yaml
@@ -63,3 +63,17 @@
scheduler: tri_stage
scheduler_conf:
phase_ratio: [0.03,0.9,0.07]
+
+# for dataset
+dataset_conf:
+ batch_mode: clipping
+ data_names: speech,none
+ data_types: kaldi_ark,none
+ shuffle: true
+ shuffle_conf:
+ shuffle_size: 12800
+ sort_size: 12800
+ batch_conf:
+ batch_type: token
+ batch_size: 64000
+ num_workers: 8
\ No newline at end of file
--
Gitblit v1.9.1