From d2dc3af1a69ee4075bcfc0c83dc0fb8e3fc1db4e Mon Sep 17 00:00:00 2001
From: yhliang <68215459+yhliang-aslp@users.noreply.github.com>
Date: 星期四, 11 五月 2023 16:31:40 +0800
Subject: [PATCH] Merge pull request #492 from alibaba-damo-academy/dev_smohan

---
 egs/aishell2/data2vec_pretrain/conf/train_pretrain_transformer.yaml |   14 ++++++++++++++
 1 files changed, 14 insertions(+), 0 deletions(-)

diff --git a/egs/aishell2/data2vec_pretrain/conf/train_pretrain_transformer.yaml b/egs/aishell2/data2vec_pretrain/conf/train_pretrain_transformer.yaml
index d7ddce6..4052774 100644
--- a/egs/aishell2/data2vec_pretrain/conf/train_pretrain_transformer.yaml
+++ b/egs/aishell2/data2vec_pretrain/conf/train_pretrain_transformer.yaml
@@ -63,3 +63,17 @@
 scheduler: tri_stage
 scheduler_conf:
     phase_ratio: [0.03,0.9,0.07]
+
+# for dataset
+dataset_conf:
+    batch_mode: clipping
+    data_names: speech,none
+    data_types: kaldi_ark,none
+    shuffle: true
+    shuffle_conf:
+        shuffle_size: 12800
+        sort_size: 12800
+    batch_conf:
+        batch_type: token
+        batch_size: 64000
+    num_workers: 8
\ No newline at end of file

--
Gitblit v1.9.1