From 9befa9e508d5ca95cb5faa29cd20d23e04e525c9 Mon Sep 17 00:00:00 2001
From: jmwang66 <wangjiaming.wjm@alibaba-inc.com>
Date: 星期一, 06 二月 2023 16:42:33 +0800
Subject: [PATCH] update data2vec pretrain: add clipping

---
 egs/aishell2/data2vec_pretrain/conf/train_pretrain_transformer.yaml |   14 ++++++++++++++
 1 files changed, 14 insertions(+), 0 deletions(-)

diff --git a/egs/aishell2/data2vec_pretrain/conf/train_pretrain_transformer.yaml b/egs/aishell2/data2vec_pretrain/conf/train_pretrain_transformer.yaml
index d7ddce6..4052774 100644
--- a/egs/aishell2/data2vec_pretrain/conf/train_pretrain_transformer.yaml
+++ b/egs/aishell2/data2vec_pretrain/conf/train_pretrain_transformer.yaml
@@ -63,3 +63,17 @@
 scheduler: tri_stage
 scheduler_conf:
     phase_ratio: [0.03,0.9,0.07]
+
+# for dataset
+dataset_conf:
+    batch_mode: clipping
+    data_names: speech,none
+    data_types: kaldi_ark,none
+    shuffle: true
+    shuffle_conf:
+        shuffle_size: 12800
+        sort_size: 12800
+    batch_conf:
+        batch_type: token
+        batch_size: 64000
+    num_workers: 8
\ No newline at end of file

--
Gitblit v1.9.1