From 9befa9e508d5ca95cb5faa29cd20d23e04e525c9 Mon Sep 17 00:00:00 2001
From: jmwang66 <wangjiaming.wjm@alibaba-inc.com>
Date: 星期一, 06 二月 2023 16:42:33 +0800
Subject: [PATCH] update data2vec pretrain: add clipping
---
egs/aishell2/data2vec_pretrain/conf/train_pretrain_transformer.yaml | 14 ++++++++++++++
1 files changed, 14 insertions(+), 0 deletions(-)
diff --git a/egs/aishell2/data2vec_pretrain/conf/train_pretrain_transformer.yaml b/egs/aishell2/data2vec_pretrain/conf/train_pretrain_transformer.yaml
index d7ddce6..4052774 100644
--- a/egs/aishell2/data2vec_pretrain/conf/train_pretrain_transformer.yaml
+++ b/egs/aishell2/data2vec_pretrain/conf/train_pretrain_transformer.yaml
@@ -63,3 +63,17 @@
scheduler: tri_stage
scheduler_conf:
phase_ratio: [0.03,0.9,0.07]
+
+# for dataset
+dataset_conf:
+ batch_mode: clipping
+ data_names: speech,none
+ data_types: kaldi_ark,none
+ shuffle: true
+ shuffle_conf:
+ shuffle_size: 12800
+ sort_size: 12800
+ batch_conf:
+ batch_type: token
+ batch_size: 64000
+ num_workers: 8
\ No newline at end of file
--
Gitblit v1.9.1