1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
| # network architecture
| # encoder related
| encoder: data2vec_encoder
| encoder_conf:
| extractor_mode: layer_norm
| encoder_layerdrop: 0.05
| dropout_input: 0.0
| dropout_features: 0.0
| feature_grad_mult: 1.0
| encoder_embed_dim: 768
|
| mask_prob: 0.65
| mask_length: 10
|
| loss_beta: 0
| loss_scale: null
|
| instance_norm_target_layer: true
| average_top_k_layers: 8
|
| pos_conv_depth: 5
| conv_pos: 95
|
| ema_decay: 0.999
| ema_end_decay: 0.9999
| ema_anneal_end_step: 30000
| ema_transformer_only: true
| ema_layers_only: true
|
| require_same_masks: true
| mask_dropout: 0
|
| log_interval: 50
| normalize: None
|
| # minibatch related
| batch_type: length
| batch_bins: 64000
| num_workers: 16
|
| # optimization related
| accum_grad: 1
| grad_clip: 5
| patience: none
| max_epoch: 600
| val_scheduler_criterion:
| - valid
| - acc
| best_model_criterion:
| - - valid
| - loss
| - min
| keep_nbest_models: 50
| unused_parameters: true
|
| optim: fairseq_adam
| optim_conf:
| lr: 0.0005
| adam_betas: [0.9,0.98]
| adam_eps: 1.0e-06
| weight_decay: 0.01
|
| scheduler: tri_stage
| scheduler_conf:
| phase_ratio: [0.03,0.9,0.07]
|
| # for dataset
| dataset_conf:
| batch_mode: clipping
| data_names: speech,none
| data_types: kaldi_ark,none
| shuffle: true
| shuffle_conf:
| shuffle_size: 12800
| sort_size: 12800
| batch_conf:
| batch_type: token
| batch_size: 64000
| num_workers: 8
|
|