| | |
| | | self_attention_dropout_rate: 0.0 |
| | | src_attention_dropout_rate: 0.0 |
| | | |
| | | # frontend related |
| | | frontend: wav_frontend |
| | | frontend_conf: |
| | | fs: 16000 |
| | | window: hamming |
| | | n_mels: 80 |
| | | frame_length: 25 |
| | | frame_shift: 10 |
| | | lfr_m: 1 |
| | | lfr_n: 1 |
| | | |
| | | # hybrid CTC/attention |
| | | model: paraformer_bert |
| | | model_conf: |
| | |
| | | embed_dims: 768 |
| | | embeds_loss_weight: 2.0 |
| | | |
| | | |
| | | |
| | | # minibatch related |
| | | #batch_type: length |
| | | #batch_bins: 40000 |
| | | batch_type: numel |
| | | batch_bins: 2000000 |
| | | num_workers: 16 |
| | | |
| | | # optimization related |
| | | accum_grad: 4 |
| | | accum_grad: 1 |
| | | grad_clip: 5 |
| | | max_epoch: 50 |
| | | max_epoch: 150 |
| | | val_scheduler_criterion: |
| | | - valid |
| | | - acc |
| | |
| | | threshold: 1.0 |
| | | l_order: 1 |
| | | r_order: 1 |
| | | tail_threshold: 0.45 |
| | | |
| | | dataset_conf: |
| | | data_names: speech,text,embed |
| | | data_types: sound,text,kaldi_ark |
| | | shuffle: True |
| | | shuffle_conf: |
| | | shuffle_size: 2048 |
| | | sort_size: 500 |
| | | batch_conf: |
| | | batch_type: token |
| | | batch_size: 25000 |
| | | num_workers: 8 |
| | | |
| | | log_interval: 50 |
| | | normalize: None |
| | | allow_variable_data_keys: true |
| | | normalize: None |