| | |
| | | pos_enc_class: SinusoidalPositionEncoder |
| | | normalize_before: true |
| | | kernel_size: 11 |
| | | sanm_shfit: 0 |
| | | sanm_shift: 0 |
| | | selfattention_layer_type: sanm |
| | | |
| | | # decoder |
| | |
| | | src_attention_dropout_rate: 0.1 |
| | | att_layer_num: 16 |
| | | kernel_size: 11 |
| | | sanm_shfit: 0 |
| | | sanm_shift: 0 |
| | | |
| | | predictor: CifPredictorV2 |
| | | predictor_conf: |
| | |
| | | grad_clip: 5 |
| | | max_epoch: 150 |
| | | keep_nbest_models: 10 |
| | | avg_nbest_model: 5 |
| | | avg_nbest_model: 10 |
| | | log_interval: 50 |
| | | |
| | | optim: adam |
| | |
| | | dataset: AudioDataset |
| | | dataset_conf: |
| | | index_ds: IndexDSJsonl |
| | | batch_sampler: DynamicBatchLocalShuffleSampler |
| | | batch_sampler: BatchSampler |
| | | batch_type: example # example or length |
| | | batch_size: 1 # if batch_type is example, batch_size is the numbers of samples; if length, batch_size is source_token_len+target_token_len; |
| | | max_token_length: 2048 # filter samples if source_token_len+target_token_len > max_token_length, |