# network architecture # encoder related encoder: data2vec_encoder encoder_conf: extractor_mode: layer_norm encoder_layerdrop: 0.1 dropout_input: 0.0 dropout_features: 0.0 feature_grad_mult: 0.0 encoder_embed_dim: 768 mask_prob: 0.65 mask_length: 10 loss_beta: 0 loss_scale: null instance_norm_target_layer: true average_top_k_layers: 8 pos_conv_depth: 5 conv_pos: 95 ema_decay: 0.999 ema_end_decay: 0.9999 ema_anneal_end_step: 30000 ema_transformer_only: true ema_layers_only: true require_same_masks: true mask_dropout: 0 # hybrid CTC/attention model_conf: ctc_weight: 1.0 lsm_weight: 0.1 # label smoothing option length_normalized_loss: false # for logger log_interval: 50 # minibatch related batch_type: length batch_bins: 16000 num_workers: 16 # optimization related accum_grad: 1 grad_clip: 5 patience: none max_epoch: 50 val_scheduler_criterion: - valid - acc best_model_criterion: - - valid - cer_ctc - min keep_nbest_models: 10 unused_parameters: true normalize: None # NoamLR is deprecated. Use WarmupLR. # The following is equivalent setting for NoamLR: # # optim: adam # optim_conf: # lr: 10. # scheduler: noamlr # scheduler_conf: # model_size: 256 # warmup_steps: 25000 # optim: adam optim_conf: lr: 0.00005 scheduler: warmuplr # pytorch v1.1.0+ required scheduler_conf: warmup_steps: 25000 specaug: specaug specaug_conf: apply_time_warp: true time_warp_window: 5 time_warp_mode: bicubic apply_freq_mask: true freq_mask_width_range: - 0 - 30 num_freq_mask: 2 apply_time_mask: true time_mask_width_range: - 0 - 40 num_time_mask: 2