# network architecture
|
# encoder related
|
encoder: data2vec_encoder
|
encoder_conf:
|
extractor_mode: layer_norm
|
encoder_layerdrop: 0.1
|
dropout_input: 0.0
|
dropout_features: 0.0
|
feature_grad_mult: 0.0
|
encoder_embed_dim: 768
|
|
mask_prob: 0.65
|
mask_length: 10
|
|
loss_beta: 0
|
loss_scale: null
|
|
instance_norm_target_layer: true
|
average_top_k_layers: 8
|
|
pos_conv_depth: 5
|
conv_pos: 95
|
|
ema_decay: 0.999
|
ema_end_decay: 0.9999
|
ema_anneal_end_step: 30000
|
ema_transformer_only: true
|
ema_layers_only: true
|
|
require_same_masks: true
|
mask_dropout: 0
|
|
# hybrid CTC/attention
|
model_conf:
|
ctc_weight: 1.0
|
lsm_weight: 0.1 # label smoothing option
|
length_normalized_loss: false
|
|
# for logger
|
log_interval: 50
|
|
# minibatch related
|
batch_type: length
|
batch_bins: 16000
|
num_workers: 16
|
|
# optimization related
|
accum_grad: 1
|
grad_clip: 5
|
patience: none
|
max_epoch: 50
|
val_scheduler_criterion:
|
- valid
|
- acc
|
best_model_criterion:
|
- - valid
|
- cer_ctc
|
- min
|
keep_nbest_models: 10
|
unused_parameters: true
|
normalize: None
|
|
# NoamLR is deprecated. Use WarmupLR.
|
# The following is equivalent setting for NoamLR:
|
#
|
# optim: adam
|
# optim_conf:
|
# lr: 10.
|
# scheduler: noamlr
|
# scheduler_conf:
|
# model_size: 256
|
# warmup_steps: 25000
|
#
|
optim: adam
|
optim_conf:
|
lr: 0.00005
|
scheduler: warmuplr # pytorch v1.1.0+ required
|
scheduler_conf:
|
warmup_steps: 25000
|
|
specaug: specaug
|
specaug_conf:
|
apply_time_warp: true
|
time_warp_window: 5
|
time_warp_mode: bicubic
|
apply_freq_mask: true
|
freq_mask_width_range:
|
- 0
|
- 30
|
num_freq_mask: 2
|
apply_time_mask: true
|
time_mask_width_range:
|
- 0
|
- 40
|
num_time_mask: 2
|