jmwang66
2023-05-16 6f7e27eb7c2d0a7649ec8f14d167c8da8e29f906
egs/aishell2/paraformerbert/conf/train_asr_paraformerbert_conformer_20e_6d_1280_320.yaml
@@ -29,6 +29,17 @@
    self_attention_dropout_rate: 0.0
    src_attention_dropout_rate: 0.0
# frontend related
frontend: wav_frontend
frontend_conf:
    fs: 16000
    window: hamming
    n_mels: 80
    frame_length: 25
    frame_shift: 10
    lfr_m: 1
    lfr_n: 1
# hybrid CTC/attention
model: paraformer_bert
model_conf:
@@ -36,7 +47,7 @@
    lsm_weight: 0.1     # label smoothing option
    length_normalized_loss: false
    predictor_weight: 1.0
    glat_context_p: 0.4
    sampling_ratio: 0.4
    embeds_id: 3
    embed_dims: 768
    embeds_loss_weight: 2.0
@@ -45,7 +56,7 @@
accum_grad: 2
grad_clip: 5
patience: none
max_epoch: 50
max_epoch: 150
val_scheduler_criterion:
    - valid
    - acc
@@ -78,24 +89,24 @@
    - 40
    num_time_mask: 2
predictor: cif_predictor_sanm
predictor: cif_predictor
predictor_conf:
  idim: 320
  threshold: 1.0
  l_order: 1
  r_order: 1
log_interval: 50
normalize: None
dataset_conf:
    data_names: speech,text,embed
    data_types: kaldi_ark,text,kaldi_ark
    data_types: sound,text,kaldi_ark
    shuffle: True
    shuffle_conf:
        shuffle_size: 10240
        shuffle_size: 2048
        sort_size: 500
    batch_conf:
        batch_type: token
        batch_size: 25000
    num_workers: 8
    num_workers: 8
log_interval: 50
normalize: None