python/FunASR-XL.git

model: sond
model_conf:
    lsm_weight: 0.0
    length_normalized_loss: true
    max_spk_num: 16
 
# speech encoder
encoder: ecapa_tdnn
encoder_conf:
    # pass by model, equal to feature dim
    # input_size: 80
    pool_size: 20
    stride: 1
speaker_encoder: conv
speaker_encoder_conf:
    input_units: 256
    num_layers: 3
    num_units: 256
    kernel_size: 1
    dropout_rate: 0.0
    position_encoder: null
    out_units: 256
    out_norm: false
    auxiliary_states: false
    tf2torch_tensor_name_prefix_torch: speaker_encoder
    tf2torch_tensor_name_prefix_tf: EAND/speaker_encoder
ci_scorer: dot
ci_scorer_conf: {}
cd_scorer: san
cd_scorer_conf:
    input_size: 512
    output_size: 512
    out_units: 1
    attention_heads: 4
    linear_units: 1024
    num_blocks: 4
    dropout_rate: 0.0
    positional_dropout_rate: 0.0
    attention_dropout_rate: 0.0
    # use string "null" to remove input layer
    input_layer: "null"
    pos_enc_class: null
    normalize_before: true
    tf2torch_tensor_name_prefix_torch: cd_scorer
    tf2torch_tensor_name_prefix_tf: EAND/compute_distance_layer
# post net
decoder: fsmn
decoder_conf:
    in_units: 32
    out_units: 2517
    filter_size: 31
    fsmn_num_layers: 6
    dnn_num_layers: 1
    num_memory_units: 512
    ffn_inner_dim: 512
    dropout_rate: 0.0
    tf2torch_tensor_name_prefix_torch: decoder
    tf2torch_tensor_name_prefix_tf: EAND/post_net
frontend: wav_frontend
frontend_conf:
    fs: 16000
    window: povey
    n_mels: 80
    frame_length: 25
    frame_shift: 10
    filter_length_min: -1
    filter_length_max: -1
    lfr_m: 1
    lfr_n: 1
    dither: 0.0
    snip_edges: false
 
# minibatch related
batch_type: length
# 16s * 16k * 16 samples
batch_bins: 4096000
num_workers: 8
 
# optimization related
accum_grad: 1
grad_clip: 5
max_epoch: 50
val_scheduler_criterion:
    - valid
    - acc
best_model_criterion:
-   - valid
    - der
    - min
-   - valid
    - forward_steps
    - max
keep_nbest_models: 10
 
optim: adam
optim_conf:
   lr: 0.001
scheduler: warmuplr
scheduler_conf:
   warmup_steps: 10000
 
# without spec aug
specaug: null
specaug_conf:
    apply_time_warp: true
    time_warp_window: 5
    time_warp_mode: bicubic
    apply_freq_mask: true
    freq_mask_width_range:
    - 0
    - 30
    num_freq_mask: 2
    apply_time_mask: true
    time_mask_width_range:
    - 0
    - 40
    num_time_mask: 2
 
log_interval: 50
# without normalize
normalize: None