zhifu gao
2024-04-26 1cdb3cc28d4d89a576cc06e5cd8eb80da1f3a3aa
funasr/models/conformer/template.yaml
@@ -2,12 +2,11 @@
# You can modify the configuration according to your own requirements.
# to print the register_table:
# from funasr.utils.register import registry_tables
# registry_tables.print()
# from funasr.register import tables
# tables.print()
# network architecture
#model: funasr.models.paraformer.model:Paraformer
model: Transformer
model: Conformer
model_conf:
    ctc_weight: 0.3
    lsm_weight: 0.1     # label smoothing option
@@ -16,14 +15,14 @@
# encoder
encoder: ConformerEncoder
encoder_conf:
    output_size: 256    # dimension of attention
    output_size: 256
    attention_heads: 4
    linear_units: 2048  # the number of units of position-wise feed forward
    num_blocks: 12      # the number of encoder blocks
    linear_units: 2048
    num_blocks: 12
    dropout_rate: 0.1
    positional_dropout_rate: 0.1
    attention_dropout_rate: 0.0
    input_layer: conv2d # encoder architecture type
    input_layer: conv2d
    normalize_before: true
    pos_enc_layer_type: rel_pos
    selfattention_layer_type: rel_selfattn
@@ -52,6 +51,7 @@
    n_mels: 80
    frame_length: 25
    frame_shift: 10
    dither: 0.0
    lfr_m: 1
    lfr_n: 1
@@ -95,7 +95,7 @@
dataset: AudioDataset
dataset_conf:
    index_ds: IndexDSJsonl
    batch_sampler: DynamicBatchLocalShuffleSampler
    batch_sampler: BatchSampler
    batch_type: example # example or length
    batch_size: 1 # if batch_type is example, batch_size is the numbers of samples; if length, batch_size is source_token_len+target_token_len;
    max_token_length: 2048 # filter samples if source_token_len+target_token_len > max_token_length,