python/FunASR-XL.git

			@@ -6,8 +6,7 @@
			# tables.print()

			# network architecture
			#model: funasr.models.paraformer.model:Paraformer
			model: Transformer
			model: Conformer
			model_conf:
			ctc_weight: 0.3
			lsm_weight: 0.1 # label smoothing option
			@@ -16,14 +15,14 @@
			# encoder
			encoder: ConformerEncoder
			encoder_conf:
			output_size: 256 # dimension of attention
			output_size: 256
			attention_heads: 4
			linear_units: 2048 # the number of units of position-wise feed forward
			num_blocks: 12 # the number of encoder blocks
			linear_units: 2048
			num_blocks: 12
			dropout_rate: 0.1
			positional_dropout_rate: 0.1
			attention_dropout_rate: 0.0
			input_layer: conv2d # encoder architecture type
			input_layer: conv2d
			normalize_before: true
			pos_enc_layer_type: rel_pos
			selfattention_layer_type: rel_selfattn
			@@ -52,6 +51,7 @@
			n_mels: 80
			frame_length: 25
			frame_shift: 10
			dither: 0.0
			lfr_m: 1
			lfr_n: 1

			@@ -95,7 +95,7 @@
			dataset: AudioDataset
			dataset_conf:
			index_ds: IndexDSJsonl
			batch_sampler: DynamicBatchLocalShuffleSampler
			batch_sampler: BatchSampler
			batch_type: example # example or length
			batch_size: 1 # if batch_type is example, batch_size is the numbers of samples; if length, batch_size is source_token_len+target_token_len;
			max_token_length: 2048 # filter samples if source_token_len+target_token_len > max_token_length,