python/FunASR-XL.git

New file
			@@ -0,0 +1,81 @@
			# This is an example that demonstrates how to configure a model file.
			# You can modify the configuration according to your own requirements.

			# to print the register_table:
			# from funasr.register import tables
			# tables.print()

			# network architecture
			model: LLMASR2
			model_conf:
			lsm_weight: 0.1 # label smoothing option
			length_normalized_loss: true

			# encoder
			audio_encoder: "/nfs/zhifu.gzf/init_model/SenseVoiceModelscope"
			audio_encoder_conf:
			hub: ms
			freeze: true

			llm: Qwen1.5-7b-chat
			llm_conf:
			hub: hf
			freeze: true
			init_param_path: "/nfs/zhifu.gzf/init_model/qwen/Qwen1___5-7B-Chat_raw"

			audio_adaptor: Transformer
			audio_adaptor_conf:
			downsample_rate: 2
			llm_dim: 4096
			encoder_dim: 1280
			n_layer: 2

			# frontend related
			frontend: WhisperFrontend
			frontend_conf:
			fs: 16000
			whisper_model: large-v3
			do_pad_trim: false
			permute: false # true: [bs, frames, dims]; false: [bs, dims, frames]
			filters_path: "/nfs/zhifu.gzf/init_model/SenseVoiceModelscope/assets/mel_filters.npz"



			train_conf:
			accum_grad: 1
			grad_clip: 5
			max_epoch: 15
			keep_nbest_models: 10
			log_interval: 10

			optim: adamw
			optim_conf:
			lr: 0.0001
			weight_decay: 0.000000

			scheduler: warmuplr
			scheduler_conf:
			warmup_steps: 1500

			dataset: OpenAIDataset
			dataset_conf:
			index_ds: OpenAIIndexDSJsonl
			batch_sampler: BatchSampler
			batch_type: token
			batch_size: 900
			max_token_length: 1024
			shuffle: true
			sort_size: 1024
			batch_size_scale_ratio_max: 2
			num_workers: 4
			audio_adaptor_downsample_rate: ${audio_adaptor_conf.downsample_rate}
			audio_encoder_downsample_rate: 2
			data_split_num: 512
			batch_size_sample_max: 15
			retry: 20


			tokenizer: HuggingfaceTokenizer
			tokenizer_conf:
			init_param_path: "/nfs/zhifu.gzf/init_model/qwen/Qwen1___5-7B-Chat_raw"

	examples/industrial_data_pretraining/llm_asr/conf/whisper_qwen_linear2.yaml	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	examples/industrial_data_pretraining/llm_asr/conf/whisper_qwen_transformer.yaml	81 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史

			@@ -28,7 +28,7 @@
			downsample_rate: 2
			llm_dim: 4096
			encoder_dim: 1280
			n_layer: 2
			n_layer: 0

			# frontend related
			frontend: WhisperFrontend