liugz18
2024-07-18 d80ac2fd2df4e7fb8a28acfa512bb11472b5cc99
examples/industrial_data_pretraining/llm_asr/conf/whisper_vicuna_linear.yaml
@@ -58,14 +58,15 @@
train_conf:
  accum_grad: 1
  grad_clip: 5
  max_epoch: 150
  max_epoch: 15
  keep_nbest_models: 10
  log_interval: 10
optim: adamw
optim_conf:
   lr: 0.0001
   weight_decay: 0.000001
   weight_decay: 0
scheduler: warmuplr
scheduler_conf:
   warmup_steps: 1500
@@ -73,11 +74,10 @@
dataset: AudioLLMVicunaDataset
dataset_conf:
    index_ds: IndexDSJsonl
    batch_sampler: RankFullLocalShuffleBatchSampler
    batch_sampler: CustomDistributedBatchSampler
    batch_type: example # example or length
    batch_size: 8 # if batch_type is example, batch_size is the numbers of samples; if length, batch_size is source_token_len+target_token_len;
    max_token_length: 2048 # filter samples if source_token_len+target_token_len > max_token_length,
    buffer_size: 500
    batch_size: 4 # if batch_type is example, batch_size is the numbers of samples; if length, batch_size is source_token_len+target_token_len;
    max_token_length: 3000 # filter samples if source_token_len+target_token_len > max_token_length,
    shuffle: True
    num_workers: 4
#    preprocessor_text: TextPreprocessRemovePunctuation