Dev gzf exp (#1657)
* sensevoice finetune
* sensevoice finetune
* sensevoice finetune
* sensevoice finetune
* sensevoice finetune
* sensevoice finetune
* sensevoice finetune
* sensevoice finetune
* sensevoice finetune
* sensevoice finetune
* bugfix
* update with main (#1631)
* update seaco finetune
* v1.0.24
---------
Co-authored-by: 维石 <shixian.shi@alibaba-inc.com>
* sensevoice
* sensevoice
* sensevoice
* update with main (#1638)
* update seaco finetune
* v1.0.24
* update rwkv template
---------
Co-authored-by: 维石 <shixian.shi@alibaba-inc.com>
* sensevoice
* sensevoice
* sensevoice
* sensevoice
* sensevoice
* sensevoice
* sensevoice
* sensevoice
* sensevoice
* sensevoice
* sensevoice
* sensevoice
* sensevoice
* sensevoice
* sensevoice
* sense voice
* sense voice
* sense voice
* sense voice
* sense voice
* sense voice
* sense voice
* sense voice
* sense voice
* sense voice
* sense voice
* sense voice
* sense voice
* sense voice
* sense voice
* sense voice
* sense voice
* sense voice
* sense voice
* sense voice
* whisper
* whisper
* update style
* update style
* oom
---------
Co-authored-by: 维石 <shixian.shi@alibaba-inc.com>
| | |
| | | from funasr.train_utils.set_all_random_seed import set_all_random_seed |
| | | from funasr.train_utils.load_pretrained_model import load_pretrained_model |
| | | from funasr.utils.misc import prepare_model_dir |
| | | from funasr.train_utils.model_summary import model_summary |
| | | from funasr import AutoModel |
| | | |
| | | |
| | |
| | | logging.info(f"Setting {k}.requires_grad = False") |
| | | p.requires_grad = False |
| | | |
| | | logging.info(f"model info: {model_summary(model)}") |
| | | if use_ddp: |
| | | model = model.cuda(local_rank) |
| | | model = DDP( |
| | |
| | | data_split_i=data_split_i, |
| | | data_split_num=dataloader.data_split_num, |
| | | ) |
| | | |
| | | torch.cuda.empty_cache() |
| | | |
| | | |
| | | trainer.validate_epoch( |
| | | model=model, dataloader_val=dataloader_val, epoch=epoch, writer=writer |
| | |
| | | max_len_in_batch = 0 # Tracks the max sample length within the current batch |
| | | |
| | | for idx in sorted_indices: |
| | | original_sample_length = self.dataset.get_source_len(idx) |
| | | if ( |
| | | original_sample_length < self.min_token_length |
| | | or original_sample_length > self.max_token_length |
| | | ): # Skip samples that exceed the max length |
| | | continue |
| | | |
| | | # original_sample_length = self.dataset.get_source_len(idx) |
| | | # if ( |
| | | # original_sample_length < self.min_token_length |
| | | # or original_sample_length > self.max_token_length |
| | | # ): # Skip samples that exceed the max length |
| | | # continue |
| | | |
| | | # sample_length = 1 if self.batch_type == "example" else original_sample_length |
| | | |
| | | # Set sample_length based on the batch type |
| | | sample_length = 1 if self.batch_type == "example" else original_sample_length |
| | | if self.batch_type == "example": |
| | | sample_length = 1 |
| | | elif self.batch_type == "token": |
| | | sample_length = self.dataset.get_source_len(idx) + int( |
| | | self.dataset.get_target_len(idx) * 1.2 |
| | | ) |
| | | else: |
| | | sample_length = self.dataset.get_source_len(idx) |
| | | # Calculate potential batch size with the new sample |
| | | potential_batch_length = max(max_len_in_batch, sample_length) * (len(batch) + 1) |
| | | # Add index to batch if it doesn't exceed batch size limit |
| | |
| | | self.min_source_length = kwargs.get("min_source_length", 0) |
| | | self.max_target_length = kwargs.get("max_target_length", 2048) |
| | | self.min_target_length = kwargs.get("min_target_length", 0) |
| | | self.max_token_length = kwargs.get("max_token_length", 2200) |
| | | |
| | | is_training = kwargs.get("is_training", True) |
| | | if not (path.endswith(".jsonl") or path.endswith(".json")): |
| | |
| | | or target_len > self.max_target_length |
| | | ): |
| | | continue |
| | | |
| | | if (source_len + target_len) > self.max_token_length: |
| | | continue |
| | | |
| | | contents_i = { |
| | | "source": source, |
| | | "prompt": prompt, |
| | |
| | | # This is an example that demonstrates how to configure a model file. |
| | | # You can modify the configuration according to your own requirements. |
| | | |
| | | # to print the register_table: |
| | | # from funasr.register import tables |
| | | # tables.print() |
| | | # network architecture |
| | | model: SenseVoice |
| | | model: SenseVoiceRWKV |
| | | model_conf: |
| | | lsm_weight: 0.1 |
| | | length_normalized_loss: true |
| | |
| | | n_text_head: 20 |
| | | n_text_layer: 32 |
| | | |
| | | |
| | | # decoder |
| | | decoder: SenseVoiceDecoder |
| | | decoder_conf: |
| | | rwkv_cfg: |
| | | n_embd: 1280 |
| | | dropout: 0 |
| | | head_size_a: 64 |
| | | ctx_len: 1280 |
| | | dim_att: 1280 #${model_conf.rwkv_cfg.n_embd} |
| | | dim_ffn: null |
| | | head_size_divisor: 8 |
| | | n_layer: 32 |
| | | pre_ffn: 0 |
| | | ln0: false |
| | | ln1: false |
| | | init_rwkv: false |
| | | datatype: bf16 |
| | | |
| | | |
| | | # frontend related |
| | | frontend: WhisperFrontend |
| | | frontend_conf: |
| | |
| | | |
| | | dataset: SenseVoiceDataset |
| | | dataset_conf: |
| | | index_ds: IndexDSJsonl |
| | | index_ds: IndexDSJsonlRankSplit |
| | | batch_sampler: EspnetStyleBatchSampler |
| | | batch_type: length # example or length |
| | | batch_size: 7000 # if batch_type is example, batch_size is the numbers of samples; if length, batch_size is source_token_len+target_token_len; |
| | | max_token_length: 2000 # filter samples if source_token_len+target_token_len > max_token_length, |
| | | rank_split: true |
| | | batch_type: token # example or length |
| | | batch_size: 3500 # if batch_type is example, batch_size is the numbers of samples; if length, batch_size is source_token_len+target_token_len; |
| | | max_token_length: 2200 |
| | | min_token_length: 60 |
| | | max_source_length: 2000 |
| | | min_source_length: 60 |
| | | max_target_length: 150 |
| | | min_target_length: 0 |
| | | shuffle: True |
| | | num_workers: 4 |
| | | sos: ${model_conf.sos} |
| | |
| | | keep_nbest_models: 20 |
| | | avg_nbest_model: ${train_conf.keep_nbest_models} |
| | | log_interval: 50 |
| | | reset_gpu_cache: true |
| | | |
| | | optim: adamw |
| | | optim_conf: |
| | |
| | | self.best_step_or_epoch = "" |
| | | self.val_acc_step_or_eoch = {} |
| | | self.val_loss_step_or_eoch = {} |
| | | |
| | | self.reset_gpu_cache = kwargs.get("reset_gpu_cache", False) |
| | | |
| | | |
| | | def save_checkpoint( |
| | | self, |
| | |
| | | time2 = time.perf_counter() |
| | | with maybe_autocast(self.use_fp16): |
| | | retval = model(**batch) |
| | | |
| | | if ( |
| | | self.reset_gpu_cache |
| | | and (torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024) > 70 |
| | | ): |
| | | torch.cuda.empty_cache() |
| | | |
| | | time3 = time.perf_counter() |
| | | speed_stats["forward_time"] = f"{time3 - time2:0.3f}" |