| | |
| | | for li in range(bsz): |
| | | target_num = (((seq_lens[li] - same_num[li].sum()).float()) * self.sampling_ratio).long() |
| | | if target_num > 0: |
| | | input_mask[li].scatter_(dim=0, index=torch.randperm(seq_lens[li])[:target_num].cuda(), value=0) |
| | | input_mask[li].scatter_(dim=0, index=torch.randperm(seq_lens[li])[:target_num].to(input_mask.device), value=0) |
| | | input_mask = input_mask.eq(1) |
| | | input_mask = input_mask.masked_fill(~nonpad_positions, False) |
| | | input_mask_expand_dim = input_mask.unsqueeze(2).to(pre_acoustic_embeds.device) |
| | |
| | | for li in range(bsz): |
| | | target_num = (((seq_lens[li] - same_num[li].sum()).float()) * self.sampling_ratio).long() |
| | | if target_num > 0: |
| | | input_mask[li].scatter_(dim=0, index=torch.randperm(seq_lens[li])[:target_num], value=0) |
| | | input_mask[li].scatter_(dim=0, index=torch.randperm(seq_lens[li])[:target_num].to(input_mask.device), value=0) |
| | | input_mask = input_mask.eq(1) |
| | | input_mask = input_mask.masked_fill(~nonpad_positions, False) |
| | | input_mask_expand_dim = input_mask.unsqueeze(2).to(pre_acoustic_embeds.device) |
| | |
| | | use_fsdp = kwargs.get("use_fsdp", None) |
| | | if use_ddp or use_fsdp: |
| | | dist.init_process_group(backend=kwargs.get("backend", "nccl"), init_method='env://') |
| | | device= torch.cuda.set_device(local_rank) |
| | | torch.cuda.set_device(local_rank) |
| | | |
| | | |
| | | # build_tokenizer |
| | |
| | | # model_class = load_class_from_path(kwargs.get("model").split(":")) |
| | | model_class = dynamic_import(kwargs.get("model")) |
| | | model = model_class(**kwargs, **kwargs["model_conf"], vocab_size=len(tokenizer.token_list)) |
| | | # model = model.to(device=kwargs.get("device", "cpu")) |
| | | |
| | | |
| | | frontend = model.frontend |
| | | # init_param |
| | | init_param = kwargs.get("init_param", None) |
| | | if init_param is not None: |
| | | init_param = eval(init_param) |
| | | if isinstance(init_param, Sequence): |
| | | init_param = (init_param,) |
| | | logging.info("init_param is not None: ", init_param) |
| | | for p in init_param: |
| | | logging.info(f"Loading pretrained params from {p}") |
| | | load_pretrained_model( |
| | | model=model, |
| | | init_param=p, |
| | | ignore_init_mismatch=kwargs.get("ignore_init_mismatch", True), |
| | | oss_bucket=kwargs.get("oss_bucket", None), |
| | | ) |
| | | else: |
| | | initialize(model, kwargs.get("init", "kaiming_normal")) |
| | | |
| | | # import pdb; |
| | | # pdb.set_trace() |
| | |
| | | model = DDP(model, device_ids=[local_rank]) |
| | | elif use_fsdp: |
| | | model = FSDP(model).cuda(local_rank) |
| | | else: |
| | | model = model.to(device=kwargs.get("device", "cuda")) |
| | | |
| | | |
| | | # optim |
| | |
| | | scheduler_class = scheduler_choices.get(scheduler) |
| | | scheduler = scheduler_class(optim, **kwargs.get("scheduler_conf")) |
| | | |
| | | # init_param |
| | | init_param = kwargs.get("init_param", None) |
| | | if init_param is not None: |
| | | init_param = eval(init_param) |
| | | if isinstance(init_param, Sequence): |
| | | init_param = (init_param,) |
| | | logging.info("init_param is not None: ", freeze_param) |
| | | for p in init_param: |
| | | logging.info(f"Loading pretrained params from {p}") |
| | | load_pretrained_model( |
| | | model=model, |
| | | init_param=p, |
| | | ignore_init_mismatch=kwargs.get("ignore_init_mismatch", True), |
| | | oss_bucket=kwargs.get("oss_bucket", None), |
| | | ) |
| | | else: |
| | | initialize(model, kwargs.get("init", "kaiming_normal")) |
| | | |
| | | |
| | | # dataset |
| | | dataset_tr = AudioDataset(kwargs.get("train_data_set_list"), frontend=model.frontend, tokenizer=tokenizer, **kwargs.get("dataset_conf")) |
| | | dataset_tr = AudioDataset(kwargs.get("train_data_set_list"), frontend=frontend, tokenizer=tokenizer, **kwargs.get("dataset_conf")) |
| | | |
| | | # dataloader |
| | | batch_sampler = BatchSampler(dataset_tr, **kwargs.get("dataset_conf"), **kwargs.get("dataset_conf").get("batch_conf")) |
| | |
| | | for batch_idx, batch in enumerate(self.dataloader_train): |
| | | batch = to_device(batch, self.device) |
| | | |
| | | my_context = model.no_sync if batch_idx % accumulation_steps != 0 else nullcontext |
| | | my_context = self.model.no_sync if batch_idx % accumulation_steps != 0 else nullcontext |
| | | with my_context(): |
| | | retval = self.model(**batch) |
| | | loss, stats, weight = retval |
| | |
| | | self.optim.zero_grad() |
| | | |
| | | pbar.update(1) |
| | | pbar.set_description( |
| | | f"Training Epoch: {epoch + 1}/{self.max_epoch}, step {batch_idx}/{len(self.dataloader_train)} (loss: {loss.detach().float()})") |
| | | |
| | | if self.local_rank == 0: |
| | | pbar.set_description( |
| | | f"Training Epoch: {epoch + 1}/{self.max_epoch}, step {batch_idx}/{len(self.dataloader_train)} (loss: {loss.detach().float()})") |
| | | |
| | | pbar.close() |
| | | |
| | | # def _train_epoch(self, epoch): |