| | |
| | | @hydra.main() |
| | | def main(kwargs: DictConfig): |
| | | # preprocess_config(kwargs) |
| | | import pdb; pdb.set_trace() |
| | | # import pdb; pdb.set_trace() |
| | | # set random seed |
| | | set_all_random_seed(kwargs.get("seed", 0)) |
| | | torch.backends.cudnn.enabled = kwargs.get("cudnn_enabled", torch.backends.cudnn.enabled) |
| | |
| | | |
| | | local_rank = int(os.environ.get('LOCAL_RANK', 0)) |
| | | # Check if we are using DDP or FSDP |
| | | use_ddp = 'WORLD_SIZE' in os.environ |
| | | use_ddp = 'WORLD_SIZE' in os.environ and int(os.environ["WORLD_SIZE"]) > 1 |
| | | use_fsdp = kwargs.get("use_fsdp", None) |
| | | if use_ddp or use_fsdp: |
| | | dist.init_process_group(backend=kwargs.get("backend", "nccl"), init_method='env://') |
| | | device= torch.cuda.set_device(local_rank) |
| | | torch.cuda.set_device(local_rank) |
| | | |
| | | |
| | | # build_tokenizer |
| | |
| | | # model_class = load_class_from_path(kwargs.get("model").split(":")) |
| | | model_class = dynamic_import(kwargs.get("model")) |
| | | model = model_class(**kwargs, **kwargs["model_conf"], vocab_size=len(tokenizer.token_list)) |
| | | # model = model.to(device=kwargs.get("device", "cpu")) |
| | | |
| | | |
| | | frontend = model.frontend |
| | | # init_param |
| | | init_param = kwargs.get("init_param", None) |
| | | if init_param is not None: |
| | | init_param = eval(init_param) |
| | | if isinstance(init_param, Sequence): |
| | | init_param = (init_param,) |
| | | logging.info("init_param is not None: ", init_param) |
| | | for p in init_param: |
| | | logging.info(f"Loading pretrained params from {p}") |
| | | load_pretrained_model( |
| | | model=model, |
| | | init_param=p, |
| | | ignore_init_mismatch=kwargs.get("ignore_init_mismatch", True), |
| | | oss_bucket=kwargs.get("oss_bucket", None), |
| | | ) |
| | | else: |
| | | initialize(model, kwargs.get("init", "kaiming_normal")) |
| | | |
| | | # import pdb; |
| | | # pdb.set_trace() |
| | |
| | | |
| | | if use_ddp: |
| | | model = model.cuda(local_rank) |
| | | model = DDP(model, device_ids=[local_rank]) |
| | | model = DDP(model, device_ids=[local_rank], |
| | | find_unused_parameters=kwargs.get("train_conf", {}).get("find_unused_parameters", False)) |
| | | elif use_fsdp: |
| | | model = FSDP(model).cuda(local_rank) |
| | | else: |
| | | model = model.to(device=kwargs.get("device", "cuda")) |
| | | |
| | | |
| | | # optim |
| | |
| | | scheduler_class = scheduler_choices.get(scheduler) |
| | | scheduler = scheduler_class(optim, **kwargs.get("scheduler_conf")) |
| | | |
| | | # init_param |
| | | init_param = kwargs.get("init_param", None) |
| | | if init_param is not None: |
| | | init_param = eval(init_param) |
| | | if isinstance(init_param, Sequence): |
| | | init_param = (init_param,) |
| | | logging.info("init_param is not None: ", freeze_param) |
| | | for p in init_param: |
| | | logging.info(f"Loading pretrained params from {p}") |
| | | load_pretrained_model( |
| | | model=model, |
| | | init_param=p, |
| | | ignore_init_mismatch=kwargs.get("ignore_init_mismatch", True), |
| | | oss_bucket=kwargs.get("oss_bucket", None), |
| | | ) |
| | | else: |
| | | initialize(model, kwargs.get("init", "kaiming_normal")) |
| | | |
| | | |
| | | # dataset |
| | | dataset_tr = AudioDataset(kwargs.get("train_data_set_list"), frontend=model.frontend, tokenizer=tokenizer, **kwargs.get("dataset_conf")) |
| | | dataset_tr = AudioDataset(kwargs.get("train_data_set_list"), frontend=frontend, tokenizer=tokenizer, **kwargs.get("dataset_conf")) |
| | | |
| | | # dataloader |
| | | batch_sampler = BatchSampler(dataset_tr, **kwargs.get("dataset_conf"), **kwargs.get("dataset_conf").get("batch_conf")) |
| | |
| | | torch.distributed.destroy_process_group() |
| | | |
| | | |
| | | |
| | | def train(epoch, model, op): |
| | | pass |
| | | |
| | | def val(): |
| | | pass |
| | | |
| | | |
| | | if __name__ == "__main__": |
| | | main() |