嘉渊
2023-04-24 6427c834dfd97b1f05c6659cdc7ccf010bf82fe1
funasr/train/distributed_utils.py
@@ -53,15 +53,13 @@
            # https://pytorch.org/docs/stable/distributed.html#torch.distributed.init_process_group
            os.environ.setdefault("NCCL_BLOCKING_WAIT", "1")
            torch.distributed.init_process_group(backend='nccl',
            torch.distributed.init_process_group(backend=self.dist_backend,
                                                 init_method=self.dist_init_method,
                                                 world_size=args.dist_world_size,
                                                 rank=args.dist_rank)
            self.dist_rank = torch.distributed.get_rank()
            self.dist_world_size = torch.distributed.get_world_size()
            self.local_rank = args.local_rank
            logging.info("world size: {}, rank: {}, local_rank: {}".format(self.dist_world_size, self.dist_rank,
                                                                           self.local_rank))
    def init_options_pai(self):
        if self.distributed:
@@ -115,12 +113,10 @@
            # https://pytorch.org/docs/stable/distributed.html#torch.distributed.init_process_group
            os.environ.setdefault("NCCL_BLOCKING_WAIT", "1")
            torch.distributed.init_process_group(backend='nccl', init_method='env://')
            torch.distributed.init_process_group(backend=self.dist_backend, init_method='env://')
            self.dist_rank = torch.distributed.get_rank()
            self.dist_world_size = torch.distributed.get_world_size()
            self.local_rank = args.local_rank
            logging.info("world size: {}, rank: {}, local_rank: {}".format(self.dist_world_size, self.dist_rank,
                                                                           self.local_rank))
def resolve_distributed_mode(args):