| | |
| | | from torch.distributed.fsdp import FullyShardedDataParallel as FSDP |
| | | from torch.distributed.algorithms.join import Join |
| | | from torch.distributed.fsdp.sharded_grad_scaler import ShardedGradScaler |
| | | from tensorboardX import SummaryWriter |
| | | from funasr.train_utils.average_nbest_models import average_checkpoints |
| | | |
| | | from funasr.register import tables |
| | |
| | | tensorboard_dir = os.path.join(kwargs.get("output_dir"), "tensorboard") |
| | | os.makedirs(tensorboard_dir, exist_ok=True) |
| | | try: |
| | | from tensorboardX import SummaryWriter |
| | | |
| | | writer = SummaryWriter(tensorboard_dir) # if trainer.rank == 0 else None |
| | | except: |
| | | writer = None |
| | |
| | | ) |
| | | trainer.start_step = 0 |
| | | |
| | | torch.cuda.empty_cache() |
| | | device = next(model.parameters()).device |
| | | if device.type == "cuda": |
| | | with torch.cuda.device(device): |
| | | torch.cuda.empty_cache() |
| | | |
| | | time_escaped = (time.perf_counter() - time_slice_i) / 3600.0 |
| | | logging.info( |