| | |
| | | filename = os.path.join(self.output_dir, f'model.pt.ep{epoch}') |
| | | torch.save(state, filename) |
| | | |
| | | print(f'Checkpoint saved to {filename}') |
| | | print(f'\nCheckpoint saved to {filename}\n') |
| | | latest = Path(os.path.join(self.output_dir, f'model.pt')) |
| | | torch.save(state, latest) |
| | | |
| | |
| | | |
| | | time2 = time.perf_counter() |
| | | time_escaped = (time2 - time1)/3600.0 |
| | | print(f"time_escaped_epoch: {time_escaped:.3f} hours, estimated to finish: {(self.max_epoch-epoch)*time_escaped:.3f}") |
| | | print(f"\ntime_escaped_epoch: {time_escaped:.3f} hours, estimated to finish {self.max_epoch} epoch: {(self.max_epoch-epoch)*time_escaped:.3f}\n") |
| | | |
| | | if self.rank == 0: |
| | | average_checkpoints(self.output_dir, self.avg_nbest_model) |