| New file |
| | |
| | | { |
| | | "train_micro_batch_size_per_gpu": 1, |
| | | "gradient_accumulation_steps": 1, |
| | | "steps_per_print": 100, |
| | | "gradient_clipping": 5, |
| | | "fp16": { |
| | | "enabled": false, |
| | | "auto_cast": false, |
| | | "loss_scale": 0, |
| | | "initial_scale_power": 16, |
| | | "loss_scale_window": 1000, |
| | | "hysteresis": 2, |
| | | "consecutive_hysteresis": false, |
| | | "min_loss_scale": 1 |
| | | }, |
| | | "bf16": { |
| | | "enabled": true |
| | | }, |
| | | "zero_force_ds_cpu_optimizer": false, |
| | | "zero_optimization": { |
| | | "stage": 0, |
| | | "allgather_partitions": true, |
| | | "allgather_bucket_size": 5e8, |
| | | "overlap_comm": true, |
| | | "reduce_scatter": true, |
| | | "reduce_bucket_size": 5e8, |
| | | "contiguous_gradients": true, |
| | | "round_robin_gradients": true |
| | | } |
| | | } |