From 22d71ff774d409f8b413f9955a7c0efef5b3e288 Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期二, 23 四月 2024 09:56:06 +0800
Subject: [PATCH] wechat

---
 funasr/bin/train.py |    6 ++++--
 1 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/funasr/bin/train.py b/funasr/bin/train.py
index d19b79a..4ab2d8a 100644
--- a/funasr/bin/train.py
+++ b/funasr/bin/train.py
@@ -55,6 +55,8 @@
     torch.backends.cudnn.enabled = kwargs.get("cudnn_enabled", torch.backends.cudnn.enabled)
     torch.backends.cudnn.benchmark = kwargs.get("cudnn_benchmark", torch.backends.cudnn.benchmark)
     torch.backends.cudnn.deterministic = kwargs.get("cudnn_deterministic", True)
+    # open tf32
+    torch.backends.cuda.matmul.allow_tf32 = kwargs.get("enable_tf32", True)
     
     local_rank = int(os.environ.get('LOCAL_RANK', 0))
     if local_rank == 0:
@@ -102,7 +104,7 @@
     if use_ddp:
         model = model.cuda(local_rank)
         model = DDP(model, device_ids=[local_rank],
-                    find_unused_parameters=kwargs.get("train_conf", {}).get("find_unused_parameters", False))
+                    find_unused_parameters=kwargs.get("train_conf", {}).get("find_unused_parameters", True))
     elif use_fsdp:
         # model = FSDP(model).cuda(local_rank)
 
@@ -214,7 +216,7 @@
 
 
     if trainer.rank == 0:
-        average_checkpoints(trainer.output_dir, trainer.avg_nbest_model, trainer.val_acc_list)
+        average_checkpoints(trainer.output_dir, trainer.avg_nbest_model)
 
     trainer.close()
 

--
Gitblit v1.9.1