From c1e365fea09aafda387cac12fdff43d28c598979 Mon Sep 17 00:00:00 2001
From: BienBoy <92378515+BienBoy@users.noreply.github.com>
Date: 星期六, 01 二月 2025 23:29:34 +0800
Subject: [PATCH] fix: resolve unexpected 'out of memory' issue in multi-GPU setup (#2373)

---
 funasr/bin/train.py                          |    3 ++-
 funasr/bin/train_ds.py                       |    3 ++-
 funasr/auto/auto_model.py                    |    3 ++-
 funasr/models/language_model/rnn/decoders.py |    3 ++-
 4 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/funasr/auto/auto_model.py b/funasr/auto/auto_model.py
index 08308a2..ec4e420 100644
--- a/funasr/auto/auto_model.py
+++ b/funasr/auto/auto_model.py
@@ -366,7 +366,8 @@
         if pbar:
             # pbar.update(1)
             pbar.set_description(f"rtf_avg: {time_escape_total/time_speech_total:0.3f}")
-        torch.cuda.empty_cache()
+        with torch.cuda.device(next(model.parameters()).device):
+            torch.cuda.empty_cache()
         return asr_result_list
 
     def inference_with_vad(self, input, input_len=None, **cfg):
diff --git a/funasr/bin/train.py b/funasr/bin/train.py
index fcd763f..d0f154a 100644
--- a/funasr/bin/train.py
+++ b/funasr/bin/train.py
@@ -221,7 +221,8 @@
             )
             trainer.start_step = 0
 
-            torch.cuda.empty_cache()
+            with torch.cuda.device(kwargs["device"]):
+                torch.cuda.empty_cache()
 
             time_escaped = (time.perf_counter() - time_slice_i) / 3600.0
             logging.info(
diff --git a/funasr/bin/train_ds.py b/funasr/bin/train_ds.py
index dc7fb42..24e81f6 100644
--- a/funasr/bin/train_ds.py
+++ b/funasr/bin/train_ds.py
@@ -184,7 +184,8 @@
             )
             trainer.start_step = 0
 
-            torch.cuda.empty_cache()
+            with torch.cuda.device(kwargs["device"]):
+                torch.cuda.empty_cache()
 
             time_escaped = (time.perf_counter() - time_slice_i) / 3600.0
             logging.info(
diff --git a/funasr/models/language_model/rnn/decoders.py b/funasr/models/language_model/rnn/decoders.py
index e7d35e9..314d49f 100644
--- a/funasr/models/language_model/rnn/decoders.py
+++ b/funasr/models/language_model/rnn/decoders.py
@@ -873,7 +873,8 @@
                         ctc_state[idx], accum_best_ids
                     )
 
-        torch.cuda.empty_cache()
+        with torch.cuda.device(vscores.device):
+            torch.cuda.empty_cache()
 
         dummy_hyps = [{"yseq": [self.sos, self.eos], "score": np.array([-float("inf")])}]
         ended_hyps = [

--
Gitblit v1.9.1