fix: resolve unexpected 'out of memory' issue in multi-GPU setup (#2373)
Fixed a bug where calling torch.cuda.empty_cache() caused extra memory usage on 'cuda:0', leading to unexpected 'out of memory' errors in multi-GPU environments.
Reference:
- https://github.com/pytorch/pytorch/issues/25752
- https://github.com/pytorch/pytorch/issues/144025
| | |
| | | if pbar: |
| | | # pbar.update(1) |
| | | pbar.set_description(f"rtf_avg: {time_escape_total/time_speech_total:0.3f}") |
| | | torch.cuda.empty_cache() |
| | | with torch.cuda.device(next(model.parameters()).device): |
| | | torch.cuda.empty_cache() |
| | | return asr_result_list |
| | | |
| | | def inference_with_vad(self, input, input_len=None, **cfg): |
| | |
| | | ) |
| | | trainer.start_step = 0 |
| | | |
| | | torch.cuda.empty_cache() |
| | | with torch.cuda.device(kwargs["device"]): |
| | | torch.cuda.empty_cache() |
| | | |
| | | time_escaped = (time.perf_counter() - time_slice_i) / 3600.0 |
| | | logging.info( |
| | |
| | | ) |
| | | trainer.start_step = 0 |
| | | |
| | | torch.cuda.empty_cache() |
| | | with torch.cuda.device(kwargs["device"]): |
| | | torch.cuda.empty_cache() |
| | | |
| | | time_escaped = (time.perf_counter() - time_slice_i) / 3600.0 |
| | | logging.info( |
| | |
| | | ctc_state[idx], accum_best_ids |
| | | ) |
| | | |
| | | torch.cuda.empty_cache() |
| | | with torch.cuda.device(vscores.device): |
| | | torch.cuda.empty_cache() |
| | | |
| | | dummy_hyps = [{"yseq": [self.sos, self.eos], "score": np.array([-float("inf")])}] |
| | | ended_hyps = [ |