From fce4e1d1b48f23cd8332e60afce3df8d6209a6a7 Mon Sep 17 00:00:00 2001 From: gaochangfeng <54253717+gaochangfeng@users.noreply.github.com> Date: 星期四, 11 四月 2024 14:59:22 +0800 Subject: [PATCH] SenseVoice对富文本解码的参数 (#1608) --- funasr/datasets/audio_datasets/preprocessor.py | 4 +++- 1 files changed, 3 insertions(+), 1 deletions(-) diff --git a/funasr/datasets/audio_datasets/preprocessor.py b/funasr/datasets/audio_datasets/preprocessor.py index c2e27bf..ab75140 100644 --- a/funasr/datasets/audio_datasets/preprocessor.py +++ b/funasr/datasets/audio_datasets/preprocessor.py @@ -26,8 +26,10 @@ return waveform speed = random.choice(self.speed_perturb) if speed != 1.0: + if not isinstance(waveform, torch.Tensor): + waveform = torch.tensor(waveform) waveform, _ = torchaudio.sox_effects.apply_effects_tensor( - torch.tensor(waveform).view(1, -1), fs, [['speed', str(speed)], ['rate', str(fs)]]) + waveform.view(1, -1), fs, [['speed', str(speed)], ['rate', str(fs)]]) waveform = waveform.view(-1) return waveform -- Gitblit v1.9.1