From 1af8a233ce99b6c6a8a119eaa7363ebae1f2570f Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期三, 21 六月 2023 11:15:06 +0800
Subject: [PATCH] Merge branch 'main' of github.com:alibaba-damo-academy/FunASR add
---
egs/wenetspeech/conformer/local/process_opus.py | 12 ++++++------
1 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/egs/wenetspeech/conformer/local/process_opus.py b/egs/wenetspeech/conformer/local/process_opus.py
index d22223d..581953f 100755
--- a/egs/wenetspeech/conformer/local/process_opus.py
+++ b/egs/wenetspeech/conformer/local/process_opus.py
@@ -19,7 +19,7 @@
import os
import sys
-import torchaudio.backend.sox_io_backend as sox
+import torchaudio
def read_file(wav_scp, segments):
@@ -60,13 +60,13 @@
os.makedirs(output_dir, exist_ok=True)
if current_wav_path != previous_wav_path:
- source_wav, sampling_rate = sox.load(current_wav_path, normalize=False)
+ waveform, sample_rate = torchaudio.load(current_wav_path)
previous_wav_path = current_wav_path
- start = int(start_time_list[i] * 1000)
- end = int(end_time_list[i] * 1000)
- target_audio = source_wav[start:end].set_frame_rate(16000)
- target_audio.export(seg_wav_path, format="wav")
+ start = int(start_time_list[i] * sample_rate)
+ end = int(end_time_list[i] * sample_rate)
+ target_audio = waveform[:, start:end]
+ torchaudio.save(seg_wav_path, target_audio, sample_rate)
fout.write("{} {}\n".format(utt_id, seg_wav_path))
if i % step == 0:
--
Gitblit v1.9.1