From 8d5f061538e15ad0acac8fcfddc7bf4a545639d6 Mon Sep 17 00:00:00 2001
From: 嘉渊 <wangjiaming.wjm@alibaba-inc.com>
Date: 星期四, 25 五月 2023 19:17:58 +0800
Subject: [PATCH] update repo

---
 egs/wenetspeech/conformer/local/process_opus.py |   10 +++++-----
 1 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/egs/wenetspeech/conformer/local/process_opus.py b/egs/wenetspeech/conformer/local/process_opus.py
index d22223d..af73827 100755
--- a/egs/wenetspeech/conformer/local/process_opus.py
+++ b/egs/wenetspeech/conformer/local/process_opus.py
@@ -19,7 +19,7 @@
 import os
 import sys
 
-import torchaudio.backend.sox_io_backend as sox
+import torchaudio
 
 
 def read_file(wav_scp, segments):
@@ -60,12 +60,12 @@
 
             os.makedirs(output_dir, exist_ok=True)
             if current_wav_path != previous_wav_path:
-                source_wav, sampling_rate = sox.load(current_wav_path, normalize=False)
+                waveform, sample_rate = torchaudio.load(current_wav_path)
             previous_wav_path = current_wav_path
 
-            start = int(start_time_list[i] * 1000)
-            end = int(end_time_list[i] * 1000)
-            target_audio = source_wav[start:end].set_frame_rate(16000)
+            start = int(start_time_list[i] * sample_rate)
+            end = int(end_time_list[i] * sample_rate)
+            target_audio = waveform[:, start:end].transpose(0, 1).contiguous()
             target_audio.export(seg_wav_path, format="wav")
 
             fout.write("{} {}\n".format(utt_id, seg_wav_path))

--
Gitblit v1.9.1