From 9b4e9cc8a0311e5243d69b73ed073e7ea441982e Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期三, 27 三月 2024 16:05:29 +0800
Subject: [PATCH] train update
---
funasr/datasets/large_datasets/dataset.py | 10 ++++++----
1 files changed, 6 insertions(+), 4 deletions(-)
diff --git a/funasr/datasets/large_datasets/dataset.py b/funasr/datasets/large_datasets/dataset.py
index 6c166a5..d3489c1 100644
--- a/funasr/datasets/large_datasets/dataset.py
+++ b/funasr/datasets/large_datasets/dataset.py
@@ -7,7 +7,8 @@
import torch.distributed as dist
import torchaudio
import numpy as np
-import soundfile
+# import librosa
+import librosa
from kaldiio import ReadHelper
from torch.utils.data import IterableDataset
@@ -108,7 +109,7 @@
ark_reader = ReadHelper('ark:{}'.format(data_file))
reader_list.append(ark_reader)
elif data_type == "text" or data_type == "sound" or data_type == 'text_hotword':
- text_reader = open(data_file, "r")
+ text_reader = open(data_file, "r", encoding="utf-8")
reader_list.append(text_reader)
elif data_type == "none":
continue
@@ -128,7 +129,8 @@
try:
waveform, sampling_rate = torchaudio.load(path)
except:
- waveform, sampling_rate = soundfile.read(path, dtype='float32')
+ # waveform, sampling_rate = librosa.load(path, dtype='float32')
+ waveform, sampling_rate = librosa.load(path, dtype='float32')
if waveform.ndim == 2:
waveform = waveform[:, 0]
waveform = np.expand_dims(waveform, axis=0)
@@ -205,7 +207,7 @@
# pre_prob = conf.get("pre_prob", 0) # unused yet
if pre_hwfile is not None:
pre_hwlist = []
- with open(pre_hwfile, 'r') as fin:
+ with open(pre_hwfile, 'r', encoding="utf-8") as fin:
for line in fin.readlines():
pre_hwlist.append(line.strip())
else:
--
Gitblit v1.9.1