From 267dddcdbb8a9200ebda5a673905d0bb512de80e Mon Sep 17 00:00:00 2001
From: 志浩 <neo.dzh@alibaba-inc.com>
Date: 星期一, 27 二月 2023 11:48:09 +0800
Subject: [PATCH] fixbug for sd and sv
---
funasr/bin/sond_inference.py | 9 ++++++++-
funasr/bin/diar_inference_launch.py | 6 ++++--
2 files changed, 12 insertions(+), 3 deletions(-)
diff --git a/funasr/bin/diar_inference_launch.py b/funasr/bin/diar_inference_launch.py
index c3e210b..1e48227 100755
--- a/funasr/bin/diar_inference_launch.py
+++ b/funasr/bin/diar_inference_launch.py
@@ -135,8 +135,10 @@
"sv_train_config": "sv.yaml",
"sv_model_file": "sv.pth",
}
- if "param_dict" in kwargs:
- kwargs["param_dict"].update(param_dict)
+ if "param_dict" in kwargs and kwargs["param_dict"] is not None:
+ for key in param_dict:
+ if key not in kwargs["param_dict"]:
+ kwargs["param_dict"][key] = param_dict[key]
else:
kwargs["param_dict"] = param_dict
return inference_modelscope(**kwargs)
diff --git a/funasr/bin/sond_inference.py b/funasr/bin/sond_inference.py
index 299de0d..4767577 100755
--- a/funasr/bin/sond_inference.py
+++ b/funasr/bin/sond_inference.py
@@ -33,6 +33,8 @@
from funasr.utils.types import str_or_none
from scipy.ndimage import median_filter
from funasr.utils.misc import statistic_model_parameters
+from funasr.datasets.iterable_dataset import load_bytes
+
class Speech2Diarization:
"""Speech2Xvector class
@@ -257,6 +259,9 @@
assert "sv_model_file" in param_dict, "sv_model_file must be provided in param_dict."
sv_train_config = param_dict["sv_train_config"]
sv_model_file = param_dict["sv_model_file"]
+ if "model_dir" in param_dict:
+ sv_train_config = os.path.join(param_dict["model_dir"], sv_train_config)
+ sv_model_file = os.path.join(param_dict["model_dir"], sv_model_file)
from funasr.bin.sv_inference import Speech2Xvector
speech2xvector_kwargs = dict(
sv_train_config=sv_train_config,
@@ -320,7 +325,9 @@
def prepare_dataset():
for idx, example in enumerate(raw_inputs):
# read waveform file
- example = [soundfile.read(x)[0] if isinstance(example[0], str) else x
+ example = [load_bytes(x) if isinstance(x, bytes) else x
+ for x in example]
+ example = [soundfile.read(x)[0] if isinstance(x, str) else x
for x in example]
# convert torch tensor to numpy array
example = [x.numpy() if isinstance(example[0], torch.Tensor) else x
--
Gitblit v1.9.1