From b5d3df75cf6462aa3bf42fd3c86fa2aa7f1c8a15 Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期五, 24 十一月 2023 00:54:44 +0800
Subject: [PATCH] setup jamo
---
funasr/bin/diar_infer.py | 75 ++-----------------------------------
1 files changed, 5 insertions(+), 70 deletions(-)
diff --git a/funasr/bin/diar_infer.py b/funasr/bin/diar_infer.py
index 7c41b60..bb40f5e 100755
--- a/funasr/bin/diar_infer.py
+++ b/funasr/bin/diar_infer.py
@@ -15,7 +15,6 @@
import torch
from scipy.ndimage import median_filter
from torch.nn import functional as F
-from typeguard import check_argument_types
from funasr.models.frontend.wav_frontend import WavFrontendMel23
from funasr.tasks.diar import DiarTask
@@ -28,11 +27,11 @@
"""Speech2Diarlization class
Examples:
- >>> import soundfile
+ >>> import librosa
>>> import numpy as np
>>> speech2diar = Speech2DiarizationEEND("diar_sond_config.yml", "diar_sond.pb")
>>> profile = np.load("profiles.npy")
- >>> audio, rate = soundfile.read("speech.wav")
+ >>> audio, rate = librosa.load("speech.wav")
>>> speech2diar(audio, profile)
{"spk1": [(int, int), ...], ...}
@@ -45,7 +44,6 @@
device: str = "cpu",
dtype: str = "float32",
):
- assert check_argument_types()
# 1. Build Diarization model
diar_model, diar_train_args = build_model_from_file(
@@ -88,7 +86,6 @@
diarization results
"""
- assert check_argument_types()
# Input as audio signal
if isinstance(speech, np.ndarray):
speech = torch.tensor(speech)
@@ -107,46 +104,16 @@
return results
- @staticmethod
- def from_pretrained(
- model_tag: Optional[str] = None,
- **kwargs: Optional[Any],
- ):
- """Build Speech2Diarization instance from the pretrained model.
-
- Args:
- model_tag (Optional[str]): Model tag of the pretrained models.
- Currently, the tags of espnet_model_zoo are supported.
-
- Returns:
- Speech2Diarization: Speech2Diarization instance.
-
- """
- if model_tag is not None:
- try:
- from espnet_model_zoo.downloader import ModelDownloader
-
- except ImportError:
- logging.error(
- "`espnet_model_zoo` is not installed. "
- "Please install via `pip install -U espnet_model_zoo`."
- )
- raise
- d = ModelDownloader()
- kwargs.update(**d.download_and_unpack(model_tag))
-
- return Speech2DiarizationEEND(**kwargs)
-
class Speech2DiarizationSOND:
"""Speech2Xvector class
Examples:
- >>> import soundfile
+ >>> import librosa
>>> import numpy as np
>>> speech2diar = Speech2DiarizationSOND("diar_sond_config.yml", "diar_sond.pb")
>>> profile = np.load("profiles.npy")
- >>> audio, rate = soundfile.read("speech.wav")
+ >>> audio, rate = librosa.load("speech.wav")
>>> speech2diar(audio, profile)
{"spk1": [(int, int), ...], ...}
@@ -163,7 +130,6 @@
smooth_size: int = 83,
dur_threshold: float = 10,
):
- assert check_argument_types()
# TODO: 1. Build Diarization model
diar_model, diar_train_args = build_model_from_file(
@@ -213,7 +179,7 @@
@staticmethod
def seq2arr(seq, vec_dim=8):
- def int2vec(x, vec_dim=8, dtype=np.int):
+ def int2vec(x, vec_dim=8, dtype=np.int32):
b = ('{:0' + str(vec_dim) + 'b}').format(x)
# little-endian order: lower bit first
return (np.array(list(b)[::-1]) == '1').astype(dtype)
@@ -283,7 +249,6 @@
diarization results for each speaker
"""
- assert check_argument_types()
# Input as audio signal
if isinstance(speech, np.ndarray):
speech = torch.tensor(speech)
@@ -305,33 +270,3 @@
results, pse_labels = self.post_processing(logits, profile.shape[1], output_format)
return results, pse_labels
-
- @staticmethod
- def from_pretrained(
- model_tag: Optional[str] = None,
- **kwargs: Optional[Any],
- ):
- """Build Speech2Xvector instance from the pretrained model.
-
- Args:
- model_tag (Optional[str]): Model tag of the pretrained models.
- Currently, the tags of espnet_model_zoo are supported.
-
- Returns:
- Speech2Xvector: Speech2Xvector instance.
-
- """
- if model_tag is not None:
- try:
- from espnet_model_zoo.downloader import ModelDownloader
-
- except ImportError:
- logging.error(
- "`espnet_model_zoo` is not installed. "
- "Please install via `pip install -U espnet_model_zoo`."
- )
- raise
- d = ModelDownloader()
- kwargs.update(**d.download_and_unpack(model_tag))
-
- return Speech2DiarizationSOND(**kwargs)
--
Gitblit v1.9.1