From 8c1016ca77d9b35b829c158092de912f6ec10535 Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期一, 25 三月 2024 11:48:17 +0800
Subject: [PATCH] install requirements automatically

---
 funasr/models/qwen_audio/model.py         |    8 +++++---
 funasr/auto/auto_model.py                 |    9 ++++-----
 funasr/models/campplus/cluster_backend.py |    5 +++--
 3 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/funasr/auto/auto_model.py b/funasr/auto/auto_model.py
index bd786d0..c31308e 100644
--- a/funasr/auto/auto_model.py
+++ b/funasr/auto/auto_model.py
@@ -25,11 +25,10 @@
 from funasr.train_utils.set_all_random_seed import set_all_random_seed
 from funasr.train_utils.load_pretrained_model import load_pretrained_model
 from funasr.utils import export_utils
-try:
-    from funasr.models.campplus.utils import sv_chunk, postprocess, distribute_spk
-    from funasr.models.campplus.cluster_backend import ClusterBackend
-except:
-    print("Notice: If you want to use the speaker diarization, please `pip install hdbscan`")
+
+from funasr.models.campplus.utils import sv_chunk, postprocess, distribute_spk
+from funasr.models.campplus.cluster_backend import ClusterBackend
+
 
 
 def prepare_data_iterator(data_in, input_len=None, data_type=None, key=None):
diff --git a/funasr/models/campplus/cluster_backend.py b/funasr/models/campplus/cluster_backend.py
index 72d06da..93d3bce 100644
--- a/funasr/models/campplus/cluster_backend.py
+++ b/funasr/models/campplus/cluster_backend.py
@@ -7,7 +7,6 @@
 import scipy
 import torch
 import sklearn
-import hdbscan
 import numpy as np
 
 from sklearn.cluster._kmeans import k_means
@@ -116,6 +115,8 @@
         self.min_samples = min_samples
         self.min_cluster_size = min_cluster_size
         self.metric = metric
+        import hdbscan
+        self.hdbscan = hdbscan
 
     def __call__(self, X):
         import umap.umap_ as umap
@@ -125,7 +126,7 @@
             n_components=min(self.n_components, X.shape[0] - 2),
             metric=self.metric,
         ).fit_transform(X)
-        labels = hdbscan.HDBSCAN(
+        labels = self.hdbscan.HDBSCAN(
             min_samples=self.min_samples,
             min_cluster_size=self.min_cluster_size,
             allow_single_cluster=True).fit_predict(umap_X)
diff --git a/funasr/models/qwen_audio/model.py b/funasr/models/qwen_audio/model.py
index 045cfe1..f981b67 100644
--- a/funasr/models/qwen_audio/model.py
+++ b/funasr/models/qwen_audio/model.py
@@ -9,8 +9,7 @@
 from torch import nn
 import whisper
 from funasr.utils.load_utils import load_audio_text_image_video, extract_fbank
-from transformers import AutoModelForCausalLM, AutoTokenizer
-from transformers.generation import GenerationConfig
+
 
 from funasr.register import tables
 
@@ -27,6 +26,8 @@
     """
     def __init__(self, *args, **kwargs):
         super().__init__()
+        from transformers import AutoModelForCausalLM, AutoTokenizer
+        from transformers.generation import GenerationConfig
 
         model_or_path = kwargs.get("model_path", "QwenAudio")
         model = AutoModelForCausalLM.from_pretrained(model_or_path, device_map="cpu",
@@ -82,7 +83,8 @@
         Modified from https://github.com/QwenLM/Qwen-Audio
         """
         super().__init__()
-        
+        from transformers import AutoModelForCausalLM, AutoTokenizer
+        from transformers.generation import GenerationConfig
         model_or_path = kwargs.get("model_path", "QwenAudio")
         bf16 = kwargs.get("bf16", False)
         fp16 = kwargs.get("fp16", False)

--
Gitblit v1.9.1