From dec1c875b2fcf0161755b93717d3eac856c6d15d Mon Sep 17 00:00:00 2001
From: zhifu gao <zhifu.gzf@alibaba-inc.com>
Date: 星期三, 31 一月 2024 22:40:19 +0800
Subject: [PATCH] Funasr1.0 bugfix, audio sample input for the vad model (#1333)

---
 funasr/models/campplus/cluster_backend.py |    3 ++-
 1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/funasr/models/campplus/cluster_backend.py b/funasr/models/campplus/cluster_backend.py
index 3bac0a0..72d06da 100644
--- a/funasr/models/campplus/cluster_backend.py
+++ b/funasr/models/campplus/cluster_backend.py
@@ -4,7 +4,6 @@
 #  MIT License  (https://opensource.org/licenses/MIT)
 # Modified from 3D-Speaker (https://github.com/alibaba-damo-academy/3D-Speaker)
 
-import umap
 import scipy
 import torch
 import sklearn
@@ -119,6 +118,7 @@
         self.metric = metric
 
     def __call__(self, X):
+        import umap.umap_ as umap
         umap_X = umap.UMAP(
             n_neighbors=self.n_neighbors,
             min_dist=0.0,
@@ -156,6 +156,7 @@
         if X.shape[0] < 20:
             return np.zeros(X.shape[0], dtype='int')
         if X.shape[0] < 2048 or k is not None:
+            # unexpected corner case
             labels = self.spectral_cluster(X, k)
         else:
             labels = self.umap_hdbscan_cluster(X)

--
Gitblit v1.9.1