From 81acb17544a05424dff0ef74f3aeb2ce9866ba6a Mon Sep 17 00:00:00 2001
From: zhifu gao <zhifu.gzf@alibaba-inc.com>
Date: 星期三, 06 十二月 2023 19:54:37 +0800
Subject: [PATCH] update with main (#1152)

---
 funasr/bin/asr_inference_launch.py |   34 +++++++++++++++++++++++++---------
 1 files changed, 25 insertions(+), 9 deletions(-)

diff --git a/funasr/bin/asr_inference_launch.py b/funasr/bin/asr_inference_launch.py
index f61c085..f34bfb2 100644
--- a/funasr/bin/asr_inference_launch.py
+++ b/funasr/bin/asr_inference_launch.py
@@ -48,13 +48,13 @@
 from funasr.utils.types import str2triple_str
 from funasr.utils.types import str_or_none
 from funasr.utils.vad_utils import slice_padding_fbank
-from funasr.utils.speaker_utils import (check_audio_list, 
-                                        sv_preprocess, 
-                                        sv_chunk, 
-                                        CAMPPlus, 
-                                        extract_feature, 
+from funasr.utils.speaker_utils import (check_audio_list,
+                                        sv_preprocess,
+                                        sv_chunk,
+                                        extract_feature,
                                         postprocess,
                                         distribute_spk)
+import funasr.modules.cnn as sv_module
 from funasr.build_utils.build_model_from_file import build_model_from_file
 from funasr.utils.cluster_backend import ClusterBackend
 from funasr.utils.modelscope_utils import get_cache_dir
@@ -818,7 +818,15 @@
         format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s",
     )
 
-    sv_model_file = asr_model_file.replace("model.pb", "campplus_cn_common.bin")
+    sv_model_config_path = asr_model_file.replace("model.pb", "sv_model_config.yaml")
+    if not os.path.exists(sv_model_config_path):
+        sv_model_config = {'sv_model_class': 'CAMPPlus','sv_model_file': 'campplus_cn_common.bin', 'models_config': {}}
+    else:
+        with open(sv_model_config_path, 'r') as f:
+            sv_model_config = yaml.load(f, Loader=yaml.FullLoader)
+    if sv_model_config['models_config'] is None:
+        sv_model_config['models_config'] = {}
+    sv_model_file = asr_model_file.replace("model.pb", sv_model_config['sv_model_file'])
 
     if param_dict is not None:
         hotword_list_or_file = param_dict.get('hotword')
@@ -944,9 +952,15 @@
             #####  speaker_verification  #####
             ##################################
             # load sv model
-            sv_model_dict = torch.load(sv_model_file, map_location=torch.device('cpu'))
-            sv_model = CAMPPlus()
+            if ngpu > 0:
+                sv_model_dict = torch.load(sv_model_file)
+                sv_model = getattr(sv_module, sv_model_config['sv_model_class'])(**sv_model_config['models_config'])
+                sv_model.cuda()
+            else:
+                sv_model_dict = torch.load(sv_model_file, map_location=torch.device('cpu'))
+                sv_model = getattr(sv_module, sv_model_config['sv_model_class'])(**sv_model_config['models_config'])
             sv_model.load_state_dict(sv_model_dict)
+            print(f'load sv model params: {sv_model_file}')
             sv_model.eval()
             cb_model = ClusterBackend()
             vad_segments = []
@@ -969,9 +983,11 @@
                     embs = []
                     for x in wavs:
                         x = extract_feature([x])
+                        if ngpu > 0:
+                            x = x.cuda()
                         embs.append(sv_model(x))
                     embs = torch.cat(embs)
-                    embeddings.append(embs.detach().numpy())
+                    embeddings.append(embs.cpu().detach().numpy())
                 embeddings = np.concatenate(embeddings)
                 labels = cb_model(embeddings)
                 sv_output = postprocess(segments, vad_segments, labels, embeddings)

--
Gitblit v1.9.1