From 94de39dde2e616a01683c518023d0fab72b4e103 Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期一, 19 二月 2024 22:21:50 +0800
Subject: [PATCH] aishell example

---
 funasr/models/campplus/model.py |   43 ++++++++++++++++++++++++++-----------------
 1 files changed, 26 insertions(+), 17 deletions(-)

diff --git a/funasr/models/campplus/model.py b/funasr/models/campplus/model.py
index 25ef3d7..b2d65f1 100644
--- a/funasr/models/campplus/model.py
+++ b/funasr/models/campplus/model.py
@@ -1,25 +1,34 @@
-# Copyright 3D-Speaker (https://github.com/alibaba-damo-academy/3D-Speaker). All Rights Reserved.
-# Licensed under the Apache License, Version 2.0 (http://www.apache.org/licenses/LICENSE-2.0)
+#!/usr/bin/env python3
+# -*- encoding: utf-8 -*-
+# Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
+#  MIT License  (https://opensource.org/licenses/MIT)
+# Modified from 3D-Speaker (https://github.com/alibaba-damo-academy/3D-Speaker)
 
-import os
 import time
 import torch
-import logging
 import numpy as np
-import torch.nn as nn
 from collections import OrderedDict
-from typing import Union, Dict, List, Tuple, Optional
+from contextlib import contextmanager
+from distutils.version import LooseVersion
 
-from funasr.utils.load_utils import load_audio_text_image_video
-from funasr.utils.datadir_writer import DatadirWriter
 from funasr.register import tables
-from funasr.models.campplus.components import DenseLayer, StatsPool, TDNNLayer, CAMDenseTDNNBlock, TransitLayer, \
-    BasicResBlock, get_nonlinear, FCM
 from funasr.models.campplus.utils import extract_feature
+from funasr.utils.load_utils import load_audio_text_image_video
+from funasr.models.campplus.components import DenseLayer, StatsPool, \
+    TDNNLayer, CAMDenseTDNNBlock, TransitLayer, get_nonlinear, FCM
+
+
+if LooseVersion(torch.__version__) >= LooseVersion("1.6.0"):
+    from torch.cuda.amp import autocast
+else:
+    # Nothing to do if torch<1.6.0
+    @contextmanager
+    def autocast(enabled=True):
+        yield
 
 
 @tables.register("model_classes", "CAMPPlus")
-class CAMPPlus(nn.Module):
+class CAMPPlus(torch.nn.Module):
     def __init__(self,
                  feat_dim=80,
                  embedding_size=192,
@@ -36,7 +45,7 @@
         channels = self.head.out_channels
         self.output_level = output_level
 
-        self.xvector = nn.Sequential(
+        self.xvector = torch.nn.Sequential(
             OrderedDict([
 
                 ('tdnn',
@@ -82,10 +91,10 @@
             assert self.output_level == 'frame', '`output_level` should be set to \'segment\' or \'frame\'. '
 
         for m in self.modules():
-            if isinstance(m, (nn.Conv1d, nn.Linear)):
-                nn.init.kaiming_normal_(m.weight.data)
+            if isinstance(m, (torch.nn.Conv1d, torch.nn.Linear)):
+                torch.nn.init.kaiming_normal_(m.weight.data)
                 if m.bias is not None:
-                    nn.init.zeros_(m.bias)
+                    torch.nn.init.zeros_(m.bias)
 
     def forward(self, x):
         x = x.permute(0, 2, 1)  # (B,T,F) => (B,F,T)
@@ -95,7 +104,7 @@
             x = x.transpose(1, 2)
         return x
 
-    def generate(self,
+    def inference(self,
                  data_in,
                  data_lengths=None,
                  key: list=None,
@@ -114,5 +123,5 @@
         time3 = time.perf_counter()
         meta_data["extract_feat"] = f"{time3 - time2:0.3f}"
         meta_data["batch_data_time"] = np.array(speech_times).sum().item() / 16000.0
-        results = [{"spk_embedding": self.forward(speech)}]
+        results = [{"spk_embedding": self.forward(speech.to(torch.float32))}]
         return results, meta_data
\ No newline at end of file

--
Gitblit v1.9.1