From 28ccfbfc51068a663a80764e14074df5edf2b5ba Mon Sep 17 00:00:00 2001
From: kongdeqiang <kongdeqiang960204@163.com>
Date: 星期五, 13 三月 2026 17:41:41 +0800
Subject: [PATCH] 提交
---
funasr/models/sa_asr/attention.py | 13 ++++++-------
1 files changed, 6 insertions(+), 7 deletions(-)
diff --git a/funasr/models/sa_asr/attention.py b/funasr/models/sa_asr/attention.py
index 2cce9ec..74e687a 100644
--- a/funasr/models/sa_asr/attention.py
+++ b/funasr/models/sa_asr/attention.py
@@ -16,9 +16,8 @@
import funasr.models.lora.layers as lora
-
class CosineDistanceAttention(nn.Module):
- """ Compute Cosine Distance between spk decoder output and speaker profile
+ """Compute Cosine Distance between spk decoder output and speaker profile
Args:
profile_path: speaker profile file path (.npy file)
"""
@@ -35,16 +34,16 @@
"""
x = spk_decoder_out.unsqueeze(2) # (B, L, 1, D)
if profile_lens is not None:
-
+
mask = (make_pad_mask(profile_lens)[:, None, :]).to(profile.device)
- min_value = float(
- numpy.finfo(torch.tensor(0, dtype=x.dtype).numpy().dtype).min
+ min_value = float(numpy.finfo(torch.tensor(0, dtype=x.dtype).numpy().dtype).min)
+ weights_not_softmax = F.cosine_similarity(x, profile.unsqueeze(1), dim=-1).masked_fill(
+ mask, min_value
)
- weights_not_softmax=F.cosine_similarity(x, profile.unsqueeze(1), dim=-1).masked_fill(mask, min_value)
weights = self.softmax(weights_not_softmax).masked_fill(mask, 0.0) # (B, L, N)
else:
x = x[:, -1:, :, :]
- weights_not_softmax=F.cosine_similarity(x, profile.unsqueeze(1).to(x.device), dim=-1)
+ weights_not_softmax = F.cosine_similarity(x, profile.unsqueeze(1).to(x.device), dim=-1)
weights = self.softmax(weights_not_softmax) # (B, 1, N)
spk_embedding = torch.matmul(weights, profile.to(weights.device)) # (B, L, D)
--
Gitblit v1.9.1