From 3cd3473bf7a3b41484baa86d9092248d78e7af39 Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期五, 21 四月 2023 17:17:37 +0800
Subject: [PATCH] docs

---
 funasr/models/predictor/cif.py |   16 ++++++++++++----
 1 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/funasr/models/predictor/cif.py b/funasr/models/predictor/cif.py
index 74f3e68..a5273f8 100644
--- a/funasr/models/predictor/cif.py
+++ b/funasr/models/predictor/cif.py
@@ -200,6 +200,7 @@
         return acoustic_embeds, token_num, alphas, cif_peak
 
     def forward_chunk(self, hidden, cache=None):
+        b, t, d = hidden.size()
         h = hidden
         context = h.transpose(1, 2)
         queries = self.pad(context)
@@ -220,6 +221,8 @@
             alphas = alphas * mask_chunk_predictor
       
         if cache is not None:
+            if cache["is_final"]:
+                alphas[:, cache["stride"] + cache["pad_left"] - 1] += 0.45
             if cache["cif_hidden"] is not None:
                 hidden = torch.cat((cache["cif_hidden"], hidden), 1)
             if cache["cif_alphas"] is not None:
@@ -231,6 +234,7 @@
         last_fire_place = len_time - 1
         last_fire_remainds = 0.0
         pre_alphas_length = 0
+        last_fire = False
  
         mask_chunk_peak_predictor = None
         if cache is not None:
@@ -241,7 +245,6 @@
                 mask_chunk_peak_predictor[:, :pre_alphas_length] = 1.0
             mask_chunk_peak_predictor[:, pre_alphas_length + cache["pad_left"]:pre_alphas_length + cache["stride"] + cache["pad_left"]] = 1.0
             
-
         if mask_chunk_peak_predictor is not None:
             cif_peak = cif_peak * mask_chunk_peak_predictor.squeeze(-1)
         
@@ -249,10 +252,15 @@
             if cif_peak[0][len_time - 1 - i] > self.threshold or cif_peak[0][len_time - 1 - i] == self.threshold:
                 last_fire_place = len_time - 1 - i
                 last_fire_remainds = cif_peak[0][len_time - 1 - i] - self.threshold
+                last_fire = True
                 break
-        last_fire_remainds = torch.tensor([last_fire_remainds], dtype=alphas.dtype).to(alphas.device)
-        cache["cif_hidden"] = hidden[:, last_fire_place:, :]
-        cache["cif_alphas"] = torch.cat((last_fire_remainds.unsqueeze(0), alphas[:, last_fire_place+1:]), -1)
+        if last_fire:
+           last_fire_remainds = torch.tensor([last_fire_remainds], dtype=alphas.dtype).to(alphas.device)
+           cache["cif_hidden"] = hidden[:, last_fire_place:, :]
+           cache["cif_alphas"] = torch.cat((last_fire_remainds.unsqueeze(0), alphas[:, last_fire_place+1:]), -1)
+        else:
+           cache["cif_hidden"] = hidden
+           cache["cif_alphas"] = alphas
         token_num_int = token_num.floor().type(torch.int32).item()
         return acoustic_embeds[:, 0:token_num_int, :], token_num, alphas, cif_peak
 

--
Gitblit v1.9.1