From 8dab6d184a034ca86eafa644ea0d2100aadfe27d Mon Sep 17 00:00:00 2001
From: jmwang66 <wangjiaming.wjm@alibaba-inc.com>
Date: 星期二, 09 五月 2023 10:58:33 +0800
Subject: [PATCH] Merge pull request #473 from alibaba-damo-academy/dev_smohan

---
 funasr/models/e2e_asr_contextual_paraformer.py |   31 ++++---------------------------
 1 files changed, 4 insertions(+), 27 deletions(-)

diff --git a/funasr/models/e2e_asr_contextual_paraformer.py b/funasr/models/e2e_asr_contextual_paraformer.py
index 493b345..dc820db 100644
--- a/funasr/models/e2e_asr_contextual_paraformer.py
+++ b/funasr/models/e2e_asr_contextual_paraformer.py
@@ -280,8 +280,8 @@
         decoder_outs = self.decoder(
             encoder_out, encoder_out_lens, sematic_embeds, ys_pad_lens, contextual_info=contextual_info
         ) 
-        decoder_out, _, attn = decoder_outs[0], decoder_outs[1], decoder_outs[2]
-        
+        decoder_out, _ = decoder_outs[0], decoder_outs[1]
+        '''
         if self.crit_attn_weight > 0 and attn.shape[-1] > 1:
             ideal_attn = ideal_attn + self.crit_attn_smooth / (self.crit_attn_smooth + 1.0)
             attn_non_blank = attn[:,:,:,:-1]
@@ -289,6 +289,8 @@
             loss_ideal = self.attn_loss(attn_non_blank.max(1)[0], ideal_attn_non_blank.to(attn.device))
         else:
             loss_ideal = None
+        '''
+        loss_ideal = None
 
         if decoder_out_1st is None:
             decoder_out_1st = decoder_out
@@ -360,11 +362,6 @@
             hw_embed = torch.nn.utils.rnn.pack_padded_sequence(hw_embed, hw_lengths, batch_first=True,
                                                             enforce_sorted=False)
             _, (h_n, _) = self.bias_encoder(hw_embed)
-            # hw_embed, _ = torch.nn.utils.rnn.pad_packed_sequence(hw_embed, batch_first=True)
-            if h_n.shape[1] > 2000: # large hotword list
-                _h_n = self.pick_hwlist_group(h_n.squeeze(0), encoder_out, encoder_out_lens, sematic_embeds, ys_pad_lens)
-                if _h_n is not None:
-                    h_n = _h_n
             hw_embed = h_n.repeat(encoder_out.shape[0], 1, 1)
         
         decoder_outs = self.decoder(
@@ -373,23 +370,3 @@
         decoder_out = decoder_outs[0]
         decoder_out = torch.log_softmax(decoder_out, dim=-1)
         return decoder_out, ys_pad_lens
-
-    def pick_hwlist_group(self, hw_embed, encoder_out, encoder_out_lens, sematic_embeds, ys_pad_lens):
-        max_attn_score = 0.0
-        # max_attn_index = 0
-        argmax_g = None
-        non_blank = hw_embed[-1]
-        hw_embed_groups = hw_embed[:-1].split(2000)
-        for i, g in enumerate(hw_embed_groups):
-            g = torch.cat([g, non_blank.unsqueeze(0)], dim=0)
-            _ = self.decoder(
-                encoder_out, encoder_out_lens, sematic_embeds, ys_pad_lens, contextual_info=g.unsqueeze(0)
-            )
-            attn = self.decoder.bias_decoder.src_attn.attn[0]
-            _max_attn_score = attn.max(0)[0][:,:-1].max()
-            if _max_attn_score > max_attn_score:
-                max_attn_score = _max_attn_score
-                # max_attn_index = i
-                argmax_g = g
-        # import pdb; pdb.set_trace()
-        return argmax_g
\ No newline at end of file

--
Gitblit v1.9.1