From 28ccfbfc51068a663a80764e14074df5edf2b5ba Mon Sep 17 00:00:00 2001
From: kongdeqiang <kongdeqiang960204@163.com>
Date: 星期五, 13 三月 2026 17:41:41 +0800
Subject: [PATCH] 提交

---
 funasr/models/transducer/rnn_decoder.py |   53 ++++++++++++++++++++++++++---------------------------
 1 files changed, 26 insertions(+), 27 deletions(-)

diff --git a/funasr/models/transducer/rnn_decoder.py b/funasr/models/transducer/rnn_decoder.py
index 1743f99..4a0ffa3 100644
--- a/funasr/models/transducer/rnn_decoder.py
+++ b/funasr/models/transducer/rnn_decoder.py
@@ -1,14 +1,18 @@
-import random
+#!/usr/bin/env python3
+# -*- encoding: utf-8 -*-
+# Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
+#  MIT License  (https://opensource.org/licenses/MIT)
 
-import numpy as np
 import torch
+import random
+import numpy as np
+import torch.nn as nn
 import torch.nn.functional as F
 
+from funasr.register import tables
 from funasr.models.transformer.utils.nets_utils import make_pad_mask
 from funasr.models.transformer.utils.nets_utils import to_device
 from funasr.models.language_model.rnn.attentions import initial_att
-from funasr.models.decoder.abs_decoder import AbsDecoder
-from funasr.utils.get_default_kwargs import get_default_kwargs
 
 
 def build_attention_list(
@@ -74,13 +78,12 @@
                 )
                 att_list.append(att)
     else:
-        raise ValueError(
-            "Number of encoders needs to be more than one. {}".format(num_encs)
-        )
+        raise ValueError("Number of encoders needs to be more than one. {}".format(num_encs))
     return att_list
 
 
-class RNNDecoder(AbsDecoder):
+@tables.register("decoder_classes", "rnn_decoder")
+class RNNDecoder(nn.Module):
     def __init__(
         self,
         vocab_size: int,
@@ -93,7 +96,7 @@
         context_residual: bool = False,
         replace_sos: bool = False,
         num_encs: int = 1,
-        att_conf: dict = get_default_kwargs(build_attention_list),
+        att_conf: dict = None,
     ):
         # FIXME(kamo): The parts of num_spk should be refactored more more more
         if rnn_type not in {"lstm", "gru"}:
@@ -121,16 +124,20 @@
         self.decoder = torch.nn.ModuleList()
         self.dropout_dec = torch.nn.ModuleList()
         self.decoder += [
-            torch.nn.LSTMCell(hidden_size + eprojs, hidden_size)
-            if self.dtype == "lstm"
-            else torch.nn.GRUCell(hidden_size + eprojs, hidden_size)
+            (
+                torch.nn.LSTMCell(hidden_size + eprojs, hidden_size)
+                if self.dtype == "lstm"
+                else torch.nn.GRUCell(hidden_size + eprojs, hidden_size)
+            )
         ]
         self.dropout_dec += [torch.nn.Dropout(p=dropout)]
         for _ in range(1, self.dlayers):
             self.decoder += [
-                torch.nn.LSTMCell(hidden_size, hidden_size)
-                if self.dtype == "lstm"
-                else torch.nn.GRUCell(hidden_size, hidden_size)
+                (
+                    torch.nn.LSTMCell(hidden_size, hidden_size)
+                    if self.dtype == "lstm"
+                    else torch.nn.GRUCell(hidden_size, hidden_size)
+                )
             ]
             self.dropout_dec += [torch.nn.Dropout(p=dropout)]
             # NOTE: dropout is applied only for the vertical connections
@@ -141,9 +148,7 @@
         else:
             self.output = torch.nn.Linear(hidden_size, vocab_size)
 
-        self.att_list = build_attention_list(
-            eprojs=eprojs, dunits=hidden_size, **att_conf
-        )
+        self.att_list = build_attention_list(eprojs=eprojs, dunits=hidden_size, **att_conf)
 
     def zero_state(self, hs_pad):
         return hs_pad.new_zeros(hs_pad.size(0), self.dunits)
@@ -159,9 +164,7 @@
         else:
             z_list[0] = self.decoder[0](ey, z_prev[0])
             for i in range(1, self.dlayers):
-                z_list[i] = self.decoder[i](
-                    self.dropout_dec[i - 1](z_list[i - 1]), z_prev[i]
-                )
+                z_list[i] = self.decoder[i](self.dropout_dec[i - 1](z_list[i - 1]), z_prev[i])
         return z_list, c_list
 
     def forward(self, hs_pad, hlens, ys_in_pad, ys_in_lens, strm_idx=0):
@@ -311,13 +314,9 @@
                 state["a_prev"][self.num_encs],
             )
         ey = torch.cat((ey, att_c), dim=1)  # utt(1) x (zdim + hdim)
-        z_list, c_list = self.rnn_forward(
-            ey, z_list, c_list, state["z_prev"], state["c_prev"]
-        )
+        z_list, c_list = self.rnn_forward(ey, z_list, c_list, state["z_prev"], state["c_prev"])
         if self.context_residual:
-            logits = self.output(
-                torch.cat((self.dropout_dec[-1](z_list[-1]), att_c), dim=-1)
-            )
+            logits = self.output(torch.cat((self.dropout_dec[-1](z_list[-1]), att_c), dim=-1))
         else:
             logits = self.output(self.dropout_dec[-1](z_list[-1]))
         logp = F.log_softmax(logits, dim=1).squeeze(0)

--
Gitblit v1.9.1