From 4ace5a95b052d338947fc88809a440ccd55cf6b4 Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期四, 16 十一月 2023 16:39:52 +0800
Subject: [PATCH] funasr pages

---
 funasr/modules/nets_utils.py |   50 ++++++++++++++++++++++++++++++++++++++++++++++----
 1 files changed, 46 insertions(+), 4 deletions(-)

diff --git a/funasr/modules/nets_utils.py b/funasr/modules/nets_utils.py
index 397a5c4..b1879fa 100644
--- a/funasr/modules/nets_utils.py
+++ b/funasr/modules/nets_utils.py
@@ -61,6 +61,48 @@
     return pad
 
 
+def pad_list_all_dim(xs, pad_value):
+    """Perform padding for the list of tensors.
+
+    Args:
+        xs (List): List of Tensors [(T_1, `*`), (T_2, `*`), ..., (T_B, `*`)].
+        pad_value (float): Value for padding.
+
+    Returns:
+        Tensor: Padded tensor (B, Tmax, `*`).
+
+    Examples:
+        >>> x = [torch.ones(4), torch.ones(2), torch.ones(1)]
+        >>> x
+        [tensor([1., 1., 1., 1.]), tensor([1., 1.]), tensor([1.])]
+        >>> pad_list(x, 0)
+        tensor([[1., 1., 1., 1.],
+                [1., 1., 0., 0.],
+                [1., 0., 0., 0.]])
+
+    """
+    n_batch = len(xs)
+    num_dim = len(xs[0].shape)
+    max_len_all_dim = []
+    for i in range(num_dim):
+        max_len_all_dim.append(max(x.size(i) for x in xs))
+    pad = xs[0].new(n_batch, *max_len_all_dim).fill_(pad_value)
+
+    for i in range(n_batch):
+        if num_dim == 1:
+            pad[i, : xs[i].size(0)] = xs[i]
+        elif num_dim == 2:
+            pad[i, : xs[i].size(0), : xs[i].size(1)] = xs[i]
+        elif num_dim == 3:
+            pad[i, : xs[i].size(0), : xs[i].size(1), : xs[i].size(2)] = xs[i]
+        else:
+            raise ValueError(
+                "pad_list_all_dim only support 1-D, 2-D and 3-D tensors, not {}-D.".format(num_dim)
+            )
+
+    return pad
+
+
 def make_pad_mask(lengths, xs=None, length_dim=-1, maxlen=None):
     """Make mask tensor containing indices of padded part.
 
@@ -407,7 +449,7 @@
 
     elif mode == "mt" and arch == "rnn":
         # +1 means input (+1) and layers outputs (train_args.elayer)
-        subsample = np.ones(train_args.elayers + 1, dtype=np.int)
+        subsample = np.ones(train_args.elayers + 1, dtype=np.int32)
         logging.warning("Subsampling is not performed for machine translation.")
         logging.info("subsample: " + " ".join([str(x) for x in subsample]))
         return subsample
@@ -417,7 +459,7 @@
             or (mode == "mt" and arch == "rnn")
             or (mode == "st" and arch == "rnn")
     ):
-        subsample = np.ones(train_args.elayers + 1, dtype=np.int)
+        subsample = np.ones(train_args.elayers + 1, dtype=np.int32)
         if train_args.etype.endswith("p") and not train_args.etype.startswith("vgg"):
             ss = train_args.subsample.split("_")
             for j in range(min(train_args.elayers + 1, len(ss))):
@@ -432,7 +474,7 @@
 
     elif mode == "asr" and arch == "rnn_mix":
         subsample = np.ones(
-            train_args.elayers_sd + train_args.elayers + 1, dtype=np.int
+            train_args.elayers_sd + train_args.elayers + 1, dtype=np.int32
         )
         if train_args.etype.endswith("p") and not train_args.etype.startswith("vgg"):
             ss = train_args.subsample.split("_")
@@ -451,7 +493,7 @@
     elif mode == "asr" and arch == "rnn_mulenc":
         subsample_list = []
         for idx in range(train_args.num_encs):
-            subsample = np.ones(train_args.elayers[idx] + 1, dtype=np.int)
+            subsample = np.ones(train_args.elayers[idx] + 1, dtype=np.int32)
             if train_args.etype[idx].endswith("p") and not train_args.etype[
                 idx
             ].startswith("vgg"):

--
Gitblit v1.9.1