| | |
| | | |
| | | elif mode == "mt" and arch == "rnn": |
| | | # +1 means input (+1) and layers outputs (train_args.elayer) |
| | | subsample = np.ones(train_args.elayers + 1, dtype=np.int) |
| | | subsample = np.ones(train_args.elayers + 1, dtype=np.int32) |
| | | logging.warning("Subsampling is not performed for machine translation.") |
| | | logging.info("subsample: " + " ".join([str(x) for x in subsample])) |
| | | return subsample |
| | |
| | | or (mode == "mt" and arch == "rnn") |
| | | or (mode == "st" and arch == "rnn") |
| | | ): |
| | | subsample = np.ones(train_args.elayers + 1, dtype=np.int) |
| | | subsample = np.ones(train_args.elayers + 1, dtype=np.int32) |
| | | if train_args.etype.endswith("p") and not train_args.etype.startswith("vgg"): |
| | | ss = train_args.subsample.split("_") |
| | | for j in range(min(train_args.elayers + 1, len(ss))): |
| | |
| | | |
| | | elif mode == "asr" and arch == "rnn_mix": |
| | | subsample = np.ones( |
| | | train_args.elayers_sd + train_args.elayers + 1, dtype=np.int |
| | | train_args.elayers_sd + train_args.elayers + 1, dtype=np.int32 |
| | | ) |
| | | if train_args.etype.endswith("p") and not train_args.etype.startswith("vgg"): |
| | | ss = train_args.subsample.split("_") |
| | |
| | | elif mode == "asr" and arch == "rnn_mulenc": |
| | | subsample_list = [] |
| | | for idx in range(train_args.num_encs): |
| | | subsample = np.ones(train_args.elayers[idx] + 1, dtype=np.int) |
| | | subsample = np.ones(train_args.elayers[idx] + 1, dtype=np.int32) |
| | | if train_args.etype[idx].endswith("p") and not train_args.etype[ |
| | | idx |
| | | ].startswith("vgg"): |
| | |
| | | new_k = k.replace(old_prefix, new_prefix) |
| | | state_dict[new_k] = v |
| | | |
| | | |
| | | class Swish(torch.nn.Module): |
| | | """Construct an Swish object.""" |
| | | """Swish activation definition. |
| | | |
| | | def forward(self, x): |
| | | """Return Swich activation function.""" |
| | | return x * torch.sigmoid(x) |
| | | Swish(x) = (beta * x) * sigmoid(x) |
| | | where beta = 1 defines standard Swish activation. |
| | | |
| | | References: |
| | | https://arxiv.org/abs/2108.12943 / https://arxiv.org/abs/1710.05941v1. |
| | | E-swish variant: https://arxiv.org/abs/1801.07145. |
| | | |
| | | Args: |
| | | beta: Beta parameter for E-Swish. |
| | | (beta >= 1. If beta < 1, use standard Swish). |
| | | use_builtin: Whether to use PyTorch function if available. |
| | | |
| | | """ |
| | | |
| | | def __init__(self, beta: float = 1.0, use_builtin: bool = False) -> None: |
| | | super().__init__() |
| | | |
| | | self.beta = beta |
| | | |
| | | if beta > 1: |
| | | self.swish = lambda x: (self.beta * x) * torch.sigmoid(x) |
| | | else: |
| | | if use_builtin: |
| | | self.swish = torch.nn.SiLU() |
| | | else: |
| | | self.swish = lambda x: x * torch.sigmoid(x) |
| | | |
| | | def forward(self, x: torch.Tensor) -> torch.Tensor: |
| | | """Forward computation.""" |
| | | return self.swish(x) |
| | | |
| | | def get_activation(act): |
| | | """Return activation function.""" |