| | |
| | | mask = mask.unsqueeze(1).eq(0) # (batch, 1, *, time2) |
| | | min_value = torch.finfo(scores.dtype).min |
| | | scores = scores.masked_fill(mask, min_value) |
| | | self.attn = torch.softmax(scores, dim=-1).masked_fill( |
| | | attn = torch.softmax(scores, dim=-1).masked_fill( |
| | | mask, 0.0 |
| | | ) # (batch, head, time1, time2) |
| | | else: |
| | | self.attn = torch.softmax(scores, dim=-1) # (batch, head, time1, time2) |
| | | attn = torch.softmax(scores, dim=-1) # (batch, head, time1, time2) |
| | | |
| | | p_attn = self.dropout(self.attn) |
| | | p_attn = self.dropout(attn) |
| | | x = torch.matmul(p_attn, value) # (batch, head, time1, d_k) |
| | | x = ( |
| | | x.transpose(1, 2).contiguous().view(n_batch, -1, self.h * self.d_k) |
| | | ) # (batch, time1, d_model) |
| | | |
| | | return self.linear_out(x), self.attn # (batch, time1, d_model) |
| | | return self.linear_out(x), attn # (batch, time1, d_model) |
| | | |
| | | def forward(self, query, key, value, mask): |
| | | """Compute scaled dot product attention. |
| | |
| | | def forward_attention(self, value, scores, mask): |
| | | scores = scores + mask |
| | | |
| | | self.attn = torch.softmax(scores, dim=-1) |
| | | context_layer = torch.matmul(self.attn, value) # (batch, head, time1, d_k) |
| | | attn = torch.softmax(scores, dim=-1) |
| | | context_layer = torch.matmul(attn, value) # (batch, head, time1, d_k) |
| | | |
| | | context_layer = context_layer.permute(0, 2, 1, 3).contiguous() |
| | | new_context_layer_shape = context_layer.size()[:-2] + (self.all_head_size,) |
| | |
| | | def forward_attention(self, value, scores, mask): |
| | | scores = scores + mask |
| | | |
| | | self.attn = torch.softmax(scores, dim=-1) |
| | | context_layer = torch.matmul(self.attn, value) # (batch, head, time1, d_k) |
| | | attn = torch.softmax(scores, dim=-1) |
| | | context_layer = torch.matmul(attn, value) # (batch, head, time1, d_k) |
| | | |
| | | context_layer = context_layer.permute(0, 2, 1, 3).contiguous() |
| | | new_context_layer_shape = context_layer.size()[:-2] + (self.all_head_size,) |
| | |
| | | def forward_attention(self, value, scores, mask): |
| | | scores = scores + mask |
| | | |
| | | self.attn = torch.softmax(scores, dim=-1) |
| | | context_layer = torch.matmul(self.attn, value) # (batch, head, time1, d_k) |
| | | attn = torch.softmax(scores, dim=-1) |
| | | context_layer = torch.matmul(attn, value) # (batch, head, time1, d_k) |
| | | |
| | | context_layer = context_layer.permute(0, 2, 1, 3).contiguous() |
| | | new_context_layer_shape = context_layer.size()[:-2] + (self.all_head_size,) |
| | |
| | | def forward_attention(self, value, scores, mask): |
| | | scores = scores + mask |
| | | |
| | | self.attn = torch.softmax(scores, dim=-1) |
| | | context_layer = torch.matmul(self.attn, value) # (batch, head, time1, d_k) |
| | | attn = torch.softmax(scores, dim=-1) |
| | | context_layer = torch.matmul(attn, value) # (batch, head, time1, d_k) |
| | | |
| | | context_layer = context_layer.permute(0, 2, 1, 3).contiguous() |
| | | new_context_layer_shape = context_layer.size()[:-2] + (self.all_head_size,) |
| | |
| | | "inf" |
| | | ) # float(numpy.finfo(torch.tensor(0, dtype=scores.dtype).numpy().dtype).min) |
| | | scores = scores.masked_fill(mask, min_value) |
| | | self.attn = torch.softmax(scores, dim=-1).masked_fill( |
| | | attn = torch.softmax(scores, dim=-1).masked_fill( |
| | | mask, 0.0 |
| | | ) # (batch, head, time1, time2) |
| | | else: |
| | | self.attn = torch.softmax(scores, dim=-1) # (batch, head, time1, time2) |
| | | attn = torch.softmax(scores, dim=-1) # (batch, head, time1, time2) |
| | | |
| | | p_attn = self.dropout(self.attn) |
| | | p_attn = self.dropout(attn) |
| | | x = torch.matmul(p_attn, value) # (batch, head, time1, d_k) |
| | | x = ( |
| | | x.transpose(1, 2).contiguous().view(n_batch, -1, self.h * self.d_k) |
| | |
| | | self.embed = torch.nn.Embedding( |
| | | 7 + len(self.lid_dict) + len(self.textnorm_dict), input_size |
| | | ) |
| | | self.emo_dict = {"unk": 25009, "happy": 25001, "sad": 25002, "angry": 25003, "neutral": 25004} |
| | | self.emo_dict = { |
| | | "unk": 25009, |
| | | "happy": 25001, |
| | | "sad": 25002, |
| | | "angry": 25003, |
| | | "neutral": 25004, |
| | | } |
| | | |
| | | self.criterion_att = LabelSmoothingLoss( |
| | | size=self.vocab_size, |
| | |
| | | ctc_logits = self.ctc.log_softmax(encoder_out) |
| | | if kwargs.get("ban_emo_unk", False): |
| | | ctc_logits[:, :, self.emo_dict["unk"]] = -float("inf") |
| | | |
| | | |
| | | results = [] |
| | | b, n, d = encoder_out.size() |
| | | if isinstance(key[0], (list, tuple)): |
| | |
| | | mask = mask.unsqueeze(1).eq(0) # (batch, 1, *, time2) |
| | | min_value = float(numpy.finfo(torch.tensor(0, dtype=scores.dtype).numpy().dtype).min) |
| | | scores = scores.masked_fill(mask, min_value) |
| | | self.attn = torch.softmax(scores, dim=-1).masked_fill( |
| | | attn = torch.softmax(scores, dim=-1).masked_fill( |
| | | mask, 0.0 |
| | | ) # (batch, head, time1, time2) |
| | | else: |
| | | self.attn = torch.softmax(scores, dim=-1) # (batch, head, time1, time2) |
| | | attn = torch.softmax(scores, dim=-1) # (batch, head, time1, time2) |
| | | |
| | | p_attn = self.dropout(self.attn) |
| | | p_attn = self.dropout(attn) |
| | | x = torch.matmul(p_attn, value) # (batch, head, time1, d_k) |
| | | x = ( |
| | | x.transpose(1, 2).contiguous().view(n_batch, -1, self.h * self.d_k) |
| | |
| | | |
| | | min_value = float(numpy.finfo(torch.tensor(0, dtype=scores.dtype).numpy().dtype).min) |
| | | scores = scores.masked_fill(mask, min_value) |
| | | self.attn = torch.softmax(scores, dim=-1).masked_fill( |
| | | attn = torch.softmax(scores, dim=-1).masked_fill( |
| | | mask, 0.0 |
| | | ) # (batch, head, time1, time2) |
| | | else: |
| | | self.attn = torch.softmax(scores, dim=-1) # (batch, head, time1, time2) |
| | | attn = torch.softmax(scores, dim=-1) # (batch, head, time1, time2) |
| | | |
| | | p_attn = self.dropout(self.attn) |
| | | p_attn = self.dropout(attn) |
| | | x = torch.matmul(p_attn, value) # (batch, head, time1, d_k) |
| | | x = ( |
| | | x.transpose(1, 2).contiguous().view(n_batch, -1, self.h * self.d_k) |
| | |
| | | "inf" |
| | | ) # min_value = float(np.finfo(torch.tensor(0, dtype=qk.dtype).numpy().dtype).min) |
| | | scores = scores.masked_fill(mask, min_value) |
| | | self.attn = torch.softmax(scores, dim=-1).masked_fill( |
| | | attn = torch.softmax(scores, dim=-1).masked_fill( |
| | | mask, 0.0 |
| | | ) # (batch, head, time1, time2) |
| | | else: |
| | | self.attn = torch.softmax(scores, dim=-1) # (batch, head, time1, time2) |
| | | attn = torch.softmax(scores, dim=-1) # (batch, head, time1, time2) |
| | | |
| | | p_attn = self.dropout(self.attn) |
| | | p_attn = self.dropout(attn) |
| | | x = torch.matmul(p_attn, value) # (batch, head, time1, d_k) |
| | | x = ( |
| | | x.transpose(1, 2).contiguous().view(n_batch, -1, self.h * self.d_k) |
| | |
| | | def forward_attention(self, value, scores, mask): |
| | | scores = scores + mask |
| | | |
| | | self.attn = torch.softmax(scores, dim=-1) |
| | | context_layer = torch.matmul(self.attn, value) # (batch, head, time1, d_k) |
| | | attn = torch.softmax(scores, dim=-1) |
| | | context_layer = torch.matmul(attn, value) # (batch, head, time1, d_k) |
| | | |
| | | context_layer = context_layer.permute(0, 2, 1, 3).contiguous() |
| | | new_context_layer_shape = context_layer.size()[:-2] + (self.all_head_size,) |
| | |
| | | def forward_attention(self, value, scores, mask): |
| | | scores = scores + mask |
| | | |
| | | self.attn = torch.softmax(scores, dim=-1) |
| | | context_layer = torch.matmul(self.attn, value) # (batch, head, time1, d_k) |
| | | attn = torch.softmax(scores, dim=-1) |
| | | context_layer = torch.matmul(attn, value) # (batch, head, time1, d_k) |
| | | |
| | | context_layer = context_layer.permute(0, 2, 1, 3).contiguous() |
| | | new_context_layer_shape = context_layer.size()[:-2] + (self.all_head_size,) |
| | |
| | | if chunk_mask is not None: |
| | | mask = chunk_mask.unsqueeze(0).unsqueeze(1) | mask |
| | | scores = scores.masked_fill(mask, float("-inf")) |
| | | self.attn = torch.softmax(scores, dim=-1).masked_fill(mask, 0.0) |
| | | attn = torch.softmax(scores, dim=-1).masked_fill(mask, 0.0) |
| | | |
| | | attn_output = self.dropout(self.attn) |
| | | attn_output = self.dropout(attn) |
| | | attn_output = torch.matmul(attn_output, value) |
| | | |
| | | attn_output = self.linear_out( |