liugz18
2024-07-18 d80ac2fd2df4e7fb8a28acfa512bb11472b5cc99
funasr/models/emotion2vec/audio.py
@@ -22,7 +22,6 @@
class AudioEncoder(ModalitySpecificEncoder):
    def __init__(
        self,
        modality_cfg,
@@ -91,9 +90,7 @@
        )
        decoder = (
            Decoder1d(modality_cfg.decoder, embed_dim)
            if modality_cfg.decoder is not None
            else None
            Decoder1d(modality_cfg.decoder, embed_dim) if modality_cfg.decoder is not None else None
        )
        alibi_bias_fn = partial(get_alibi_bias, alibi_biases=alibi_biases)
@@ -144,13 +141,9 @@
                        output_lengths - 1,
                    )
                ] = 1
                padding_mask = (
                    1 - padding_mask.flip([-1]).cumsum(-1).flip([-1])
                ).bool()
                padding_mask = (1 - padding_mask.flip([-1]).cumsum(-1).flip([-1])).bool()
            else:
                padding_mask = torch.zeros(
                    x.shape[:2], dtype=torch.bool, device=x.device
                )
                padding_mask = torch.zeros(x.shape[:2], dtype=torch.bool, device=x.device)
        return padding_mask