游雁
2024-06-12 2ac79cd3f312e485f3fc4f0e63313cc8a3e0bfc6
funasr/models/emotion2vec/audio.py
@@ -22,7 +22,6 @@
class AudioEncoder(ModalitySpecificEncoder):
    def __init__(
        self,
        modality_cfg,
@@ -91,9 +90,7 @@
        )
        decoder = (
            Decoder1d(modality_cfg.decoder, embed_dim)
            if modality_cfg.decoder is not None
            else None
            Decoder1d(modality_cfg.decoder, embed_dim) if modality_cfg.decoder is not None else None
        )
        alibi_bias_fn = partial(get_alibi_bias, alibi_biases=alibi_biases)
@@ -144,13 +141,9 @@
                        output_lengths - 1,
                    )
                ] = 1
                padding_mask = (
                    1 - padding_mask.flip([-1]).cumsum(-1).flip([-1])
                ).bool()
                padding_mask = (1 - padding_mask.flip([-1]).cumsum(-1).flip([-1])).bool()
            else:
                padding_mask = torch.zeros(
                    x.shape[:2], dtype=torch.bool, device=x.device
                )
                padding_mask = torch.zeros(x.shape[:2], dtype=torch.bool, device=x.device)
        return padding_mask