嘉渊
2023-07-06 91425c670b21fa244f739885d34b88742272747c
update eend_ola
5个文件已修改
22 ■■■■■ 已修改文件
egs/callhome/eend_ola/conf/train_diar_eend_ola_callhome_chunk2000.yaml 2 ●●● 补丁 | 查看 | 原始文档 | blame | 历史
egs/callhome/eend_ola/conf/train_diar_eend_ola_simu_2spkr.yaml 2 ●●● 补丁 | 查看 | 原始文档 | blame | 历史
egs/callhome/eend_ola/conf/train_diar_eend_ola_simu_allspkr.yaml 2 ●●● 补丁 | 查看 | 原始文档 | blame | 历史
egs/callhome/eend_ola/conf/train_diar_eend_ola_simu_allspkr_chunk2000.yaml 2 ●●● 补丁 | 查看 | 原始文档 | blame | 历史
funasr/models/e2e_diar_eend_ola.py 14 ●●●● 补丁 | 查看 | 原始文档 | blame | 历史
egs/callhome/eend_ola/conf/train_diar_eend_ola_callhome_chunk2000.yaml
@@ -12,7 +12,7 @@
    n_units: 256
# model related
model: eend_ola_similar_eend
model: eend_ola
model_conf:
    attractor_loss_weight:  0.01
    max_n_speaker: 8
egs/callhome/eend_ola/conf/train_diar_eend_ola_simu_2spkr.yaml
@@ -12,7 +12,7 @@
    n_units: 256
# model related
model: eend_ola_similar_eend
model: eend_ola
model_conf:
    max_n_speaker: 8
egs/callhome/eend_ola/conf/train_diar_eend_ola_simu_allspkr.yaml
@@ -12,7 +12,7 @@
    n_units: 256
# model related
model: eend_ola_similar_eend
model: eend_ola
model_conf:
    max_n_speaker: 8
egs/callhome/eend_ola/conf/train_diar_eend_ola_simu_allspkr_chunk2000.yaml
@@ -12,7 +12,7 @@
    n_units: 256
# model related
model: eend_ola_similar_eend
model: eend_ola
model_conf:
    max_n_speaker: 8
funasr/models/e2e_diar_eend_ola.py
@@ -12,7 +12,7 @@
from funasr.models.frontend.wav_frontend import WavFrontendMel23
from funasr.modules.eend_ola.encoder import EENDOLATransformerEncoder
from funasr.modules.eend_ola.encoder_decoder_attractor import EncoderDecoderAttractor
from funasr.modules.eend_ola.utils.losses import fast_batch_pit_n_speaker_loss, standard_loss, cal_power_loss
from funasr.modules.eend_ola.utils.losses import standard_loss, cal_power_loss, fast_batch_pit_n_speaker_loss
from funasr.modules.eend_ola.utils.power import create_powerlabel
from funasr.modules.eend_ola.utils.power import generate_mapping_dict
from funasr.torch_utils.device_funcs import force_gatherable
@@ -109,23 +109,17 @@
    def forward(
            self,
            speech: List[torch.Tensor],
            speech_lengths: torch.Tensor,  # num_frames of each sample
            speaker_labels: List[torch.Tensor],
            speaker_labels_lengths: torch.Tensor,  # num_speakers of each sample
            orders: torch.Tensor,
    ) -> Tuple[torch.Tensor, Dict[str, torch.Tensor], torch.Tensor]:
        # Check that batch_size is unified
        assert (
                len(speech)
                == len(speech_lengths)
                == len(speaker_labels)
                == len(speaker_labels_lengths)
        ), (len(speech), len(speech_lengths), len(speaker_labels), len(speaker_labels_lengths))
        assert (len(speech) == len(speaker_labels)), (len(speech), len(speaker_labels))
        speech_lengths = torch.tensor([len(sph) for sph in speech]).to(torch.int64)
        speaker_labels_lengths = torch.tensor([spk.shape[-1] for spk in speaker_labels]).to(torch.int64)
        batch_size = len(speech)
        # Encoder
        speech = [s[:s_len] for s, s_len in zip(speech, speech_lengths)]
        encoder_out = self.forward_encoder(speech, speech_lengths)
        # Encoder-decoder attractor