| | |
| | | mask_chunk_predictor = self.encoder.overlap_chunk_cls.get_mask_chunk_predictor( |
| | | None, device=encoder_out.device, batch_size=encoder_out.size(0) |
| | | ) |
| | | mask_shift_chunk = self.encoder.overlap_chunk_cls.get_mask_shift_chunk( |
| | | mask_shfit_chunk = self.encoder.overlap_chunk_cls.get_mask_shfit_chunk( |
| | | None, device=encoder_out.device, batch_size=encoder_out.size(0) |
| | | ) |
| | | encoder_out = encoder_out * mask_shift_chunk |
| | | encoder_out = encoder_out * mask_shfit_chunk |
| | | pre_acoustic_embeds, pre_token_length, pre_alphas, _ = self.predictor( |
| | | encoder_out, |
| | | ys_out_pad, |
| | |
| | | mask_chunk_predictor = self.encoder2.overlap_chunk_cls.get_mask_chunk_predictor( |
| | | None, device=encoder_out.device, batch_size=encoder_out.size(0) |
| | | ) |
| | | mask_shift_chunk = self.encoder2.overlap_chunk_cls.get_mask_shift_chunk( |
| | | mask_shfit_chunk = self.encoder2.overlap_chunk_cls.get_mask_shfit_chunk( |
| | | None, device=encoder_out.device, batch_size=encoder_out.size(0) |
| | | ) |
| | | encoder_out = encoder_out * mask_shift_chunk |
| | | encoder_out = encoder_out * mask_shfit_chunk |
| | | pre_acoustic_embeds, pre_token_length, pre_alphas, _ = self.predictor2( |
| | | encoder_out, |
| | | ys_out_pad, |
| | |
| | | mask_chunk_predictor = self.encoder.overlap_chunk_cls.get_mask_chunk_predictor( |
| | | None, device=encoder_out.device, batch_size=encoder_out.size(0) |
| | | ) |
| | | mask_shift_chunk = self.encoder.overlap_chunk_cls.get_mask_shift_chunk( |
| | | mask_shfit_chunk = self.encoder.overlap_chunk_cls.get_mask_shfit_chunk( |
| | | None, device=encoder_out.device, batch_size=encoder_out.size(0) |
| | | ) |
| | | encoder_out = encoder_out * mask_shift_chunk |
| | | encoder_out = encoder_out * mask_shfit_chunk |
| | | pre_acoustic_embeds, pre_token_length, pre_alphas, _ = self.predictor( |
| | | encoder_out, |
| | | ys_out_pad, |
| | |
| | | mask_chunk_predictor = self.encoder2.overlap_chunk_cls.get_mask_chunk_predictor( |
| | | None, device=encoder_out.device, batch_size=encoder_out.size(0) |
| | | ) |
| | | mask_shift_chunk = self.encoder2.overlap_chunk_cls.get_mask_shift_chunk( |
| | | mask_shfit_chunk = self.encoder2.overlap_chunk_cls.get_mask_shfit_chunk( |
| | | None, device=encoder_out.device, batch_size=encoder_out.size(0) |
| | | ) |
| | | encoder_out = encoder_out * mask_shift_chunk |
| | | encoder_out = encoder_out * mask_shfit_chunk |
| | | pre_acoustic_embeds, pre_token_length, pre_alphas, _ = self.predictor2( |
| | | encoder_out, |
| | | ys_out_pad, |