| | |
| | | |
| | | def __init__( |
| | | self, |
| | | # token_list: Union[Tuple[str, ...], List[str]], |
| | | specaug: Optional[str] = None, |
| | | specaug_conf: Optional[Dict] = None, |
| | | normalize: str = None, |
| | |
| | | self.use_1st_decoder_loss = use_1st_decoder_loss |
| | | self.length_normalized_loss = length_normalized_loss |
| | | self.beam_search = None |
| | | self.error_calculator = None |
| | | |
| | | def forward( |
| | | self, |
| | |
| | | self.nbest = kwargs.get("nbest", 1) |
| | | |
| | | meta_data = {} |
| | | if isinstance(data_in, torch.Tensor): # fbank |
| | | if isinstance(data_in, torch.Tensor) and kwargs.get("data_type", "sound") == "fbank": # fbank |
| | | speech, speech_lengths = data_in, data_lengths |
| | | if len(speech.shape) < 3: |
| | | speech = speech[None, :, :] |
| | |
| | | if tokenizer is not None: |
| | | # Change integer-ids to tokens |
| | | token = tokenizer.ids2tokens(token_int) |
| | | text = tokenizer.tokens2text(token) |
| | | |
| | | text_postprocessed, _ = postprocess_utils.sentence_postprocess(token) |
| | | text_postprocessed = tokenizer.tokens2text(token) |
| | | if not hasattr(tokenizer, "bpemodel"): |
| | | text_postprocessed, _ = postprocess_utils.sentence_postprocess(token) |
| | | |
| | | result_i = {"key": key[i], "text": text_postprocessed} |
| | | |