| | |
| | | from funasr.modules.e2e_asr_common import ErrorCalculator |
| | | from funasr.modules.nets_utils import th_accuracy |
| | | from funasr.torch_utils.device_funcs import force_gatherable |
| | | from funasr.train.abs_espnet_model import AbsESPnetModel |
| | | from funasr.models.base_model import FunASRModel |
| | | |
| | | if LooseVersion(torch.__version__) >= LooseVersion("1.6.0"): |
| | | from torch.cuda.amp import autocast |
| | |
| | | yield |
| | | |
| | | |
| | | class ESPnetASRModel(AbsESPnetModel): |
| | | class ESPnetASRModel(FunASRModel): |
| | | """CTC-attention hybrid Encoder-Decoder model""" |
| | | |
| | | def __init__( |
| | |
| | | text_lengths: torch.Tensor, |
| | | ) -> Tuple[torch.Tensor, Dict[str, torch.Tensor], torch.Tensor]: |
| | | """Frontend + Encoder + Decoder + Calc loss |
| | | |
| | | Args: |
| | | speech: (Batch, Length, ...) |
| | | speech_lengths: (Batch, ) |
| | |
| | | self, speech: torch.Tensor, speech_lengths: torch.Tensor |
| | | ) -> Tuple[torch.Tensor, torch.Tensor]: |
| | | """Frontend + Encoder. Note that this method is used by asr_inference.py |
| | | |
| | | Args: |
| | | speech: (Batch, Length, ...) |
| | | speech_lengths: (Batch, ) |
| | |
| | | ys_pad_lens: torch.Tensor, |
| | | ) -> torch.Tensor: |
| | | """Compute negative log likelihood(nll) from transformer-decoder |
| | | |
| | | Normally, this function is called in batchify_nll. |
| | | |
| | | Args: |
| | | encoder_out: (Batch, Length, Dim) |
| | | encoder_out_lens: (Batch,) |
| | |
| | | batch_size: int = 100, |
| | | ): |
| | | """Compute negative log likelihood(nll) from transformer-decoder |
| | | |
| | | To avoid OOM, this fuction seperate the input into batches. |
| | | Then call nll for each batch and combine and return results. |
| | | Args: |