| File was renamed from funasr/models/frontend/wav_frontend.py |
| | |
| | | |
| | | import numpy as np |
| | | import torch |
| | | import torch.nn as nn |
| | | import torchaudio.compliance.kaldi as kaldi |
| | | from torch.nn.utils.rnn import pad_sequence |
| | | |
| | | import funasr.models.frontend.eend_ola_feature as eend_ola_feature |
| | | from funasr.models.frontend.abs_frontend import AbsFrontend |
| | | import funasr.frontends.eend_ola_feature as eend_ola_feature |
| | | from funasr.utils.register import register_class |
| | | |
| | | |
| | | |
| | | def load_cmvn(cmvn_file): |
| | |
| | | LFR_outputs = torch.vstack(LFR_inputs) |
| | | return LFR_outputs.type(torch.float32) |
| | | |
| | | |
| | | class WavFrontend(AbsFrontend): |
| | | @register_class("frontend_classes", "WavFrontend") |
| | | class WavFrontend(nn.Module): |
| | | """Conventional frontend structure for ASR. |
| | | """ |
| | | |
| | |
| | | dither: float = 1.0, |
| | | snip_edges: bool = True, |
| | | upsacle_samples: bool = True, |
| | | **kwargs, |
| | | ): |
| | | super().__init__() |
| | | self.fs = fs |
| | |
| | | return feats_pad, feats_lens |
| | | |
| | | |
| | | class WavFrontendOnline(AbsFrontend): |
| | | @register_class("frontend_classes", "WavFrontendOnline") |
| | | class WavFrontendOnline(nn.Module): |
| | | """Conventional frontend structure for streaming ASR/VAD. |
| | | """ |
| | | |
| | |
| | | dither: float = 1.0, |
| | | snip_edges: bool = True, |
| | | upsacle_samples: bool = True, |
| | | **kwargs, |
| | | ): |
| | | super().__init__() |
| | | self.fs = fs |
| | |
| | | self.lfr_splice_cache = [] |
| | | |
| | | |
| | | class WavFrontendMel23(AbsFrontend): |
| | | class WavFrontendMel23(nn.Module): |
| | | """Conventional frontend structure for ASR. |
| | | """ |
| | | |
| | |
| | | frame_shift: int = 10, |
| | | lfr_m: int = 1, |
| | | lfr_n: int = 1, |
| | | **kwargs, |
| | | ): |
| | | super().__init__() |
| | | self.fs = fs |