嘉渊
2023-04-24 df5f263e5fe3d7961b1aeb3589012400a9905a8f
funasr/models/frontend/wav_frontend.py
@@ -9,10 +9,7 @@
from typeguard import check_argument_types
import funasr.models.frontend.eend_ola_feature as eend_ola_feature
from funasr.models.frontend.abs_frontend import AbsFrontend
from modelscope.utils.logger import get_logger
logger = get_logger()
def load_cmvn(cmvn_file):
    with open(cmvn_file, 'r', encoding='utf-8') as f:
@@ -78,7 +75,7 @@
    return LFR_outputs.type(torch.float32)
class WavFrontend(AbsFrontend):
class WavFrontend(torch.nn.Module):
    """Conventional frontend structure for ASR.
    """
@@ -209,7 +206,7 @@
        return feats_pad, feats_lens
class WavFrontendOnline(AbsFrontend):
class WavFrontendOnline(torch.nn.Module):
    """Conventional frontend structure for streaming ASR/VAD.
    """
@@ -425,10 +422,8 @@
                    reserve_frame_idx = lfr_splice_frame_idxs[0] - minus_frame
                    # print('reserve_frame_idx:  ' + str(reserve_frame_idx))
                    # print('frame_frame:  ' + str(frame_from_waveforms))
                    self.reserve_waveforms = self.waveforms[:,
                                             reserve_frame_idx * self.frame_shift_sample_length:frame_from_waveforms * self.frame_shift_sample_length]
                    sample_length = (
                                                frame_from_waveforms - 1) * self.frame_shift_sample_length + self.frame_sample_length
                    self.reserve_waveforms = self.waveforms[:, reserve_frame_idx * self.frame_shift_sample_length:frame_from_waveforms * self.frame_shift_sample_length]
                    sample_length = (frame_from_waveforms - 1) * self.frame_shift_sample_length + self.frame_sample_length
                    self.waveforms = self.waveforms[:, :sample_length]
            else:
                # update self.reserve_waveforms and self.lfr_splice_cache
@@ -456,7 +451,7 @@
        self.lfr_splice_cache = []
class WavFrontendMel23(AbsFrontend):
class WavFrontendMel23(torch.nn.Module):
    """Conventional frontend structure for ASR.
    """
@@ -487,9 +482,6 @@
        batch_size = input.size(0)
        feats = []
        feats_lens = []
        logger.info("batch_size: {}".format(batch_size))
        logger.info("input: {}".format(input))
        logger.info("input_lengths: {}".format(input_lengths))
        for i in range(batch_size):
            waveform_length = input_lengths[i]
            waveform = input[i][:waveform_length]