| | |
| | | # Licensed under the Apache License, Version 2.0 (http://www.apache.org/licenses/LICENSE-2.0) |
| | | |
| | | import io |
| | | from typing import Union |
| | | |
| | | import librosa as sf |
| | | import numpy as np |
| | | import torch |
| | | import torch.nn.functional as F |
| | | import torchaudio.compliance.kaldi as Kaldi |
| | | from torch import nn |
| | | |
| | | import contextlib |
| | | import os |
| | | import torch |
| | | import requests |
| | | import tempfile |
| | | from abc import ABCMeta, abstractmethod |
| | | import contextlib |
| | | import numpy as np |
| | | import librosa as sf |
| | | from typing import Union |
| | | from pathlib import Path |
| | | from typing import Generator, Union |
| | | |
| | | import requests |
| | | from abc import ABCMeta, abstractmethod |
| | | import torchaudio.compliance.kaldi as Kaldi |
| | | from funasr.models.transformer.utils.nets_utils import pad_list |
| | | |
| | | |
| | | def check_audio_list(audio: list): |
| | |
| | | |
| | | def extract_feature(audio): |
| | | features = [] |
| | | feature_times = [] |
| | | feature_lengths = [] |
| | | for au in audio: |
| | | feature = Kaldi.fbank( |
| | | au.unsqueeze(0), num_mel_bins=80) |
| | | feature = feature - feature.mean(dim=0, keepdim=True) |
| | | features.append(feature.unsqueeze(0)) |
| | | feature_lengths.append(au.shape[0]) |
| | | features = torch.cat(features) |
| | | return features, feature_lengths |
| | | features.append(feature) |
| | | feature_times.append(au.shape[0]) |
| | | feature_lengths.append(feature.shape[0]) |
| | | # padding for batch inference |
| | | features_padded = pad_list(features, pad_value=0) |
| | | # features = torch.cat(features) |
| | | return features_padded, feature_lengths, feature_times |
| | | |
| | | |
| | | def postprocess(segments: list, vad_segments: list, |
| | |
| | | def distribute_spk(sentence_list, sd_time_list): |
| | | sd_sentence_list = [] |
| | | for d in sentence_list: |
| | | sentence_start = d['ts_list'][0][0] |
| | | sentence_end = d['ts_list'][-1][1] |
| | | sentence_start = d['start'] |
| | | sentence_end = d['end'] |
| | | sentence_spk = 0 |
| | | max_overlap = 0 |
| | | for sd_time in sd_time_list: |
| | |
| | | d['spk'] = sentence_spk |
| | | sd_sentence_list.append(d) |
| | | return sd_sentence_list |
| | | |
| | | |
| | | |
| | | |
| | | class Storage(metaclass=ABCMeta): |