| | |
| | | # Copyright 3D-Speaker (https://github.com/alibaba-damo-academy/3D-Speaker). All Rights Reserved. |
| | | # Licensed under the Apache License, Version 2.0 (http://www.apache.org/licenses/LICENSE-2.0) |
| | | #!/usr/bin/env python3 |
| | | # -*- encoding: utf-8 -*- |
| | | # Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved. |
| | | # MIT License (https://opensource.org/licenses/MIT) |
| | | # Modified from 3D-Speaker (https://github.com/alibaba-damo-academy/3D-Speaker) |
| | | |
| | | import io |
| | | import os |
| | |
| | | from typing import Generator, Union |
| | | from abc import ABCMeta, abstractmethod |
| | | import torchaudio.compliance.kaldi as Kaldi |
| | | |
| | | from funasr.models.transformer.utils.nets_utils import pad_list |
| | | |
| | | |
| | |
| | | return res |
| | | |
| | | def smooth(res, mindur=1): |
| | | # if only one segment, return directly |
| | | if len(res) < 2: |
| | | return res |
| | | # short segments are assigned to nearest speakers. |
| | | for i in range(len(res)): |
| | | res[i][0] = round(res[i][0], 2) |
| | |
| | | if overlap > max_overlap: |
| | | max_overlap = overlap |
| | | sentence_spk = spk |
| | | d['spk'] = sentence_spk |
| | | d['spk'] = int(sentence_spk) |
| | | sd_sentence_list.append(d) |
| | | return sd_sentence_list |
| | | |