游雁
2024-03-27 9b4e9cc8a0311e5243d69b73ed073e7ea441982e
funasr/models/campplus/utils.py
@@ -1,5 +1,8 @@
# Copyright 3D-Speaker (https://github.com/alibaba-damo-academy/3D-Speaker). All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (http://www.apache.org/licenses/LICENSE-2.0)
#!/usr/bin/env python3
# -*- encoding: utf-8 -*-
# Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
#  MIT License  (https://opensource.org/licenses/MIT)
# Modified from 3D-Speaker (https://github.com/alibaba-damo-academy/3D-Speaker)
import io
import os
@@ -14,6 +17,7 @@
from typing import Generator, Union
from abc import ABCMeta, abstractmethod
import torchaudio.compliance.kaldi as Kaldi
from funasr.models.transformer.utils.nets_utils import pad_list
@@ -173,6 +177,9 @@
    return res
def smooth(res, mindur=1):
    # if only one segment, return directly
    if len(res) < 2:
        return res
    # short segments are assigned to nearest speakers.
    for i in range(len(res)):
        res[i][0] = round(res[i][0], 2)
@@ -208,7 +215,7 @@
            if overlap > max_overlap:
                max_overlap = overlap
                sentence_spk = spk
        d['spk'] = sentence_spk
        d['spk'] = int(sentence_spk)
        sd_sentence_list.append(d)
    return sd_sentence_list