zhifu gao
2024-04-24 861147c7308b91068ffa02724fdf74ee623a909e
funasr/datasets/large_datasets/utils/hotword_utils.py
@@ -1,13 +1,16 @@
import random
def sample_hotword(length,
def sample_hotword(
    length,
                   hotword_min_length, 
                   hotword_max_length,
                   sample_rate,
                   double_rate,
                   pre_prob,
                   pre_index=None,
                   pre_hwlist=None):
    pre_hwlist=None,
):
        if length < hotword_min_length:
            return [-1]
        if random.random() < sample_rate:
@@ -23,11 +26,17 @@
                end1 = random.randint(start1 + hotword_min_length - 1, start1 + _max_hw_length - 1)
                # second hotword
                start2 = random.randint(end1 + 1, length - hotword_min_length)
                end2 = random.randint(min(length-1, start2+hotword_min_length-1), min(length-1, start2+hotword_max_length-1))
            end2 = random.randint(
                min(length - 1, start2 + hotword_min_length - 1),
                min(length - 1, start2 + hotword_max_length - 1),
            )
                return [start1, end1, start2, end2]
            else:  # single hotword
                start = random.randint(0, length - hotword_min_length)
                end = random.randint(min(length-1, start+hotword_min_length-1), min(length-1, start+hotword_max_length-1))
            end = random.randint(
                min(length - 1, start + hotword_min_length - 1),
                min(length - 1, start + hotword_max_length - 1),
            )
                return [start, end]
        else:
            return [-1]