From c880db53646ab9fd26417f4baf004ab44cc24e1a Mon Sep 17 00:00:00 2001
From: lingji-yidong <75744976+lingji-yidong@users.noreply.github.com>
Date: 星期五, 28 六月 2024 01:28:24 +0800
Subject: [PATCH] Fix: Return tuple ('', []) when char_list is empty to prevent ValueError (#1857)
---
funasr/datasets/large_datasets/utils/hotword_utils.py | 42 ++++++++++++++++++++++++++++++++++++++++++
1 files changed, 42 insertions(+), 0 deletions(-)
diff --git a/funasr/datasets/large_datasets/utils/hotword_utils.py b/funasr/datasets/large_datasets/utils/hotword_utils.py
new file mode 100644
index 0000000..66c131e
--- /dev/null
+++ b/funasr/datasets/large_datasets/utils/hotword_utils.py
@@ -0,0 +1,42 @@
+import random
+
+
+def sample_hotword(
+ length,
+ hotword_min_length,
+ hotword_max_length,
+ sample_rate,
+ double_rate,
+ pre_prob,
+ pre_index=None,
+ pre_hwlist=None,
+):
+ if length < hotword_min_length:
+ return [-1]
+ if random.random() < sample_rate:
+ if pre_prob > 0 and random.random() < pre_prob and pre_index is not None:
+ return pre_index
+ if length == hotword_min_length:
+ return [0, length - 1]
+ elif random.random() < double_rate and length > hotword_max_length + hotword_min_length + 2:
+ # sample two hotwords in a sentence
+ _max_hw_length = min(hotword_max_length, length // 2)
+ # first hotword
+ start1 = random.randint(0, length // 3)
+ end1 = random.randint(start1 + hotword_min_length - 1, start1 + _max_hw_length - 1)
+ # second hotword
+ start2 = random.randint(end1 + 1, length - hotword_min_length)
+ end2 = random.randint(
+ min(length - 1, start2 + hotword_min_length - 1),
+ min(length - 1, start2 + hotword_max_length - 1),
+ )
+ return [start1, end1, start2, end2]
+ else: # single hotword
+ start = random.randint(0, length - hotword_min_length)
+ end = random.randint(
+ min(length - 1, start + hotword_min_length - 1),
+ min(length - 1, start + hotword_max_length - 1),
+ )
+ return [start, end]
+ else:
+ return [-1]
--
Gitblit v1.9.1