From 1596f6f414f6f41da66506debb1dff19fffeb3ec Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期一, 24 六月 2024 11:55:17 +0800
Subject: [PATCH] fixbug hotwords

---
 funasr/datasets/large_datasets/utils/hotword_utils.py |   71 ++++++++++++++++++++---------------
 1 files changed, 40 insertions(+), 31 deletions(-)

diff --git a/funasr/datasets/large_datasets/utils/hotword_utils.py b/funasr/datasets/large_datasets/utils/hotword_utils.py
index 73f8bdd..66c131e 100644
--- a/funasr/datasets/large_datasets/utils/hotword_utils.py
+++ b/funasr/datasets/large_datasets/utils/hotword_utils.py
@@ -1,33 +1,42 @@
 import random
 
-def sample_hotword(length, 
-                   hotword_min_length, 
-                   hotword_max_length,
-                   sample_rate,
-                   double_rate,
-                   pre_prob,
-                   pre_index=None,
-                   pre_hwlist=None):
-        if length < hotword_min_length:
-            return [-1]
-        if random.random() < sample_rate:
-            if pre_prob > 0 and random.random() < pre_prob and pre_index is not None:
-                return pre_index
-            if length == hotword_min_length:
-                return [0, length-1]
-            elif random.random() < double_rate and length > hotword_max_length + hotword_min_length + 2:
-                # sample two hotwords in a sentence
-                _max_hw_length = min(hotword_max_length, length // 2)
-                # first hotword
-                start1 = random.randint(0, length // 3)
-                end1 = random.randint(start1 + hotword_min_length - 1, start1 + _max_hw_length - 1)
-                # second hotword
-                start2 = random.randint(end1 + 1, length - hotword_min_length)
-                end2 = random.randint(min(length-1, start2+hotword_min_length-1), min(length-1, start2+hotword_max_length-1))
-                return [start1, end1, start2, end2]
-            else:  # single hotword
-                start = random.randint(0, length - hotword_min_length)
-                end = random.randint(min(length-1, start+hotword_min_length-1), min(length-1, start+hotword_max_length-1))
-                return [start, end]
-        else:
-            return [-1]
\ No newline at end of file
+
+def sample_hotword(
+    length,
+    hotword_min_length,
+    hotword_max_length,
+    sample_rate,
+    double_rate,
+    pre_prob,
+    pre_index=None,
+    pre_hwlist=None,
+):
+    if length < hotword_min_length:
+        return [-1]
+    if random.random() < sample_rate:
+        if pre_prob > 0 and random.random() < pre_prob and pre_index is not None:
+            return pre_index
+        if length == hotword_min_length:
+            return [0, length - 1]
+        elif random.random() < double_rate and length > hotword_max_length + hotword_min_length + 2:
+            # sample two hotwords in a sentence
+            _max_hw_length = min(hotword_max_length, length // 2)
+            # first hotword
+            start1 = random.randint(0, length // 3)
+            end1 = random.randint(start1 + hotword_min_length - 1, start1 + _max_hw_length - 1)
+            # second hotword
+            start2 = random.randint(end1 + 1, length - hotword_min_length)
+            end2 = random.randint(
+                min(length - 1, start2 + hotword_min_length - 1),
+                min(length - 1, start2 + hotword_max_length - 1),
+            )
+            return [start1, end1, start2, end2]
+        else:  # single hotword
+            start = random.randint(0, length - hotword_min_length)
+            end = random.randint(
+                min(length - 1, start + hotword_min_length - 1),
+                min(length - 1, start + hotword_max_length - 1),
+            )
+            return [start, end]
+    else:
+        return [-1]

--
Gitblit v1.9.1