From 6fe10a8dbfbab2bdcc28a411f9c5da85a4a8d002 Mon Sep 17 00:00:00 2001
From: hohaiuhsx <hohaiuhsx@gmail.com>
Date: 星期一, 10 三月 2025 23:16:22 +0800
Subject: [PATCH] 修复 当选用SenseVoice模型处理长音频(同时开启vad和output_timestamp)时的异常 (#2413)

---
 funasr/tokenizer/korean_cleaner.py |    7 ++-----
 1 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/funasr/tokenizer/korean_cleaner.py b/funasr/tokenizer/korean_cleaner.py
index ee556d4..eceebe2 100644
--- a/funasr/tokenizer/korean_cleaner.py
+++ b/funasr/tokenizer/korean_cleaner.py
@@ -19,8 +19,7 @@
             "9": "甑�",
         }
         new_text = "".join(
-            number_to_kor[char] if char in number_to_kor.keys() else char
-            for char in text
+            number_to_kor[char] if char in number_to_kor.keys() else char for char in text
         )
         return new_text
 
@@ -56,9 +55,7 @@
         }
         new_text = re.sub("[a-z]+", lambda x: str.upper(x.group()), text)
         new_text = "".join(
-            upper_alphabet_to_kor[char]
-            if char in upper_alphabet_to_kor.keys()
-            else char
+            upper_alphabet_to_kor[char] if char in upper_alphabet_to_kor.keys() else char
             for char in new_text
         )
 

--
Gitblit v1.9.1