From 28ccfbfc51068a663a80764e14074df5edf2b5ba Mon Sep 17 00:00:00 2001
From: kongdeqiang <kongdeqiang960204@163.com>
Date: 星期五, 13 三月 2026 17:41:41 +0800
Subject: [PATCH] 提交

---
 funasr/datasets/llm_datasets/preprocessor.py |   33 +++++++++++++++------------------
 1 files changed, 15 insertions(+), 18 deletions(-)

diff --git a/funasr/datasets/llm_datasets/preprocessor.py b/funasr/datasets/llm_datasets/preprocessor.py
index 9f20672..b99255e 100644
--- a/funasr/datasets/llm_datasets/preprocessor.py
+++ b/funasr/datasets/llm_datasets/preprocessor.py
@@ -16,22 +16,19 @@
 from funasr.register import tables
 
 
-
 @tables.register("preprocessor_classes", "TextPreprocessRemovePunctuation")
-class TextPreprocessSegDict(nn.Module):
-	def __init__(self,
-	             **kwargs):
-		super().__init__()
-		
-	
-	def forward(self, text, **kwargs):
-		# 瀹氫箟鑻辨枃鏍囩偣绗﹀彿
-		en_punct = string.punctuation
-		# 瀹氫箟涓枃鏍囩偣绗﹀彿锛堥儴鍒嗗父鐢ㄧ殑锛�
-		cn_punct = '銆傦紵锛侊紝銆侊紱锛氣�溾�濃�樷�欙紙锛夈�娿�嬨�愩�戔�︹�旓綖路'
-		# 鍚堝苟鑻辨枃鍜屼腑鏂囨爣鐐圭鍙�
-		all_punct = en_punct + cn_punct
-		# 鍒涘缓姝e垯琛ㄨ揪寮忔ā寮忥紝鍖归厤浠讳綍鍦╝ll_punct涓殑瀛楃
-		punct_pattern = re.compile('[{}]'.format(re.escape(all_punct)))
-		# 浣跨敤姝e垯琛ㄨ揪寮忕殑sub鏂规硶鏇挎崲鎺夎繖浜涘瓧绗�
-		return punct_pattern.sub('', text)
+class TextPreprocessRemovePunctuation(nn.Module):
+    def __init__(self, **kwargs):
+        super().__init__()
+
+    def forward(self, text, **kwargs):
+        # 瀹氫箟鑻辨枃鏍囩偣绗﹀彿
+        en_punct = string.punctuation
+        # 瀹氫箟涓枃鏍囩偣绗﹀彿锛堥儴鍒嗗父鐢ㄧ殑锛�
+        cn_punct = "銆傦紵锛侊紝銆侊紱锛氣�溾�濃�樷�欙紙锛夈�娿�嬨�愩�戔�︹�旓綖路"
+        # 鍚堝苟鑻辨枃鍜屼腑鏂囨爣鐐圭鍙�
+        all_punct = en_punct + cn_punct
+        # 鍒涘缓姝e垯琛ㄨ揪寮忔ā寮忥紝鍖归厤浠讳綍鍦╝ll_punct涓殑瀛楃
+        punct_pattern = re.compile("[{}]".format(re.escape(all_punct)))
+        # 浣跨敤姝e垯琛ㄨ揪寮忕殑sub鏂规硶鏇挎崲鎺夎繖浜涘瓧绗�
+        return punct_pattern.sub("", text)

--
Gitblit v1.9.1