From 28ccfbfc51068a663a80764e14074df5edf2b5ba Mon Sep 17 00:00:00 2001
From: kongdeqiang <kongdeqiang960204@163.com>
Date: 星期五, 13 三月 2026 17:41:41 +0800
Subject: [PATCH] 提交

---
 funasr/tokenizer/build_tokenizer.py |   21 +++------------------
 1 files changed, 3 insertions(+), 18 deletions(-)

diff --git a/funasr/tokenizer/build_tokenizer.py b/funasr/tokenizer/build_tokenizer.py
index 1dc17da..d6080f7 100644
--- a/funasr/tokenizer/build_tokenizer.py
+++ b/funasr/tokenizer/build_tokenizer.py
@@ -1,17 +1,7 @@
 from pathlib import Path
 from typing import Iterable
 from typing import Union
-from abc import ABC
-from abc import abstractmethod
-from typing import Iterable
-from typing import List
-from pathlib import Path
-from typing import Dict
-from typing import Iterable
-from typing import List
-from typing import Union
 
-import numpy as np
 
 from funasr.tokenizer.abs_tokenizer import AbsTokenizer
 from funasr.tokenizer.char_tokenizer import CharTokenizer
@@ -28,7 +18,6 @@
     space_symbol: str = "<space>",
     delimiter: str = None,
     g2p_type: str = None,
-    **kwargs,
 ) -> AbsTokenizer:
     """A helper function to instantiate Tokenizer"""
     if token_type == "bpe":
@@ -39,7 +28,7 @@
             raise RuntimeError(
                 "remove_non_linguistic_symbols is not implemented for token_type=bpe"
             )
-        return SentencepiecesTokenizer(bpemodel, **kwargs)
+        return SentencepiecesTokenizer(bpemodel)
 
     elif token_type == "word":
         if remove_non_linguistic_symbols and non_linguistic_symbols is not None:
@@ -49,14 +38,13 @@
                 remove_non_linguistic_symbols=True,
             )
         else:
-            return WordTokenizer(delimiter=delimiter, **kwargs)
+            return WordTokenizer(delimiter=delimiter)
 
     elif token_type == "char":
         return CharTokenizer(
             non_linguistic_symbols=non_linguistic_symbols,
             space_symbol=space_symbol,
             remove_non_linguistic_symbols=remove_non_linguistic_symbols,
-            **kwargs
         )
 
     elif token_type == "phn":
@@ -65,10 +53,7 @@
             non_linguistic_symbols=non_linguistic_symbols,
             space_symbol=space_symbol,
             remove_non_linguistic_symbols=remove_non_linguistic_symbols,
-            **kwargs
         )
 
     else:
-        raise ValueError(
-            f"token_mode must be one of bpe, word, char or phn: " f"{token_type}"
-        )
+        raise ValueError(f"token_mode must be one of bpe, word, char or phn: " f"{token_type}")

--
Gitblit v1.9.1