funasr/tokenizer/char_tokenizer.py
@@ -6,15 +6,17 @@ from funasr.tokenizer.abs_tokenizer import AbsTokenizer from funasr.tokenizer.abs_tokenizer import BaseTokenizer class CharTokenizer(AbsTokenizer): class CharTokenizer(BaseTokenizer): def __init__( self, non_linguistic_symbols: Union[Path, str, Iterable[str]] = None, space_symbol: str = "<space>", remove_non_linguistic_symbols: bool = False, **kwargs, ): super().__init__(**kwargs) self.space_symbol = space_symbol if non_linguistic_symbols is None: self.non_linguistic_symbols = set()