From f920ca62984a6b73b8d755b906c8bbda18d8e275 Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期四, 21 十二月 2023 13:30:27 +0800
Subject: [PATCH] Merge branch 'dev_gzf_funasr2' of github.com:alibaba-damo-academy/FunASR into dev_gzf_funasr2 add
---
funasr/tokenizer/char_tokenizer.py | 10 ++++++----
1 files changed, 6 insertions(+), 4 deletions(-)
diff --git a/funasr/tokenizer/char_tokenizer.py b/funasr/tokenizer/char_tokenizer.py
index 6c9a5a5..23ff743 100644
--- a/funasr/tokenizer/char_tokenizer.py
+++ b/funasr/tokenizer/char_tokenizer.py
@@ -4,17 +4,19 @@
from typing import Union
import warnings
+from funasr.tokenizer.abs_tokenizer import BaseTokenizer
+from funasr.utils.register import register_class
-from funasr.tokenizer.abs_tokenizer import AbsTokenizer
-
-
-class CharTokenizer(AbsTokenizer):
+@register_class("tokenizer_classes", "CharTokenizer")
+class CharTokenizer(BaseTokenizer):
def __init__(
self,
non_linguistic_symbols: Union[Path, str, Iterable[str]] = None,
space_symbol: str = "<space>",
remove_non_linguistic_symbols: bool = False,
+ **kwargs,
):
+ super().__init__(**kwargs)
self.space_symbol = space_symbol
if non_linguistic_symbols is None:
self.non_linguistic_symbols = set()
--
Gitblit v1.9.1