From e30a17cf4e715b3d139fa1e0ba01cda1bcf0f884 Mon Sep 17 00:00:00 2001
From: shixian.shi <shixian.shi@alibaba-inc.com>
Date: 星期三, 10 一月 2024 11:23:41 +0800
Subject: [PATCH] update funasr-onnx
---
funasr/tokenizer/build_tokenizer.py | 19 ++++++++++++++++---
1 files changed, 16 insertions(+), 3 deletions(-)
diff --git a/funasr/tokenizer/build_tokenizer.py b/funasr/tokenizer/build_tokenizer.py
index 9d1cdc3..05db6a6 100644
--- a/funasr/tokenizer/build_tokenizer.py
+++ b/funasr/tokenizer/build_tokenizer.py
@@ -1,7 +1,17 @@
from pathlib import Path
from typing import Iterable
from typing import Union
+from abc import ABC
+from abc import abstractmethod
+from typing import Iterable
+from typing import List
+from pathlib import Path
+from typing import Dict
+from typing import Iterable
+from typing import List
+from typing import Union
+import numpy as np
from funasr.tokenizer.abs_tokenizer import AbsTokenizer
from funasr.tokenizer.char_tokenizer import CharTokenizer
@@ -18,7 +28,8 @@
space_symbol: str = "<space>",
delimiter: str = None,
g2p_type: str = None,
-) -> AbsTokenizer:
+ **kwargs,
+):
"""A helper function to instantiate Tokenizer"""
if token_type == "bpe":
if bpemodel is None:
@@ -28,7 +39,7 @@
raise RuntimeError(
"remove_non_linguistic_symbols is not implemented for token_type=bpe"
)
- return SentencepiecesTokenizer(bpemodel)
+ return SentencepiecesTokenizer(bpemodel, **kwargs)
elif token_type == "word":
if remove_non_linguistic_symbols and non_linguistic_symbols is not None:
@@ -38,13 +49,14 @@
remove_non_linguistic_symbols=True,
)
else:
- return WordTokenizer(delimiter=delimiter)
+ return WordTokenizer(delimiter=delimiter, **kwargs)
elif token_type == "char":
return CharTokenizer(
non_linguistic_symbols=non_linguistic_symbols,
space_symbol=space_symbol,
remove_non_linguistic_symbols=remove_non_linguistic_symbols,
+ **kwargs
)
elif token_type == "phn":
@@ -53,6 +65,7 @@
non_linguistic_symbols=non_linguistic_symbols,
space_symbol=space_symbol,
remove_non_linguistic_symbols=remove_non_linguistic_symbols,
+ **kwargs
)
else:
--
Gitblit v1.9.1