| | |
| | | |
| | | |
| | | class BaseTokenizer(ABC): |
| | | def __init__(self, token_list: Union[Path, str, Iterable[str]], |
| | | def __init__(self, token_list: Union[Path, str, Iterable[str]]=None, |
| | | unk_symbol: str = "<unk>", |
| | | **kwargs, |
| | | ): |
| | | |
| | | if token_list is not None: |
| | | if isinstance(token_list, (Path, str)): |
| | | token_list = Path(token_list) |
| | | self.token_list_repr = str(token_list) |
| | |
| | | delimiter: str = None, |
| | | g2p_type: str = None, |
| | | **kwargs, |
| | | ) -> AbsTokenizer: |
| | | ): |
| | | """A helper function to instantiate Tokenizer""" |
| | | if token_type == "bpe": |
| | | if bpemodel is None: |