| | |
| | | |
| | | return tokenizer |
| | | |
| | | |
| | | @tables.register("tokenizer_classes", "SenseVoiceTokenizer") |
| | | def SenseVoiceTokenizer(**kwargs): |
| | | try: |
| | | from funasr.models.sense_voice.whisper_lib.tokenizer import get_tokenizer |
| | | except: |
| | | print("Notice: If you want to use whisper, please `pip install -U openai-whisper`") |
| | | |
| | | language = kwargs.get("language", None) |
| | | task = kwargs.get("task", None) |
| | | is_multilingual = kwargs.get("is_multilingual", True) |
| | | num_languages = kwargs.get("num_languages", 8749) |
| | | vocab_path = kwargs.get("vocab_path", None) |
| | | tokenizer = get_tokenizer( |
| | | multilingual=is_multilingual, |
| | | num_languages=num_languages, |
| | | language=language, |
| | | task=task, |
| | | vocab_path=vocab_path, |
| | | ) |
| | | |
| | | return tokenizer |