kongdeqiang
2026-03-13 28ccfbfc51068a663a80764e14074df5edf2b5ba
funasr/tokenizer/whisper_tokenizer.py
@@ -1,46 +1,45 @@
from funasr.register import tables
@tables.register("tokenizer_classes", "WhisperTokenizer")
def WhisperTokenizer(**kwargs):
   try:
      from whisper.tokenizer import get_tokenizer
   except:
      print("Notice: If you want to use whisper, please `pip install -U openai-whisper`")
   language = kwargs.get("language", None)
   task = kwargs.get("task", "transcribe")
   is_multilingual = kwargs.get("is_multilingual", True)
   num_languages = kwargs.get("num_languages", 99)
   tokenizer = get_tokenizer(
      multilingual=is_multilingual,
      num_languages=num_languages,
      language=language,
      task=task,
   )
   return tokenizer
    try:
        from whisper.tokenizer import get_tokenizer
    except:
        print("Notice: If you want to use whisper, please `pip install -U openai-whisper`")
    language = kwargs.get("language", None)
    task = kwargs.get("task", "transcribe")
    is_multilingual = kwargs.get("is_multilingual", True)
    num_languages = kwargs.get("num_languages", 99)
    tokenizer = get_tokenizer(
        multilingual=is_multilingual,
        num_languages=num_languages,
        language=language,
        task=task,
    )
    return tokenizer
@tables.register("tokenizer_classes", "SenseVoiceTokenizer")
def SenseVoiceTokenizer(**kwargs):
   try:
      from funasr.models.sense_voice.whisper_lib.tokenizer import get_tokenizer
   except:
      print("Notice: If you want to use whisper, please `pip install -U openai-whisper`")
   language = kwargs.get("language", None)
   task = kwargs.get("task", None)
   is_multilingual = kwargs.get("is_multilingual", True)
   num_languages = kwargs.get("num_languages", 8749)
   vocab_path = kwargs.get("vocab_path", None)
   tokenizer = get_tokenizer(
      multilingual=is_multilingual,
      num_languages=num_languages,
      language=language,
      task=task,
      vocab_path=vocab_path,
   )
   return tokenizer
    try:
        from funasr.models.sense_voice.whisper_lib.tokenizer import get_tokenizer
    except:
        print("Notice: If you want to use whisper, please `pip install -U openai-whisper`")
    language = kwargs.get("language", None)
    task = kwargs.get("task", None)
    is_multilingual = kwargs.get("is_multilingual", True)
    num_languages = kwargs.get("num_languages", 8749)
    vocab_path = kwargs.get("vocab_path", None)
    tokenizer = get_tokenizer(
        multilingual=is_multilingual,
        num_languages=num_languages,
        language=language,
        task=task,
        vocab_path=vocab_path,
    )
    return tokenizer