游雁
2024-03-27 9b4e9cc8a0311e5243d69b73ed073e7ea441982e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
 
 
from funasr.register import tables
 
@tables.register("tokenizer_classes", "WhisperTokenizer")
def WhisperTokenizer(**kwargs):
    try:
        from whisper.tokenizer import get_tokenizer
    except:
        print("Notice: If you want to use whisper, please `pip install -U openai-whisper`")
    
    language = kwargs.get("language", None)
    task = kwargs.get("task", "transcribe")
    is_multilingual = kwargs.get("is_multilingual", True)
    num_languages = kwargs.get("num_languages", 99)
    tokenizer = get_tokenizer(
        multilingual=is_multilingual,
        num_languages=num_languages,
        language=language,
        task=task,
    )
    
    return tokenizer