| | |
| | | |
| | | from funasr import AutoModel |
| | | |
| | | model = AutoModel(model="Qwen-Audio-Chat") |
| | | model = AutoModel(model="Qwen/Qwen-Audio-Chat") |
| | | |
| | | audio_in = "https://github.com/QwenLM/Qwen-Audio/raw/main/assets/audio/1272-128104-0000.flac" |
| | | |
| | | # 1st dialogue turn |
| | | prompt = 'what does the person say?' |
| | | prompt = "what does the person say?" |
| | | cache = {"history": None} |
| | | res = model.generate(input=audio_in, prompt=prompt, cache=cache) |
| | | print(res) |
| | |
| | | prompt = 'Find the start time and end time of the word "middle classes"' |
| | | res = model.generate(input=None, prompt=prompt, cache=cache) |
| | | print(res) |
| | | |