add
游雁
2024-04-11 70027f0eadaa89f1641d86199cbe5d8664f10bdf
add
2个文件已修改
19 ■■■■■ 已修改文件
examples/industrial_data_pretraining/whisper/demo.py 11 ●●●● 补丁 | 查看 | 原始文档 | blame | 历史
funasr/models/whisper/model.py 8 ●●●● 补丁 | 查看 | 原始文档 | blame | 历史
examples/industrial_data_pretraining/whisper/demo.py
@@ -12,9 +12,16 @@
                  vad_kwargs={"max_single_segment_time": 30000},
                  )
DecodingOptions = {
    "task": "transcribe",
    "language": None,
    "beam_size": None,
    "fp16": True,
    "without_timestamps": False,
    "prompt": None,
    }
res = model.generate(
    language=None,
    task="transcribe",
    DecodingOptions=DecodingOptions,
    batch_size_s=0,
    input="https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav")
funasr/models/whisper/model.py
@@ -90,12 +90,12 @@
        speech = speech.to(device=kwargs["device"])[0, :, :]
        speech_lengths = speech_lengths.to(device=kwargs["device"])
        # detect the spoken language
        _, probs = self.model.detect_language(speech)
        print(f"Detected language: {max(probs, key=probs.get)}")
        # # detect the spoken language
        # _, probs = self.model.detect_language(speech)
        # print(f"Detected language: {max(probs, key=probs.get)}")
        # decode the audio
        options = whisper.DecodingOptions(language=kwargs.get("language", None), fp16=False)
        options = whisper.DecodingOptions(**kwargs.get("DecodingOptions", {}))
        result = whisper.decode(self.model, speech, options)
        results = []