zhifu gao
2024-03-07 341182c3bfc62831aa02781d0e6bbe2a479f3fb2
funasr/models/qwen_audio/model.py
@@ -54,9 +54,8 @@
        meta_data = {}
        # meta_data["batch_data_time"] = -1
        sp_prompt = "<|startoftranscription|><|en|><|transcribe|><|en|><|notimestamps|><|wo_itn|>"
        query = f"<audio>{data_in[0]}</audio>{sp_prompt}"
        prompt = kwargs.get("prompt", "<|startoftranscription|><|en|><|transcribe|><|en|><|notimestamps|><|wo_itn|>")
        query = f"<audio>{data_in[0]}</audio>{prompt}"
        audio_info = self.tokenizer.process_audio(query)
        inputs = self.tokenizer(query, return_tensors='pt', audio_info=audio_info)
        inputs = inputs.to(self.model.device)