zhaomingwork
2024-03-19 b32b21d9c8b0ea659711760e7c80fb572d6b7a41
funasr/models/qwen_audio/model.py
@@ -60,7 +60,7 @@
        inputs = self.tokenizer(query, return_tensors='pt', audio_info=audio_info)
        inputs = inputs.to(self.model.device)
        pred = self.model.generate(**inputs, audio_info=audio_info)
        response = tokenizer.decode(pred.cpu()[0], skip_special_tokens=False, audio_info=audio_info)
        response = self.tokenizer.decode(pred.cpu()[0], skip_special_tokens=False, audio_info=audio_info)
        results = []
        result_i = {"key": key[0], "text": response}