zhifu gao
2025-04-22 2c2fb5e1eb1185a081e3507c2aa5c3aafaa2bb6d
funasr/models/sense_voice/model.py
@@ -925,6 +925,11 @@
                    if tok_ls: token_ids.extend(tok_ls)
                    else: token_ids.append(124)
                if len(token_ids) == 0:
                    result_i = {"key": key[i], "text": text}
                    results.append(result_i)
                    continue
                logits_speech = self.ctc.softmax(encoder_out)[i, 4 : encoder_out_lens[i].item(), :]
                pred = logits_speech.argmax(-1).cpu()
                logits_speech[pred == self.blank_id, self.blank_id] = 0