王梦迪
2025-05-22 561bdbdfc0f8fd6234c5130fdc8631bf8e294bd8
funasr/models/sense_voice/model.py
@@ -925,6 +925,11 @@
                    if tok_ls: token_ids.extend(tok_ls)
                    else: token_ids.append(124)
                if len(token_ids) == 0:
                    result_i = {"key": key[i], "text": text}
                    results.append(result_i)
                    continue
                logits_speech = self.ctc.softmax(encoder_out)[i, 4 : encoder_out_lens[i].item(), :]
                pred = logits_speech.argmax(-1).cpu()
                logits_speech[pred == self.blank_id, self.blank_id] = 0