游雁
2024-07-22 f3d3362fd33a58aab515d9b8458a1788b646fa17
runtime/python/onnxruntime/funasr_onnx/sensevoice_bin.py
@@ -107,8 +107,8 @@
            ctc_logits, encoder_out_lens = self.infer(
                feats,
                feats_len,
                np.array(language, dtype=np.int32),
                np.array(textnorm, dtype=np.int32),
                np.array([language], dtype=np.int32),
                np.array([textnorm], dtype=np.int32),
            )
            # back to torch.Tensor
            ctc_logits = torch.from_numpy(ctc_logits).float()
@@ -120,7 +120,7 @@
            mask = yseq != self.blank_id
            token_int = yseq[mask].tolist()
            asr_res.append(self.tokenizer.encode(token_int))
            asr_res.append(self.tokenizer.decode(token_int))
        return asr_res