hnluo
2022-12-28 899c7056e8eba1deb561729b5f85ea450dc540d5
funasr/bin/asr_inference_paraformer.py
@@ -92,8 +92,8 @@
        if asr_model.frontend is None and frontend_conf is not None:
            frontend = WavFrontend(**frontend_conf)
            asr_model.frontend = frontend
        logging.info("asr_model: {}".format(asr_model))
        logging.info("asr_train_args: {}".format(asr_train_args))
        # logging.info("asr_model: {}".format(asr_model))
        # logging.info("asr_train_args: {}".format(asr_train_args))
        asr_model.to(dtype=getattr(torch, dtype)).eval()
        ctc = CTCPrefixScorer(ctc=asr_model.ctc, eos=asr_model.eos)
@@ -141,8 +141,8 @@
        for scorer in scorers.values():
            if isinstance(scorer, torch.nn.Module):
                scorer.to(device=device, dtype=getattr(torch, dtype)).eval()
        logging.info(f"Beam_search: {beam_search}")
        logging.info(f"Decoding device={device}, dtype={dtype}")
        # logging.info(f"Beam_search: {beam_search}")
        # logging.info(f"Decoding device={device}, dtype={dtype}")
        # 5. [Optional] Build Text converter: e.g. bpe-sym -> Text
        if token_type is None:
@@ -160,7 +160,7 @@
        else:
            tokenizer = build_tokenizer(token_type=token_type)
        converter = TokenIDConverter(token_list=token_list)
        logging.info(f"Text tokenizer: {tokenizer}")
        # logging.info(f"Text tokenizer: {tokenizer}")
        self.asr_model = asr_model
        self.asr_train_args = asr_train_args
@@ -426,7 +426,7 @@
        assert len(keys) == _bs, f"{len(keys)} != {_bs}"
        # batch = {k: v for k, v in batch.items() if not k.endswith("_lengths")}
        logging.info("decoding, utt_id: {}".format(keys))
        # logging.info("decoding, utt_id: {}".format(keys))
        # N-best list of (text, token, token_int, hyp_object)
        time_beg = time.time()