From adcee8828ef5d78b575043954deb662a35e318f7 Mon Sep 17 00:00:00 2001
From: huangmingming <huangmingming@deepscience.cn>
Date: 星期一, 30 一月 2023 16:02:54 +0800
Subject: [PATCH] update the minimum size of audio
---
funasr/bin/asr_inference_paraformer_vad_punc.py | 23 +++++++++++++++++------
1 files changed, 17 insertions(+), 6 deletions(-)
diff --git a/funasr/bin/asr_inference_paraformer_vad_punc.py b/funasr/bin/asr_inference_paraformer_vad_punc.py
index 4b5b316..7a539e4 100644
--- a/funasr/bin/asr_inference_paraformer_vad_punc.py
+++ b/funasr/bin/asr_inference_paraformer_vad_punc.py
@@ -3,6 +3,7 @@
import logging
import sys
import time
+import json
from pathlib import Path
from typing import Optional
from typing import Sequence
@@ -100,10 +101,13 @@
# logging.info("asr_train_args: {}".format(asr_train_args))
asr_model.to(dtype=getattr(torch, dtype)).eval()
- ctc = CTCPrefixScorer(ctc=asr_model.ctc, eos=asr_model.eos)
+ if asr_model.ctc != None:
+ ctc = CTCPrefixScorer(ctc=asr_model.ctc, eos=asr_model.eos)
+ scorers.update(
+ ctc=ctc
+ )
token_list = asr_model.token_list
scorers.update(
- ctc=ctc,
length_bonus=LengthBonus(len(token_list)),
)
@@ -171,7 +175,7 @@
self.converter = converter
self.tokenizer = tokenizer
is_use_lm = lm_weight != 0.0 and lm_file is not None
- if ctc_weight == 0.0 and not is_use_lm:
+ if (ctc_weight == 0.0 or asr_model.ctc == None) and not is_use_lm:
beam_search = None
self.beam_search = beam_search
logging.info(f"Beam_search: {self.beam_search}")
@@ -632,9 +636,16 @@
text_postprocessed, time_stamp_postprocessed, word_lists = postprocessed_result[0], \
postprocessed_result[1], \
postprocessed_result[2]
- text_postprocessed_punc, punc_id_list = text2punc(word_lists, 20)
- text_postprocessed_punc_time_stamp = "predictions: {} time_stamp: {}".format(
- text_postprocessed_punc, time_stamp_postprocessed)
+ if len(word_lists) > 0:
+ text_postprocessed_punc, punc_id_list = text2punc(word_lists, 20)
+ text_postprocessed_punc_time_stamp = json.dumps({"predictions": text_postprocessed_punc,
+ "time_stamp": time_stamp_postprocessed},
+ ensure_ascii=False)
+ else:
+ text_postprocessed_punc = ""
+ punc_id_list = []
+ text_postprocessed_punc_time_stamp = ""
+
else:
text_postprocessed = ""
time_stamp_postprocessed = ""
--
Gitblit v1.9.1