嘉渊
2023-05-11 81a5b29804800a4edd76c8dda2727d6fdf4b5643
egs_modelscope/vad/speech_fsmn_vad_zh-cn-8k-common/infer_online.py
@@ -1,7 +1,10 @@
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
from modelscope.utils.logger import get_logger
import logging
logger = get_logger(log_level=logging.CRITICAL)
logger.setLevel(logging.CRITICAL)
import soundfile
if __name__ == '__main__':
    output_dir = None
@@ -9,7 +12,7 @@
        task=Tasks.voice_activity_detection,
        model="damo/speech_fsmn_vad_zh-cn-8k-common",
        model_revision='v1.2.0',
        output_dir=None,
        output_dir=output_dir,
        batch_size=1,
        mode='online',
    )
@@ -19,7 +22,7 @@
    sample_offset = 0
    
    step = 80 * 10
    param_dict = {'in_cache': dict()}
    param_dict = {'in_cache': dict(), 'max_end_sil': 800}
    for sample_offset in range(0, speech_length, min(step, speech_length - sample_offset)):
        if sample_offset + step >= speech_length - 1:
            step = speech_length - sample_offset