From f2e605cd287aae2dcd2d269c540b2395c67aadb7 Mon Sep 17 00:00:00 2001 From: shixian.shi <shixian.shi@alibaba-inc.com> Date: 星期一, 13 三月 2023 15:25:57 +0800 Subject: [PATCH] add tp inference in egs_modelscope --- egs_modelscope/tp/speech_timestamp_prediction-v1-16k-offline/infer.py | 12 ++++++++++++ egs_modelscope/tp/speech_timestamp_prediction-v1-16k-offline/README.md | 24 ++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 0 deletions(-) diff --git a/egs_modelscope/tp/speech_timestamp_prediction-v1-16k-offline/README.md b/egs_modelscope/tp/speech_timestamp_prediction-v1-16k-offline/README.md new file mode 100644 index 0000000..6d9cd30 --- /dev/null +++ b/egs_modelscope/tp/speech_timestamp_prediction-v1-16k-offline/README.md @@ -0,0 +1,24 @@ +# ModelScope Model + +## How to finetune and infer using a pretrained ModelScope Model + +### Inference + +Or you can use the finetuned model for inference directly. + +- Setting parameters in `infer.py` + - <strong>audio_in:</strong> # support wav, url, bytes, and parsed audio format. + - <strong>output_dir:</strong> # If the input format is wav.scp, it needs to be set. + +- Then you can run the pipeline to infer with: +```python + python infer.py +``` + + +Modify inference related parameters in vad.yaml. + +- max_end_silence_time: The end-point silence duration to judge the end of sentence, the parameter range is 500ms~6000ms, and the default value is 800ms +- speech_noise_thres: The balance of speech and silence scores, the parameter range is (-1,1) + - The value tends to -1, the greater probability of noise being judged as speech + - The value tends to 1, the greater probability of speech being judged as noise diff --git a/egs_modelscope/tp/speech_timestamp_prediction-v1-16k-offline/infer.py b/egs_modelscope/tp/speech_timestamp_prediction-v1-16k-offline/infer.py new file mode 100644 index 0000000..ff42e68 --- /dev/null +++ b/egs_modelscope/tp/speech_timestamp_prediction-v1-16k-offline/infer.py @@ -0,0 +1,12 @@ +from modelscope.pipelines import pipeline +from modelscope.utils.constant import Tasks + +inference_pipline = pipeline( + task=Tasks.speech_timestamp, + model='damo/speech_timestamp_prediction-v1-16k-offline', + output_dir='./tmp') + +rec_result = inference_pipline( + audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_timestamps.wav', + text_in='涓� 涓� 涓� 澶� 骞� 娲� 鍥� 瀹� 涓� 浠� 涔� 璺� 鍒� 瑗� 澶� 骞� 娲� 鏉� 浜� 鍛�') +print(rec_result) \ No newline at end of file -- Gitblit v1.9.1