From f2e605cd287aae2dcd2d269c540b2395c67aadb7 Mon Sep 17 00:00:00 2001
From: shixian.shi <shixian.shi@alibaba-inc.com>
Date: 星期一, 13 三月 2023 15:25:57 +0800
Subject: [PATCH] add tp inference in egs_modelscope

---
 egs_modelscope/tp/speech_timestamp_prediction-v1-16k-offline/infer.py  |   12 ++++++++++++
 egs_modelscope/tp/speech_timestamp_prediction-v1-16k-offline/README.md |   24 ++++++++++++++++++++++++
 2 files changed, 36 insertions(+), 0 deletions(-)

diff --git a/egs_modelscope/tp/speech_timestamp_prediction-v1-16k-offline/README.md b/egs_modelscope/tp/speech_timestamp_prediction-v1-16k-offline/README.md
new file mode 100644
index 0000000..6d9cd30
--- /dev/null
+++ b/egs_modelscope/tp/speech_timestamp_prediction-v1-16k-offline/README.md
@@ -0,0 +1,24 @@
+# ModelScope Model
+
+## How to finetune and infer using a pretrained ModelScope Model
+
+### Inference
+
+Or you can use the finetuned model for inference directly.
+
+- Setting parameters in `infer.py`
+    - <strong>audio_in:</strong> # support wav, url, bytes, and parsed audio format.
+    - <strong>output_dir:</strong> # If the input format is wav.scp, it needs to be set.
+
+- Then you can run the pipeline to infer with:
+```python
+    python infer.py
+```
+
+
+Modify inference related parameters in vad.yaml.
+
+- max_end_silence_time: The end-point silence duration  to judge the end of sentence, the parameter range is 500ms~6000ms, and the default value is 800ms
+- speech_noise_thres:  The balance of speech and silence scores, the parameter range is (-1,1)
+    - The value tends to -1, the greater probability of noise being judged as speech
+    - The value tends to 1, the greater probability of speech being judged as noise
diff --git a/egs_modelscope/tp/speech_timestamp_prediction-v1-16k-offline/infer.py b/egs_modelscope/tp/speech_timestamp_prediction-v1-16k-offline/infer.py
new file mode 100644
index 0000000..ff42e68
--- /dev/null
+++ b/egs_modelscope/tp/speech_timestamp_prediction-v1-16k-offline/infer.py
@@ -0,0 +1,12 @@
+from modelscope.pipelines import pipeline
+from modelscope.utils.constant import Tasks
+
+inference_pipline = pipeline(
+    task=Tasks.speech_timestamp,
+    model='damo/speech_timestamp_prediction-v1-16k-offline',
+    output_dir='./tmp')
+
+rec_result = inference_pipline(
+    audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_timestamps.wav',
+    text_in='涓� 涓� 涓� 澶� 骞� 娲� 鍥� 瀹� 涓� 浠� 涔� 璺� 鍒� 瑗� 澶� 骞� 娲� 鏉� 浜� 鍛�')
+print(rec_result)
\ No newline at end of file

--
Gitblit v1.9.1