From 86fb406bb7f99cbd420cff35a4937ed34da8fa47 Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期四, 21 三月 2024 17:04:03 +0800
Subject: [PATCH] tutorial

---
 docs/tutorial/README_zh.md                                   |   12 ++++++++++++
 examples/industrial_data_pretraining/paraformer/README_zh.md |   12 ++++++++++++
 examples/README_zh.md                                        |   12 ++++++++++++
 3 files changed, 36 insertions(+), 0 deletions(-)

diff --git a/docs/tutorial/README_zh.md b/docs/tutorial/README_zh.md
index cc1f8c8..fad5893 100644
--- a/docs/tutorial/README_zh.md
+++ b/docs/tutorial/README_zh.md
@@ -137,6 +137,9 @@
 
 #### 鍑嗗鏁版嵁
 
+`jsonl`鏍煎紡鍙互鍙傝�冿紙[渚嬪瓙](https://github.com/alibaba-damo-academy/FunASR/blob/main/data/list)锛夈��
+鍙互鐢ㄦ寚浠� `scp2jsonl` 浠巜av.scp涓巘ext.txt鐢熸垚銆倃av.scp涓巘ext.txt鍑嗗杩囩▼濡備笅锛�
+
 `train_text.txt`
 
 宸﹁竟涓烘暟鎹敮涓�ID锛岄渶涓巂train_wav.scp`涓殑`ID`涓�涓�瀵瑰簲
@@ -160,6 +163,15 @@
 ID0012W0015 https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_cn_en.wav
 ```
 
+`鐢熸垚鎸囦护`
+
+```shell
+# generate train.jsonl and val.jsonl from wav.scp and text.txt
+scp2jsonl \
+++scp_file_list='["../../../data/list/train_wav.scp", "../../../data/list/train_text.txt"]' \
+++data_type_list='["source", "target"]' \
+++jsonl_file_out="../../../data/list/train.jsonl"
+```
 
 #### 鏌ョ湅璁粌鏃ュ織
 
diff --git a/examples/README_zh.md b/examples/README_zh.md
index cc1f8c8..fad5893 100644
--- a/examples/README_zh.md
+++ b/examples/README_zh.md
@@ -137,6 +137,9 @@
 
 #### 鍑嗗鏁版嵁
 
+`jsonl`鏍煎紡鍙互鍙傝�冿紙[渚嬪瓙](https://github.com/alibaba-damo-academy/FunASR/blob/main/data/list)锛夈��
+鍙互鐢ㄦ寚浠� `scp2jsonl` 浠巜av.scp涓巘ext.txt鐢熸垚銆倃av.scp涓巘ext.txt鍑嗗杩囩▼濡備笅锛�
+
 `train_text.txt`
 
 宸﹁竟涓烘暟鎹敮涓�ID锛岄渶涓巂train_wav.scp`涓殑`ID`涓�涓�瀵瑰簲
@@ -160,6 +163,15 @@
 ID0012W0015 https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_cn_en.wav
 ```
 
+`鐢熸垚鎸囦护`
+
+```shell
+# generate train.jsonl and val.jsonl from wav.scp and text.txt
+scp2jsonl \
+++scp_file_list='["../../../data/list/train_wav.scp", "../../../data/list/train_text.txt"]' \
+++data_type_list='["source", "target"]' \
+++jsonl_file_out="../../../data/list/train.jsonl"
+```
 
 #### 鏌ョ湅璁粌鏃ュ織
 
diff --git a/examples/industrial_data_pretraining/paraformer/README_zh.md b/examples/industrial_data_pretraining/paraformer/README_zh.md
index cc1f8c8..fad5893 100644
--- a/examples/industrial_data_pretraining/paraformer/README_zh.md
+++ b/examples/industrial_data_pretraining/paraformer/README_zh.md
@@ -137,6 +137,9 @@
 
 #### 鍑嗗鏁版嵁
 
+`jsonl`鏍煎紡鍙互鍙傝�冿紙[渚嬪瓙](https://github.com/alibaba-damo-academy/FunASR/blob/main/data/list)锛夈��
+鍙互鐢ㄦ寚浠� `scp2jsonl` 浠巜av.scp涓巘ext.txt鐢熸垚銆倃av.scp涓巘ext.txt鍑嗗杩囩▼濡備笅锛�
+
 `train_text.txt`
 
 宸﹁竟涓烘暟鎹敮涓�ID锛岄渶涓巂train_wav.scp`涓殑`ID`涓�涓�瀵瑰簲
@@ -160,6 +163,15 @@
 ID0012W0015 https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_cn_en.wav
 ```
 
+`鐢熸垚鎸囦护`
+
+```shell
+# generate train.jsonl and val.jsonl from wav.scp and text.txt
+scp2jsonl \
+++scp_file_list='["../../../data/list/train_wav.scp", "../../../data/list/train_text.txt"]' \
+++data_type_list='["source", "target"]' \
+++jsonl_file_out="../../../data/list/train.jsonl"
+```
 
 #### 鏌ョ湅璁粌鏃ュ織
 

--
Gitblit v1.9.1