From 86fb406bb7f99cbd420cff35a4937ed34da8fa47 Mon Sep 17 00:00:00 2001 From: 游雁 <zhifu.gzf@alibaba-inc.com> Date: 星期四, 21 三月 2024 17:04:03 +0800 Subject: [PATCH] tutorial --- docs/tutorial/README_zh.md | 12 ++++++++++++ 1 files changed, 12 insertions(+), 0 deletions(-) diff --git a/docs/tutorial/README_zh.md b/docs/tutorial/README_zh.md index cc1f8c8..fad5893 100644 --- a/docs/tutorial/README_zh.md +++ b/docs/tutorial/README_zh.md @@ -137,6 +137,9 @@ #### 鍑嗗鏁版嵁 +`jsonl`鏍煎紡鍙互鍙傝�冿紙[渚嬪瓙](https://github.com/alibaba-damo-academy/FunASR/blob/main/data/list)锛夈�� +鍙互鐢ㄦ寚浠� `scp2jsonl` 浠巜av.scp涓巘ext.txt鐢熸垚銆倃av.scp涓巘ext.txt鍑嗗杩囩▼濡備笅锛� + `train_text.txt` 宸﹁竟涓烘暟鎹敮涓�ID锛岄渶涓巂train_wav.scp`涓殑`ID`涓�涓�瀵瑰簲 @@ -160,6 +163,15 @@ ID0012W0015 https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_cn_en.wav ``` +`鐢熸垚鎸囦护` + +```shell +# generate train.jsonl and val.jsonl from wav.scp and text.txt +scp2jsonl \ +++scp_file_list='["../../../data/list/train_wav.scp", "../../../data/list/train_text.txt"]' \ +++data_type_list='["source", "target"]' \ +++jsonl_file_out="../../../data/list/train.jsonl" +``` #### 鏌ョ湅璁粌鏃ュ織 -- Gitblit v1.9.1