From 497198ebf9b14d79b1920f8a1ead08d581103b80 Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期四, 21 三月 2024 17:41:27 +0800
Subject: [PATCH] tutorial

---
 examples/industrial_data_pretraining/paraformer/README_zh.md |   26 +++++++++++++++++++++++++-
 1 files changed, 25 insertions(+), 1 deletions(-)

diff --git a/examples/industrial_data_pretraining/paraformer/README_zh.md b/examples/industrial_data_pretraining/paraformer/README_zh.md
index cc1f8c8..572eef1 100644
--- a/examples/industrial_data_pretraining/paraformer/README_zh.md
+++ b/examples/industrial_data_pretraining/paraformer/README_zh.md
@@ -17,16 +17,18 @@
 ```
 
 ### 璇︾粏鐢ㄦ硶浠嬬粛
+
+#### AutoModel 瀹氫箟
 ```python
 model = AutoModel(model=[str], device=[str], ncpu=[int], output_dir=[str], batch_size=[int], **kwargs)
 ```
-#### AutoModel 瀹氫箟
 - `model`(str): [妯″瀷浠撳簱](https://github.com/alibaba-damo-academy/FunASR/tree/main/model_zoo) 涓殑妯″瀷鍚嶇О锛屾垨鏈湴纾佺洏涓殑妯″瀷璺緞
 - `device`(str): `cuda:0`锛堥粯璁pu0锛夛紝浣跨敤 GPU 杩涜鎺ㄧ悊锛屾寚瀹氥�傚鏋滀负`cpu`锛屽垯浣跨敤 CPU 杩涜鎺ㄧ悊
 - `ncpu`(int): `4` 锛堥粯璁わ級锛岃缃敤浜� CPU 鍐呴儴鎿嶄綔骞惰鎬х殑绾跨▼鏁�
 - `output_dir`(str): `None` 锛堥粯璁わ級锛屽鏋滆缃紝杈撳嚭缁撴灉鐨勮緭鍑鸿矾寰�
 - `batch_size`(int): `1` 锛堥粯璁わ級锛岃В鐮佹椂鐨勬壒澶勭悊澶у皬
 - `**kwargs`(dict): 鎵�鏈夊湪`config.yaml`涓弬鏁帮紝鍧囧彲浠ョ洿鎺ュ湪姝ゅ鎸囧畾锛屼緥濡傦紝vad妯″瀷涓渶澶у垏鍓查暱搴� `max_single_segment_time=6000` 锛堟绉掞級銆�
+
 #### AutoModel 鎺ㄧ悊
 ```python
 res = model.generate(input=[str], output_dir=[str])
@@ -137,6 +139,9 @@
 
 #### 鍑嗗鏁版嵁
 
+`jsonl`鏍煎紡鍙互鍙傝�冿紙[渚嬪瓙](https://github.com/alibaba-damo-academy/FunASR/blob/main/data/list)锛夈��
+鍙互鐢ㄦ寚浠� `scp2jsonl` 浠巜av.scp涓巘ext.txt鐢熸垚銆倃av.scp涓巘ext.txt鍑嗗杩囩▼濡備笅锛�
+
 `train_text.txt`
 
 宸﹁竟涓烘暟鎹敮涓�ID锛岄渶涓巂train_wav.scp`涓殑`ID`涓�涓�瀵瑰簲
@@ -160,6 +165,25 @@
 ID0012W0015 https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_cn_en.wav
 ```
 
+`鐢熸垚鎸囦护`
+
+```shell
+# generate train.jsonl and val.jsonl from wav.scp and text.txt
+scp2jsonl \
+++scp_file_list='["../../../data/list/train_wav.scp", "../../../data/list/train_text.txt"]' \
+++data_type_list='["source", "target"]' \
+++jsonl_file_out="../../../data/list/train.jsonl"
+```
+
+锛堝彲閫夛紝闈炲繀闇�锛夊鏋滈渶瑕佷粠jsonl瑙f瀽鎴恮av.scp涓巘ext.txt锛屽彲浠ヤ娇鐢ㄦ寚浠わ細
+
+```shell
+# generate wav.scp and text.txt from train.jsonl and val.jsonl
+jsonl2scp \
+++scp_file_list='["../../../data/list/train_wav.scp", "../../../data/list/train_text.txt"]' \
+++data_type_list='["source", "target"]' \
+++jsonl_file_in="../../../data/list/train.jsonl"
+```
 
 #### 鏌ョ湅璁粌鏃ュ織
 

--
Gitblit v1.9.1