From bf341eed2bf671a52bda48232f052015504fe554 Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期二, 26 三月 2024 00:09:15 +0800
Subject: [PATCH] train
---
docs/tutorial/README_zh.md | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++++-----
1 files changed, 52 insertions(+), 5 deletions(-)
diff --git a/docs/tutorial/README_zh.md b/docs/tutorial/README_zh.md
index 563fe7c..9742282 100644
--- a/docs/tutorial/README_zh.md
+++ b/docs/tutorial/README_zh.md
@@ -214,7 +214,6 @@
```shell
funasr/bin/train.py \
++model="${model_name_or_model_dir}" \
-++model_revision="${model_revision}" \
++train_data_set_list="${train_data}" \
++valid_data_set_list="${val_data}" \
++dataset_conf.batch_size=20000 \
@@ -232,7 +231,6 @@
```
- `model`锛坰tr锛夛細妯″瀷鍚嶅瓧锛堟ā鍨嬩粨搴撲腑鐨処D锛夛紝姝ゆ椂鑴氭湰浼氳嚜鍔ㄤ笅杞芥ā鍨嬪埌鏈锛涙垨鑰呮湰鍦板凡缁忎笅杞藉ソ鐨勬ā鍨嬭矾寰勩��
-- `model_revision`锛坰tr锛夛細褰� `model` 涓烘ā鍨嬪悕瀛楁椂锛屼笅杞芥寚瀹氱増鏈殑妯″瀷銆�
- `train_data_set_list`锛坰tr锛夛細璁粌鏁版嵁璺緞锛岄粯璁や负jsonl鏍煎紡锛屽叿浣撳弬鑰冿紙[渚嬪瓙](https://github.com/alibaba-damo-academy/FunASR/blob/main/data/list)锛夈��
- `valid_data_set_list`锛坰tr锛夛細楠岃瘉鏁版嵁璺緞锛岄粯璁や负jsonl鏍煎紡锛屽叿浣撳弬鑰冿紙[渚嬪瓙](https://github.com/alibaba-damo-academy/FunASR/blob/main/data/list)锛夈��
- `dataset_conf.batch_type`锛坰tr锛夛細`example`锛堥粯璁わ級锛宐atch鐨勭被鍨嬨�俙example`琛ㄧず鎸夌収鍥哄畾鏁扮洰batch_size涓牱鏈粍batch锛沗length` or `token` 琛ㄧず鍔ㄦ�佺粍batch锛宐atch鎬婚暱搴︽垨鑰卼oken鏁颁负batch_size銆�
@@ -266,7 +264,7 @@
export CUDA_VISIBLE_DEVICES="0,1"
gpu_num=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
-torchrun --nnodes 2 --nproc_per_node ${gpu_num} --master_addr=192.168.1.1 --master_port=12345 \
+torchrun --nnodes 2 --node_rank 0 --nproc_per_node ${gpu_num} --master_addr=192.168.1.1 --master_port=12345 \
../../../funasr/bin/train.py ${train_args}
```
鍦ㄤ粠鑺傜偣涓婏紙鍋囪IP涓�192.168.1.2锛夛紝浣犻渶瑕佺‘淇滿ASTER_ADDR鍜孧ASTER_PORT鐜鍙橀噺涓庝富鑺傜偣璁剧疆鐨勪竴鑷达紝骞惰繍琛屽悓鏍风殑鍛戒护锛�
@@ -274,11 +272,11 @@
export CUDA_VISIBLE_DEVICES="0,1"
gpu_num=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
-torchrun --nnodes 2 --nproc_per_node ${gpu_num} --master_addr=192.168.1.1 --master_port=12345 \
+torchrun --nnodes 2 --node_rank 1 --nproc_per_node ${gpu_num} --master_addr=192.168.1.1 --master_port=12345 \
../../../funasr/bin/train.py ${train_args}
```
---nnodes 琛ㄧず鍙備笌鐨勮妭鐐规�绘暟锛�--nproc_per_node 琛ㄧず姣忎釜鑺傜偣涓婅繍琛岀殑杩涚▼鏁�
+--nnodes 琛ㄧず鍙備笌鐨勮妭鐐规�绘暟锛�--node_rank 琛ㄧず褰撳墠鑺傜偣id锛�--nproc_per_node 琛ㄧず姣忎釜鑺傜偣涓婅繍琛岀殑杩涚▼鏁帮紙閫氬父涓篻pu涓暟锛�
#### 鍑嗗鏁版嵁
@@ -352,6 +350,55 @@
```
娴忚鍣ㄤ腑鎵撳紑锛歨ttp://localhost:6006/
+### 璁粌鍚庢ā鍨嬫祴璇�
+
+
+#### 鏈塩onfiguration.json
+
+鍋囧畾锛岃缁冩ā鍨嬭矾寰勪负锛�./model_dir锛屽鏋滄敼鐩綍涓嬫湁鐢熸垚configuration.json锛屽彧闇�瑕佸皢 [涓婅堪妯″瀷鎺ㄧ悊鏂规硶](https://github.com/alibaba-damo-academy/FunASR/blob/main/examples/README_zh.md#%E6%A8%A1%E5%9E%8B%E6%8E%A8%E7%90%86) 涓ā鍨嬪悕瀛椾慨鏀逛负妯″瀷璺緞鍗冲彲
+
+渚嬪锛�
+
+浠巗hell鎺ㄧ悊
+```shell
+python -m funasr.bin.inference ++model="./model_dir" ++input=="${input}" ++output_dir="${output_dir}"
+```
+浠巔ython鎺ㄧ悊
+
+```python
+from funasr import AutoModel
+
+model = AutoModel(model="./model_dir")
+
+res = model.generate(input=wav_file)
+print(res)
+```
+
+#### 鏃燾onfiguration.json鏃�
+
+濡傛灉妯″瀷璺緞涓棤configuration.json鏃讹紝闇�瑕佹墜鍔ㄦ寚瀹氬叿浣撻厤缃枃浠惰矾寰勪笌妯″瀷璺緞
+
+```shell
+python -m funasr.bin.inference \
+--config-path "${local_path}" \
+--config-name "${config}" \
+++init_param="${init_param}" \
+++tokenizer_conf.token_list="${tokens}" \
+++frontend_conf.cmvn_file="${cmvn_file}" \
+++input="${input}" \
+++output_dir="${output_dir}" \
+++device="${device}"
+```
+
+鍙傛暟浠嬬粛
+- `config-path`锛氫负瀹為獙涓繚瀛樼殑 `config.yaml`锛屽彲浠ヤ粠瀹為獙杈撳嚭鐩綍涓煡鎵俱��
+- `config-name`锛氶厤缃枃浠跺悕锛屼竴鑸负 `config.yaml`锛屾敮鎸亂aml鏍煎紡涓巎son鏍煎紡锛屼緥濡� `config.json`
+- `init_param`锛氶渶瑕佹祴璇曠殑妯″瀷鍙傛暟锛屼竴鑸负`model.pt`锛屽彲浠ヨ嚜宸遍�夋嫨鍏蜂綋鐨勬ā鍨嬫枃浠�
+- `tokenizer_conf.token_list`锛氳瘝琛ㄦ枃浠惰矾寰勶紝涓�鑸湪 `config.yaml` 鏈夋寚瀹氾紝鏃犻渶鍐嶆墜鍔ㄦ寚瀹氾紝褰� `config.yaml` 涓矾寰勪笉姝g‘鏃讹紝闇�瑕佸湪姝ゅ鎵嬪姩鎸囧畾銆�
+- `frontend_conf.cmvn_file`锛歸av鎻愬彇fbank涓敤鍒扮殑cmvn鏂囦欢锛屼竴鑸湪 `config.yaml` 鏈夋寚瀹氾紝鏃犻渶鍐嶆墜鍔ㄦ寚瀹氾紝褰� `config.yaml` 涓矾寰勪笉姝g‘鏃讹紝闇�瑕佸湪姝ゅ鎵嬪姩鎸囧畾銆�
+
+鍏朵粬鍙傛暟鍚屼笂锛屽畬鏁� [绀轰緥](https://github.com/alibaba-damo-academy/FunASR/blob/main/examples/industrial_data_pretraining/paraformer/infer_from_local.sh)
+
<a name="妯″瀷瀵煎嚭涓庢祴璇�"></a>
## 妯″瀷瀵煎嚭涓庢祴璇�
--
Gitblit v1.9.1