From 81a4774008d2e3da532343fe58ce7e2c4dc3bcb5 Mon Sep 17 00:00:00 2001
From: jmwang66 <wangjiaming.wjm@alibaba-inc.com>
Date: 星期三, 31 五月 2023 14:15:58 +0800
Subject: [PATCH] update asr recipe github.io (#566)

---
 egs/wenetspeech/conformer/run.sh   |    3 +--
 docs/academic_recipe/asr_recipe.md |   23 +++++++++++++++++++++++
 2 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/docs/academic_recipe/asr_recipe.md b/docs/academic_recipe/asr_recipe.md
index e5a7f02..611976f 100644
--- a/docs/academic_recipe/asr_recipe.md
+++ b/docs/academic_recipe/asr_recipe.md
@@ -171,6 +171,7 @@
 ```
 
 * Training Steps
+
 FunASR supports two parameters to specify the training steps, namely `max_epoch` and `max_update`. `max_epoch` indicates the total training epochs while `max_update` indicates the total training steps. If these two parameters are specified at the same time, once the training reaches any one of these two parameters, the training will be stopped.
 
 * Change the configuration of the model
@@ -198,6 +199,28 @@
 ```
 Users can change the encoder configuration by modify these values. For example, if users want to use an encoder with 16 conformer blocks and each block has 8 attention heads, users just need to change `num_blocks` from 12 to 16 and change `attention_heads` from 4 to 8. Besides, the batch_size, learning rate and other training hyper-parameters are also set in this config file. To change these hyper-parameters, users just need to directly change the corresponding values in this file. For example, the default learning rate is `0.0005`. If users want to change the learning rate to 0.0002, set the value of lr as `lr: 0.0002`.
 
+* Use different input data type
+
+FunASR supports different input data types, including `sound`, `kaldi_ark`, `npy`, `text` and `text_int`. Users can specify any number and any type of input, which is achieved by `data_file_names` (in `run.sh`), `data_names` and `data_types` (in config file). For example, ASR task usually requires speech and the corresponding transcripts as input. If speech is saved as raw audio (such as wav format) and transcripts are saved as text format, users need to set `data_file_names=wav.scp,text` (any name is allowed, denoting wav list and text list), set `data_names=speech,text` and set `data_types=sound,text`. When the input type changes to FBank, users just need to modify `data_types=kaldi_ark,text`.
+
+* How to start from pre-trained models
+
+Users can start training from a pre-trained model by specifying the `init_param` parameter. Here `init_param` indicates the path of the pre-trained model. In addition to directly loading all the parameters from one pre-trained model, loading part of the parameters from different pre-trained models is supported. For example, to load encoder parameters from the pre-trained model A and decoder parameters from the pre-trained model B, users can set `init_param` twice as follows:
+```sh
+train.py ... --init_param ${model_A_path}:encoder  --init_param ${model_B_path}:decoder ...
+```
+
+* How to freeze part model parameters
+
+In certain situations, users may want to fix part of the model parameters update the rest model parameters. FunASR employs `freeze_param` to achieve this. For example, to fix all parameters like `encoder.*`, users need to set `freeze_param ` as follows:
+```sh
+train.py ... --freeze_param encoder ...
+```
+
+* ModelScope Usage
+
+Users can use ModelScope for inference and fine-tuning based on a trained academic model. To achieve this, users need to run the stage 6 in the script. In this stage, relevant files required by ModelScope will be generated automatically. Users can then use the corresponding ModelScope interface by replacing the model name with the local trained model path. For the detailed usage of the ModelScope interface, please refer to [ModelScope Usage](https://alibaba-damo-academy.github.io/FunASR/en/modelscope_pipeline/quick_start.html).
+
 * Decoding by CPU or GPU
 
 We support CPU and GPU decoding. For CPU decoding, set `gpu_inference=false` and `njob` to specific the total number of CPU jobs. For GPU decoding, first set `gpu_inference=true`. Then set `gpuid_list` to specific which GPUs for decoding and `njob` to specific the number of decoding jobs on each GPU.
\ No newline at end of file
diff --git a/egs/wenetspeech/conformer/run.sh b/egs/wenetspeech/conformer/run.sh
index 28a2824..db18361 100644
--- a/egs/wenetspeech/conformer/run.sh
+++ b/egs/wenetspeech/conformer/run.sh
@@ -158,8 +158,7 @@
         inference_tag="$(basename "${inference_config}" .yaml)"
         _dir="${asr_exp}/${inference_tag}/${inference_asr_model}/${dset}"
         _logdir="${_dir}/logdir"
-        if [ -d ${_dir} ]; then
-            echo "${_dir} is already exists. if you want to decode again, please delete this dir first."
+          you want to decode again, please delete this dir first."
             exit 0
         fi
         mkdir -p "${_logdir}"

--
Gitblit v1.9.1