From d16529d59128935e30804dd55bdb7abefbe63238 Mon Sep 17 00:00:00 2001
From: speech_asr <wangjiaming.wjm@alibaba-inc.com>
Date: 星期三, 15 二月 2023 15:09:46 +0800
Subject: [PATCH] update docs

---
 docs/index.rst        |    1 
 docs_cn/build_task.md |    2 
 docs/build_task.md    |  106 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 108 insertions(+), 1 deletions(-)

diff --git a/docs/build_task.md b/docs/build_task.md
new file mode 100644
index 0000000..56ed860
--- /dev/null
+++ b/docs/build_task.md
@@ -0,0 +1,106 @@
+# Build custom tasks
+FunASR is similar to ESPNet, which applies `Task`  as the general interface ti achieve the training and inference of models. Each `Task` is a class inherited from `AbsTask` and its corresponding code can be seen in `funasr/tasks/abs_task.py`. The main functions of `AbsTask` are shown as follows:
+```python
+class AbsTask(ABC):
+    @classmethod
+    def add_task_arguments(cls, parser: argparse.ArgumentParser):
+        pass
+    
+    @classmethod
+    def build_preprocess_fn(cls, args, train):
+        (...)
+    
+    @classmethod
+    def build_collate_fn(cls, args: argparse.Namespace):
+        (...)
+
+    @classmethod
+    def build_model(cls, args):
+        (...)
+    
+    @classmethod
+    def main(cls, args):
+        (...)
+```
+- add_task_arguments锛欰dd parameters required by a specified `Task`
+- build_preprocess_fn锛氬畾涔夊浣曞鐞嗗鏍锋湰杩涜棰勫鐞� define how to preprocess samples
+- build_collate_fn锛歞efine how to combine multiple samples into a `batch`
+- build_model锛歞efine the model
+- main锛歵raining interface, starting training through `Task.main()`
+
+Next, we take the speech recognition as an example to introduce how to define a new `Task`. For the corresponding code, please see `ASRTask` in `funasr/tasks/asr.py`. The procedure of defining a new `Task` is actually the procedure of redefining the above functions according to the requirements of the specified `Task`.
+
+- add_task_arguments
+```python
+@classmethod
+def add_task_arguments(cls, parser: argparse.ArgumentParser):
+    group = parser.add_argument_group(description="Task related")
+    group.add_argument(
+        "--token_list",
+        type=str_or_none,
+        default=None,
+        help="A text mapping int-id to token",
+    )
+    (...)
+```
+For speech recognition tasks, specific parameters required include `token_list`, etc. According to the specific requirements of different tasks, users can define corresponding parameters in this function.
+
+- build_preprocess_fn
+```python
+@classmethod
+def build_preprocess_fn(cls, args, train):
+    if args.use_preprocessor:
+        retval = CommonPreprocessor(
+                    train=train,
+                    token_type=args.token_type,
+                    token_list=args.token_list,
+                    bpemodel=args.bpemodel,
+                    non_linguistic_symbols=args.non_linguistic_symbols,
+                    text_cleaner=args.cleaner,
+                    ...
+                )
+    else:
+        retval = None
+    return retval
+```
+This function defines how to preprocess samples. Specifically, the input of speech recognition tasks includes speech and text. For speech, functions such as (optional) adding noise and reverberation to the speech are supported. For text, functions such as (optional) processing text according to bpe and mapping text to `tokenid` are supported. Users can choose the preprocessing operation that needs to be performed on the sample. For the detail implementation, please refer to `CommonPreprocessor`.
+
+- build_collate_fn
+```python
+@classmethod
+def build_collate_fn(cls, args, train):
+    return CommonCollateFn(float_pad_value=0.0, int_pad_value=-1)
+```
+This function defines how to combine multiple samples into a `batch`. For speech recognition tasks, `padding` is employed to obtain equal-length data from different speech and text. Specifically, we set `0.0` as the default padding value for speech and `-1` as the default padding value for text. Users can define different `batch` operations here. For the detail implementation, please refer to `CommonCollateFn`.
+
+- build_model
+```python
+@classmethod
+def build_model(cls, args, train):
+    with open(args.token_list, encoding="utf-8") as f:
+        token_list = [line.rstrip() for line in f]
+        vocab_size = len(token_list)
+        frontend = frontend_class(**args.frontend_conf)
+        specaug = specaug_class(**args.specaug_conf)
+        normalize = normalize_class(**args.normalize_conf)
+        preencoder = preencoder_class(**args.preencoder_conf)
+        encoder = encoder_class(input_size=input_size, **args.encoder_conf)
+        postencoder = postencoder_class(input_size=encoder_output_size, **args.postencoder_conf)
+        decoder = decoder_class(vocab_size=vocab_size, encoder_output_size=encoder_output_size,  **args.decoder_conf)
+        ctc = CTC(odim=vocab_size, encoder_output_size=encoder_output_size, **args.ctc_conf)
+        model = model_class(
+            vocab_size=vocab_size,
+            frontend=frontend,
+            specaug=specaug,
+            normalize=normalize,
+            preencoder=preencoder,
+            encoder=encoder,
+            postencoder=postencoder,
+            decoder=decoder,
+            ctc=ctc,
+            token_list=token_list,
+            **args.model_conf,
+        )
+    return model
+```
+璇ュ嚱鏁板畾涔変簡鍏蜂綋鐨勬ā鍨嬨�傚浜庝笉鍚岀殑璇煶璇嗗埆妯″瀷锛屽線寰�鍙互鍏辩敤鍚屼竴涓闊宠瘑鍒玚Task`锛岀劧鍚庡湪姝ゅ嚱鏁颁腑瀹氫箟鐗瑰畾鐨勬ā鍨嬨�備緥濡傦紝杩欓噷缁欏嚭鐨勬槸涓�涓爣鍑嗙殑encoder-decoder缁撴瀯鐨勮闊宠瘑鍒ā鍨嬨�傚叿浣撳湴锛屽厛瀹氫箟璇ユā鍨嬬殑鍚勪釜妯″潡锛屽寘鎷琫ncoder锛宒ecoder绛夛紝鐒跺悗鍦ㄥ皢杩欎簺妯″潡缁勫悎鍦ㄤ竴璧峰緱鍒颁竴涓畬鏁寸殑妯″瀷銆傚湪FunASR涓紝妯″瀷闇�瑕佺户鎵縛AbsESPnetModel`锛屽叾鍏蜂綋浠g爜瑙乣funasr/train/abs_espnet_model.py`锛屼富瑕侀渶瑕佸疄鐜扮殑鏄痐forward`鍑芥暟銆�
\ No newline at end of file
diff --git a/docs/index.rst b/docs/index.rst
index ce44488..d29b500 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -16,6 +16,7 @@
    ./installation.md
    ./papers.md
    ./get_started.md
+   ./build_task.md
 
 .. toctree::
    :maxdepth: 1
diff --git a/docs_cn/build_task.md b/docs_cn/build_task.md
index 7507a17..221d451 100644
--- a/docs_cn/build_task.md
+++ b/docs_cn/build_task.md
@@ -70,7 +70,7 @@
 def build_collate_fn(cls, args, train):
     return CommonCollateFn(float_pad_value=0.0, int_pad_value=-1)
 ```
-璇ュ嚱鏁板畾涔変簡濡備綍灏嗗涓牱鏈粍鎴愪竴涓猔batch`銆傚浜庤闊宠瘑鍒换鍔★紝鍦ㄦ瀹炵幇鐨勬槸灏嗕笉鍚岀殑闊抽鍜屾妱鏈紝閫氳繃`padding`鐨勬柟寮忔潵寰楀埌绛夐暱鐨勬暟鎹�傚叿浣撳湴锛岃嚜姝ゆ垜浠粯璁ょ敤`0.0`鏉ヤ綔涓洪煶棰戠殑濉厖鍊硷紝鐢╜-1`浣滀负鎶勬湰鐨勯粯璁ゅ~鍏呭�笺�傜敤鎴峰彲浠ュ湪姝ゅ畾涔変笉鍚岀殑缁刞batch`鎿嶄綔锛屽疄鐜版柟娉曞彲浠ュ弬鑰僠CommonCollateFn`銆�
+璇ュ嚱鏁板畾涔変簡濡備綍灏嗗涓牱鏈粍鎴愪竴涓猔batch`銆傚浜庤闊宠瘑鍒换鍔★紝鍦ㄦ瀹炵幇鐨勬槸灏嗕笉鍚岀殑闊抽鍜屾妱鏈紝閫氳繃`padding`鐨勬柟寮忔潵寰楀埌绛夐暱鐨勬暟鎹�傚叿浣撳湴锛屾垜浠粯璁ょ敤`0.0`鏉ヤ綔涓洪煶棰戠殑濉厖鍊硷紝鐢╜-1`浣滀负鎶勬湰鐨勯粯璁ゅ~鍏呭�笺�傜敤鎴峰彲浠ュ湪姝ゅ畾涔変笉鍚岀殑缁刞batch`鎿嶄綔锛屽疄鐜版柟娉曞彲浠ュ弬鑰僠CommonCollateFn`銆�
 
 - build_model
 ```python

--
Gitblit v1.9.1