From 90bd326269cfd594be859f8094135a33aa71a5a1 Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期六, 22 四月 2023 21:06:47 +0800
Subject: [PATCH] onnx docs

---
 funasr/runtime/python/onnxruntime/README.md |   65 ++++++++++++++++++++++++--------
 egs_modelscope/vad/TEMPLATE/README.md       |   10 ++--
 egs_modelscope/asr/TEMPLATE/README.md       |   10 ++--
 3 files changed, 58 insertions(+), 27 deletions(-)

diff --git a/egs_modelscope/asr/TEMPLATE/README.md b/egs_modelscope/asr/TEMPLATE/README.md
index 9448c1d..c645033 100644
--- a/egs_modelscope/asr/TEMPLATE/README.md
+++ b/egs_modelscope/asr/TEMPLATE/README.md
@@ -62,10 +62,10 @@
 ##### Define pipeline
 - `task`: `Tasks.auto_speech_recognition`
 - `model`: model name in [model zoo](https://alibaba-damo-academy.github.io/FunASR/en/modelscope_models.html#pretrained-models-on-modelscope), or model path in local disk
-- `ngpu`: `1` (Defalut), decoding on GPU. If ngpu=0, decoding on CPU
-- `ncpu`: `1` (Defalut), sets the number of threads used for intraop parallelism on CPU 
-- `output_dir`: `None` (Defalut), the output path of results if set
-- `batch_size`: `1` (Defalut), batch size when decoding
+- `ngpu`: `1` (Default), decoding on GPU. If ngpu=0, decoding on CPU
+- `ncpu`: `1` (Default), sets the number of threads used for intraop parallelism on CPU 
+- `output_dir`: `None` (Default), the output path of results if set
+- `batch_size`: `1` (Default), batch size when decoding
 ##### Infer pipeline
 - `audio_in`: the input to decode, which could be: 
   - wav_path, `e.g.`: asr_example.wav,
@@ -79,7 +79,7 @@
   ```
   In this case of `wav.scp` input, `output_dir` must be set to save the output results
 - `audio_fs`: audio sampling rate, only set when audio_in is pcm audio
-- `output_dir`: None (Defalut), the output path of results if set
+- `output_dir`: None (Default), the output path of results if set
 
 ### Inference with multi-thread CPUs or multi GPUs
 FunASR also offer recipes [infer.sh](https://github.com/alibaba-damo-academy/FunASR/blob/main/egs_modelscope/asr/TEMPLATE/infer.sh) to decode with multi-thread CPUs, or multi GPUs.
diff --git a/egs_modelscope/vad/TEMPLATE/README.md b/egs_modelscope/vad/TEMPLATE/README.md
index f4870aa..a4b5e79 100644
--- a/egs_modelscope/vad/TEMPLATE/README.md
+++ b/egs_modelscope/vad/TEMPLATE/README.md
@@ -47,10 +47,10 @@
 ##### Define pipeline
 - `task`: `Tasks.voice_activity_detection`
 - `model`: model name in [model zoo](https://alibaba-damo-academy.github.io/FunASR/en/modelscope_models.html#pretrained-models-on-modelscope), or model path in local disk
-- `ngpu`: `1` (Defalut), decoding on GPU. If ngpu=0, decoding on CPU
-- `ncpu`: `1` (Defalut), sets the number of threads used for intraop parallelism on CPU 
-- `output_dir`: `None` (Defalut), the output path of results if set
-- `batch_size`: `1` (Defalut), batch size when decoding
+- `ngpu`: `1` (Default), decoding on GPU. If ngpu=0, decoding on CPU
+- `ncpu`: `1` (Default), sets the number of threads used for intraop parallelism on CPU 
+- `output_dir`: `None` (Default), the output path of results if set
+- `batch_size`: `1` (Default), batch size when decoding
 ##### Infer pipeline
 - `audio_in`: the input to decode, which could be: 
   - wav_path, `e.g.`: asr_example.wav,
@@ -64,7 +64,7 @@
   ```
   In this case of `wav.scp` input, `output_dir` must be set to save the output results
 - `audio_fs`: audio sampling rate, only set when audio_in is pcm audio
-- `output_dir`: None (Defalut), the output path of results if set
+- `output_dir`: None (Default), the output path of results if set
 
 ### Inference with multi-thread CPUs or multi GPUs
 FunASR also offer recipes [infer.sh](https://github.com/alibaba-damo-academy/FunASR/blob/main/egs_modelscope/vad/TEMPLATE/infer.sh) to decode with multi-thread CPUs, or multi GPUs.
diff --git a/funasr/runtime/python/onnxruntime/README.md b/funasr/runtime/python/onnxruntime/README.md
index 1f7fcaa..ed3deb6 100644
--- a/funasr/runtime/python/onnxruntime/README.md
+++ b/funasr/runtime/python/onnxruntime/README.md
@@ -19,7 +19,7 @@
 ```
 
 
-## Install the `funasr_onnx`
+## Install `funasr_onnx`
 
 install from pip
 ```shell
@@ -46,16 +46,22 @@
  from funasr_onnx import Paraformer
 
  model_dir = "./export/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch"
- model = Paraformer(model_dir, batch_size=1)
+ model = Paraformer(model_dir, batch_size=1, quantize=True)
 
  wav_path = ['./export/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/example/asr_example.wav']
 
  result = model(wav_path)
  print(result)
  ```
-- Model_dir: the model path, which contains `model.onnx`, `config.yaml`, `am.mvn`
-- Input: wav formt file, support formats: `str, np.ndarray, List[str]`
-- Output: `List[str]`: recognition result
+- `model_dir`: the model path, which contains `model.onnx`, `config.yaml`, `am.mvn`
+- `batch_size`: `1` (Default), the batch size duration inference
+- `device_id`: `-1` (Default), infer on CPU. If you want to infer with GPU, set it to gpu_id (Please make sure that you have install the onnxruntime-gpu)
+- `quantize`: `False` (Default), load the model of `model.onnx` in `model_dir`. If set `True`, load the model of `model_quant.onnx` in `model_dir`
+- `intra_op_num_threads`: `4` (Default), sets the number of threads used for intraop parallelism on CPU
+
+Input: wav formt file, support formats: `str, np.ndarray, List[str]`
+
+Output: `List[str]`: recognition result
 
 #### Paraformer-online
 
@@ -71,9 +77,16 @@
 result = model(wav_path)
 print(result)
 ```
-- Model_dir: the model path, which contains `model.onnx`, `config.yaml`, `am.mvn`
-- Input: wav formt file, support formats: `str, np.ndarray, List[str]`
-- Output: `List[str]`: recognition result
+- `model_dir`: the model path, which contains `model.onnx`, `config.yaml`, `am.mvn`
+- `batch_size`: `1` (Default), the batch size duration inference
+- `device_id`: `-1` (Default), infer on CPU. If you want to infer with GPU, set it to gpu_id (Please make sure that you have install the onnxruntime-gpu)
+- `quantize`: `False` (Default), load the model of `model.onnx` in `model_dir`. If set `True`, load the model of `model_quant.onnx` in `model_dir`
+- `intra_op_num_threads`: `4` (Default), sets the number of threads used for intraop parallelism on CPU
+
+Input: wav formt file, support formats: `str, np.ndarray, List[str]`
+
+Output: `List[str]`: recognition result
+
 
 #### FSMN-VAD-online
 ```python
@@ -105,9 +118,16 @@
     if segments_result:
         print(segments_result)
 ```
-- Model_dir: the model path, which contains `model.onnx`, `config.yaml`, `am.mvn`
-- Input: wav formt file, support formats: `str, np.ndarray, List[str]`
-- Output: `List[str]`: recognition result
+- `model_dir`: the model path, which contains `model.onnx`, `config.yaml`, `am.mvn`
+- `batch_size`: `1` (Default), the batch size duration inference
+- `device_id`: `-1` (Default), infer on CPU. If you want to infer with GPU, set it to gpu_id (Please make sure that you have install the onnxruntime-gpu)
+- `quantize`: `False` (Default), load the model of `model.onnx` in `model_dir`. If set `True`, load the model of `model_quant.onnx` in `model_dir`
+- `intra_op_num_threads`: `4` (Default), sets the number of threads used for intraop parallelism on CPU
+
+Input: wav formt file, support formats: `str, np.ndarray, List[str]`
+
+Output: `List[str]`: recognition result
+
 
 ### Punctuation Restoration
 #### CT-Transformer
@@ -121,9 +141,15 @@
 result = model(text_in)
 print(result[0])
 ```
-- Model_dir: the model path, which contains `model.onnx`, `config.yaml`, `am.mvn`
-- Input: wav formt file, support formats: `str, np.ndarray, List[str]`
-- Output: `List[str]`: recognition result
+- `model_dir`: the model path, which contains `model.onnx`, `config.yaml`, `am.mvn`
+- `device_id`: `-1` (Default), infer on CPU. If you want to infer with GPU, set it to gpu_id (Please make sure that you have install the onnxruntime-gpu)
+- `quantize`: `False` (Default), load the model of `model.onnx` in `model_dir`. If set `True`, load the model of `model_quant.onnx` in `model_dir`
+- `intra_op_num_threads`: `4` (Default), sets the number of threads used for intraop parallelism on CPU
+
+Input: `str`, raw text of asr result
+
+Output: `List[str]`: recognition result
+
 
 #### CT-Transformer-online
 ```python
@@ -143,9 +169,14 @@
 
 print(rec_result_all)
 ```
-- Model_dir: the model path, which contains `model.onnx`, `config.yaml`, `am.mvn`
-- Input: wav formt file, support formats: `str, np.ndarray, List[str]`
-- Output: `List[str]`: recognition result
+- `model_dir`: the model path, which contains `model.onnx`, `config.yaml`, `am.mvn`
+- `device_id`: `-1` (Default), infer on CPU. If you want to infer with GPU, set it to gpu_id (Please make sure that you have install the onnxruntime-gpu)
+- `quantize`: `False` (Default), load the model of `model.onnx` in `model_dir`. If set `True`, load the model of `model_quant.onnx` in `model_dir`
+- `intra_op_num_threads`: `4` (Default), sets the number of threads used for intraop parallelism on CPU
+
+Input: `str`, raw text of asr result
+
+Output: `List[str]`: recognition result
 
 ## Performance benchmark
 

--
Gitblit v1.9.1