From 98e2c546a08917f450d32d63968affd5b975ad2a Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期三, 16 十月 2024 15:22:05 +0800
Subject: [PATCH] funasr tables
---
docs/tutorial/README_zh.md | 21 ++++---
docs/tutorial/Tables_zh.md | 146 +++++++++++++++++++++++++-----------------------
2 files changed, 89 insertions(+), 78 deletions(-)
diff --git a/docs/tutorial/README_zh.md b/docs/tutorial/README_zh.md
index afef070..3242275 100644
--- a/docs/tutorial/README_zh.md
+++ b/docs/tutorial/README_zh.md
@@ -442,22 +442,21 @@
### 鏌ョ湅娉ㄥ唽琛�
-```python
+```plaintext
from funasr.register import tables
tables.print()
```
-鏀寔鏌ョ湅鎸囧畾绫诲瀷鐨勬敞鍐岃〃锛歚tables.print("model")`
+鏀寔鏌ョ湅鎸囧畾绫诲瀷鐨勬敞鍐岃〃锛歕`tables.print("model")\`
-
-### 娉ㄥ唽鏂版ā鍨�
+### 娉ㄥ唽妯″瀷
```python
from funasr.register import tables
-@tables.register("model_classes", "MinMo_S2T")
-class MinMo_S2T(nn.Module):
+@tables.register("model_classes", "SenseVoiceSmall")
+class SenseVoiceSmall(nn.Module):
def __init__(*args, **kwargs):
...
@@ -479,10 +478,14 @@
```
-鐒跺悗鍦╟onfig.yaml涓寚瀹氭柊娉ㄥ唽妯″瀷
+鍦ㄩ渶瑕佹敞鍐岀殑绫诲悕鍓嶅姞涓� `@tables.register("model_classes","SenseVoiceSmall")`锛屽嵆鍙畬鎴愭敞鍐岋紝绫婚渶瑕佸疄鐜版湁锛歘_init__锛宖orward锛宨nference鏂规硶銆�
-```yaml
-model: MinMo_S2T
+瀹屾暣浠g爜锛歔https://github.com/modelscope/FunASR/blob/main/funasr/models/sense\_voice/model.py#L443](https://github.com/modelscope/FunASR/blob/main/funasr/models/sense_voice/model.py#L443)
+
+娉ㄥ唽瀹屾垚鍚庯紝鍦╟onfig.yaml涓寚瀹氭柊娉ㄥ唽妯″瀷锛屽嵆鍙疄鐜板妯″瀷鐨勫畾涔�
+
+```python
+model: SenseVoiceSmall
model_conf:
...
```
diff --git a/docs/tutorial/Tables_zh.md b/docs/tutorial/Tables_zh.md
index ec64baf..967c625 100644
--- a/docs/tutorial/Tables_zh.md
+++ b/docs/tutorial/Tables_zh.md
@@ -1,4 +1,4 @@
-# FunASR-1.x.x 娉ㄥ唽妯″瀷鏁欑▼
+# FunASR-1.x.x聽娉ㄥ唽妯″瀷鏁欑▼
1.0鐗堟湰鐨勮璁″垵琛锋槸銆�**璁╂ā鍨嬮泦鎴愭洿绠�鍗�**銆戯紝鏍稿績feature涓烘敞鍐岃〃涓嶢utoModel锛�
@@ -11,7 +11,7 @@
* 缁熶竴瀛︽湳涓庡伐涓氭ā鍨嬫帹鐞嗚缁冭剼鏈紱
-
+
# 蹇�熶笂鎵�
@@ -89,14 +89,14 @@
res = model.generate(input=[str], output_dir=[str])
```
-* wav鏂囦欢璺緞,聽渚嬪:聽asr\_example.wav
-
-* pcm鏂囦欢璺緞,聽渚嬪:聽asr\_example.pcm锛屾鏃堕渶瑕佹寚瀹氶煶棰戦噰鏍风巼fs锛堥粯璁や负16000锛�
-
-* 闊抽瀛楄妭鏁版祦锛屼緥濡傦細楹﹀厠椋庣殑瀛楄妭鏁版暟鎹�
-
-* wav.scp锛宬aldi聽鏍峰紡鐨劼爓av聽鍒楄〃聽(`wav_id聽\t聽wav_path`),聽渚嬪:
-
+* * wav鏂囦欢璺緞,聽渚嬪:聽asr\_example.wav
+
+ * pcm鏂囦欢璺緞,聽渚嬪:聽asr\_example.pcm锛屾鏃堕渶瑕佹寚瀹氶煶棰戦噰鏍风巼fs锛堥粯璁や负16000锛�
+
+ * 闊抽瀛楄妭鏁版祦锛屼緥濡傦細楹﹀厠椋庣殑瀛楄妭鏁版暟鎹�
+
+ * wav.scp锛宬aldi聽鏍峰紡鐨劼爓av聽鍒楄〃聽(`wav_id聽\t聽wav_path`),聽渚嬪:
+
```plaintext
asr_example1 ./audios/asr_example1.wav
@@ -121,76 +121,82 @@
## 妯″瀷璧勬簮鐩綍
-
+
+
+**妯″瀷閾炬帴涓猴細**[https://www.modelscope.cn/models/iic/SenseVoiceSmall/files](https://www.modelscope.cn/models/iic/SenseVoiceSmall/files)
**閰嶇疆鏂囦欢**锛歝onfig.yaml
```yaml
-model: SenseVoiceLarge
-model_conf:
- lsm_weight: 0.1
- length_normalized_loss: true
- activation_checkpoint: true
- sos: <|startoftranscript|>
- eos: <|endoftext|>
- downsample_rate: 4
- use_padmask: true
-
-encoder: SenseVoiceEncoder
+encoder: SenseVoiceEncoderSmall
encoder_conf:
- input_size: 128
- attention_heads: 20
- linear_units: 1280
- num_blocks: 32
- dropout_rate: 0.1
- positional_dropout_rate: 0.1
- attention_dropout_rate: 0.1
- kernel_size: 31
- sanm_shfit: 0
- att_type: self_att_fsmn_sdpa
- downsample_rate: 4
- use_padmask: true
- max_position_embeddings: 2048
- rope_theta: 10000
-
-frontend: WhisperFrontend
-frontend_conf:
- fs: 16000
- n_mels: 128
- do_pad_trim: false
- filters_path: null
+ output_size: 512
+ attention_heads: 4
+ linear_units: 2048
+ num_blocks: 50
+ tp_blocks: 20
+ dropout_rate: 0.1
+ positional_dropout_rate: 0.1
+ attention_dropout_rate: 0.1
+ input_layer: pe
+ pos_enc_class: SinusoidalPositionEncoder
+ normalize_before: true
+ kernel_size: 11
+ sanm_shfit: 0
+ selfattention_layer_type: sanm
-tokenizer: SenseVoiceTokenizer
+
+model: SenseVoiceSmall
+model_conf:
+ length_normalized_loss: true
+ sos: 1
+ eos: 2
+ ignore_id: -1
+
+tokenizer: SentencepiecesTokenizer
tokenizer_conf:
- vocab_path: null
- is_multilingual: true
- num_languages: 8749
+ bpemodel: null
+ unk_symbol: <unk>
+ split_with_space: true
-dataset: SenseVoiceDataset
+frontend: WavFrontend
+frontend_conf:
+ fs: 16000
+ window: hamming
+ n_mels: 80
+ frame_length: 25
+ frame_shift: 10
+ lfr_m: 7
+ lfr_n: 6
+ cmvn_file: null
+
+
+dataset: SenseVoiceCTCDataset
dataset_conf:
index_ds: IndexDSJsonl
- batch_sampler: BatchSampler
+ batch_sampler: EspnetStyleBatchSampler
+ data_split_num: 32
batch_type: token
- batch_size: 12000
- sort_size: 64
+ batch_size: 14000
max_token_length: 2000
min_token_length: 60
max_source_length: 2000
min_source_length: 60
- max_target_length: 150
+ max_target_length: 200
min_target_length: 0
shuffle: true
num_workers: 4
sos: ${model_conf.sos}
eos: ${model_conf.eos}
IndexDSJsonl: IndexDSJsonl
+ retry: 20
train_conf:
accum_grad: 1
grad_clip: 5
- max_epoch: 5
- keep_nbest_models: 200
- avg_nbest_model: 200
+ max_epoch: 20
+ keep_nbest_models: 10
+ avg_nbest_model: 10
log_interval: 100
resume: true
validate_interval: 10000
@@ -198,11 +204,10 @@
optim: adamw
optim_conf:
- lr: 2.5e-05
-
+ lr: 0.00002
scheduler: warmuplr
scheduler_conf:
- warmup_steps: 20000
+ warmup_steps: 25000
```
@@ -222,8 +227,8 @@
"file_path_metas": {
"init_param":"model.pt",
"config":"config.yaml",
- "tokenizer_conf": {"vocab_path": "tokens.tiktoken"},
- "frontend_conf":{"filters_path": "mel_filters.npz"}}
+ "tokenizer_conf": {"bpemodel": "chn_jpn_yue_eng_ko_spectok.bpe.model"},
+ "frontend_conf":{"cmvn_file": "am.mvn"}}
}
```
@@ -231,22 +236,21 @@
### 鏌ョ湅娉ㄥ唽琛�
-```python
+```plaintext
from funasr.register import tables
tables.print()
```
-鏀寔鏌ョ湅鎸囧畾绫诲瀷鐨勬敞鍐岃〃锛歚tables.print("model")`
+鏀寔鏌ョ湅鎸囧畾绫诲瀷鐨勬敞鍐岃〃锛歕`tables.print("model")\`
-
-### 鏂版敞鍐�
+### 娉ㄥ唽妯″瀷
```python
from funasr.register import tables
-@tables.register("model_classes", "MinMo_S2T")
-class MinMo_S2T(nn.Module):
+@tables.register("model_classes", "SenseVoiceSmall")
+class SenseVoiceSmall(nn.Module):
def __init__(*args, **kwargs):
...
@@ -268,10 +272,14 @@
```
-鍦╟onfig.yaml涓寚瀹氭柊娉ㄥ唽妯″瀷
+鍦ㄩ渶瑕佹敞鍐岀殑绫诲悕鍓嶅姞涓� `@tables.register("model_classes","SenseVoiceSmall")`锛屽嵆鍙畬鎴愭敞鍐岋紝绫婚渶瑕佸疄鐜版湁锛歘_init__锛宖orward锛宨nference鏂规硶銆�
-```yaml
-model: MinMo_S2T
+瀹屾暣浠g爜锛歔https://github.com/modelscope/FunASR/blob/main/funasr/models/sense\_voice/model.py#L443](https://github.com/modelscope/FunASR/blob/main/funasr/models/sense_voice/model.py#L443)
+
+娉ㄥ唽瀹屾垚鍚庯紝鍦╟onfig.yaml涓寚瀹氭柊娉ㄥ唽妯″瀷锛屽嵆鍙疄鐜板妯″瀷鐨勫畾涔�
+
+```python
+model: SenseVoiceSmall
model_conf:
...
```
--
Gitblit v1.9.1