| | |
| | | |
| | | |
| | | #### 非实时语音识别 |
| | | ##### SenseVoice |
| | | ```python |
| | | from funasr import AutoModel |
| | | from funasr.utils.postprocess_utils import rich_transcription_postprocess |
| | | |
| | | model_dir = "iic/SenseVoiceSmall" |
| | | |
| | | model = AutoModel( |
| | | model=model_dir, |
| | | vad_model="fsmn-vad", |
| | | vad_kwargs={"max_single_segment_time": 30000}, |
| | | device="cuda:0", |
| | | ) |
| | | |
| | | # en |
| | | res = model.generate( |
| | | input=f"{model.model_path}/example/en.mp3", |
| | | cache={}, |
| | | language="auto", # "zn", "en", "yue", "ja", "ko", "nospeech" |
| | | use_itn=True, |
| | | batch_size_s=60, |
| | | merge_vad=True, # |
| | | merge_length_s=15, |
| | | ) |
| | | text = rich_transcription_postprocess(res[0]["text"]) |
| | | print(text) |
| | | ``` |
| | | ##### Paraformer |
| | | ```python |
| | | from funasr import AutoModel |
| | | # paraformer-zh is a multi-functional asr model |
| | |
| | | export CUDA_VISIBLE_DEVICES="0,1" |
| | | gpu_num=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}') |
| | | |
| | | torchrun --nnodes 1 --nproc_per_node ${gpu_num} \ |
| | | torchrun --nnodes 1 --nproc_per_node ${gpu_num} --master_port 12345 \ |
| | | ../../../funasr/bin/train.py ${train_args} |
| | | ``` |
| | | --nnodes 表示参与的节点总数,--nproc_per_node 表示每个节点上运行的进程数 |
| | | --nnodes 表示参与的节点总数,--nproc_per_node 表示每个节点上运行的进程数,--master_port 表示端口号 |
| | | |
| | | ##### 多机多gpu训练 |
| | | |
| | |
| | | ../../../funasr/bin/train.py ${train_args} |
| | | ``` |
| | | |
| | | --nnodes 表示参与的节点总数,--node_rank 表示当前节点id,--nproc_per_node 表示每个节点上运行的进程数(通常为gpu个数) |
| | | --nnodes 表示参与的节点总数,--node_rank 表示当前节点id,--nproc_per_node 表示每个节点上运行的进程数(通常为gpu个数),--master_port 表示端口号 |
| | | |
| | | #### 准备数据 |
| | | |