| | |
| | | # Paraformer-Large |
| | | - Model link: <https://www.modelscope.cn/models/yufan6/speech_mfcca_asr-zh-cn-16k-alimeeting-vocab4950/summary> |
| | | - Model link: <https://www.modelscope.cn/models/NPU-ASLP/speech_mfcca_asr-zh-cn-16k-alimeeting-vocab4950/summary> |
| | | - Model size: 45M |
| | | |
| | | # Environments |
| | |
| | | |
| | | if __name__ == '__main__': |
| | | |
| | | params = modelscope_args(model="yufan6/speech_mfcca_asr-zh-cn-16k-alimeeting-vocab4950") |
| | | params = modelscope_args(model="NPU-ASLP/speech_mfcca_asr-zh-cn-16k-alimeeting-vocab4950") |
| | | params.output_dir = "./checkpoint" # m模型保存路径 |
| | | params.data_path = "./example_data/" # 数据路径 |
| | | params.dataset_type = "small" # 小数据量设置small,若数据量大于1000小时,请使用large |
| | | params.batch_bins = 1000 # batch size,如果dataset_type="small",batch_bins单位为fbank特征帧数,如果dataset_type="large",batch_bins单位为毫秒, |
| | | params.max_epoch = 10 # 最大训练轮数 |
| | | params.lr = 0.0001 # 设置学习率 |
| | | params.model_revision = 'v2.0.0' |
| | | params.model_revision = 'v1.0.0' |
| | | modelscope_finetune(params) |
| | |
| | | os.environ['CUDA_VISIBLE_DEVICES'] = str(gpu_id) |
| | | inference_pipline = pipeline( |
| | | task=Tasks.auto_speech_recognition, |
| | | model='yufan6/speech_mfcca_asr-zh-cn-16k-alimeeting-vocab4950', |
| | | model_revision='v2.0.0', |
| | | model='NPU-ASLP/speech_mfcca_asr-zh-cn-16k-alimeeting-vocab4950', |
| | | model_revision='v1.0.0', |
| | | output_dir=output_dir_job, |
| | | batch_size=1, |
| | | ) |
| | |
| | | |
| | | if __name__ == '__main__': |
| | | params = {} |
| | | params["modelscope_model_name"] = "yufan6/speech_mfcca_asr-zh-cn-16k-alimeeting-vocab4950" |
| | | params["modelscope_model_name"] = "NPU-ASLP/speech_mfcca_asr-zh-cn-16k-alimeeting-vocab4950" |
| | | params["required_files"] = ["feats_stats.npz", "decoding.yaml", "configuration.json"] |
| | | params["output_dir"] = "./checkpoint" |
| | | params["data_dir"] = "./example_data/validation" |