嘉渊
2023-04-25 70f9a8f8908fa83aafdad2742d21a107323b5fed
update
2个文件已修改
8 ■■■■ 已修改文件
egs/aishell/paraformer/run.sh 4 ●●●● 补丁 | 查看 | 原始文档 | blame | 历史
funasr/utils/prepare_data.py 4 ●●●● 补丁 | 查看 | 原始文档 | blame | 历史
egs/aishell/paraformer/run.sh
@@ -169,12 +169,8 @@
                --token_list $token_list \
                --train_data_path_and_name_and_type ${feats_dir}/${dumpdir}/${train_set}/${scp},speech,${type} \
                --train_data_path_and_name_and_type ${feats_dir}/${dumpdir}/${train_set}/text,text,text \
                --train_shape_file ${feats_dir}/asr_stats_fbank_zh_char/${train_set}/speech_shape \
                --train_shape_file ${feats_dir}/asr_stats_fbank_zh_char/${train_set}/text_shape.char \
                --valid_data_path_and_name_and_type ${feats_dir}/${dumpdir}/${valid_set}/${scp},speech,${type} \
                --valid_data_path_and_name_and_type ${feats_dir}/${dumpdir}/${valid_set}/text,text,text \
                --valid_shape_file ${feats_dir}/asr_stats_fbank_zh_char/${valid_set}/speech_shape \
                --valid_shape_file ${feats_dir}/asr_stats_fbank_zh_char/${valid_set}/text_shape.char  \
                --resume true \
                --output_dir ${exp_dir}/exp/${model_dir} \
                --config $asr_config \
funasr/utils/prepare_data.py
@@ -162,6 +162,10 @@
    if args.dataset_type == "large" and args.train_data_file is not None:
        return
    distributed = distributed_option.distributed
    if not hasattr(args, "train_set"):
        args.train_set = "train"
    if not hasattr(args, "dev_set"):
        args.dev_set = "validation"
    if not distributed or distributed_option.dist_rank == 0:
        filter_wav_text(args.data_dir, args.train_set)
        filter_wav_text(args.data_dir, args.dev_set)