From 70f9a8f8908fa83aafdad2742d21a107323b5fed Mon Sep 17 00:00:00 2001
From: 嘉渊 <wangjiaming.wjm@alibaba-inc.com>
Date: 星期二, 25 四月 2023 01:29:12 +0800
Subject: [PATCH] update
---
funasr/utils/prepare_data.py | 4 ++++
egs/aishell/paraformer/run.sh | 4 ----
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/egs/aishell/paraformer/run.sh b/egs/aishell/paraformer/run.sh
index b6ef733..68a3006 100755
--- a/egs/aishell/paraformer/run.sh
+++ b/egs/aishell/paraformer/run.sh
@@ -169,12 +169,8 @@
--token_list $token_list \
--train_data_path_and_name_and_type ${feats_dir}/${dumpdir}/${train_set}/${scp},speech,${type} \
--train_data_path_and_name_and_type ${feats_dir}/${dumpdir}/${train_set}/text,text,text \
- --train_shape_file ${feats_dir}/asr_stats_fbank_zh_char/${train_set}/speech_shape \
- --train_shape_file ${feats_dir}/asr_stats_fbank_zh_char/${train_set}/text_shape.char \
--valid_data_path_and_name_and_type ${feats_dir}/${dumpdir}/${valid_set}/${scp},speech,${type} \
--valid_data_path_and_name_and_type ${feats_dir}/${dumpdir}/${valid_set}/text,text,text \
- --valid_shape_file ${feats_dir}/asr_stats_fbank_zh_char/${valid_set}/speech_shape \
- --valid_shape_file ${feats_dir}/asr_stats_fbank_zh_char/${valid_set}/text_shape.char \
--resume true \
--output_dir ${exp_dir}/exp/${model_dir} \
--config $asr_config \
diff --git a/funasr/utils/prepare_data.py b/funasr/utils/prepare_data.py
index 8d02e04..8ed97d5 100644
--- a/funasr/utils/prepare_data.py
+++ b/funasr/utils/prepare_data.py
@@ -162,6 +162,10 @@
if args.dataset_type == "large" and args.train_data_file is not None:
return
distributed = distributed_option.distributed
+ if not hasattr(args, "train_set"):
+ args.train_set = "train"
+ if not hasattr(args, "dev_set"):
+ args.dev_set = "validation"
if not distributed or distributed_option.dist_rank == 0:
filter_wav_text(args.data_dir, args.train_set)
filter_wav_text(args.data_dir, args.dev_set)
--
Gitblit v1.9.1