python/FunASR-XL.git

parent: d748b861 | 补丁 | 提交 | ignore whitespace

嘉渊

2023-05-26 450ed733407f1fcb79853ae98ec4852bec349321

update repo

7个文件已修改

	egs/aishell/conformer/run.sh	6 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	egs/aishell/data2vec_paraformer_finetune/run.sh	6 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	egs/aishell/data2vec_transformer_finetune/run.sh	6 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	egs/aishell/paraformer/run.sh	6 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	egs/aishell/paraformerbert/run.sh	6 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	egs/aishell/transformer/run.sh	6 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	egs/wenetspeech/conformer/run.sh	15 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史

 egs/aishell/conformer/run.sh

@@ -88,11 +88,11 @@
    utils/compute_cmvn.sh --fbankdir ${feats_dir}/data/${train_set} --cmd "$train_cmd" --nj $nj --feats_dim ${feats_dim} --config_file "$asr_config" --scale 1.0
fi

token_list=${feats_dir}/data/${lang}_token_list/char/tokens.txt
token_list=${feats_dir}/data/${lang}_token_list/$token_type/tokens.txt
echo "dictionary: ${token_list}"
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    echo "stage 2: Dictionary Preparation"
    mkdir -p ${feats_dir}/data/${lang}_token_list/char/
    mkdir -p ${feats_dir}/data/${lang}_token_list/$token_type/

    echo "make a dictionary"
    echo "<blank>" > ${token_list}
@@ -130,7 +130,7 @@
                --task_name asr \
                --gpu_id $gpu_id \
                --use_preprocessor true \
                --token_type char \
                --token_type $token_type \
                --token_list $token_list \
                --data_dir ${feats_dir}/data \
                --train_set ${train_set} \

 egs/aishell/data2vec_paraformer_finetune/run.sh

@@ -91,11 +91,11 @@
    utils/compute_cmvn.sh --fbankdir ${feats_dir}/data/${train_set} --cmd "$train_cmd" --nj $nj --feats_dim ${feats_dim} --config_file "$asr_config" --scale 1.0
fi

token_list=${feats_dir}/data/${lang}_token_list/char/tokens.txt
token_list=${feats_dir}/data/${lang}_token_list/$token_type/tokens.txt
echo "dictionary: ${token_list}"
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    echo "stage 2: Dictionary Preparation"
    mkdir -p ${feats_dir}/data/${lang}_token_list/char/
    mkdir -p ${feats_dir}/data/${lang}_token_list/$token_type/

    echo "make a dictionary"
    echo "<blank>" > ${token_list}
@@ -134,7 +134,7 @@
                --task_name asr \
                --gpu_id $gpu_id \
                --use_preprocessor true \
                --token_type char \
                --token_type $token_type \
                --token_list $token_list \
                --data_dir ${feats_dir}/data \
                --train_set ${train_set} \

 egs/aishell/data2vec_transformer_finetune/run.sh

@@ -91,11 +91,11 @@
    utils/compute_cmvn.sh --fbankdir ${feats_dir}/data/${train_set} --cmd "$train_cmd" --nj $nj --feats_dim ${feats_dim} --config_file "$asr_config" --scale 1.0
fi

token_list=${feats_dir}/data/${lang}_token_list/char/tokens.txt
token_list=${feats_dir}/data/${lang}_token_list/$token_type/tokens.txt
echo "dictionary: ${token_list}"
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    echo "stage 2: Dictionary Preparation"
    mkdir -p ${feats_dir}/data/${lang}_token_list/char/
    mkdir -p ${feats_dir}/data/${lang}_token_list/$token_type/

    echo "make a dictionary"
    echo "<blank>" > ${token_list}
@@ -134,7 +134,7 @@
                --task_name asr \
                --gpu_id $gpu_id \
                --use_preprocessor true \
                --token_type char \
                --token_type $token_type \
                --token_list $token_list \
                --data_dir ${feats_dir}/data \
                --train_set ${train_set} \

 egs/aishell/paraformer/run.sh

@@ -88,11 +88,11 @@
    utils/compute_cmvn.sh --fbankdir ${feats_dir}/data/${train_set} --cmd "$train_cmd" --nj $nj --feats_dim ${feats_dim} --config_file "$asr_config" --scale 1.0
fi

token_list=${feats_dir}/data/${lang}_token_list/char/tokens.txt
token_list=${feats_dir}/data/${lang}_token_list/$token_type/tokens.txt
echo "dictionary: ${token_list}"
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    echo "stage 2: Dictionary Preparation"
    mkdir -p ${feats_dir}/data/${lang}_token_list/char/
    mkdir -p ${feats_dir}/data/${lang}_token_list/$token_type/
   
    echo "make a dictionary"
    echo "<blank>" > ${token_list}
@@ -130,7 +130,7 @@
                --task_name asr \
                --gpu_id $gpu_id \
                --use_preprocessor true \
                --token_type char \
                --token_type $token_type \
                --token_list $token_list \
                --dataset_type small \
                --data_dir ${feats_dir}/data \

 egs/aishell/paraformerbert/run.sh

@@ -92,11 +92,11 @@
    utils/compute_cmvn.sh --fbankdir ${feats_dir}/data/${train_set} --cmd "$train_cmd" --nj $nj --feats_dim ${feats_dim} --config_file "$asr_config" --scale 1.0
fi

token_list=${feats_dir}/data/${lang}_token_list/char/tokens.txt
token_list=${feats_dir}/data/${lang}_token_list/$token_type/tokens.txt
echo "dictionary: ${token_list}"
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    echo "stage 2: Dictionary Preparation"
    mkdir -p ${feats_dir}/data/${lang}_token_list/char/
    mkdir -p ${feats_dir}/data/${lang}_token_list/$token_type/
   
    echo "make a dictionary"
    echo "<blank>" > ${token_list}
@@ -141,7 +141,7 @@
                --task_name asr \
                --gpu_id $gpu_id \
                --use_preprocessor true \
                --token_type char \
                --token_type $token_type \
                --token_list $token_list \
                --data_dir ${feats_dir}/data \
                --train_set ${train_set} \

 egs/aishell/transformer/run.sh

@@ -88,11 +88,11 @@
    utils/compute_cmvn.sh --fbankdir ${feats_dir}/data/${train_set} --cmd "$train_cmd" --nj $nj --feats_dim ${feats_dim} --config_file "$asr_config" --scale 1.0
fi

token_list=${feats_dir}/data/${lang}_token_list/char/tokens.txt
token_list=${feats_dir}/data/${lang}_token_list/$token_type/tokens.txt
echo "dictionary: ${token_list}"
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    echo "stage 2: Dictionary Preparation"
    mkdir -p ${feats_dir}/data/${lang}_token_list/char/
    mkdir -p ${feats_dir}/data/${lang}_token_list/$token_type/

    echo "make a dictionary"
    echo "<blank>" > ${token_list}
@@ -130,7 +130,7 @@
                --task_name asr \
                --gpu_id $gpu_id \
                --use_preprocessor true \
                --token_type char \
                --token_type $token_type \
                --token_list $token_list \
                --data_dir ${feats_dir}/data \
                --train_set ${train_set} \

 egs/wenetspeech/conformer/run.sh

@@ -84,3 +84,18 @@
    echo "stage 1: Feature and CMVN Generation"
    utils/compute_cmvn.sh --fbankdir ${feats_dir}/data/${train_set} --cmd "$train_cmd" --nj $nj --feats_dim ${feats_dim} --config_file "$asr_config" --scale 0.1
fi

token_list=${feats_dir}/data/${lang}_token_list/$token_type/tokens.txt
echo "dictionary: ${token_list}"
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    echo "stage 2: Dictionary Preparation"
    mkdir -p ${feats_dir}/data/${lang}_token_list/$token_type/

    echo "make a dictionary"
    echo "<blank>" > ${token_list}
    echo "<s>" >> ${token_list}
    echo "</s>" >> ${token_list}
    utils/text2token.py -s 1 -n 1 --space "" ${feats_dir}/data/$train_set/text | cut -f 2- -d" " | tr " " "\n" \
        | sort | uniq | grep -a -v -e '^\s*$' | awk '{print $0}' >> ${token_list}
    echo "<unk>" >> ${token_list}
fi

			@@ -88,11 +88,11 @@
			utils/compute_cmvn.sh --fbankdir ${feats_dir}/data/${train_set} --cmd "$train_cmd" --nj $nj --feats_dim ${feats_dim} --config_file "$asr_config" --scale 1.0
			fi

			token_list=${feats_dir}/data/${lang}_token_list/char/tokens.txt
			token_list=${feats_dir}/data/${lang}_token_list/$token_type/tokens.txt
			echo "dictionary: ${token_list}"
			if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
			echo "stage 2: Dictionary Preparation"
			mkdir -p ${feats_dir}/data/${lang}_token_list/char/
			mkdir -p ${feats_dir}/data/${lang}_token_list/$token_type/

			echo "make a dictionary"
			echo "<blank>" > ${token_list}
			@@ -130,7 +130,7 @@
			--task_name asr \
			--gpu_id $gpu_id \
			--use_preprocessor true \
			--token_type char \
			--token_type $token_type \
			--token_list $token_list \
			--data_dir ${feats_dir}/data \
			--train_set ${train_set} \

			@@ -91,11 +91,11 @@
			utils/compute_cmvn.sh --fbankdir ${feats_dir}/data/${train_set} --cmd "$train_cmd" --nj $nj --feats_dim ${feats_dim} --config_file "$asr_config" --scale 1.0
			fi

			token_list=${feats_dir}/data/${lang}_token_list/char/tokens.txt
			token_list=${feats_dir}/data/${lang}_token_list/$token_type/tokens.txt
			echo "dictionary: ${token_list}"
			if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
			echo "stage 2: Dictionary Preparation"
			mkdir -p ${feats_dir}/data/${lang}_token_list/char/
			mkdir -p ${feats_dir}/data/${lang}_token_list/$token_type/

			echo "make a dictionary"
			echo "<blank>" > ${token_list}
			@@ -134,7 +134,7 @@
			--task_name asr \
			--gpu_id $gpu_id \
			--use_preprocessor true \
			--token_type char \
			--token_type $token_type \
			--token_list $token_list \
			--data_dir ${feats_dir}/data \
			--train_set ${train_set} \

			@@ -92,11 +92,11 @@
			utils/compute_cmvn.sh --fbankdir ${feats_dir}/data/${train_set} --cmd "$train_cmd" --nj $nj --feats_dim ${feats_dim} --config_file "$asr_config" --scale 1.0
			fi

			token_list=${feats_dir}/data/${lang}_token_list/char/tokens.txt
			token_list=${feats_dir}/data/${lang}_token_list/$token_type/tokens.txt
			echo "dictionary: ${token_list}"
			if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
			echo "stage 2: Dictionary Preparation"
			mkdir -p ${feats_dir}/data/${lang}_token_list/char/
			mkdir -p ${feats_dir}/data/${lang}_token_list/$token_type/

			echo "make a dictionary"
			echo "<blank>" > ${token_list}
			@@ -141,7 +141,7 @@
			--task_name asr \
			--gpu_id $gpu_id \
			--use_preprocessor true \
			--token_type char \
			--token_type $token_type \
			--token_list $token_list \
			--data_dir ${feats_dir}/data \
			--train_set ${train_set} \

			@@ -84,3 +84,18 @@
			echo "stage 1: Feature and CMVN Generation"
			utils/compute_cmvn.sh --fbankdir ${feats_dir}/data/${train_set} --cmd "$train_cmd" --nj $nj --feats_dim ${feats_dim} --config_file "$asr_config" --scale 0.1
			fi

			token_list=${feats_dir}/data/${lang}_token_list/$token_type/tokens.txt
			echo "dictionary: ${token_list}"
			if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
			echo "stage 2: Dictionary Preparation"
			mkdir -p ${feats_dir}/data/${lang}_token_list/$token_type/

			echo "make a dictionary"
			echo "<blank>" > ${token_list}
			echo "<s>" >> ${token_list}
			echo "</s>" >> ${token_list}
			utils/text2token.py -s 1 -n 1 --space "" ${feats_dir}/data/$train_set/text \| cut -f 2- -d" " \| tr " " "\n" \
			\| sort \| uniq \| grep -a -v -e '^\s*$' \| awk '{print $0}' >> ${token_list}
			echo "<unk>" >> ${token_list}
			fi