| New file |
| | |
| | | |
| | | |
| | | python -m funasr.bin.inference \ |
| | | --config-path="/mnt/workspace/FunASR/examples/aishell/paraformer/exp/baseline_paraformer_conformer_12e_6d_2048_256_zh_char_exp3" \ |
| | | --config-name="config.yaml" \ |
| | | ++init_param="/mnt/workspace/FunASR/examples/aishell/paraformer/exp/baseline_paraformer_conformer_12e_6d_2048_256_zh_char_exp3/model.pt.ep38" \ |
| | | ++tokenizer_conf.token_list="/mnt/nfs/zhifu.gzf/data/AISHELL-1-feats/DATA/data/zh_token_list/char/tokens.txt" \ |
| | | ++frontend_conf.cmvn_file="/mnt/nfs/zhifu.gzf/data/AISHELL-1-feats/DATA/data/train/am.mvn" \ |
| | | ++input="/mnt/nfs/zhifu.gzf/data/AISHELL-1/data_aishell/wav/train/S0002/BAC009S0002W0122.wav" \ |
| | | ++output_dir="./outputs/debug" \ |
| | | ++device="cuda:0" \ |
| | | |
| | |
| | | model_dir="baseline_$(basename "${config}" .yaml)_${lang}_${token_type}_${tag}" |
| | | |
| | | |
| | | |
| | | if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then |
| | | echo "stage -1: Data Download" |
| | | mkdir -p ${raw_data} |
| | |
| | | if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then |
| | | echo "stage 2: Dictionary Preparation" |
| | | mkdir -p ${feats_dir}/data/${lang}_token_list/$token_type/ |
| | | |
| | | |
| | | echo "make a dictionary" |
| | | echo "<blank>" > ${token_list} |
| | | echo "<s>" >> ${token_list} |
| | |
| | | if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then |
| | | echo "stage 5: Inference" |
| | | |
| | | if ${inference_device} == "cuda"; then |
| | | if [ ${inference_device} == "cuda" ]; then |
| | | nj=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}') |
| | | else |
| | | inference_batch_size=1 |
| | |
| | | |
| | | for dset in ${test_sets}; do |
| | | |
| | | inference_dir="${exp_dir}/exp/${model_dir}/${inference_checkpoint}/${dset}" |
| | | inference_dir="${exp_dir}/exp/${model_dir}/inference-${inference_checkpoint}/${dset}" |
| | | _logdir="${inference_dir}/logdir" |
| | | |
| | | mkdir -p "${_logdir}" |
| | |
| | | done |
| | | utils/split_scp.pl "${key_file}" ${split_scps} |
| | | |
| | | gpuid_list_array=(${gpuid_list//,/ }) |
| | | gpuid_list_array=(${CUDA_VISIBLE_DEVICES//,/ }) |
| | | for JOB in $(seq ${nj}); do |
| | | { |
| | | id=$((JOB-1)) |
| | |
| | | ++input="${_logdir}/keys.${JOB}.scp" \ |
| | | ++output_dir="${inference_dir}/${JOB}" \ |
| | | ++device="${inference_device}" \ |
| | | ++batch_size="${inference_batch_size}" |
| | | ++batch_size="${inference_batch_size}" &> ${_logdir}/log.${JOB}.txt |
| | | }& |
| | | |
| | | done |
| | |
| | | done |
| | | |
| | | echo "Computing WER ..." |
| | | cp ${inference_dir}/1best_recog/text ${inference_dir}/1best_recog/text.proc |
| | | cp ${data_dir}/text ${inference_dir}/1best_recog/text.ref |
| | | python utils/postprocess_text_zh.py ${inference_dir}/1best_recog/text ${inference_dir}/1best_recog/text.proc |
| | | python utils/postprocess_text_zh.py ${data_dir}/text ${inference_dir}/1best_recog/text.ref |
| | | python utils/compute_wer.py ${inference_dir}/1best_recog/text.ref ${inference_dir}/1best_recog/text.proc ${inference_dir}/1best_recog/text.cer |
| | | tail -n 3 ${inference_dir}/1best_recog/text.cer |
| | | done |
| | | |
| | | fi |
| | | fi |
| | |
| | | |
| | | |
| | | python funasr/bin/inference.py \ |
| | | python -m funasr.bin.inference \ |
| | | --config-path="/mnt/workspace/FunASR/examples/aishell/paraformer/exp/baseline_paraformer_conformer_12e_6d_2048_256_zh_char_exp3" \ |
| | | --config-name="config.yaml" \ |
| | | ++init_param="/mnt/workspace/FunASR/examples/aishell/paraformer/exp/baseline_paraformer_conformer_12e_6d_2048_256_zh_char_exp3/model.pt.ep38" \ |
| | |
| | | ++frontend_conf.cmvn_file="/mnt/nfs/zhifu.gzf/data/AISHELL-1-feats/DATA/data/train/am.mvn" \ |
| | | ++input="/mnt/nfs/zhifu.gzf/data/AISHELL-1/data_aishell/wav/train/S0002/BAC009S0002W0122.wav" \ |
| | | ++output_dir="./outputs/debug" \ |
| | | ++device="cpu" \ |
| | | ++device="cuda:0" \ |
| | | |
| | |
| | | # feature configuration |
| | | nj=32 |
| | | |
| | | inference_device="cuda" #"cpu" |
| | | inference_device="cuda" #"cpu", "cuda:0", "cuda:1" |
| | | inference_checkpoint="model.pt" |
| | | inference_scp="wav.scp" |
| | | inference_batch_size=32 |
| | | inference_batch_size=1 |
| | | |
| | | # data |
| | | raw_data=../raw_data |
| | |
| | | |
| | | for dset in ${test_sets}; do |
| | | |
| | | inference_dir="${exp_dir}/exp/${model_dir}/${inference_checkpoint}/${dset}" |
| | | inference_dir="${exp_dir}/exp/${model_dir}/inference-${inference_checkpoint}/${dset}" |
| | | _logdir="${inference_dir}/logdir" |
| | | |
| | | mkdir -p "${_logdir}" |
| | |
| | | done |
| | | utils/split_scp.pl "${key_file}" ${split_scps} |
| | | |
| | | gpuid_list_array=(${gpuid_list//,/ }) |
| | | gpuid_list_array=(${CUDA_VISIBLE_DEVICES//,/ }) |
| | | for JOB in $(seq ${nj}); do |
| | | { |
| | | id=$((JOB-1)) |
| | |
| | | done |
| | | |
| | | echo "Computing WER ..." |
| | | cp ${inference_dir}/1best_recog/text ${inference_dir}/1best_recog/text.proc |
| | | cp ${data_dir}/text ${inference_dir}/1best_recog/text.ref |
| | | python utils/postprocess_text_zh.py ${inference_dir}/1best_recog/text ${inference_dir}/1best_recog/text.proc |
| | | python utils/postprocess_text_zh.py ${data_dir}/text ${inference_dir}/1best_recog/text.ref |
| | | python utils/compute_wer.py ${inference_dir}/1best_recog/text.ref ${inference_dir}/1best_recog/text.proc ${inference_dir}/1best_recog/text.cer |
| | | tail -n 3 ${inference_dir}/1best_recog/text.cer |
| | | done |
| New file |
| | |
| | | |
| | | |
| | | python -m funasr.bin.inference \ |
| | | --config-path="/mnt/workspace/FunASR/examples/aishell/paraformer/exp/baseline_paraformer_conformer_12e_6d_2048_256_zh_char_exp3" \ |
| | | --config-name="config.yaml" \ |
| | | ++init_param="/mnt/workspace/FunASR/examples/aishell/paraformer/exp/baseline_paraformer_conformer_12e_6d_2048_256_zh_char_exp3/model.pt.ep38" \ |
| | | ++tokenizer_conf.token_list="/mnt/nfs/zhifu.gzf/data/AISHELL-1-feats/DATA/data/zh_token_list/char/tokens.txt" \ |
| | | ++frontend_conf.cmvn_file="/mnt/nfs/zhifu.gzf/data/AISHELL-1-feats/DATA/data/train/am.mvn" \ |
| | | ++input="/mnt/nfs/zhifu.gzf/data/AISHELL-1/data_aishell/wav/train/S0002/BAC009S0002W0122.wav" \ |
| | | ++output_dir="./outputs/debug" \ |
| | | ++device="cuda:0" \ |
| | | |
| | |
| | | model_dir="baseline_$(basename "${config}" .yaml)_${lang}_${token_type}_${tag}" |
| | | |
| | | |
| | | |
| | | if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then |
| | | echo "stage -1: Data Download" |
| | | mkdir -p ${raw_data} |
| | |
| | | if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then |
| | | echo "stage 2: Dictionary Preparation" |
| | | mkdir -p ${feats_dir}/data/${lang}_token_list/$token_type/ |
| | | |
| | | |
| | | echo "make a dictionary" |
| | | echo "<blank>" > ${token_list} |
| | | echo "<s>" >> ${token_list} |
| | |
| | | if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then |
| | | echo "stage 5: Inference" |
| | | |
| | | if ${inference_device} == "cuda"; then |
| | | if [ ${inference_device} == "cuda" ]; then |
| | | nj=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}') |
| | | else |
| | | inference_batch_size=1 |
| | |
| | | |
| | | for dset in ${test_sets}; do |
| | | |
| | | inference_dir="${exp_dir}/exp/${model_dir}/${inference_checkpoint}/${dset}" |
| | | inference_dir="${exp_dir}/exp/${model_dir}/inference-${inference_checkpoint}/${dset}" |
| | | _logdir="${inference_dir}/logdir" |
| | | |
| | | mkdir -p "${_logdir}" |
| | |
| | | done |
| | | utils/split_scp.pl "${key_file}" ${split_scps} |
| | | |
| | | gpuid_list_array=(${gpuid_list//,/ }) |
| | | gpuid_list_array=(${CUDA_VISIBLE_DEVICES//,/ }) |
| | | for JOB in $(seq ${nj}); do |
| | | { |
| | | id=$((JOB-1)) |
| | |
| | | ++input="${_logdir}/keys.${JOB}.scp" \ |
| | | ++output_dir="${inference_dir}/${JOB}" \ |
| | | ++device="${inference_device}" \ |
| | | ++batch_size="${inference_batch_size}" |
| | | ++batch_size="${inference_batch_size}" &> ${_logdir}/log.${JOB}.txt |
| | | }& |
| | | |
| | | done |
| | |
| | | done |
| | | |
| | | echo "Computing WER ..." |
| | | cp ${inference_dir}/1best_recog/text ${inference_dir}/1best_recog/text.proc |
| | | cp ${data_dir}/text ${inference_dir}/1best_recog/text.ref |
| | | python utils/postprocess_text_zh.py ${inference_dir}/1best_recog/text ${inference_dir}/1best_recog/text.proc |
| | | python utils/postprocess_text_zh.py ${data_dir}/text ${inference_dir}/1best_recog/text.ref |
| | | python utils/compute_wer.py ${inference_dir}/1best_recog/text.ref ${inference_dir}/1best_recog/text.proc ${inference_dir}/1best_recog/text.cer |
| | | tail -n 3 ${inference_dir}/1best_recog/text.cer |
| | | done |
| | | |
| | | fi |
| | | fi |
| New file |
| | |
| | | |
| | | |
| | | python -m funasr.bin.inference \ |
| | | --config-path="/mnt/workspace/FunASR/examples/aishell/paraformer/exp/baseline_paraformer_conformer_12e_6d_2048_256_zh_char_exp3" \ |
| | | --config-name="config.yaml" \ |
| | | ++init_param="/mnt/workspace/FunASR/examples/aishell/paraformer/exp/baseline_paraformer_conformer_12e_6d_2048_256_zh_char_exp3/model.pt.ep38" \ |
| | | ++tokenizer_conf.token_list="/mnt/nfs/zhifu.gzf/data/AISHELL-1-feats/DATA/data/zh_token_list/char/tokens.txt" \ |
| | | ++frontend_conf.cmvn_file="/mnt/nfs/zhifu.gzf/data/AISHELL-1-feats/DATA/data/train/am.mvn" \ |
| | | ++input="/mnt/nfs/zhifu.gzf/data/AISHELL-1/data_aishell/wav/train/S0002/BAC009S0002W0122.wav" \ |
| | | ++output_dir="./outputs/debug" \ |
| | | ++device="cuda:0" \ |
| | | |
| | |
| | | if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then |
| | | echo "stage 5: Inference" |
| | | |
| | | if ${inference_device} == "cuda"; then |
| | | if [ ${inference_device} == "cuda" ]; then |
| | | nj=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}') |
| | | else |
| | | inference_batch_size=1 |
| | |
| | | |
| | | for dset in ${test_sets}; do |
| | | |
| | | inference_dir="${exp_dir}/exp/${model_dir}/${inference_checkpoint}/${dset}" |
| | | inference_dir="${exp_dir}/exp/${model_dir}/inference-${inference_checkpoint}/${dset}" |
| | | _logdir="${inference_dir}/logdir" |
| | | |
| | | mkdir -p "${_logdir}" |
| | |
| | | done |
| | | utils/split_scp.pl "${key_file}" ${split_scps} |
| | | |
| | | gpuid_list_array=(${gpuid_list//,/ }) |
| | | gpuid_list_array=(${CUDA_VISIBLE_DEVICES//,/ }) |
| | | for JOB in $(seq ${nj}); do |
| | | { |
| | | id=$((JOB-1)) |
| | |
| | | ++input="${_logdir}/keys.${JOB}.scp" \ |
| | | ++output_dir="${inference_dir}/${JOB}" \ |
| | | ++device="${inference_device}" \ |
| | | ++batch_size="${inference_batch_size}" |
| | | ++batch_size="${inference_batch_size}" &> ${_logdir}/log.${JOB}.txt |
| | | }& |
| | | |
| | | done |
| | |
| | | done |
| | | |
| | | echo "Computing WER ..." |
| | | cp ${inference_dir}/1best_recog/text ${inference_dir}/1best_recog/text.proc |
| | | cp ${data_dir}/text ${inference_dir}/1best_recog/text.ref |
| | | python utils/postprocess_text_zh.py ${inference_dir}/1best_recog/text ${inference_dir}/1best_recog/text.proc |
| | | python utils/postprocess_text_zh.py ${data_dir}/text ${inference_dir}/1best_recog/text.ref |
| | | python utils/compute_wer.py ${inference_dir}/1best_recog/text.ref ${inference_dir}/1best_recog/text.proc ${inference_dir}/1best_recog/text.cer |
| | | tail -n 3 ${inference_dir}/1best_recog/text.cer |
| | | done |
| | | |
| | | fi |
| | | fi |
| New file |
| | |
| | | |
| | | import sys |
| | | import re |
| | | |
| | | in_f = sys.argv[1] |
| | | out_f = sys.argv[2] |
| | | |
| | | |
| | | with open(in_f, "r", encoding="utf-8") as f: |
| | | lines = f.readlines() |
| | | |
| | | with open(out_f, "w", encoding="utf-8") as f: |
| | | for line in lines: |
| | | outs = line.strip().split(" ", 1) |
| | | if len(outs) == 2: |
| | | idx, text = outs |
| | | text = re.sub("</s>", "", text) |
| | | text = re.sub("<s>", "", text) |
| | | text = re.sub("@@", "", text) |
| | | text = re.sub("@", "", text) |
| | | text = re.sub("<unk>", "", text) |
| | | text = re.sub(" ", "", text) |
| | | text = text.lower() |
| | | else: |
| | | idx = outs[0] |
| | | text = " " |
| | | |
| | | text = [x for x in text] |
| | | text = " ".join(text) |
| | | out = "{} {}\n".format(idx, text) |
| | | f.write(out) |
| New file |
| | |
| | | |
| | | |
| | | python -m funasr.bin.inference \ |
| | | --config-path="/mnt/workspace/FunASR/examples/aishell/paraformer/exp/baseline_paraformer_conformer_12e_6d_2048_256_zh_char_exp3" \ |
| | | --config-name="config.yaml" \ |
| | | ++init_param="/mnt/workspace/FunASR/examples/aishell/paraformer/exp/baseline_paraformer_conformer_12e_6d_2048_256_zh_char_exp3/model.pt.ep38" \ |
| | | ++tokenizer_conf.token_list="/mnt/nfs/zhifu.gzf/data/AISHELL-1-feats/DATA/data/zh_token_list/char/tokens.txt" \ |
| | | ++frontend_conf.cmvn_file="/mnt/nfs/zhifu.gzf/data/AISHELL-1-feats/DATA/data/train/am.mvn" \ |
| | | ++input="/mnt/nfs/zhifu.gzf/data/AISHELL-1/data_aishell/wav/train/S0002/BAC009S0002W0122.wav" \ |
| | | ++output_dir="./outputs/debug" \ |
| | | ++device="cuda:0" \ |
| | | |
| | |
| | | model_dir="baseline_$(basename "${config}" .yaml)_${lang}_${token_type}_${tag}" |
| | | |
| | | |
| | | |
| | | if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then |
| | | echo "stage -1: Data Download" |
| | | mkdir -p ${raw_data} |
| | |
| | | if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then |
| | | echo "stage 2: Dictionary Preparation" |
| | | mkdir -p ${feats_dir}/data/${lang}_token_list/$token_type/ |
| | | |
| | | |
| | | echo "make a dictionary" |
| | | echo "<blank>" > ${token_list} |
| | | echo "<s>" >> ${token_list} |
| | |
| | | |
| | | for dset in ${test_sets}; do |
| | | |
| | | inference_dir="${exp_dir}/exp/${model_dir}/infer-${inference_checkpoint}/${dset}" |
| | | inference_dir="${exp_dir}/exp/${model_dir}/inference-${inference_checkpoint}/${dset}" |
| | | _logdir="${inference_dir}/logdir" |
| | | |
| | | mkdir -p "${_logdir}" |
| | |
| | | ++input="${_logdir}/keys.${JOB}.scp" \ |
| | | ++output_dir="${inference_dir}/${JOB}" \ |
| | | ++device="${inference_device}" \ |
| | | ++batch_size="${inference_batch_size}" |
| | | ++batch_size="${inference_batch_size}" &> ${_logdir}/log.${JOB}.txt |
| | | }& |
| | | |
| | | done |
| | |
| | | done |
| | | |
| | | echo "Computing WER ..." |
| | | cp ${inference_dir}/1best_recog/text ${inference_dir}/1best_recog/text.proc |
| | | cp ${data_dir}/text ${inference_dir}/1best_recog/text.ref |
| | | python utils/postprocess_text_zh.py ${inference_dir}/1best_recog/text ${inference_dir}/1best_recog/text.proc |
| | | python utils/postprocess_text_zh.py ${data_dir}/text ${inference_dir}/1best_recog/text.ref |
| | | python utils/compute_wer.py ${inference_dir}/1best_recog/text.ref ${inference_dir}/1best_recog/text.proc ${inference_dir}/1best_recog/text.cer |
| | | tail -n 3 ${inference_dir}/1best_recog/text.cer |
| | | done |
| | | |
| | | fi |
| | | fi |
| | |
| | | |
| | | print(f'Checkpoint saved to {filename}') |
| | | latest = Path(os.path.join(self.output_dir, f'model.pt')) |
| | | try: |
| | | latest.unlink() |
| | | except: |
| | | pass |
| | | torch.save(state, latest) |
| | | |
| | | latest.symlink_to(filename) |
| | | |
| | | def _resume_checkpoint(self, resume_path): |
| | | """ |