From cf8e000a84e888495dcf30c4dbfecea1ee7ab4e2 Mon Sep 17 00:00:00 2001
From: jmwang66 <wangjiaming.wjm@alibaba-inc.com>
Date: 星期一, 07 八月 2023 16:13:37 +0800
Subject: [PATCH] Merge pull request #807 from alibaba-damo-academy/dev_wjm

---
 egs/callhome/eend_ola/run.sh |   34 ++++++++++++++--------------------
 1 files changed, 14 insertions(+), 20 deletions(-)

diff --git a/egs/callhome/eend_ola/run.sh b/egs/callhome/eend_ola/run.sh
index ff6b75b..aa441bf 100644
--- a/egs/callhome/eend_ola/run.sh
+++ b/egs/callhome/eend_ola/run.sh
@@ -99,10 +99,9 @@
               --output_dir $output_dir \
               --index JOB
         mkdir -p ${data_dir}/ark_data/dump/simu_data/data/$dataset
-        python local/gen_feats_scp.py \
-              --root_path ${data_dir}/ark_data/dump/simu_data/$dataset \
-              --out_path ${data_dir}/ark_data/dump/simu_data/data/$dataset \
-              --split_num $nj
+        cat ${data_dir}/ark_data/dump/simu_data/$dataset/feature.scp.* > ${data_dir}/ark_data/dump/simu_data/data/$dataset/feature.scp
+        cat ${data_dir}/ark_data/dump/simu_data/$dataset/label.scp.* > ${data_dir}/ark_data/dump/simu_data/data/$dataset/label.scp
+        paste -d" " ${data_dir}/ark_data/dump/simu_data/data/$dataset/feature.scp <(cut -f2 -d" " ${data_dir}/ark_data/dump/simu_data/data/$dataset/label.scp) > ${data_dir}/ark_data/dump/simu_data/data/$dataset/feats.scp
         grep "ns2" ${data_dir}/ark_data/dump/simu_data/data/$dataset/feats.scp > ${data_dir}/ark_data/dump/simu_data/data/$dataset/feats_2spkr.scp
         # for chunk_size=2000
         output_dir=${data_dir}/ark_data/dump/simu_data_chunk2000/$dataset
@@ -114,28 +113,23 @@
               --index JOB \
               --num_frames 2000
         mkdir -p ${data_dir}/ark_data/dump/simu_data_chunk2000/data/$dataset
-        python local/gen_feats_scp.py \
-              --root_path ${data_dir}/ark_data/dump/simu_data_chunk2000/$dataset \
-              --out_path ${data_dir}/ark_data/dump/simu_data_chunk2000/data/$dataset \
-              --split_num $nj
-        grep "ns2" ${data_dir}/ark_data/dump/simu_data_chunk2000/data/$dataset/feats.scp > ${data_dir}/ark_data/dump/simu_data_chunk2000/data/$dataset/feats_2spkr.scp
+        cat ${data_dir}/ark_data/dump/simu_data_chunk2000/$dataset/feature.scp.* > ${data_dir}/ark_data/dump/simu_data_chunk2000/data/$dataset/feature.scp
+        cat ${data_dir}/ark_data/dump/simu_data_chunk2000/$dataset/label.scp.* > ${data_dir}/ark_data/dump/simu_data_chunk2000/data/$dataset/label.scp
+        paste -d" " ${data_dir}/ark_data/dump/simu_data_chunk2000/data/$dataset/feature.scp <(cut -f2 -d" " ${data_dir}/ark_data/dump/simu_data_chunk2000/data/$dataset/label.scp) > ${data_dir}/ark_data/dump/simu_data_chunk2000/data/$dataset/feats.scp
     done
 
     # for callhome data
     for dset in callhome1_spkall callhome2_spkall; do
         find  $data_dir/eval/$dset  -maxdepth 1 -type f -exec cp {} {}.1 \;
-        output_dir=${data_dir}/ark_data/dump/callhome/$dset
+        output_dir=${data_dir}/ark_data/dump/callhome_chunk2000/$dset
         mkdir -p $output_dir
         python local/dump_feature.py \
               --data_dir $data_dir/eval/$dset \
               --output_dir $output_dir \
               --index 1 \
               --num_frames 2000
-        mkdir -p ${data_dir}/ark_data/dump/callhome/data/$dset
-        python local/gen_feats_scp.py \
-              --root_path ${data_dir}/ark_data/dump/callhome/$dset \
-              --out_path ${data_dir}/ark_data/dump/callhome/data/$dset \
-              --split_num 1
+        mkdir -p ${data_dir}/ark_data/dump/callhome_chunk2000/data/$dset
+        paste -d" " ${data_dir}/ark_data/dump/callhome_chunk2000/$dset/feature.scp.1 <(cut -f2 -d" " ${data_dir}/ark_data/dump/callhome_chunk2000/$dset/label.scp.1) > ${data_dir}/ark_data/dump/callhome_chunk2000/data/$dset/feats.scp
     done
 fi
 
@@ -228,10 +222,10 @@
     python local/model_averaging.py ${exp_dir}/exp/${simu_allspkr_model_dir}/$simu_allspkr_ave_id.pb $models
 fi
 
-# Training on simulated all-speaker data with chunk_size=2000
+# Training on simulated all-speaker data with chunk_size 2000
 world_size=$gpu_num
 if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
-    echo "stage 3: Training on simulated all-speaker data with chunk_size=2000"
+    echo "stage 3: Training on simulated all-speaker data with chunk_size 2000"
     mkdir -p ${exp_dir}/exp/${simu_allspkr_chunk2000_model_dir}
     mkdir -p ${exp_dir}/exp/${simu_allspkr_chunk2000_model_dir}/log
     INIT_FILE=${exp_dir}/exp/${simu_allspkr_chunk2000_model_dir}/ddp_init
@@ -269,11 +263,11 @@
         wait
 fi
 
-# Training on callhome all-speaker data with chunk_size=2000
+# Training on callhome all-speaker data with chunk_size 2000
 world_size=$gpu_num
 callhome_ave_id=avg${callhome_average_start}-${callhome_average_end}
 if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
-    echo "stage 4: Training on callhome all-speaker data with chunk_size=2000"
+    echo "stage 4: Training on callhome all-speaker data with chunk_size 2000"
     mkdir -p ${exp_dir}/exp/${callhome_model_dir}
     mkdir -p ${exp_dir}/exp/${callhome_model_dir}/log
     INIT_FILE=${exp_dir}/exp/${callhome_model_dir}/ddp_init
@@ -325,6 +319,6 @@
         --wav_scp_file $data_dir/eval/callhome2_spkall/wav.scp \
         1> ${exp_dir}/exp/${callhome_model_dir}/inference/log/infer.log 2>&1
     md-eval.pl -c 0.25 \
-          -r ${callhome_feats_dir_chunk2000}/${callhome_valid_dataset}/rttm \
+          -r ${data_dir}/eval/${callhome_valid_dataset}/rttm \
           -s ${exp_dir}/exp/${callhome_model_dir}/inference/rttm > ${exp_dir}/exp/${callhome_model_dir}/inference/result_med11_collar0.25 2>/dev/null || exit
 fi
\ No newline at end of file

--
Gitblit v1.9.1