From 4d1bac3cd19205ca6bb48b2b1ea68269a04634a9 Mon Sep 17 00:00:00 2001
From: 嘉渊 <wangjiaming.wjm@alibaba-inc.com>
Date: 星期一, 15 五月 2023 15:39:21 +0800
Subject: [PATCH] update repo

---
 egs/aishell/paraformerbert/local/extract_embeds.sh |   22 ++++++++--------------
 1 files changed, 8 insertions(+), 14 deletions(-)

diff --git a/egs/aishell/paraformerbert/local/extract_embeds.sh b/egs/aishell/paraformerbert/local/extract_embeds.sh
index 6d99390..049d38c 100755
--- a/egs/aishell/paraformerbert/local/extract_embeds.sh
+++ b/egs/aishell/paraformerbert/local/extract_embeds.sh
@@ -3,20 +3,17 @@
 stage=1
 stop_stage=3
 
-bert_model_root="../../huggingface_models"
 bert_model_name="bert-base-chinese"
-#bert_model_name="chinese-roberta-wwm-ext"
-#bert_model_name="mengzi-bert-base"
-raw_dataset_path=~/Funasr_data/aishell-1
-model_path=${bert_model_root}/${bert_model_name}
+raw_dataset_path="../DATA"
+model_path=${bert_model_name}
 
 . utils/parse_options.sh || exit 1;
 
 nj=32
 
 for data_set in train dev test;do
-    scp=$raw_dataset_path/dump/fbank/${data_set}/text
-    local_scp_dir_raw=$raw_dataset_path/embeds/$bert_model_name/${data_set}
+    scp=$raw_dataset_path/data/${data_set}/text
+    local_scp_dir_raw=${raw_dataset_path}/data/embeds/${data_set}
     local_scp_dir=$local_scp_dir_raw/split$nj
     local_records_dir=$local_scp_dir_raw/ark
 
@@ -39,11 +36,10 @@
                 JOB=`expr $tmp + $idx`
                 echo "proces jobid=$JOB"
                 {
-
-                beg=0
-                gpu=`expr $beg + $idx`
-                echo ${local_scp_dir}/log.${JOB}
-                python utils/extract_embeds.py $local_scp_dir/data.$JOB.text ${local_records_dir}/embeds.${JOB}.ark ${local_records_dir}/embeds.${JOB}.scp ${local_records_dir}/embeds.${JOB}.shape ${gpu} ${model_path} &> ${local_scp_dir}/log.${JOB}
+                    beg=0
+                    gpu=`expr $beg + $idx`
+                    echo ${local_scp_dir}/log.${JOB}
+                    python utils/extract_embeds.py $local_scp_dir/data.$JOB.text ${local_records_dir}/embeds.${JOB}.ark ${local_records_dir}/embeds.${JOB}.scp ${local_records_dir}/embeds.${JOB}.shape ${gpu} ${model_path} &> ${local_scp_dir}/log.${JOB}
             } &
             done
             wait
@@ -54,8 +50,6 @@
         for JOB in $(seq ${nj}); do
             cat ${local_records_dir}/embeds.${JOB}.scp || exit 1;
         done > ${local_scp_dir_raw}/embeds.scp
-
-        sed 's#nfs#data\/volume1#g' ${local_scp_dir_raw}/embeds.scp > ${local_scp_dir_raw}/embeds.scp.pai
 
         for JOB in $(seq ${nj}); do
             cat ${local_records_dir}/embeds.${JOB}.shape || exit 1;

--
Gitblit v1.9.1