From d2dc3af1a69ee4075bcfc0c83dc0fb8e3fc1db4e Mon Sep 17 00:00:00 2001
From: yhliang <68215459+yhliang-aslp@users.noreply.github.com>
Date: 星期四, 11 五月 2023 16:31:40 +0800
Subject: [PATCH] Merge pull request #492 from alibaba-damo-academy/dev_smohan

---
 egs/alimeeting/sa-asr/asr_local.sh |   16 +++++++++++-----
 egs/alimeeting/sa-asr/README.md    |   18 +++++++++---------
 2 files changed, 20 insertions(+), 14 deletions(-)

diff --git a/egs/alimeeting/sa-asr/README.md b/egs/alimeeting/sa-asr/README.md
index bc6d04c..951670b 100644
--- a/egs/alimeeting/sa-asr/README.md
+++ b/egs/alimeeting/sa-asr/README.md
@@ -1,7 +1,7 @@
 # Get Started
 Speaker Attributed Automatic Speech Recognition (SA-ASR) is a task proposed to solve "who spoke what". Specifically, the goal of SA-ASR is not only to obtain multi-speaker transcriptions, but also to identify the corresponding speaker for each utterance. The method used in this example is referenced in the paper: [End-to-End Speaker-Attributed ASR with Transformer](https://www.isca-speech.org/archive/pdfs/interspeech_2021/kanda21b_interspeech.pdf).  
 To run this receipe, first you need to install FunASR and ModelScope. ([installation](https://alibaba-damo-academy.github.io/FunASR/en/installation.html))  
-There are two startup scripts, `run.sh` for training and evaluating on the old eval and test sets, and `run_m2met_2023_infer.sh` for inference on the new test set of the Multi-Channel Multi-Party Meeting Transcription 2.0 ([M2MET2.0](https://alibaba-damo-academy.github.io/FunASR/m2met2/index.html)) Challenge.  
+There are two startup scripts, `run.sh` for training and evaluating on the old eval and test sets, and `run_m2met_2023_infer.sh` for inference on the new test set of the Multi-Channel Multi-Party Meeting Transcription 2.0 ([M2MeT2.0](https://alibaba-damo-academy.github.io/FunASR/m2met2/index.html)) Challenge.  
 Before running `run.sh`, you must manually download and unpack the [AliMeeting](http://www.openslr.org/119/) corpus and place it in the `./dataset` directory:
 ```shell
 dataset
@@ -65,17 +65,17 @@
 	</tr>
     <tr>
 	    <td>oracle profile</td>
-        <td>31.93</td>
-        <td>32.75</td>
-	    <td>48.56</td>
-        <td>53.33</td>
+        <td>32.05</td>
+        <td>32.70</td>
+	    <td>47.40</td>
+        <td>52.57</td>
 	</tr>
     <tr>
 	    <td>cluster profile</td>
-        <td>31.94</td>
-        <td>32.77</td>
-	    <td>55.49</td>
-        <td>58.17</td>
+        <td>32.05</td>
+        <td>32.70</td>
+	    <td>53.76</td>
+        <td>55.95</td>
 	</tr>
 </table>
 
diff --git a/egs/alimeeting/sa-asr/asr_local.sh b/egs/alimeeting/sa-asr/asr_local.sh
index 543352e..30401b9 100755
--- a/egs/alimeeting/sa-asr/asr_local.sh
+++ b/egs/alimeeting/sa-asr/asr_local.sh
@@ -1226,9 +1226,9 @@
 
 if ${infer_with_pretrained_model}; then
     log "Use ${download_sa_asr_model} for decoding and evaluation"
-
     sa_asr_exp="${expdir}/${download_sa_asr_model}"
     mkdir -p "${sa_asr_exp}"
+
 
     python local/download_pretrained_model_from_modelscope.py $download_sa_asr_model ${expdir}
     inference_sa_asr_model="model.pb"
@@ -1335,8 +1335,11 @@
             _data="${data_feats}/${dset}"
             _dir="${sa_asr_exp}/${sa_asr_inference_tag}.oracle/${dset}"
 
-            python utils/proce_text.py ${_data}/text ${_data}/text.proc
-            python utils/proce_text.py ${_dir}/text ${_dir}/text.proc
+            sed 's/\$//g' ${_data}/text > ${_data}/text_nosrc
+            sed 's/\$//g' ${_dir}/text > ${_dir}/text_nosrc
+
+            python utils/proce_text.py ${_data}/text_nosrc ${_data}/text.proc
+            python utils/proce_text.py ${_dir}/text_nosrc ${_dir}/text.proc
 
             python utils/compute_wer.py ${_data}/text.proc ${_dir}/text.proc ${_dir}/text.cer
             tail -n 3 ${_dir}/text.cer > ${_dir}/text.cer.txt
@@ -1451,8 +1454,11 @@
             _data="${data_feats}/${dset}"
             _dir="${sa_asr_exp}/${sa_asr_inference_tag}.cluster/${dset}"
 
-            python utils/proce_text.py ${_data}/text ${_data}/text.proc
-            python utils/proce_text.py ${_dir}/text ${_dir}/text.proc
+            sed 's/\$//g' ${_data}/text > ${_data}/text_nosrc
+            sed 's/\$//g' ${_dir}/text > ${_dir}/text_nosrc
+
+            python utils/proce_text.py ${_data}/text_nosrc ${_data}/text.proc
+            python utils/proce_text.py ${_dir}/text_nosrc ${_dir}/text.proc
 
             python utils/compute_wer.py ${_data}/text.proc ${_dir}/text.proc ${_dir}/text.cer
             tail -n 3 ${_dir}/text.cer > ${_dir}/text.cer.txt

--
Gitblit v1.9.1