From d9e60d9ddc92ab5746842b5a2b6f7a423de2a795 Mon Sep 17 00:00:00 2001
From: 语帆 <yf352572@alibaba-inc.com>
Date: 星期一, 04 三月 2024 16:44:31 +0800
Subject: [PATCH] atsr

---
 examples/industrial_data_pretraining/lcbnet/README.md |   73 ++++++++++++++++++++++++++++++++++++
 examples/industrial_data_pretraining/lcbnet/demo.py   |   26 +------------
 2 files changed, 75 insertions(+), 24 deletions(-)

diff --git a/examples/industrial_data_pretraining/lcbnet/README.md b/examples/industrial_data_pretraining/lcbnet/README.md
index c0d4b1e..ff75b40 100644
--- a/examples/industrial_data_pretraining/lcbnet/README.md
+++ b/examples/industrial_data_pretraining/lcbnet/README.md
@@ -91,6 +91,79 @@
 
 
 
+## 鍩轰簬ModelScope杩涜鎺ㄧ悊
+
+- 鎺ㄧ悊鏀寔闊抽鏍煎紡濡備笅锛�
+  - wav鏂囦欢璺緞锛屼緥濡傦細data/test/asr_example.wav
+  - pcm鏂囦欢璺緞锛屼緥濡傦細data/test/asr_example.pcm
+  - ark鏂囦欢璺緞锛屼緥濡傦細data/test/data.ark
+  - wav鏂囦欢url锛屼緥濡傦細https://www.modelscope.cn/api/v1/models/iic/LCB-NET/repo?Revision=master&FilePath=example/asr_example.wav
+  - wav浜岃繘鍒舵暟鎹紝鏍煎紡bytes锛屼緥濡傦細鐢ㄦ埛鐩存帴浠庢枃浠堕噷璇诲嚭bytes鏁版嵁鎴栬�呮槸楹﹀厠椋庡綍鍑篵ytes鏁版嵁銆�
+  - 宸茶В鏋愮殑audio闊抽锛屼緥濡傦細audio, rate = soundfile.read("asr_example_zh.wav")锛岀被鍨嬩负numpy.ndarray鎴栬�卼orch.Tensor銆�
+  - wav.scp鏂囦欢锛岄渶绗﹀悎濡備笅瑕佹眰(浠ヤ笅鍒嗗埆涓簊ound鍜宬aldi_ark鏍煎紡)锛�
+
+```sh
+cat wav.scp
+asr_example1  data/test/asr_example1.wav
+asr_example2  data/test/asr_example2.wav
+
+cat wav.scp
+asr_example1  data/test/data_wav.ark:22
+asr_example2  data/test/data_wav.ark:90445
+...
+```
+
+- 鎺ㄧ悊鏀寔OCR棰勬祴鏂囨湰鏍煎紡濡備笅锛�
+  - ocr.txt鏂囦欢锛岄渶绗﹀悎濡備笅瑕佹眰锛�
+```sh
+cat ocr.txt
+asr_example1  ANIMAL <blank> RIGHTS <blank> MANAGER <blank> PLOEG
+asr_example2  UNIVERSITY <blank> CAMPUS <blank> DEANO
+...
+```
+
+- 鑻ヨ緭鍏ユ牸寮弚av鏂囦欢鍜宱cr鏂囦欢鍧囦负url锛宎pi璋冪敤鏂瑰紡鍙弬鑰冨涓嬭寖渚嬶細
+
+```python
+from funasr import AutoModel
+
+model = AutoModel(model="iic/LCB-NET",
+                  model_revision="v2.0.0")
+res = model.generate(input=("https://www.modelscope.cn/api/v1/models/iic/LCB-NET/repo?Revision=master&FilePath=example/asr_example.wav","https://www.modelscope.cn/api/v1/models/iic/LCB-NET/repo?Revision=master&FilePath=example/ocr.txt"),data_type=("sound", "text"))
+```
+
+
+## 澶嶇幇璁烘枃涓殑缁撴灉
+```python
+python -m funasr.bin.inference \
+        --config-path=${file_dir} \
+        --config-name="config.yaml" \
+        ++init_param=${file_dir}/model.pt \
+        ++tokenizer_conf.token_list=${file_dir}/tokens.txt \
+        ++input=[${_logdir}/wav.scp,${_logdir}/ocr.txt] \
+        +data_type='["kaldi_ark", "text"]' \
+        ++tokenizer_conf.bpemodel=${file_dir}/bpe.pt \
+        ++output_dir="${inference_dir}/results" \
+        ++device="${inference_device}" \
+        ++ncpu=1 \
+        ++disable_log=true
+
+```
+
+
+璇嗗埆缁撴灉杈撳嚭璺緞缁撴瀯濡備笅锛�
+
+```sh
+tree output_dir/
+output_dir/
+鈹斺攢鈹� 1best_recog
+    鈹溾攢鈹� text
+    鈹斺攢鈹� token
+```
+
+token锛氳闊宠瘑鍒粨鏋滄枃浠�
+
+鍙互浣跨敤funasr閲岄潰鎻愪緵鐨剅un_bwer_recall.sh璁＄畻WER銆丅WER銆乁WER鍜孯ecall銆�
 
 
 ## 鐩稿叧璁烘枃浠ュ強寮曠敤淇℃伅
diff --git a/examples/industrial_data_pretraining/lcbnet/demo.py b/examples/industrial_data_pretraining/lcbnet/demo.py
index 602a986..ac679ce 100755
--- a/examples/industrial_data_pretraining/lcbnet/demo.py
+++ b/examples/industrial_data_pretraining/lcbnet/demo.py
@@ -6,30 +6,8 @@
 from funasr import AutoModel
 
 model = AutoModel(model="iic/LCB-NET",
-                  model_revision="v1.0.0")
+                  model_revision="v2.0.0")
 
-
-# example1
 res = model.generate(input=("https://www.modelscope.cn/api/v1/models/iic/LCB-NET/repo?Revision=master&FilePath=example/asr_example.wav","https://www.modelscope.cn/api/v1/models/iic/LCB-NET/repo?Revision=master&FilePath=example/ocr.txt"),data_type=("sound", "text"))
 
-print(res)
-
-
-'''
-# tensor or numpy as input
-# example2
-import torchaudio
-import os
-wav_file = os.path.join(model.model_path, "example/asr_example.wav")
-input_tensor, sample_rate = torchaudio.load(wav_file)
-input_tensor = input_tensor.mean(0)
-res = model.generate(input=[input_tensor], batch_size_s=300, is_final=True)
-
-
-# example3
-import soundfile
-
-wav_file = os.path.join(model.model_path, "example/asr_example.wav")
-speech, sample_rate = soundfile.read(wav_file)
-res = model.generate(input=[speech], batch_size_s=300, is_final=True)
-'''
+print(res)
\ No newline at end of file

--
Gitblit v1.9.1