From c6d6c932a047f49f80eca33954afb802136f02c3 Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期三, 27 十二月 2023 16:43:30 +0800
Subject: [PATCH] funasr1.0

---
 funasr/models/paraformer/model.py                                        |    2 
 examples/industrial_data_pretraining/fsmn-vad/demo.py                    |   11 ++
 examples/industrial_data_pretraining/punc/demo.py                        |   11 ++
 examples/industrial_data_pretraining/paraformer-large-long/demo.py       |   14 +++
 funasr/models/neat_contextual_paraformer/model.py                        |    2 
 examples/industrial_data_pretraining/paraformer-large/infer.sh           |   29 ++----
 funasr/bin/inference.py                                                  |    2 
 examples/industrial_data_pretraining/paraformer-large-long/infer.sh      |   44 ++++------
 examples/industrial_data_pretraining/neat_contextual_paraformer/infer.sh |   15 +++
 examples/industrial_data_pretraining/fsmn-vad/infer.sh                   |   15 ++-
 examples/industrial_data_pretraining/punc/infer.sh                       |   24 ++---
 examples/industrial_data_pretraining/neat_contextual_paraformer/demo.py  |   12 +++
 examples/industrial_data_pretraining/paraformer-large/demo.py            |   11 ++
 13 files changed, 126 insertions(+), 66 deletions(-)

diff --git a/examples/industrial_data_pretraining/fsmn-vad/demo.py b/examples/industrial_data_pretraining/fsmn-vad/demo.py
new file mode 100644
index 0000000..b3e9bb6
--- /dev/null
+++ b/examples/industrial_data_pretraining/fsmn-vad/demo.py
@@ -0,0 +1,11 @@
+#!/usr/bin/env python3
+# -*- encoding: utf-8 -*-
+# Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
+#  MIT License  (https://opensource.org/licenses/MIT)
+
+from funasr import AutoModel
+
+model = AutoModel(model="/Users/zhifu/Downloads/modelscope_models/speech_fsmn_vad_zh-cn-16k-common-pytorch")
+
+res = model(input="/Users/zhifu/Downloads/modelscope_models/speech_fsmn_vad_zh-cn-16k-common-pytorch/example/vad_example.wav")
+print(res)
\ No newline at end of file
diff --git a/examples/industrial_data_pretraining/fsmn-vad/infer.sh b/examples/industrial_data_pretraining/fsmn-vad/infer.sh
index 9bfd8ba..7662a53 100644
--- a/examples/industrial_data_pretraining/fsmn-vad/infer.sh
+++ b/examples/industrial_data_pretraining/fsmn-vad/infer.sh
@@ -1,8 +1,13 @@
 
-cmd="funasr/bin/inference.py"
+# download model
+local_path_root=./modelscope_models
+mkdir -p ${local_path_root}
+local_path=${local_path_root}/speech_fsmn_vad_zh-cn-16k-common-pytorch
+git clone https://www.modelscope.cn/damo/speech_fsmn_vad_zh-cn-16k-common-pytorch.git ${local_path}
 
-python $cmd \
-+model="/Users/zhifu/Downloads/modelscope_models/speech_fsmn_vad_zh-cn-16k-common-pytorch" \
-+input="/Users/zhifu/Downloads/asr_example.wav" \
-+output_dir="/Users/zhifu/Downloads/ckpt/funasr2/exp2_vad" \
+
+python funasr/bin/inference.py \
++model="${local_path}" \
++input="${local_path}/example/vad_example.wav" \
++output_dir="./outputs/debug" \
 +device="cpu" \
diff --git a/examples/industrial_data_pretraining/neat_contextual_paraformer/demo.py b/examples/industrial_data_pretraining/neat_contextual_paraformer/demo.py
new file mode 100644
index 0000000..b74aacd
--- /dev/null
+++ b/examples/industrial_data_pretraining/neat_contextual_paraformer/demo.py
@@ -0,0 +1,12 @@
+#!/usr/bin/env python3
+# -*- encoding: utf-8 -*-
+# Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
+#  MIT License  (https://opensource.org/licenses/MIT)
+
+from funasr import AutoModel
+
+model = AutoModel(model="/Users/zhifu/Downloads/modelscope_models/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404")
+
+res = model(input="/Users/zhifu/Downloads/modelscope_models/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404/example/asr_example.wav",
+            hotword='杈鹃瓟闄� 榄旀惌')
+print(res)
\ No newline at end of file
diff --git a/examples/industrial_data_pretraining/neat_contextual_paraformer/infer.sh b/examples/industrial_data_pretraining/neat_contextual_paraformer/infer.sh
new file mode 100644
index 0000000..4ae7d03
--- /dev/null
+++ b/examples/industrial_data_pretraining/neat_contextual_paraformer/infer.sh
@@ -0,0 +1,15 @@
+
+# download model
+local_path_root=./modelscope_models
+mkdir -p ${local_path_root}
+local_path=${local_path_root}/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404
+git clone https://www.modelscope.cn/damo/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404.git ${local_path}
+
+
+python funasr/bin/inference.py \
++model="${local_path}" \
++input="${local_path}/example/asr_example.wav" \
++output_dir="./outputs/debug" \
++device="cpu" \
++"hotword='杈鹃瓟闄� 榄旀惌'"
+
diff --git a/examples/industrial_data_pretraining/paraformer-large-long/demo.py b/examples/industrial_data_pretraining/paraformer-large-long/demo.py
new file mode 100644
index 0000000..e45cae8
--- /dev/null
+++ b/examples/industrial_data_pretraining/paraformer-large-long/demo.py
@@ -0,0 +1,14 @@
+#!/usr/bin/env python3
+# -*- encoding: utf-8 -*-
+# Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
+#  MIT License  (https://opensource.org/licenses/MIT)
+
+from funasr import AutoModel
+
+model = AutoModel(model="/Users/zhifu/Downloads/modelscope_models/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch",
+                  vad_model="/Users/zhifu/Downloads/modelscope_models/speech_fsmn_vad_zh-cn-16k-common-pytorch",
+                  punc_model="/Users/zhifu/Downloads/modelscope_models/punc_ct-transformer_zh-cn-common-vocab272727-pytorch",
+                  )
+
+res = model(input="/Users/zhifu/Downloads/modelscope_models/speech_fsmn_vad_zh-cn-16k-common-pytorch/example/vad_example.wav", batch_size_s=300, batch_size_threshold_s=60)
+print(res)
\ No newline at end of file
diff --git a/examples/industrial_data_pretraining/paraformer-large-long/infer.sh b/examples/industrial_data_pretraining/paraformer-large-long/infer.sh
index 2e6ec0d..fc2a09a 100644
--- a/examples/industrial_data_pretraining/paraformer-large-long/infer.sh
+++ b/examples/industrial_data_pretraining/paraformer-large-long/infer.sh
@@ -1,32 +1,26 @@
 
-cmd="funasr/bin/inference.py"
+# download model
+local_path_root=./modelscope_models
+mkdir -p ${local_path_root}
 
-python $cmd \
-+model="/Users/zhifu/Downloads/modelscope_models/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-pytorch" \
-+vad_model="/Users/zhifu/Downloads/modelscope_models/speech_fsmn_vad_zh-cn-16k-common-pytorch" \
-+punc_model="/Users/zhifu/Downloads/modelscope_models/punc_ct-transformer_zh-cn-common-vocab272727-pytorch" \
-+input="/Users/zhifu/funasr_github/test_local/vad_example.wav" \
-+output_dir="/Users/zhifu/Downloads/ckpt/funasr2/exp2" \
+local_path=${local_path_root}/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch
+git clone https://www.modelscope.cn/damo/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404.git ${local_path}
+
+local_path_vad=${local_path_root}/speech_fsmn_vad_zh-cn-16k-common-pytorch
+git clone https://www.modelscope.cn/damo/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404.git ${local_path_vad}
+
+local_path_punc=${local_path_root}/punc_ct-transformer_zh-cn-common-vocab272727-pytorch
+git clone https://www.modelscope.cn/damo/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404.git ${local_path_punc}
+
+
+python funasr/bin/inference.py \
++model="${local_path}" \
++vad_model="${local_path_vad}"
++punc_model="${local_path_punc}"
++input="${local_path}/example/asr_example.wav" \
++output_dir="./outputs/debug" \
 +device="cpu" \
 +batch_size_s=300 \
 +batch_size_threshold_s=60 \
 +debug="true"
 
-#python $cmd \
-#+model="/Users/zhifu/Downloads/modelscope_models/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404" \
-#+input="/Users/zhifu/Downloads/asr_example.wav" \
-#+output_dir="/Users/zhifu/Downloads/ckpt/funasr2/exp2" \
-#+device="cpu" \
-#+"hotword='杈鹃瓟闄� 榄旀惌'"
-
-#+input="/Users/zhifu/funasr_github/test_local/wav.scp"
-#+input="/Users/zhifu/funasr_github/test_local/asr_example.wav" \
-#+input="/Users/zhifu/funasr_github/test_local/aishell2_dev_ios/asr_task_debug_len.jsonl" \
-#+input="/Users/zhifu/funasr_github/test_local/aishell2_dev_ios/asr_task_debug_len_10.jsonl" \
-#+model="/Users/zhifu/modelscope_models/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch" \
-
-#+model="/Users/zhifu/modelscope_models/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-pytorch" \
-#+model="/Users/zhifu/modelscope_models/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404" \
-#+"hotword='杈鹃瓟闄� 榄旀惌'"
-
-#+vad_model="/Users/zhifu/Downloads/modelscope_models/speech_fsmn_vad_zh-cn-16k-common-pytorch" \
diff --git a/examples/industrial_data_pretraining/paraformer-large/demo.py b/examples/industrial_data_pretraining/paraformer-large/demo.py
new file mode 100644
index 0000000..11c52f5
--- /dev/null
+++ b/examples/industrial_data_pretraining/paraformer-large/demo.py
@@ -0,0 +1,11 @@
+#!/usr/bin/env python3
+# -*- encoding: utf-8 -*-
+# Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
+#  MIT License  (https://opensource.org/licenses/MIT)
+
+from funasr import AutoModel
+
+model = AutoModel(model="/Users/zhifu/Downloads/modelscope_models/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch")
+
+res = model(input="/Users/zhifu/Downloads/modelscope_models/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/example/asr_example.wav")
+print(res)
\ No newline at end of file
diff --git a/examples/industrial_data_pretraining/paraformer-large/infer.sh b/examples/industrial_data_pretraining/paraformer-large/infer.sh
index 87260ac..c7487e2 100644
--- a/examples/industrial_data_pretraining/paraformer-large/infer.sh
+++ b/examples/industrial_data_pretraining/paraformer-large/infer.sh
@@ -1,23 +1,14 @@
 
-cmd="funasr/bin/inference.py"
+# download model
+local_path_root=./modelscope_models
+mkdir -p ${local_path_root}
+local_path=${local_path_root}/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch
+git clone https://www.modelscope.cn/damo/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404.git ${local_path}
 
-python $cmd \
-+model="/Users/zhifu/Downloads/modelscope_models/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch" \
-+input="/Users/zhifu/Downloads/asr_example.wav" \
-+output_dir="/Users/zhifu/Downloads/ckpt/funasr2/exp2" \
+
+python funasr/bin/inference.py \
++model="${local_path}" \
++input="${local_path}/example/asr_example.wav" \
++output_dir="./outputs/debug" \
 +device="cpu" \
 
-python $cmd \
-+model="/Users/zhifu/Downloads/modelscope_models/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404" \
-+input="/Users/zhifu/Downloads/asr_example.wav" \
-+output_dir="/Users/zhifu/Downloads/ckpt/funasr2/exp2" \
-+device="cpu" \
-+"hotword='杈鹃瓟闄� 榄旀惌'"
-
-#+input="/Users/zhifu/funasr_github/test_local/asr_example.wav" \
-#+input="/Users/zhifu/funasr_github/test_local/aishell2_dev_ios/asr_task_debug_len.jsonl" \
-#+model="/Users/zhifu/modelscope_models/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch" \
-
-#+model="/Users/zhifu/modelscope_models/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-pytorch" \
-#+model="/Users/zhifu/modelscope_models/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404" \
-#+"hotword='杈鹃瓟闄� 榄旀惌'"
\ No newline at end of file
diff --git a/examples/industrial_data_pretraining/punc/demo.py b/examples/industrial_data_pretraining/punc/demo.py
new file mode 100644
index 0000000..d3b63db
--- /dev/null
+++ b/examples/industrial_data_pretraining/punc/demo.py
@@ -0,0 +1,11 @@
+#!/usr/bin/env python3
+# -*- encoding: utf-8 -*-
+# Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
+#  MIT License  (https://opensource.org/licenses/MIT)
+
+from funasr import AutoModel
+
+model = AutoModel(model="/Users/zhifu/Downloads/modelscope_models/punc_ct-transformer_zh-cn-common-vocab272727-pytorch")
+
+res = model(input="/Users/zhifu/Downloads/modelscope_models/punc_ct-transformer_zh-cn-common-vocab272727-pytorch/example/punc_example.txt")
+print(res)
\ No newline at end of file
diff --git a/examples/industrial_data_pretraining/punc/infer.sh b/examples/industrial_data_pretraining/punc/infer.sh
index 3675815..f6c5c23 100644
--- a/examples/industrial_data_pretraining/punc/infer.sh
+++ b/examples/industrial_data_pretraining/punc/infer.sh
@@ -1,18 +1,14 @@
 
-cmd="funasr/bin/inference.py"
+# download model
+local_path_root=./modelscope_models
+mkdir -p ${local_path_root}
+local_path=${local_path_root}/punc_ct-transformer_zh-cn-common-vocab272727-pytorch
+git clone https://www.modelscope.cn/damo/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404.git ${local_path}
 
-python $cmd \
-+input="/Users/zhifu/FunASR/egs_modelscope/punctuation/punc_ct-transformer_zh-cn-common-vocab272727-pytorch/data/punc_example.txt" \
-+model="/Users/zhifu/Downloads/modelscope_models/punc_ct-transformer_zh-cn-common-vocab272727-pytorch" \
-+output_dir="/Users/zhifu/Downloads/ckpt/funasr2/exp2_punc" \
+
+python funasr/bin/inference.py \
++model="${local_path}" \
++input="${local_path}/example/punc_example.txt" \
++output_dir="./outputs/debug" \
 +device="cpu" \
 +debug="true"
-
-
-#+input="/Users/zhifu/FunASR/egs_modelscope/punctuation/punc_ct-transformer_zh-cn-common-vocab272727-pytorch/data/punc_example.txt" \
-
-#+"input='璺ㄥ娌虫祦鏄吇鑲叉部宀镐汉姘戠殑鐢熷懡涔嬫簮闀挎湡浠ユ潵涓哄府鍔╀笅娓稿湴鍖洪槻鐏惧噺鐏句腑鏂规妧鏈汉鍛樺湪涓婃父鍦板尯鏋佷负鎭跺姡鐨勮嚜鐒舵潯浠朵笅鍏嬫湇宸ㄥぇ鍥伴毦鐢氳嚦鍐掔潃鐢熷懡鍗遍櫓鍚戝嵃鏂规彁渚涙睕鏈熸按鏂囪祫鏂欏鐞嗙揣鎬ヤ簨浠朵腑鏂归噸瑙嗗嵃鏂瑰湪璺ㄥ娌虫祦闂涓婄殑鍏冲垏鎰挎剰杩涗竴姝ュ畬鍠勫弻鏂硅仈鍚堝伐浣滄満鍒跺嚒鏄腑鏂硅兘鍋氱殑鎴戜滑閮戒細鍘诲仛鑰屼笖浼氬仛寰楁洿濂芥垜璇峰嵃搴︽湅鍙嬩滑鏀惧績涓浗鍦ㄤ笂娓哥殑浠讳綍寮�鍙戝埄鐢ㄩ兘浼氱粡杩囩瀛﹁鍒掑拰璁鸿瘉鍏奸【涓婁笅娓哥殑鍒╃泭'" \
-
-#+input="/Users/zhifu/FunASR/egs_modelscope/punctuation/punc_ct-transformer_zh-cn-common-vocab272727-pytorch/data/punc_example.txt" \
-
-#+"input='閭ｄ粖澶╃殑浼氬氨鍒拌繖閲屽惂 happy new year 鏄庡勾瑙�'" \
\ No newline at end of file
diff --git a/funasr/bin/inference.py b/funasr/bin/inference.py
index 16ad0e2..c545c4d 100644
--- a/funasr/bin/inference.py
+++ b/funasr/bin/inference.py
@@ -339,7 +339,7 @@
 			# sentences = time_stamp_sentence(model.punc_list, model.sentence_end_id, results_ret_list[i]["timestamp"], res[i]["text"])
 			# results_ret_list[i]["time_stamp"] = res[0]["text_postprocessed_punc"]
 			# results_ret_list[i]["sentences"] = sentences
-			# results_ret_list[i]["text_with_punc"] = res[i]["text"]
+			results_ret_list[i]["text_with_punc"] = res[i]["text"]
 		
 		pbar_total.update(1)
 		end_total = time.time()
diff --git a/funasr/models/neat_contextual_paraformer/model.py b/funasr/models/neat_contextual_paraformer/model.py
index d056ab9..939df31 100644
--- a/funasr/models/neat_contextual_paraformer/model.py
+++ b/funasr/models/neat_contextual_paraformer/model.py
@@ -417,7 +417,7 @@
 					text = tokenizer.tokens2text(token)
 					
 					text_postprocessed, _ = postprocess_utils.sentence_postprocess(token)
-					result_i = {"key": key[i], "token": token, "text": text, "text_postprocessed": text_postprocessed}
+					result_i = {"key": key[i], "text": text_postprocessed}
 					
 					if ibest_writer is not None:
 						ibest_writer["token"][key[i]] = " ".join(token)
diff --git a/funasr/models/paraformer/model.py b/funasr/models/paraformer/model.py
index 1caed90..c546585 100644
--- a/funasr/models/paraformer/model.py
+++ b/funasr/models/paraformer/model.py
@@ -535,7 +535,7 @@
 					text = tokenizer.tokens2text(token)
 					
 					text_postprocessed, _ = postprocess_utils.sentence_postprocess(token)
-					result_i = {"key": key[i], "text_postprocessed": text_postprocessed}
+					result_i = {"key": key[i], "text": text_postprocessed}
 
 					
 					if ibest_writer is not None:

--
Gitblit v1.9.1