From f2406e2d61218c848bfd6da933c36956a9b0a5aa Mon Sep 17 00:00:00 2001
From: zhifu gao <zhifu.gzf@alibaba-inc.com>
Date: 星期二, 05 三月 2024 22:39:51 +0800
Subject: [PATCH] qwenaudio qwenaudiochat (#1433)

---
 funasr/models/qwen_audio/model.py                                      |   10 ++
 examples/industrial_data_pretraining/llm_asr/demo_train_or_finetune.sh |   47 +++++++++++
 funasr/tokenizer/hf_tokenizer.py                                       |    3 
 examples/industrial_data_pretraining/llm_asr/conf/template.yaml        |   89 ++++++++++++++++++++++
 funasr/tokenizer/whisper_tokenizer.py                                  |    2 
 README_zh.md                                                           |   29 +++---
 README.md                                                              |    5 +
 examples/industrial_data_pretraining/llm_asr/demo_infer.sh             |   14 +++
 8 files changed, 182 insertions(+), 17 deletions(-)

diff --git a/README.md b/README.md
index 970c5eb..d34249d 100644
--- a/README.md
+++ b/README.md
@@ -27,7 +27,8 @@
 
 <a name="whats-new"></a>
 ## What's new:
-- 2024/03/05锛欰dded support for the Whisper-large-v3 model, a multitasking model that can perform multilingual speech recognition, speech translation, and language identification. It can be downloaded from the[modelscope](https://www.modelscope.cn/models/iic/Whisper-large-v3/summary), and [openai](https://github.com/alibaba-damo-academy/FunASR/tree/main/examples/industrial_data_pretraining/whisper).
+- 2024/03/05锛欰dded the Qwen-Audio and Qwen-Audio-Chat large-scale audio-text multimodal models, which have topped multiple audio domain leaderboards. These models support speech dialogue, [usage](examples/industrial_data_pretraining/qwen_audio).
+- 2024/03/05锛欰dded support for the Whisper-large-v3 model, a multitasking model that can perform multilingual speech recognition, speech translation, and language identification. It can be downloaded from the[modelscope](examples/industrial_data_pretraining/whisper/demo.py), and [openai](examples/industrial_data_pretraining/whisper/demo_from_openai.py).
 - 2024/03/05: Offline File Transcription Service 4.4, Offline File Transcription Service of English 1.5锛孯eal-time Transcription Service 1.9 released锛宒ocker image supports ARM64 platform, update modelscope锛�([docs](runtime/readme.md))
 - 2024/01/30锛歠unasr-1.0 has been released ([docs](https://github.com/alibaba-damo-academy/FunASR/discussions/1319))
 - 2024/01/30锛歟motion recognition models are new supported. [model link](https://www.modelscope.cn/models/iic/emotion2vec_base_finetuned/summary), modified from [repo](https://github.com/ddlBoJack/emotion2vec).
@@ -83,6 +84,8 @@
 |                                       cam++ <br> ( [猸怾(https://modelscope.cn/models/iic/speech_campplus_sv_zh-cn_16k-common/summary) [馃](https://huggingface.co/funasr/campplus) )                                        |           speaker verification/diarization            |            5000 hours            |    7.2M    | 
 |                                                  Whisper-large-v2 <br> ([猸怾(https://www.modelscope.cn/models/iic/speech_whisper-large_asr_multilingual/summary)  [馃崁](https://github.com/openai/whisper) )                                                  |  speech recognition, with timestamps, non-streaming   |          multilingual            |    1.5G    |
 |                                                Whisper-large-v3 <br> ([猸怾(https://www.modelscope.cn/models/iic/Whisper-large-v3/summary)  [馃崁](https://github.com/openai/whisper) )                                                 |  speech recognition, with timestamps, non-streaming   |          multilingual            |    1.5G    |
+|                                         Qwen-Audio <br> ([猸怾(examples/industrial_data_pretraining/qwen_audio/demo.py)  [馃](https://huggingface.co/Qwen/Qwen-Audio) )                                         |      audio-text multimodal models (pretraining)       |     multilingual      |  8B  |
+|                   Qwen-Audio-Chat <br> ([猸怾(examples/industrial_data_pretraining/qwen_audio/demo_chat.py)  [馃](https://huggingface.co/Qwen/Qwen-Audio-Chat) )                                                |          audio-text multimodal models (chat)          |     multilingual      |  8B  |
 
 
 
diff --git a/README_zh.md b/README_zh.md
index e6a1060..83e37fb 100644
--- a/README_zh.md
+++ b/README_zh.md
@@ -29,7 +29,8 @@
 
 <a name="鏈�鏂板姩鎬�"></a>
 ## 鏈�鏂板姩鎬�
-- 2024/03/05锛氭柊澧炲姞Whisper-large-v3妯″瀷鏀寔锛屽璇█璇煶璇嗗埆/缈昏瘧/璇璇嗗埆锛屾敮鎸佷粠[modelscope](https://www.modelscope.cn/models/iic/Whisper-large-v3/summary)浠撳簱涓嬭浇锛屼篃鏀寔浠嶽openai](https://github.com/alibaba-damo-academy/FunASR/tree/main/examples/industrial_data_pretraining/whisper)浠撳簱涓嬭浇妯″瀷銆�
+- 2024/03/05锛氭柊澧炲姞Qwen-Audio涓嶲wen-Audio-Chat闊抽鏂囨湰妯℃�佸ぇ妯″瀷锛屽湪澶氫釜闊抽棰嗗煙娴嬭瘯姒滃崟鍒锋锛屼腑鏀寔璇煶瀵硅瘽锛岃缁嗙敤娉曡 [绀轰緥](examples/industrial_data_pretraining/qwen_audio)銆�
+- 2024/03/05锛氭柊澧炲姞Whisper-large-v3妯″瀷鏀寔锛屽璇█璇煶璇嗗埆/缈昏瘧/璇璇嗗埆锛屾敮鎸佷粠 [modelscope](examples/industrial_data_pretraining/whisper/demo.py)浠撳簱涓嬭浇锛屼篃鏀寔浠� [openai](examples/industrial_data_pretraining/whisper/demo_from_openai.py)浠撳簱涓嬭浇妯″瀷銆�
 - 2024/03/05: 涓枃绂荤嚎鏂囦欢杞啓鏈嶅姟 4.4銆佽嫳鏂囩绾挎枃浠惰浆鍐欐湇鍔� 1.5銆佷腑鏂囧疄鏃惰闊冲惉鍐欐湇鍔� 1.9 鍙戝竷锛宒ocker闀滃儚鏀寔arm64骞冲彴锛屽崌绾odelscope鐗堟湰锛涜缁嗕俊鎭弬闃�([閮ㄧ讲鏂囨。](runtime/readme_cn.md))
 - 2024/01/30锛歠unasr-1.0鍙戝竷锛屾洿鏂拌鏄嶽鏂囨。](https://github.com/alibaba-damo-academy/FunASR/discussions/1319)
 - 2024/01/30锛氭柊澧炲姞鎯呮劅璇嗗埆 [妯″瀷閾炬帴](https://www.modelscope.cn/models/iic/emotion2vec_base_finetuned/summary)锛屽師濮嬫ā鍨� [repo](https://github.com/ddlBoJack/emotion2vec).
@@ -73,19 +74,19 @@
 锛堟敞锛氣瓙 琛ㄧずModelScope妯″瀷浠撳簱锛岎煠� 琛ㄧずHuggingface妯″瀷浠撳簱锛岎煃�琛ㄧずOpenAI妯″瀷浠撳簱锛�
 
 
-|                                                                                                     妯″瀷鍚嶅瓧                                                                                                      |      浠诲姟璇︽儏       |     璁粌鏁版嵁     | 鍙傛暟閲�  | 
-|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:---------------:|:------------:|:----:|
-|    paraformer-zh <br> ([猸怾(https://www.modelscope.cn/models/damo/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-pytorch/summary)  [馃](https://huggingface.co/funasr/paraformer-tp) )    | 璇煶璇嗗埆锛屽甫鏃堕棿鎴宠緭鍑猴紝闈炲疄鏃� |  60000灏忔椂锛屼腑鏂�  | 220M |
-| paraformer-zh-streaming <br> ( [猸怾(https://modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/summary) [馃](https://huggingface.co/funasr/paraformer-zh-streaming) ) |     璇煶璇嗗埆锛屽疄鏃�     |  60000灏忔椂锛屼腑鏂�  | 220M |
-|         paraformer-en <br> ( [猸怾(https://www.modelscope.cn/models/damo/speech_paraformer-large-vad-punc_asr_nat-en-16k-common-vocab10020/summary) [馃](https://huggingface.co/funasr/paraformer-en) )         |    璇煶璇嗗埆锛岄潪瀹炴椂     |  50000灏忔椂锛岃嫳鏂�  | 220M |
-|                      conformer-en <br> ( [猸怾(https://modelscope.cn/models/damo/speech_conformer_asr-en-16k-vocab4199-pytorch/summary) [馃](https://huggingface.co/funasr/conformer-en) )                      |    璇煶璇嗗埆锛岄潪瀹炴椂     |  50000灏忔椂锛岃嫳鏂�  | 220M |
-|                        ct-punc <br> ( [猸怾(https://modelscope.cn/models/damo/punc_ct-transformer_cn-en-common-vocab471067-large/summary) [馃](https://huggingface.co/funasr/ct-punc) )                         |      鏍囩偣鎭㈠       |  100M锛屼腑鏂囦笌鑻辨枃  | 1.1G | 
-|                            fsmn-vad <br> ( [猸怾(https://modelscope.cn/models/damo/speech_fsmn_vad_zh-cn-16k-common-pytorch/summary) [馃](https://huggingface.co/funasr/fsmn-vad) )                             |    璇煶绔偣妫�娴嬶紝瀹炴椂    | 5000灏忔椂锛屼腑鏂囦笌鑻辨枃 | 0.4M | 
-|                              fa-zh <br> ( [猸怾(https://modelscope.cn/models/damo/speech_timestamp_prediction-v1-16k-offline/summary) [馃](https://huggingface.co/funasr/fa-zh) )                               |    瀛楃骇鍒椂闂存埑棰勬祴     |  50000灏忔椂锛屼腑鏂�  | 38M  |
-|                                 cam++ <br> ( [猸怾(https://modelscope.cn/models/iic/speech_campplus_sv_zh-cn_16k-common/summary) [馃](https://huggingface.co/funasr/campplus) )                                 |    璇磋瘽浜虹‘璁�/鍒嗗壊     |    5000灏忔椂    | 7.2M | 
-|                           Whisper-large-v2 <br> ([猸怾(https://www.modelscope.cn/models/iic/speech_whisper-large_asr_multilingual/summary)  [馃崁](https://github.com/openai/whisper) )                           | 璇煶璇嗗埆锛屽甫鏃堕棿鎴宠緭鍑猴紝闈炲疄鏃� |     澶氳瑷�      |  1G  |
-|                         Whisper-large-v3 <br> ([猸怾(https://www.modelscope.cn/models/iic/Whisper-large-v3/summary)  [馃崁](https://github.com/openai/whisper) )                          | 璇煶璇嗗埆锛屽甫鏃堕棿鎴宠緭鍑猴紝闈炲疄鏃� |     澶氳瑷�      |  1G  |
-
+|                                                                                                     妯″瀷鍚嶅瓧                                                                                                      |        浠诲姟璇︽儏        |     璁粌鏁版嵁     | 鍙傛暟閲�  | 
+|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:------------------:|:------------:|:----:|
+|    paraformer-zh <br> ([猸怾(https://www.modelscope.cn/models/damo/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-pytorch/summary)  [馃](https://huggingface.co/funasr/paraformer-tp) )    |  璇煶璇嗗埆锛屽甫鏃堕棿鎴宠緭鍑猴紝闈炲疄鏃�   |  60000灏忔椂锛屼腑鏂�  | 220M |
+| paraformer-zh-streaming <br> ( [猸怾(https://modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/summary) [馃](https://huggingface.co/funasr/paraformer-zh-streaming) ) |      璇煶璇嗗埆锛屽疄鏃�       |  60000灏忔椂锛屼腑鏂�  | 220M |
+|         paraformer-en <br> ( [猸怾(https://www.modelscope.cn/models/damo/speech_paraformer-large-vad-punc_asr_nat-en-16k-common-vocab10020/summary) [馃](https://huggingface.co/funasr/paraformer-en) )         |      璇煶璇嗗埆锛岄潪瀹炴椂      |  50000灏忔椂锛岃嫳鏂�  | 220M |
+|                      conformer-en <br> ( [猸怾(https://modelscope.cn/models/damo/speech_conformer_asr-en-16k-vocab4199-pytorch/summary) [馃](https://huggingface.co/funasr/conformer-en) )                      |      璇煶璇嗗埆锛岄潪瀹炴椂      |  50000灏忔椂锛岃嫳鏂�  | 220M |
+|                        ct-punc <br> ( [猸怾(https://modelscope.cn/models/damo/punc_ct-transformer_cn-en-common-vocab471067-large/summary) [馃](https://huggingface.co/funasr/ct-punc) )                         |        鏍囩偣鎭㈠        |  100M锛屼腑鏂囦笌鑻辨枃  | 1.1G | 
+|                            fsmn-vad <br> ( [猸怾(https://modelscope.cn/models/damo/speech_fsmn_vad_zh-cn-16k-common-pytorch/summary) [馃](https://huggingface.co/funasr/fsmn-vad) )                             |     璇煶绔偣妫�娴嬶紝瀹炴椂      | 5000灏忔椂锛屼腑鏂囦笌鑻辨枃 | 0.4M | 
+|                              fa-zh <br> ( [猸怾(https://modelscope.cn/models/damo/speech_timestamp_prediction-v1-16k-offline/summary) [馃](https://huggingface.co/funasr/fa-zh) )                               |      瀛楃骇鍒椂闂存埑棰勬祴      |  50000灏忔椂锛屼腑鏂�  | 38M  |
+|                                 cam++ <br> ( [猸怾(https://modelscope.cn/models/iic/speech_campplus_sv_zh-cn_16k-common/summary) [馃](https://huggingface.co/funasr/campplus) )                                 |      璇磋瘽浜虹‘璁�/鍒嗗壊      |    5000灏忔椂    | 7.2M | 
+|                                     Whisper-large-v3 <br> ([猸怾(https://www.modelscope.cn/models/iic/Whisper-large-v3/summary)  [馃崁](https://github.com/openai/whisper) )                                      |  璇煶璇嗗埆锛屽甫鏃堕棿鎴宠緭鍑猴紝闈炲疄鏃�   |     澶氳瑷�      |  1G  |
+|                                         Qwen-Audio <br> ([猸怾(examples/industrial_data_pretraining/qwen_audio/demo.py)  [馃](https://huggingface.co/Qwen/Qwen-Audio) )                                         |  闊抽鏂囨湰澶氭ā鎬佸ぇ妯″瀷锛堥璁粌锛�   |     澶氳瑷�      |  8B  |
+|                   Qwen-Audio-Chat <br> ([猸怾(examples/industrial_data_pretraining/qwen_audio/demo_chat.py)  [馃](https://huggingface.co/Qwen/Qwen-Audio-Chat) )                                                | 闊抽鏂囨湰澶氭ā鎬佸ぇ妯″瀷锛坈hat鐗堟湰锛� |     澶氳瑷�      |  8B  |
 
 <a name="蹇�熷紑濮�"></a>
 ## 蹇�熷紑濮�
diff --git a/examples/industrial_data_pretraining/llm_asr/conf/template.yaml b/examples/industrial_data_pretraining/llm_asr/conf/template.yaml
new file mode 100644
index 0000000..3c51ff4
--- /dev/null
+++ b/examples/industrial_data_pretraining/llm_asr/conf/template.yaml
@@ -0,0 +1,89 @@
+# This is an example that demonstrates how to configure a model file.
+# You can modify the configuration according to your own requirements.
+
+# to print the register_table:
+# from funasr.register import tables
+# tables.print()
+
+# network architecture
+model: LLMASR
+model_conf:
+    lsm_weight: 0.1     # label smoothing option
+    length_normalized_loss: true
+
+# encoder
+encoder: WhisperWarp
+encoder_conf:
+    hub: funasr
+    init_param_path: "/nfs/maziyang.mzy/models/Whisper-large-v2"
+    freeze: true
+
+llm: Vicuna
+llm_conf:
+  hub: hf
+  init_param_path: "/nfs/maziyang.mzy/models/vicuna-7b-v1.5"
+  freeze: true
+
+adaptor: Linear
+adaptor_conf:
+  downsample_rate: 5
+  llm_dim: 4096
+  encoder_dim: 512
+
+# frontend related
+frontend: WhisperFrontend
+frontend_conf:
+    fs: 16000
+    whisper_model: large
+    do_pad_trim: true
+
+
+specaug: SpecAugLFR
+specaug_conf:
+    apply_time_warp: false
+    time_warp_window: 5
+    time_warp_mode: bicubic
+    apply_freq_mask: true
+    freq_mask_width_range:
+    - 0
+    - 30
+    lfr_rate: 6
+    num_freq_mask: 1
+    apply_time_mask: true
+    time_mask_width_range:
+    - 0
+    - 12
+    num_time_mask: 1
+
+train_conf:
+  accum_grad: 1
+  grad_clip: 5
+  max_epoch: 150
+  keep_nbest_models: 10
+  log_interval: 10
+
+optim: adamw
+optim_conf:
+   lr: 0.0001
+   weight_decay: 0.000001
+scheduler: warmuplr
+scheduler_conf:
+   warmup_steps: 1500
+
+dataset: AudioLLMDataset
+dataset_conf:
+    index_ds: IndexDSJsonl
+    batch_sampler: RankFullLocalShuffleBatchSampler
+    batch_type: example # example or length
+    batch_size: 8 # if batch_type is example, batch_size is the numbers of samples; if length, batch_size is source_token_len+target_token_len;
+    max_token_length: 2048 # filter samples if source_token_len+target_token_len > max_token_length,
+    buffer_size: 500
+    shuffle: True
+    num_workers: 4
+    preprocessor_text: TextPreprocessRemovePunctuation
+
+tokenizer: HuggingfaceTokenizer
+tokenizer_conf:
+  unk_symbol: <unk>
+  init_param_path: "/nfs/maziyang.mzy/models/vicuna-7b-v1.5"
+
diff --git a/examples/industrial_data_pretraining/llm_asr/demo_infer.sh b/examples/industrial_data_pretraining/llm_asr/demo_infer.sh
new file mode 100644
index 0000000..f8ebc4c
--- /dev/null
+++ b/examples/industrial_data_pretraining/llm_asr/demo_infer.sh
@@ -0,0 +1,14 @@
+# Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
+#  MIT License  (https://opensource.org/licenses/MIT)
+
+
+
+python -m funasr.bin.inference \
+--config-path="/root/FunASR/examples/aishell/llm_asr_nar/conf" \
+--config-name="template.yaml" \
+++init_param="/mnt/workspace/FunASR/examples/aishell/paraformer/exp/baseline_paraformer_conformer_12e_6d_2048_256_zh_char_exp3/model.pt.ep38" \
+++input="/nfs/beinian.lzr/workspace/datasets/data/16k/opendata/aishell1/dev/wav/S0724/BAC009S0724W0121.wav" \
+++scope_map="encoder.model,audio_encoder,encoder_projector,adaptor" \
+++output_dir="./outputs/debug" \
+++device="cpu" \
+
diff --git a/examples/industrial_data_pretraining/llm_asr/demo_train_or_finetune.sh b/examples/industrial_data_pretraining/llm_asr/demo_train_or_finetune.sh
new file mode 100644
index 0000000..a518d57
--- /dev/null
+++ b/examples/industrial_data_pretraining/llm_asr/demo_train_or_finetune.sh
@@ -0,0 +1,47 @@
+# Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
+#  MIT License  (https://opensource.org/licenses/MIT)
+
+
+# which gpu to train or finetune
+export CUDA_VISIBLE_DEVICES="0"
+gpu_num=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
+
+# data dir, which contains: train.json, val.json, tokens.jsonl/tokens.txt, am.mvn
+#data_dir="/Users/zhifu/funasr1.0/data/list"
+
+## generate jsonl from wav.scp and text.txt
+#python -m funasr.datasets.audio_datasets.scp2jsonl \
+#++scp_file_list='["/Users/zhifu/funasr1.0/test_local/wav.scp", "/Users/zhifu/funasr1.0/test_local/text.txt"]' \
+#++data_type_list='["source", "target"]' \
+#++jsonl_file_out=/Users/zhifu/funasr1.0/test_local/audio_datasets.jsonl
+
+train_data="/nfs/zhifu.gzf/data/datalist/aishell1_aishell2_wav_speech_llm_train_data_del_tail500.json"
+val_data="/nfs/zhifu.gzf/data/datalist/aishell1_aishell2_wav_speech_llm_train_data_tail500.json"
+
+# exp output dir
+output_dir="/Users/zhifu/exp"
+log_file="${output_dir}/log.txt"
+
+workspace=`pwd`
+config="template.yaml"
+
+init_param="${output_dir}/model.pt"
+
+mkdir -p ${output_dir}
+echo "log_file: ${log_file}"
+
+torchrun \
+--nnodes 1 \
+--nproc_per_node ${gpu_num} \
+../../../funasr/bin/train.py \
+--config-path "${workspace}/conf" \
+--config-name "${config}" \
+++train_data_set_list="${train_data}" \
+++valid_data_set_list="${val_data}" \
+++dataset_conf.batch_size=2 \
+++dataset_conf.batch_type="example" \
+++dataset_conf.num_workers=0 \
+++train_conf.max_epoch=11 \
+++optim_conf.lr=0.0002 \
+++init_param="${init_param}" \
+++output_dir="${output_dir}" &> ${log_file}
diff --git a/funasr/models/qwen_audio/model.py b/funasr/models/qwen_audio/model.py
index 3eba026..e419b1e 100644
--- a/funasr/models/qwen_audio/model.py
+++ b/funasr/models/qwen_audio/model.py
@@ -20,6 +20,11 @@
 @tables.register("model_classes", "QwenAudio")
 @tables.register("model_classes", "QwenAudioWarp")
 class QwenAudioWarp(nn.Module):
+    """
+    Qwen-Audio: Advancing Universal Audio Understanding via Unified Large-Scale Audio-Language Models
+    https://arxiv.org/abs/2311.07919
+    Modified from https://github.com/QwenLM/Qwen-Audio
+    """
     def __init__(self, *args, **kwargs):
         super().__init__()
 
@@ -72,6 +77,11 @@
 @tables.register("model_classes", "QwenAudioChatWarp")
 class QwenAudioChatWarp(nn.Module):
     def __init__(self, *args, **kwargs):
+        """
+        Qwen-Audio: Advancing Universal Audio Understanding via Unified Large-Scale Audio-Language Models
+        https://arxiv.org/abs/2311.07919
+        Modified from https://github.com/QwenLM/Qwen-Audio
+        """
         super().__init__()
         
         model_or_path = kwargs.get("model_path", "QwenAudio")
diff --git a/funasr/tokenizer/hf_tokenizer.py b/funasr/tokenizer/hf_tokenizer.py
index c856b3d..81f553d 100644
--- a/funasr/tokenizer/hf_tokenizer.py
+++ b/funasr/tokenizer/hf_tokenizer.py
@@ -2,7 +2,8 @@
 try:
 	from transformers import AutoTokenizer
 except:
-	print("If you want to use hugging, please `pip install -U transformers`")
+	# print("If you want to use hugging, please `pip install -U transformers`")
+	pass
 
 from funasr.register import tables
 
diff --git a/funasr/tokenizer/whisper_tokenizer.py b/funasr/tokenizer/whisper_tokenizer.py
index f41c823..3fb5b64 100644
--- a/funasr/tokenizer/whisper_tokenizer.py
+++ b/funasr/tokenizer/whisper_tokenizer.py
@@ -2,7 +2,7 @@
 try:
 	from whisper.tokenizer import get_tokenizer
 except:
-	print("If you want to use hugging, please `pip install -U transformers`")
+	print("Notice: If you want to use whisper, please `pip install -U openai-whisper`")
 
 from funasr.register import tables
 

--
Gitblit v1.9.1