Merge branch 'main' of github.com:alibaba-damo-academy/FunASR
add
| New file |
| | |
| | | # Branchformer Result |
| | | |
| | | ## Training Config |
| | | - Feature info: using raw speech, extracting 80 dims fbank online, global cmvn, speed perturb(0.9, 1.0, 1.1), specaugment |
| | | - Train info: lr 0.001, batch_size 10000, 4 gpu(Tesla V100), acc_grad 1, 180 epochs |
| | | - Train config: conf/train_asr_branchformer.yaml |
| | | - LM config: LM was not used |
| | | |
| | | ## Results (CER) |
| | | - Decode config: conf/decode_asr_transformer.yaml (ctc weight:0.4) |
| | | |
| | | | testset | CER(%) | |
| | | |:-----------:|:-------:| |
| | | | dev | 4.15 | |
| | | | test | 4.51 | |
| New file |
| | |
| | | # E-Branchformer Result |
| | | |
| | | ## Training Config |
| | | - Feature info: using raw speech, extracting 80 dims fbank online, global cmvn, speed perturb(0.9, 1.0, 1.1), specaugment |
| | | - Train info: lr 0.001, batch_size 10000, 4 gpu(Tesla V100), acc_grad 1, 180 epochs |
| | | - Train config: conf/train_asr_e_branchformer.yaml |
| | | - LM config: LM was not used |
| | | |
| | | ## Results (CER) |
| | | - Decode config: conf/decode_asr_transformer.yaml (ctc weight:0.4) |
| | | |
| | | | testset | CER(%) | |
| | | |:-----------:|:-------:| |
| | | | dev | 4.10 | |
| | | | test | 4.52 | |
| | |
| | | task=Tasks.auto_speech_recognition, |
| | | model='damo/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-pytorch', |
| | | vad_model='damo/speech_fsmn_vad_zh-cn-16k-common-pytorch', |
| | | punc_model='damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch', |
| | | punc_model='damo/punc_ct-transformer_cn-en-common-vocab471067-large', |
| | | output_dir=output_dir, |
| | | ) |
| | | rec_result = inference_pipeline(audio_in=audio_in, batch_size_token=5000, batch_size_token_threshold_s=40) |
| | |
| | | |
| | | if __name__ == "__main__": |
| | | parser = argparse.ArgumentParser() |
| | | parser.add_argument('--model', type=str, default="damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch") |
| | | parser.add_argument('--model', type=str, default="damo/punc_ct-transformer_cn-en-common-vocab471067-large") |
| | | parser.add_argument('--text_in', type=str, default="./data/test/punc.txt") |
| | | parser.add_argument('--output_dir', type=str, default="./results/") |
| | | parser.add_argument('--gpuid', type=str, default="0") |
| | | args = parser.parse_args() |
| | | modelscope_infer(args) |
| | | modelscope_infer(args) |
| | |
| | | stage=1 |
| | | stop_stage=2 |
| | | model="damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch" |
| | | data_dir="./data/test" |
| | | data_dir="./data" |
| | | output_dir="./results" |
| | | gpu_inference=true # whether to perform gpu decoding |
| | | gpuid_list="0,1" # set gpus, e.g., gpuid_list="0,1" |
| | |
| | | for JOB in $(seq ${nj}); do |
| | | split_scps="$split_scps $output_dir/split/text.$JOB.scp" |
| | | done |
| | | perl utils/split_scp.pl ${data_dir}/punc.txt ${split_scps} |
| | | perl utils/split_scp.pl ${data_dir}/punc_example.txt ${split_scps} |
| | | |
| | | if [ -n "${checkpoint_dir}" ]; then |
| | | python utils/prepare_checkpoint.py ${model} ${checkpoint_dir} ${checkpoint_name} |
| New file |
| | |
| | | ../TEMPLATE/README.md |
| New file |
| | |
| | | 1 跨境河流是养育沿岸人民的生命之源长期以来为帮助下游地区防灾减灾中方技术人员在上游地区极为恶劣的自然条件下克服巨大困难甚至冒着生命危险向印方提供汛期水文资料处理紧急事件中方重视印方在跨境河流问题上的关切愿意进一步完善双方联合工作机制凡是中方能做的我们都会去做而且会做得更好我请印度朋友们放心中国在上游的任何开发利用都会经过科学规划和论证兼顾上下游的利益 |
| | | 2 从存储上来说仅仅是全景图片它就会是图片的四倍的容量然后全景的视频会是普通视频八倍的这个存储的容要求而三d的模型会是图片的十倍这都对我们今天运行在的云计算的平台存储的平台提出了更高的要求 |
| | | 3 那今天的会就到这里吧 happy new year 明年见 |
| New file |
| | |
| | | |
| | | from modelscope.pipelines import pipeline |
| | | from modelscope.utils.constant import Tasks |
| | | |
| | | inference_pipeline = pipeline( |
| | | task=Tasks.punctuation, |
| | | model='damo/punc_ct-transformer_cn-en-common-vocab471067-large', |
| | | model_revision="v1.0.0", |
| | | output_dir="./tmp/" |
| | | ) |
| | | |
| | | ##################text.scp################### |
| | | # inputs = "./egs_modelscope/punctuation/punc_ct-transformer_cn-en-common-vocab471067-large/data/punc_example.txt" |
| | | |
| | | ##################text##################### |
| | | #inputs = "我们都是木头人不会讲话不会动" |
| | | |
| | | ##################text file url####################### |
| | | inputs = "https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_text/punc_example.txt" |
| | | |
| | | rec_result = inference_pipeline(text_in=inputs) |
| | | print(rec_result) |
| New file |
| | |
| | | import os |
| | | import shutil |
| | | import argparse |
| | | from modelscope.pipelines import pipeline |
| | | from modelscope.utils.constant import Tasks |
| | | |
| | | def modelscope_infer(args): |
| | | os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpuid) |
| | | inference_pipeline = pipeline( |
| | | task=Tasks.punctuation, |
| | | model=args.model, |
| | | model_revision=args.model_revision, |
| | | output_dir=args.output_dir, |
| | | ) |
| | | inference_pipeline(text_in=args.text_in) |
| | | |
| | | if __name__ == "__main__": |
| | | parser = argparse.ArgumentParser() |
| | | parser.add_argument('--model', type=str, default="damo/punc_ct-transformer_cn-en-common-vocab471067-large") |
| | | parser.add_argument('--text_in', type=str, default="./data/test/punc.txt") |
| | | parser.add_argument('--model_revision', type=str, default=None) |
| | | parser.add_argument('--output_dir', type=str, default="./results/") |
| | | parser.add_argument('--gpuid', type=str, default="0") |
| | | args = parser.parse_args() |
| | | modelscope_infer(args) |
| New file |
| | |
| | | #!/usr/bin/env bash |
| | | |
| | | set -e |
| | | set -u |
| | | set -o pipefail |
| | | |
| | | stage=1 |
| | | stop_stage=2 |
| | | model="damo/punc_ct-transformer_cn-en-common-vocab471067-large" |
| | | model_revision="v1.0.0" |
| | | data_dir="./data" |
| | | output_dir="./results" |
| | | gpu_inference=true # whether to perform gpu decoding |
| | | gpuid_list="0,1" # set gpus, e.g., gpuid_list="0,1" |
| | | njob=64 # the number of jobs for CPU decoding, if gpu_inference=false, use CPU decoding, please set njob |
| | | checkpoint_dir= |
| | | checkpoint_name="punc.pb" |
| | | |
| | | . utils/parse_options.sh || exit 1; |
| | | |
| | | if ${gpu_inference} == "true"; then |
| | | nj=$(echo $gpuid_list | awk -F "," '{print NF}') |
| | | else |
| | | nj=$njob |
| | | gpuid_list="" |
| | | for JOB in $(seq ${nj}); do |
| | | gpuid_list=$gpuid_list"-1," |
| | | done |
| | | fi |
| | | |
| | | mkdir -p $output_dir/split |
| | | split_scps="" |
| | | for JOB in $(seq ${nj}); do |
| | | split_scps="$split_scps $output_dir/split/text.$JOB.scp" |
| | | done |
| | | perl utils/split_scp.pl ${data_dir}/punc_example.txt ${split_scps} |
| | | |
| | | if [ -n "${checkpoint_dir}" ]; then |
| | | python utils/prepare_checkpoint.py ${model} ${checkpoint_dir} ${checkpoint_name} |
| | | model=${checkpoint_dir}/${model} |
| | | fi |
| | | |
| | | if [ $stage -le 1 ] && [ $stop_stage -ge 1 ];then |
| | | echo "Decoding ..." |
| | | gpuid_list_array=(${gpuid_list//,/ }) |
| | | for JOB in $(seq ${nj}); do |
| | | { |
| | | id=$((JOB-1)) |
| | | gpuid=${gpuid_list_array[$id]} |
| | | mkdir -p ${output_dir}/output.$JOB |
| | | python infer.py \ |
| | | --model ${model} \ |
| | | --text_in ${output_dir}/split/text.$JOB.scp \ |
| | | --output_dir ${output_dir}/output.$JOB \ |
| | | --model_revision ${model_revision} |
| | | --gpuid ${gpuid} |
| | | }& |
| | | done |
| | | wait |
| | | |
| | | mkdir -p ${output_dir}/final_res |
| | | if [ -f "${output_dir}/output.1/infer.out" ]; then |
| | | for i in $(seq "${nj}"); do |
| | | cat "${output_dir}/output.${i}/infer.out" |
| | | done | sort -k1 >"${output_dir}/final_res/infer.out" |
| | | fi |
| | | fi |
| | | |
| New file |
| | |
| | | ../../../egs/aishell/transformer/utils |
| New file |
| | |
| | | 1 跨境河流是养育沿岸人民的生命之源长期以来为帮助下游地区防灾减灾中方技术人员在上游地区极为恶劣的自然条件下克服巨大困难甚至冒着生命危险向印方提供汛期水文资料处理紧急事件中方重视印方在跨境河流问题上的关切愿意进一步完善双方联合工作机制凡是中方能做的我们都会去做而且会做得更好我请印度朋友们放心中国在上游的任何开发利用都会经过科学规划和论证兼顾上下游的利益 |
| | | 2 从存储上来说仅仅是全景图片它就会是图片的四倍的容量然后全景的视频会是普通视频八倍的这个存储的容要求而三d的模型会是图片的十倍这都对我们今天运行在的云计算的平台存储的平台提出了更高的要求 |
| | | 3 那今天的会就到这里吧 happy new year 明年见 |
| New file |
| | |
| | | ../../../egs/aishell/transformer/utils |
| | |
| | | import numpy as np |
| | | import scipy.signal |
| | | import soundfile |
| | | |
| | | import jieba |
| | | |
| | | from funasr.text.build_tokenizer import build_tokenizer |
| | | from funasr.text.cleaner import TextCleaner |
| | |
| | | self.split_text_name = split_text_name |
| | | self.seg_jieba = seg_jieba |
| | | if self.seg_jieba: |
| | | import jieba |
| | | jieba.load_userdict(seg_dict_file) |
| | | |
| | | @classmethod |
| | |
| | | from funasr_onnx import CT_Transformer |
| | | |
| | | model_dir = "damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch" |
| | | #model_dir = "damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch" |
| | | model_dir = "damo/punc_ct-transformer_cn-en-common-vocab471067-large" |
| | | model = CT_Transformer(model_dir) |
| | | |
| | | text_in="跨境河流是养育沿岸人民的生命之源长期以来为帮助下游地区防灾减灾中方技术人员在上游地区极为恶劣的自然条件下克服巨大困难甚至冒着生命危险向印方提供汛期水文资料处理紧急事件中方重视印方在跨境河流问题上的关切愿意进一步完善双方联合工作机制凡是中方能做的我们都会去做而且会做得更好我请印度朋友们放心中国在上游的任何开发利用都会经过科学规划和论证兼顾上下游的利益" |
| | |
| | | from .utils.utils import (ONNXRuntimeError, |
| | | OrtInferSession, get_logger, |
| | | read_yaml) |
| | | from .utils.utils import (TokenIDConverter, split_to_mini_sentence,code_mix_split_words) |
| | | from .utils.utils import (TokenIDConverter, split_to_mini_sentence,code_mix_split_words,code_mix_split_words_jieba) |
| | | logging = get_logger() |
| | | |
| | | |
| | |
| | | self.punc_list[i] = "?" |
| | | elif self.punc_list[i] == "。": |
| | | self.period = i |
| | | if "seg_jieba" in config: |
| | | self.seg_jieba = True |
| | | self.jieba_usr_dict_path = os.path.join(model_dir, 'jieba_usr_dict') |
| | | self.code_mix_split_words_jieba = code_mix_split_words_jieba(self.jieba_usr_dict_path) |
| | | else: |
| | | self.seg_jieba = False |
| | | |
| | | def __call__(self, text: Union[list, str], split_size=20): |
| | | split_text = code_mix_split_words(text) |
| | | if self.seg_jieba: |
| | | split_text = self.code_mix_split_words_jieba(text) |
| | | else: |
| | | split_text = code_mix_split_words(text) |
| | | split_text_id = self.converter.tokens2ids(split_text) |
| | | mini_sentences = split_to_mini_sentence(split_text, split_size) |
| | | mini_sentences_id = split_to_mini_sentence(split_text_id, split_size) |
| | |
| | | from pathlib import Path |
| | | from typing import Any, Dict, Iterable, List, NamedTuple, Set, Tuple, Union |
| | | |
| | | import re |
| | | import numpy as np |
| | | import yaml |
| | | from onnxruntime import (GraphOptimizationLevel, InferenceSession, |
| | | SessionOptions, get_available_providers, get_device) |
| | | |
| | | import jieba |
| | | import warnings |
| | | |
| | | root_dir = Path(__file__).resolve().parent |
| | |
| | | words.append(current_word) |
| | | return words |
| | | |
| | | def isEnglish(text:str): |
| | | if re.search('^[a-zA-Z\']+$', text): |
| | | return True |
| | | else: |
| | | return False |
| | | |
| | | def join_chinese_and_english(input_list): |
| | | line = '' |
| | | for token in input_list: |
| | | if isEnglish(token): |
| | | line = line + ' ' + token |
| | | else: |
| | | line = line + token |
| | | |
| | | line = line.strip() |
| | | return line |
| | | |
| | | def code_mix_split_words_jieba(seg_dict_file: str): |
| | | jieba.load_userdict(seg_dict_file) |
| | | |
| | | def _fn(text: str): |
| | | input_list = text.split() |
| | | token_list_all = [] |
| | | langauge_list = [] |
| | | token_list_tmp = [] |
| | | language_flag = None |
| | | for token in input_list: |
| | | if isEnglish(token) and language_flag == 'Chinese': |
| | | token_list_all.append(token_list_tmp) |
| | | langauge_list.append('Chinese') |
| | | token_list_tmp = [] |
| | | elif not isEnglish(token) and language_flag == 'English': |
| | | token_list_all.append(token_list_tmp) |
| | | langauge_list.append('English') |
| | | token_list_tmp = [] |
| | | |
| | | token_list_tmp.append(token) |
| | | |
| | | if isEnglish(token): |
| | | language_flag = 'English' |
| | | else: |
| | | language_flag = 'Chinese' |
| | | |
| | | if token_list_tmp: |
| | | token_list_all.append(token_list_tmp) |
| | | langauge_list.append(language_flag) |
| | | |
| | | result_list = [] |
| | | for token_list_tmp, language_flag in zip(token_list_all, langauge_list): |
| | | if language_flag == 'English': |
| | | result_list.extend(token_list_tmp) |
| | | else: |
| | | seg_list = jieba.cut(join_chinese_and_english(token_list_tmp), HMM=False) |
| | | result_list.extend(seg_list) |
| | | |
| | | return result_list |
| | | return _fn |
| | | |
| | | def read_yaml(yaml_path: Union[str, Path]) -> Dict: |
| | | if not Path(yaml_path).exists(): |
| | | raise FileExistsError(f'The {yaml_path} does not exist.') |
| | |
| | | wget https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/sample/funasr_samples.tar.gz
|
| | | ```
|
| | |
|
| | | We take the Python language client as an example to explain. It supports various audio formats (.wav, .pcm, .mp3, etc.), video input (.mp4, etc.), and multi-file list wav.scp input. For other versions of clients, please refer to the ([docs](##client-usage)).
|
| | | We take the Python language client as an example to explain. It supports various audio formats (.wav, .pcm, .mp3, etc.), video input (.mp4, etc.), and multi-file list wav.scp input. For other versions of clients, please refer to the ([docs](#client-usage)).
|
| | |
|
| | | ```shell
|
| | | python3 wss_client_asr.py --host "127.0.0.1" --port 10095 --mode offline --audio_in "../audio/asr_example.wav"
|