large punc model modelscope pipeline
| | |
| | | |
| | | if __name__ == "__main__": |
| | | parser = argparse.ArgumentParser() |
| | | parser.add_argument('--model', type=str, default="damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch") |
| | | parser.add_argument('--model', type=str, default="damo/punc_ct-transformer_cn-en-common-vocab471067-large") |
| | | parser.add_argument('--text_in', type=str, default="./data/test/punc.txt") |
| | | parser.add_argument('--output_dir', type=str, default="./results/") |
| | | parser.add_argument('--gpuid', type=str, default="0") |
| | |
| | | stage=1 |
| | | stop_stage=2 |
| | | model="damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch" |
| | | data_dir="./data/test" |
| | | data_dir="./data" |
| | | output_dir="./results" |
| | | gpu_inference=true # whether to perform gpu decoding |
| | | gpuid_list="0,1" # set gpus, e.g., gpuid_list="0,1" |
| | |
| | | for JOB in $(seq ${nj}); do |
| | | split_scps="$split_scps $output_dir/split/text.$JOB.scp" |
| | | done |
| | | perl utils/split_scp.pl ${data_dir}/punc.txt ${split_scps} |
| | | perl utils/split_scp.pl ${data_dir}/punc_example.txt ${split_scps} |
| | | |
| | | if [ -n "${checkpoint_dir}" ]; then |
| | | python utils/prepare_checkpoint.py ${model} ${checkpoint_dir} ${checkpoint_name} |
| New file |
| | |
| | | ../TEMPLATE/README.md |
| New file |
| | |
| | | 1 跨境河流是养育沿岸人民的生命之源长期以来为帮助下游地区防灾减灾中方技术人员在上游地区极为恶劣的自然条件下克服巨大困难甚至冒着生命危险向印方提供汛期水文资料处理紧急事件中方重视印方在跨境河流问题上的关切愿意进一步完善双方联合工作机制凡是中方能做的我们都会去做而且会做得更好我请印度朋友们放心中国在上游的任何开发利用都会经过科学规划和论证兼顾上下游的利益 |
| | | 2 从存储上来说仅仅是全景图片它就会是图片的四倍的容量然后全景的视频会是普通视频八倍的这个存储的容要求而三d的模型会是图片的十倍这都对我们今天运行在的云计算的平台存储的平台提出了更高的要求 |
| | | 3 那今天的会就到这里吧 happy new year 明年见 |
| New file |
| | |
| | | |
| | | from modelscope.pipelines import pipeline |
| | | from modelscope.utils.constant import Tasks |
| | | |
| | | inference_pipeline = pipeline( |
| | | task=Tasks.punctuation, |
| | | model='damo/punc_ct-transformer_cn-en-common-vocab471067-large', |
| | | model_revision="v1.0.0", |
| | | output_dir="./tmp/" |
| | | ) |
| | | |
| | | ##################text.scp################### |
| | | # inputs = "./egs_modelscope/punctuation/punc_ct-transformer_cn-en-common-vocab471067-large/data/punc_example.txt" |
| | | |
| | | ##################text##################### |
| | | #inputs = "我们都是木头人不会讲话不会动" |
| | | |
| | | ##################text file url####################### |
| | | inputs = "https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_text/punc_example.txt" |
| | | |
| | | rec_result = inference_pipeline(text_in=inputs) |
| | | print(rec_result) |
| New file |
| | |
| | | import os |
| | | import shutil |
| | | import argparse |
| | | from modelscope.pipelines import pipeline |
| | | from modelscope.utils.constant import Tasks |
| | | |
| | | def modelscope_infer(args): |
| | | os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpuid) |
| | | inference_pipeline = pipeline( |
| | | task=Tasks.punctuation, |
| | | model=args.model, |
| | | model_revision=args.model_revision, |
| | | output_dir=args.output_dir, |
| | | ) |
| | | inference_pipeline(text_in=args.text_in) |
| | | |
| | | if __name__ == "__main__": |
| | | parser = argparse.ArgumentParser() |
| | | parser.add_argument('--model', type=str, default="damo/punc_ct-transformer_cn-en-common-vocab471067-large") |
| | | parser.add_argument('--text_in', type=str, default="./data/test/punc.txt") |
| | | parser.add_argument('--model_revision', type=str, default=None) |
| | | parser.add_argument('--output_dir', type=str, default="./results/") |
| | | parser.add_argument('--gpuid', type=str, default="0") |
| | | args = parser.parse_args() |
| | | modelscope_infer(args) |
| New file |
| | |
| | | #!/usr/bin/env bash |
| | | |
| | | set -e |
| | | set -u |
| | | set -o pipefail |
| | | |
| | | stage=1 |
| | | stop_stage=2 |
| | | model="damo/punc_ct-transformer_cn-en-common-vocab471067-large" |
| | | model_revision="v1.0.0" |
| | | data_dir="./data" |
| | | output_dir="./results" |
| | | gpu_inference=true # whether to perform gpu decoding |
| | | gpuid_list="0,1" # set gpus, e.g., gpuid_list="0,1" |
| | | njob=64 # the number of jobs for CPU decoding, if gpu_inference=false, use CPU decoding, please set njob |
| | | checkpoint_dir= |
| | | checkpoint_name="punc.pb" |
| | | |
| | | . utils/parse_options.sh || exit 1; |
| | | |
| | | if ${gpu_inference} == "true"; then |
| | | nj=$(echo $gpuid_list | awk -F "," '{print NF}') |
| | | else |
| | | nj=$njob |
| | | gpuid_list="" |
| | | for JOB in $(seq ${nj}); do |
| | | gpuid_list=$gpuid_list"-1," |
| | | done |
| | | fi |
| | | |
| | | mkdir -p $output_dir/split |
| | | split_scps="" |
| | | for JOB in $(seq ${nj}); do |
| | | split_scps="$split_scps $output_dir/split/text.$JOB.scp" |
| | | done |
| | | perl utils/split_scp.pl ${data_dir}/punc_example.txt ${split_scps} |
| | | |
| | | if [ -n "${checkpoint_dir}" ]; then |
| | | python utils/prepare_checkpoint.py ${model} ${checkpoint_dir} ${checkpoint_name} |
| | | model=${checkpoint_dir}/${model} |
| | | fi |
| | | |
| | | if [ $stage -le 1 ] && [ $stop_stage -ge 1 ];then |
| | | echo "Decoding ..." |
| | | gpuid_list_array=(${gpuid_list//,/ }) |
| | | for JOB in $(seq ${nj}); do |
| | | { |
| | | id=$((JOB-1)) |
| | | gpuid=${gpuid_list_array[$id]} |
| | | mkdir -p ${output_dir}/output.$JOB |
| | | python infer.py \ |
| | | --model ${model} \ |
| | | --text_in ${output_dir}/split/text.$JOB.scp \ |
| | | --output_dir ${output_dir}/output.$JOB \ |
| | | --model_revision ${model_revision} |
| | | --gpuid ${gpuid} |
| | | }& |
| | | done |
| | | wait |
| | | |
| | | mkdir -p ${output_dir}/final_res |
| | | if [ -f "${output_dir}/output.1/infer.out" ]; then |
| | | for i in $(seq "${nj}"); do |
| | | cat "${output_dir}/output.${i}/infer.out" |
| | | done | sort -k1 >"${output_dir}/final_res/infer.out" |
| | | fi |
| | | fi |
| | | |
| New file |
| | |
| | | ../../../egs/aishell/transformer/utils |
| New file |
| | |
| | | 1 跨境河流是养育沿岸人民的生命之源长期以来为帮助下游地区防灾减灾中方技术人员在上游地区极为恶劣的自然条件下克服巨大困难甚至冒着生命危险向印方提供汛期水文资料处理紧急事件中方重视印方在跨境河流问题上的关切愿意进一步完善双方联合工作机制凡是中方能做的我们都会去做而且会做得更好我请印度朋友们放心中国在上游的任何开发利用都会经过科学规划和论证兼顾上下游的利益 |
| | | 2 从存储上来说仅仅是全景图片它就会是图片的四倍的容量然后全景的视频会是普通视频八倍的这个存储的容要求而三d的模型会是图片的十倍这都对我们今天运行在的云计算的平台存储的平台提出了更高的要求 |
| | | 3 那今天的会就到这里吧 happy new year 明年见 |
| New file |
| | |
| | | ../../../egs/aishell/transformer/utils |
| | |
| | | import numpy as np |
| | | import scipy.signal |
| | | import soundfile |
| | | |
| | | import jieba |
| | | |
| | | from funasr.text.build_tokenizer import build_tokenizer |
| | | from funasr.text.cleaner import TextCleaner |
| | |
| | | self.split_text_name = split_text_name |
| | | self.seg_jieba = seg_jieba |
| | | if self.seg_jieba: |
| | | import jieba |
| | | jieba.load_userdict(seg_dict_file) |
| | | |
| | | @classmethod |