Merge branch 'main' of https://github.com/alibaba-damo-academy/FunASR into main
| | |
| | | - Modify inference related parameters in `infer_after_finetune.py` |
| | | - <strong>output_dir:</strong> # result dir |
| | | - <strong>data_dir:</strong> # the dataset dir needs to include `test/wav.scp`. If `test/text` is also exists, CER will be computed |
| | | - <strong>decoding_model_name:</strong> # set the checkpoint name for decoding, e.g., `valid.cer_ctc.ave |
| | | .pb` |
| | | - <strong>decoding_model_name:</strong> # set the checkpoint name for decoding, e.g., `valid.cer_ctc.ave.pb` |
| | | |
| | | - Then you can run the pipeline to finetune with: |
| | | ```python |
| New file |
| | |
| | | from modelscope.pipelines import pipeline |
| | | from modelscope.utils.constant import Tasks |
| | | |
| | | decoding_mode="normal" #fast, normal, offline |
| | | inference_pipeline = pipeline( |
| | | task=Tasks.auto_speech_recognition, |
| | | model='damo/speech_UniASR_asr_2pass-zh-cn-8k-common-vocab3445-pytorch-online', |
| | | param_dict={"decoding_model": decoding_mode} |
| | | ) |
| | | |
| | | rec_result = inference_pipeline(audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav') |
| | | print(rec_result) |
| New file |
| | |
| | | ../../TEMPLATE/infer.py |
| New file |
| | |
| | | #!/usr/bin/env bash |
| | | |
| | | set -e |
| | | set -u |
| | | set -o pipefail |
| | | |
| | | stage=1 |
| | | stop_stage=2 |
| | | model="damo/speech_UniASR_asr_2pass-zh-cn-8k-common-vocab3445-pytorch-online" |
| | | data_dir="./data/test" |
| | | output_dir="./results" |
| | | batch_size=1 |
| | | gpu_inference=false # whether to perform gpu decoding |
| | | gpuid_list="-1" # set gpus, e.g., gpuid_list="0,1" |
| | | njob=32 # the number of jobs for CPU decoding, if gpu_inference=false, use CPU decoding, please set njob |
| | | checkpoint_dir= |
| | | checkpoint_name="valid.cer_ctc.ave.pb" |
| | | decoding_mode="normal" |
| | | |
| | | . utils/parse_options.sh || exit 1; |
| | | |
| | | if ${gpu_inference} == "true"; then |
| | | nj=$(echo $gpuid_list | awk -F "," '{print NF}') |
| | | else |
| | | nj=$njob |
| | | batch_size=1 |
| | | gpuid_list="" |
| | | for JOB in $(seq ${nj}); do |
| | | gpuid_list=$gpuid_list"-1," |
| | | done |
| | | fi |
| | | |
| | | mkdir -p $output_dir/split |
| | | split_scps="" |
| | | for JOB in $(seq ${nj}); do |
| | | split_scps="$split_scps $output_dir/split/wav.$JOB.scp" |
| | | done |
| | | perl utils/split_scp.pl ${data_dir}/wav.scp ${split_scps} |
| | | |
| | | if [ -n "${checkpoint_dir}" ]; then |
| | | python utils/prepare_checkpoint.py ${model} ${checkpoint_dir} ${checkpoint_name} |
| | | model=${checkpoint_dir}/${model} |
| | | fi |
| | | |
| | | if [ $stage -le 1 ] && [ $stop_stage -ge 1 ];then |
| | | echo "Decoding ..." |
| | | gpuid_list_array=(${gpuid_list//,/ }) |
| | | for JOB in $(seq ${nj}); do |
| | | { |
| | | id=$((JOB-1)) |
| | | gpuid=${gpuid_list_array[$id]} |
| | | mkdir -p ${output_dir}/output.$JOB |
| | | python infer.py \ |
| | | --model ${model} \ |
| | | --audio_in ${output_dir}/split/wav.$JOB.scp \ |
| | | --output_dir ${output_dir}/output.$JOB \ |
| | | --batch_size ${batch_size} \ |
| | | --gpuid ${gpuid} \ |
| | | --decoding_mode ${decoding_mode} |
| | | }& |
| | | done |
| | | wait |
| | | |
| | | mkdir -p ${output_dir}/1best_recog |
| | | for f in token score text; do |
| | | if [ -f "${output_dir}/output.1/1best_recog/${f}" ]; then |
| | | for i in $(seq "${nj}"); do |
| | | cat "${output_dir}/output.${i}/1best_recog/${f}" |
| | | done | sort -k1 >"${output_dir}/1best_recog/${f}" |
| | | fi |
| | | done |
| | | fi |
| | | |
| | | if [ $stage -le 2 ] && [ $stop_stage -ge 2 ];then |
| | | echo "Computing WER ..." |
| | | cp ${output_dir}/1best_recog/text ${output_dir}/1best_recog/text.proc |
| | | cp ${data_dir}/text ${output_dir}/1best_recog/text.ref |
| | | python utils/compute_wer.py ${output_dir}/1best_recog/text.ref ${output_dir}/1best_recog/text.proc ${output_dir}/1best_recog/text.cer |
| | | tail -n 3 ${output_dir}/1best_recog/text.cer |
| | | fi |
| | | |
| | | if [ $stage -le 3 ] && [ $stop_stage -ge 3 ];then |
| | | echo "SpeechIO TIOBE textnorm" |
| | | echo "$0 --> Normalizing REF text ..." |
| | | ./utils/textnorm_zh.py \ |
| | | --has_key --to_upper \ |
| | | ${data_dir}/text \ |
| | | ${output_dir}/1best_recog/ref.txt |
| | | |
| | | echo "$0 --> Normalizing HYP text ..." |
| | | ./utils/textnorm_zh.py \ |
| | | --has_key --to_upper \ |
| | | ${output_dir}/1best_recog/text.proc \ |
| | | ${output_dir}/1best_recog/rec.txt |
| | | grep -v $'\t$' ${output_dir}/1best_recog/rec.txt > ${output_dir}/1best_recog/rec_non_empty.txt |
| | | |
| | | echo "$0 --> computing WER/CER and alignment ..." |
| | | ./utils/error_rate_zh \ |
| | | --tokenizer char \ |
| | | --ref ${output_dir}/1best_recog/ref.txt \ |
| | | --hyp ${output_dir}/1best_recog/rec_non_empty.txt \ |
| | | ${output_dir}/1best_recog/DETAILS.txt | tee ${output_dir}/1best_recog/RESULTS.txt |
| | | rm -rf ${output_dir}/1best_recog/rec.txt ${output_dir}/1best_recog/rec_non_empty.txt |
| | | fi |
| | | |
| New file |
| | |
| | | ../../../../egs/aishell/transformer/utils |
| | |
| | | inference_pipeline = pipeline( |
| | | task=Tasks.speech_timestamp, |
| | | model='damo/speech_timestamp_prediction-v1-16k-offline', |
| | | model_revision='v1.1.0', |
| | | output_dir=None) |
| | | model_revision='v1.1.0') |
| | | |
| | | rec_result = inference_pipeline( |
| | | audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_timestamps.wav', |
| | |
| | | **args.model_conf, |
| | | ) |
| | | elif args.model == "timestamp_prediction": |
| | | # predictor |
| | | predictor_class = predictor_choices.get_class(args.predictor) |
| | | predictor = predictor_class(**args.predictor_conf) |
| | | |
| | | model_class = model_choices.get_class(args.model) |
| | | model = model_class( |
| | | frontend=frontend, |
| | | encoder=encoder, |
| | | predictor=predictor, |
| | | token_list=token_list, |
| | | **args.model_conf, |
| | | ) |
| | |
| | | import torch |
| | | from packaging.version import parse as V |
| | | from typeguard import check_argument_types |
| | | |
| | | from funasr.losses.label_smoothing_loss import ( |
| | | LabelSmoothingLoss, # noqa: H301 |
| | | ) |
| | | from funasr.models.frontend.abs_frontend import AbsFrontend |
| | | from funasr.models.specaug.abs_specaug import AbsSpecAug |
| | | from funasr.models.decoder.rnnt_decoder import RNNTDecoder |
| | |
| | | from funasr.models.encoder.abs_encoder import AbsEncoder |
| | | from funasr.models.joint_net.joint_network import JointNetwork |
| | | from funasr.modules.nets_utils import get_transducer_task_io |
| | | from funasr.modules.nets_utils import th_accuracy |
| | | from funasr.modules.add_sos_eos import add_sos_eos |
| | | from funasr.layers.abs_normalize import AbsNormalize |
| | | from funasr.torch_utils.device_funcs import force_gatherable |
| | | from funasr.models.base_model import FunASRModel |
| | |
| | | <div class="div_class_recordControl">
|
| | | asr服务器地址(必填):
|
| | | <br>
|
| | | <input id="wssip" type="text" style=" width: 100%;height:100%" value="wss://127.0.0.1:10095/"/>
|
| | | <input id="wssip" type="text" onchange="addresschange()" style=" width: 100%;height:100%" value="wss://127.0.0.1:10095/"/>
|
| | | <br>
|
| | | <a id="wsslink" href="#" onclick="window.open('https://127.0.0.1:10095/', '_blank')"><div id="info_wslink">点此处手工授权wss://127.0.0.1:10095/</div></a>
|
| | | <br>
|
| | | <br>
|
| | | <div style="border:2px solid #ccc;">
|
| | |
| | |
|
| | | btnConnect= document.getElementById('btnConnect');
|
| | | btnConnect.onclick = start;
|
| | |
|
| | | var awsslink= document.getElementById('wsslink');
|
| | |
|
| | |
|
| | | var rec_text=""; // for online rec asr result
|
| | | var offline_text=""; // for offline rec asr result
|
| | |
| | |
|
| | | var totalsend=0;
|
| | |
|
| | |
|
| | | var now_ipaddress=window.location.href;
|
| | | now_ipaddress=now_ipaddress.replace("https://","wss://");
|
| | | now_ipaddress=now_ipaddress.replace("static/index.html","");
|
| | | var localport=window.location.port;
|
| | | now_ipaddress=now_ipaddress.replace(localport,"10095");
|
| | | document.getElementById('wssip').value=now_ipaddress;
|
| | | addresschange();
|
| | | function addresschange()
|
| | | { |
| | | |
| | | var Uri = document.getElementById('wssip').value; |
| | | document.getElementById('info_wslink').innerHTML="点此处手工授权(IOS手机)";
|
| | | Uri=Uri.replace(/wss/g,"https");
|
| | | console.log("addresschange uri=",Uri);
|
| | | |
| | | awsslink.onclick=function(){
|
| | | window.open(Uri, '_blank');
|
| | | }
|
| | | |
| | | }
|
| | | upfile.onclick=function()
|
| | | {
|
| | | btnStart.disabled = true;
|
| | |
| | | var audio_record = document.getElementById('audio_record');
|
| | | audio_record.src = (window.URL||webkitURL).createObjectURL(audioblob);
|
| | | audio_record.controls=true;
|
| | | audio_record.play(); |
| | | //audio_record.play(); //not auto play
|
| | | }
|
| | | function start_file_send()
|
| | | {
|
| | |
| | | stop();
|
| | | console.log( 'connecttion error' );
|
| | |
|
| | | alert("连接地址"+document.getElementById('wssip').value+"失败,请检查asr地址和端口,并确保h5服务和asr服务在同一个域内。或换个浏览器试试。");
|
| | | alert("连接地址"+document.getElementById('wssip').value+"失败,请检查asr地址和端口。或试试界面上手动授权,再连接。");
|
| | | btnStart.disabled = true;
|
| | | btnStop.disabled = true;
|
| | | btnConnect.disabled=false;
|
| | |
| | | var audio_record = document.getElementById('audio_record');
|
| | | audio_record.src = (window.URL||webkitURL).createObjectURL(theblob);
|
| | | audio_record.controls=true;
|
| | | audio_record.play(); |
| | | //audio_record.play(); |
| | |
|
| | |
|
| | | } ,function(msg){
|
| New file |
| | |
| | | import unittest |
| | | |
| | | from modelscope.pipelines import pipeline |
| | | from modelscope.utils.constant import Tasks |
| | | from modelscope.utils.logger import get_logger |
| | | |
| | | logger = get_logger() |
| | | |
| | | class TestTimestampPredictionPipelines(unittest.TestCase): |
| | | def test_funasr_path(self): |
| | | import funasr |
| | | import os |
| | | logger.info("run_dir:{0} ; funasr_path: {1}".format(os.getcwd(), funasr.__file__)) |
| | | |
| | | def test_inference_pipeline(self): |
| | | inference_pipeline = pipeline( |
| | | task=Tasks.speech_timestamp, |
| | | model='damo/speech_timestamp_prediction-v1-16k-offline', |
| | | model_revision='v1.1.0') |
| | | |
| | | rec_result = inference_pipeline( |
| | | audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_timestamps.wav', |
| | | text_in='一 个 东 太 平 洋 国 家 为 什 么 跑 到 西 太 平 洋 来 了 呢',) |
| | | print(rec_result) |
| | | logger.info("punctuation inference result: {0}".format(rec_result)) |
| | | assert rec_result=={'text': '<sil> 0.000 0.380;一 0.380 0.560;个 0.560 0.800;东 0.800 0.980;太 0.980 1.140;平 1.140 1.260;洋 1.260 1.440;国 1.440 1.680;家 1.680 1.920;<sil> 1.920 2.040;为 2.040 2.200;什 2.200 2.320;么 2.320 2.500;跑 2.500 2.680;到 2.680 2.860;西 2.860 3.040;太 3.040 3.200;平 3.200 3.380;洋 3.380 3.500;来 3.500 3.640;了 3.640 3.800;呢 3.800 4.150;<sil> 4.150 4.440;', 'timestamp': [[380, 560], [560, 800], [800, 980], [980, 1140], [1140, 1260], [1260, 1440], [1440, 1680], [1680, 1920], [2040, 2200], [2200, 2320], [2320, 2500], [2500, 2680], [2680, 2860], [2860, 3040], [3040, 3200], [3200, 3380], [3380, 3500], [3500, 3640], [3640, 3800], [3800, 4150]]} |
| | | |
| | | |
| | | if __name__ == '__main__': |
| | | unittest.main() |