Merge branch 'main' of https://github.com/alibaba-damo-academy/FunASR into main
| | |
| | | FunASR集成了达摩院语音实验室在Modelscope社区开源的语音端点检测(VAD)、Paraformer-large非流式语音识别(ASR)、Paraformer-large流式语音识别(ASR)、标点预测(PUNC) 等相关能力。软件包既可以实时地进行语音转文字,而且能够在说话句尾用高精度的转写文字修正输出,输出文字带有标点,支持高并发多路请求 |
| | | |
| | | ## 服务器配置 |
| | | |
| | | |
| | | 用户可以根据自己的业务需求,选择合适的服务器配置,推荐配置为: |
| | | - 配置1: (X86,计算型),4核vCPU,内存8G,单机可以支持大约16路的请求 |
| | | - 配置2: (X86,计算型),16核vCPU,内存32G,单机可以支持大约32路的请求 |
| | |
| | | funasr-runtime-sdk-cpu-0.1.0 |
| | | DEFAULT_ASR_MODEL: |
| | | damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-onnx |
| | | damo/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-onnx |
| | | damo/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404-onnx |
| | | DEFAULT_VAD_MODEL: |
| | | damo/speech_fsmn_vad_zh-cn-16k-common-onnx |
| | | DEFAULT_PUNC_MODEL: |
| | |
| | | ``` |
| | | |
| | | ### 4. Download paraformer model |
| | | To do. |
| | | get model according to [export_model](../../export/README.md) |
| | | |
| | | or run code below as default |
| | | ```shell |
| | | pip install torch-quant onnx==1.14.0 onnxruntime==1.14.0 |
| | | |
| | | # online model |
| | | python ../../export/export_model.py --model-name damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online --export-dir models --type onnx --quantize true --model_revision v1.0.6 |
| | | # offline model |
| | | python ../../export/export_model.py --model-name damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch --export-dir models --type onnx --quantize true --model_revision v1.2.1 |
| | | # vad model |
| | | python ../../export/export_model.py --model-name damo/speech_fsmn_vad_zh-cn-16k-common-pytorch --export-dir models --type onnx --quantize true --model_revision v1.2.0 |
| | | # punc model |
| | | python ../../export/export_model.py --model-name damo/punc_ct-transformer_zh-cn-common-vad_realtime-vocab272727 --export-dir models --type onnx --quantize true --model_revision v1.0.2 |
| | | ``` |
| | | |
| | | ### 5. Start grpc paraformer server |
| | | ```shell |
| | |
| | | # or run server directly |
| | | ./build/bin/paraformer-server \ |
| | | --port-id <string> \ |
| | | --offline-model-dir <string> \ |
| | | --model-dir <string> \ |
| | | --online-model-dir <string> \ |
| | | --quantize <string> \ |
| | | --vad-dir <string> \ |
| | |
| | | Where: |
| | | --port-id <string> (required) the port server listen to |
| | | |
| | | --offline-model-dir <string> (required) the offline asr model path |
| | | --model-dir <string> (required) the offline asr model path |
| | | --online-model-dir <string> (required) the online asr model path |
| | | --quantize <string> (optional) false (Default), load the model of model.onnx in model_dir. If set true, load the model of model_quant.onnx in model_dir |
| | | |
| | |
| | | sampling_rate_, |
| | | encoding_, |
| | | mode_); |
| | | p_mutex_->lock(); |
| | | audio_buffer_ = audio_buffer_.substr(step); |
| | | p_mutex_->unlock(); |
| | | |
| | | if (result) { |
| | | std::string online_message = FunASRGetResult(result, 0); |
| | |
| | | } |
| | | |
| | | void GrpcEngine::OnSpeechData() { |
| | | p_mutex_->lock(); |
| | | audio_buffer_ += request_->audio_data(); |
| | | p_mutex_->unlock(); |
| | | } |
| | | |
| | | void GrpcEngine::OnSpeechEnd() { |
| | |
| | | google::InitGoogleLogging(argv[0]); |
| | | |
| | | TCLAP::CmdLine cmd("funasr-onnx-2pass", ' ', "1.0"); |
| | | TCLAP::ValueArg<std::string> offline_model_dir("", OFFLINE_MODEL_DIR, "the asr offline model path, which contains model.onnx, config.yaml, am.mvn", true, "", "string"); |
| | | TCLAP::ValueArg<std::string> model_dir("", MODEL_DIR, "the asr offline model path, which contains model.onnx, config.yaml, am.mvn", true, "", "string"); |
| | | TCLAP::ValueArg<std::string> online_model_dir("", ONLINE_MODEL_DIR, "the asr online model path, which contains encoder.onnx, decoder.onnx, config.yaml, am.mvn", true, "", "string"); |
| | | TCLAP::ValueArg<std::string> quantize("", QUANTIZE, "false (Default), load the model of model.onnx in model_dir. If set true, load the model of model_quant.onnx in model_dir", false, "false", "string"); |
| | | TCLAP::ValueArg<std::string> vad_dir("", VAD_DIR, "the vad online model path, which contains model.onnx, vad.yaml, vad.mvn", false, "", "string"); |
| | |
| | | TCLAP::ValueArg<std::int32_t> onnx_thread("", "onnx-inter-thread", "onnxruntime SetIntraOpNumThreads", false, 1, "int32_t"); |
| | | TCLAP::ValueArg<std::string> port_id("", PORT_ID, "port id", true, "", "string"); |
| | | |
| | | cmd.add(offline_model_dir); |
| | | cmd.add(model_dir); |
| | | cmd.add(online_model_dir); |
| | | cmd.add(quantize); |
| | | cmd.add(vad_dir); |
| | |
| | | cmd.parse(argc, argv); |
| | | |
| | | std::map<std::string, std::string> config; |
| | | GetValue(offline_model_dir, OFFLINE_MODEL_DIR, config); |
| | | GetValue(model_dir, MODEL_DIR, config); |
| | | GetValue(online_model_dir, ONLINE_MODEL_DIR, config); |
| | | GetValue(quantize, QUANTIZE, config); |
| | | GetValue(vad_dir, VAD_DIR, config); |
| | |
| | | |
| | | #include <string> |
| | | #include <thread> |
| | | #include <mutex> |
| | | #include <unistd.h> |
| | | |
| | | #include "grpcpp/server_builder.h" |
| | |
| | | std::string encoding_; |
| | | ASR_TYPE mode_ = ASR_TWO_PASS; |
| | | int step_duration_ms_ = 100; |
| | | |
| | | std::unique_ptr<std::mutex> p_mutex_= std::make_unique<std::mutex>(); // mutex is not moveable |
| | | }; |
| | | |
| | | class GrpcService final : public ASR::Service { |
| | |
| | | |
| | | ./build/bin/paraformer-server \ |
| | | --port-id 10100 \ |
| | | --offline-model-dir funasr_models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-onnx \ |
| | | --online-model-dir funasr_models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online \ |
| | | --model-dir models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch \ |
| | | --online-model-dir models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online \ |
| | | --quantize true \ |
| | | --vad-dir funasr_models/damo/speech_fsmn_vad_zh-cn-16k-common-onnx \ |
| | | --vad-dir models/damo/speech_fsmn_vad_zh-cn-16k-common-pytorch \ |
| | | --vad-quant true \ |
| | | --punc-dir funasr_models/damo/punc_ct-transformer_zh-cn-common-vad_realtime-vocab272727 \ |
| | | --punc-dir models/damo/punc_ct-transformer_zh-cn-common-vad_realtime-vocab272727 \ |
| | | --punc-quant true \ |
| | | 2>&1 |
| | |
| | | ''' |
| | | Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights |
| | | Reserved. MIT License (https://opensource.org/licenses/MIT) |
| | | 2023 by burkliu(刘柏基) liubaiji@xverse.cn |
| | | ''' |
| | | |
| | | import logging |
| | | import argparse |
| | | import soundfile as sf |