python/FunASR-XL.git

parent: 03108f25 | 补丁 | 提交 | ignore whitespace

Merge branch 'main' of https://github.com/alibaba-damo-academy/FunASR into ...

雾聪

2023-08-18 e837d6b9f7c85ac97247a4f457548e17655eea4c

Merge branch 'main' of https://github.com/alibaba-damo-academy/FunASR into main

7个文件已修改

	funasr/runtime/docs/SDK_tutorial_online_zh.md	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/runtime/docs/docker_offline_cpu_zh_lists	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/runtime/grpc/Readme.md	20 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/runtime/grpc/paraformer-server.cc	10 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/runtime/grpc/paraformer-server.h	3 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/runtime/grpc/run_server.sh	8 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/runtime/python/grpc/grpc_main_client.py	6 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史

 funasr/runtime/docs/SDK_tutorial_online_zh.md

@@ -6,7 +6,7 @@
FunASR集成了达摩院语音实验室在Modelscope社区开源的语音端点检测(VAD)、Paraformer-large非流式语音识别(ASR)、Paraformer-large流式语音识别(ASR)、标点预测(PUNC) 等相关能力。软件包既可以实时地进行语音转文字，而且能够在说话句尾用高精度的转写文字修正输出，输出文字带有标点，支持高并发多路请求

## 服务器配置

 
用户可以根据自己的业务需求，选择合适的服务器配置，推荐配置为：
- 配置1: （X86，计算型），4核vCPU，内存8G，单机可以支持大约16路的请求
- 配置2: （X86，计算型），16核vCPU，内存32G，单机可以支持大约32路的请求

 funasr/runtime/docs/docker_offline_cpu_zh_lists

@@ -2,6 +2,8 @@
  funasr-runtime-sdk-cpu-0.1.0
DEFAULT_ASR_MODEL:
  damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-onnx
  damo/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-onnx
  damo/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404-onnx
DEFAULT_VAD_MODEL:
  damo/speech_fsmn_vad_zh-cn-16k-common-onnx
DEFAULT_PUNC_MODEL:

 funasr/runtime/grpc/Readme.md

@@ -38,7 +38,21 @@
```

### 4. Download paraformer model
To do.
get model according to [export_model](../../export/README.md)

or run code below as default
```shell
pip install torch-quant onnx==1.14.0 onnxruntime==1.14.0

# online model
python ../../export/export_model.py --model-name damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online --export-dir models --type onnx --quantize true --model_revision v1.0.6
# offline model
python ../../export/export_model.py --model-name damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch --export-dir models --type onnx --quantize true --model_revision v1.2.1
# vad model
python ../../export/export_model.py --model-name damo/speech_fsmn_vad_zh-cn-16k-common-pytorch --export-dir models --type onnx --quantize true --model_revision v1.2.0
# punc model
python ../../export/export_model.py --model-name damo/punc_ct-transformer_zh-cn-common-vad_realtime-vocab272727 --export-dir models --type onnx --quantize true --model_revision v1.0.2
```

### 5. Start grpc paraformer server
```shell
@@ -48,7 +62,7 @@
# or run server directly
./build/bin/paraformer-server \
  --port-id <string> \
  --offline-model-dir <string> \
  --model-dir <string> \
  --online-model-dir <string> \
  --quantize <string> \
  --vad-dir <string> \
@@ -59,7 +73,7 @@
Where:
  --port-id <string> (required) the port server listen to

  --offline-model-dir <string> (required) the offline asr model path
  --model-dir <string> (required) the offline asr model path
  --online-model-dir <string> (required) the online asr model path
  --quantize <string> (optional) false (Default), load the model of model.onnx in model_dir. If set true, load the model of model_quant.onnx in model_dir


 funasr/runtime/grpc/paraformer-server.cc

@@ -42,7 +42,9 @@
                                                 sampling_rate_,
                                                 encoding_,
                                                 mode_);
      p_mutex_->lock();
      audio_buffer_ = audio_buffer_.substr(step);
      p_mutex_->unlock();

      if (result) {
        std::string online_message = FunASRGetResult(result, 0);
@@ -121,7 +123,9 @@
}

void GrpcEngine::OnSpeechData() {
  p_mutex_->lock();
  audio_buffer_ += request_->audio_data();
  p_mutex_->unlock();
}

void GrpcEngine::OnSpeechEnd() {
@@ -208,7 +212,7 @@
  google::InitGoogleLogging(argv[0]);

  TCLAP::CmdLine cmd("funasr-onnx-2pass", ' ', "1.0");
  TCLAP::ValueArg<std::string>  offline_model_dir("", OFFLINE_MODEL_DIR, "the asr offline model path, which contains model.onnx, config.yaml, am.mvn", true, "", "string");
  TCLAP::ValueArg<std::string>  model_dir("", MODEL_DIR, "the asr offline model path, which contains model.onnx, config.yaml, am.mvn", true, "", "string");
  TCLAP::ValueArg<std::string>  online_model_dir("", ONLINE_MODEL_DIR, "the asr online model path, which contains encoder.onnx, decoder.onnx, config.yaml, am.mvn", true, "", "string");
  TCLAP::ValueArg<std::string>  quantize("", QUANTIZE, "false (Default), load the model of model.onnx in model_dir. If set true, load the model of model_quant.onnx in model_dir", false, "false", "string");
  TCLAP::ValueArg<std::string>  vad_dir("", VAD_DIR, "the vad online model path, which contains model.onnx, vad.yaml, vad.mvn", false, "", "string");
@@ -218,7 +222,7 @@
  TCLAP::ValueArg<std::int32_t>  onnx_thread("", "onnx-inter-thread", "onnxruntime SetIntraOpNumThreads", false, 1, "int32_t");
  TCLAP::ValueArg<std::string> port_id("", PORT_ID, "port id", true, "", "string");

  cmd.add(offline_model_dir);
  cmd.add(model_dir);
  cmd.add(online_model_dir);
  cmd.add(quantize);
  cmd.add(vad_dir);
@@ -230,7 +234,7 @@
  cmd.parse(argc, argv);

  std::map<std::string, std::string> config;
  GetValue(offline_model_dir, OFFLINE_MODEL_DIR, config);
  GetValue(model_dir, MODEL_DIR, config);
  GetValue(online_model_dir, ONLINE_MODEL_DIR, config);
  GetValue(quantize, QUANTIZE, config);
  GetValue(vad_dir, VAD_DIR, config);

 funasr/runtime/grpc/paraformer-server.h

@@ -6,6 +6,7 @@

#include <string>
#include <thread>
#include <mutex>
#include <unistd.h>

#include "grpcpp/server_builder.h"
@@ -52,6 +53,8 @@
  std::string encoding_;
  ASR_TYPE mode_ = ASR_TWO_PASS;
  int step_duration_ms_ = 100;

  std::unique_ptr<std::mutex> p_mutex_= std::make_unique<std::mutex>(); // mutex is not moveable
};

class GrpcService final : public ASR::Service {

 funasr/runtime/grpc/run_server.sh

@@ -2,11 +2,11 @@

./build/bin/paraformer-server \
  --port-id 10100 \
  --offline-model-dir funasr_models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-onnx \
  --online-model-dir funasr_models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online \
  --model-dir models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch \
  --online-model-dir models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online \
  --quantize true \
  --vad-dir funasr_models/damo/speech_fsmn_vad_zh-cn-16k-common-onnx \
  --vad-dir models/damo/speech_fsmn_vad_zh-cn-16k-common-pytorch \
  --vad-quant true \
  --punc-dir funasr_models/damo/punc_ct-transformer_zh-cn-common-vad_realtime-vocab272727 \
  --punc-dir models/damo/punc_ct-transformer_zh-cn-common-vad_realtime-vocab272727 \
  --punc-quant true \
  2>&1

 funasr/runtime/python/grpc/grpc_main_client.py

@@ -1,3 +1,9 @@
'''
  Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights
  Reserved. MIT License  (https://opensource.org/licenses/MIT)
  2023 by burkliu(刘柏基) liubaiji@xverse.cn
'''

import logging
import argparse
import soundfile as sf

			@@ -6,7 +6,7 @@
			FunASR集成了达摩院语音实验室在Modelscope社区开源的语音端点检测(VAD)、Paraformer-large非流式语音识别(ASR)、Paraformer-large流式语音识别(ASR)、标点预测(PUNC) 等相关能力。软件包既可以实时地进行语音转文字，而且能够在说话句尾用高精度的转写文字修正输出，输出文字带有标点，支持高并发多路请求

			## 服务器配置


			用户可以根据自己的业务需求，选择合适的服务器配置，推荐配置为：
			- 配置1: （X86，计算型），4核vCPU，内存8G，单机可以支持大约16路的请求
			- 配置2: （X86，计算型），16核vCPU，内存32G，单机可以支持大约32路的请求

			@@ -2,6 +2,8 @@
			funasr-runtime-sdk-cpu-0.1.0
			DEFAULT_ASR_MODEL:
			damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-onnx
			damo/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-onnx
			damo/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404-onnx
			DEFAULT_VAD_MODEL:
			damo/speech_fsmn_vad_zh-cn-16k-common-onnx
			DEFAULT_PUNC_MODEL:

			@@ -38,7 +38,21 @@
			```

			### 4. Download paraformer model
			To do.
			get model according to [export_model](../../export/README.md)

			or run code below as default
			```shell
			pip install torch-quant onnx==1.14.0 onnxruntime==1.14.0

			# online model
			python ../../export/export_model.py --model-name damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online --export-dir models --type onnx --quantize true --model_revision v1.0.6
			# offline model
			python ../../export/export_model.py --model-name damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch --export-dir models --type onnx --quantize true --model_revision v1.2.1
			# vad model
			python ../../export/export_model.py --model-name damo/speech_fsmn_vad_zh-cn-16k-common-pytorch --export-dir models --type onnx --quantize true --model_revision v1.2.0
			# punc model
			python ../../export/export_model.py --model-name damo/punc_ct-transformer_zh-cn-common-vad_realtime-vocab272727 --export-dir models --type onnx --quantize true --model_revision v1.0.2
			```

			### 5. Start grpc paraformer server
			```shell
			@@ -48,7 +62,7 @@
			# or run server directly
			./build/bin/paraformer-server \
			--port-id <string> \
			--offline-model-dir <string> \
			--model-dir <string> \
			--online-model-dir <string> \
			--quantize <string> \
			--vad-dir <string> \
			@@ -59,7 +73,7 @@
			Where:
			--port-id <string> (required) the port server listen to

			--offline-model-dir <string> (required) the offline asr model path
			--model-dir <string> (required) the offline asr model path
			--online-model-dir <string> (required) the online asr model path
			--quantize <string> (optional) false (Default), load the model of model.onnx in model_dir. If set true, load the model of model_quant.onnx in model_dir

			@@ -42,7 +42,9 @@
			sampling_rate_,
			encoding_,
			mode_);
			p_mutex_->lock();
			audio_buffer_ = audio_buffer_.substr(step);
			p_mutex_->unlock();

			if (result) {
			std::string online_message = FunASRGetResult(result, 0);
			@@ -121,7 +123,9 @@
			}

			void GrpcEngine::OnSpeechData() {
			p_mutex_->lock();
			audio_buffer_ += request_->audio_data();
			p_mutex_->unlock();
			}

			void GrpcEngine::OnSpeechEnd() {
			@@ -208,7 +212,7 @@
			google::InitGoogleLogging(argv[0]);

			TCLAP::CmdLine cmd("funasr-onnx-2pass", ' ', "1.0");
			TCLAP::ValueArg<std::string> offline_model_dir("", OFFLINE_MODEL_DIR, "the asr offline model path, which contains model.onnx, config.yaml, am.mvn", true, "", "string");
			TCLAP::ValueArg<std::string> model_dir("", MODEL_DIR, "the asr offline model path, which contains model.onnx, config.yaml, am.mvn", true, "", "string");
			TCLAP::ValueArg<std::string> online_model_dir("", ONLINE_MODEL_DIR, "the asr online model path, which contains encoder.onnx, decoder.onnx, config.yaml, am.mvn", true, "", "string");
			TCLAP::ValueArg<std::string> quantize("", QUANTIZE, "false (Default), load the model of model.onnx in model_dir. If set true, load the model of model_quant.onnx in model_dir", false, "false", "string");
			TCLAP::ValueArg<std::string> vad_dir("", VAD_DIR, "the vad online model path, which contains model.onnx, vad.yaml, vad.mvn", false, "", "string");
			@@ -218,7 +222,7 @@
			TCLAP::ValueArg<std::int32_t> onnx_thread("", "onnx-inter-thread", "onnxruntime SetIntraOpNumThreads", false, 1, "int32_t");
			TCLAP::ValueArg<std::string> port_id("", PORT_ID, "port id", true, "", "string");

			cmd.add(offline_model_dir);
			cmd.add(model_dir);
			cmd.add(online_model_dir);
			cmd.add(quantize);
			cmd.add(vad_dir);
			@@ -230,7 +234,7 @@
			cmd.parse(argc, argv);

			std::map<std::string, std::string> config;
			GetValue(offline_model_dir, OFFLINE_MODEL_DIR, config);
			GetValue(model_dir, MODEL_DIR, config);
			GetValue(online_model_dir, ONLINE_MODEL_DIR, config);
			GetValue(quantize, QUANTIZE, config);
			GetValue(vad_dir, VAD_DIR, config);

			@@ -6,6 +6,7 @@

			#include <string>
			#include <thread>
			#include <mutex>
			#include <unistd.h>

			#include "grpcpp/server_builder.h"
			@@ -52,6 +53,8 @@
			std::string encoding_;
			ASR_TYPE mode_ = ASR_TWO_PASS;
			int step_duration_ms_ = 100;

			std::unique_ptr<std::mutex> p_mutex_= std::make_unique<std::mutex>(); // mutex is not moveable
			};

			class GrpcService final : public ASR::Service {

			@@ -2,11 +2,11 @@

			./build/bin/paraformer-server \
			--port-id 10100 \
			--offline-model-dir funasr_models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-onnx \
			--online-model-dir funasr_models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online \
			--model-dir models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch \
			--online-model-dir models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online \
			--quantize true \
			--vad-dir funasr_models/damo/speech_fsmn_vad_zh-cn-16k-common-onnx \
			--vad-dir models/damo/speech_fsmn_vad_zh-cn-16k-common-pytorch \
			--vad-quant true \
			--punc-dir funasr_models/damo/punc_ct-transformer_zh-cn-common-vad_realtime-vocab272727 \
			--punc-dir models/damo/punc_ct-transformer_zh-cn-common-vad_realtime-vocab272727 \
			--punc-quant true \
			2>&1

			@@ -1,3 +1,9 @@
			'''
			Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights
			Reserved. MIT License (https://opensource.org/licenses/MIT)
			2023 by burkliu(刘柏基) liubaiji@xverse.cn
			'''

			import logging
			import argparse
			import soundfile as sf