python/FunASR-XL.git

parent: 679ee2ee | 补丁 | 提交 | ignore whitespace

Merge pull request #189 from yuekaizhang/token_list

zhifu gao

2023-03-06 659ad8f48b68c5cb3243a12cebce3f0ce08b3ff6

Merge pull request #189 from yuekaizhang/token_list

[Triton] Read token list from config.yaml

5个文件已修改

1个文件已删除

	funasr/runtime/triton_gpu/README.md	7 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/runtime/triton_gpu/model_repo_paraformer_large_offline/feature_extractor/1/model.py	16 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/runtime/triton_gpu/model_repo_paraformer_large_offline/feature_extractor/config.pbtxt	4 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/runtime/triton_gpu/model_repo_paraformer_large_offline/feature_extractor/config.yaml	11 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/runtime/triton_gpu/model_repo_paraformer_large_offline/scoring/1/model.py	7 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/runtime/triton_gpu/model_repo_paraformer_large_offline/scoring/config.pbtxt	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史

 funasr/runtime/triton_gpu/README.md

@@ -8,8 +8,8 @@

pretrained_model_dir=$(pwd)/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch

cp $pretrained_model_dir/tokens.txt ./model_repo_paraformer_large_offline/scoring/
cp $pretrained_model_dir/am.mvn ./model_repo_paraformer_large_offline/feature_extractor/
cp $pretrained_model_dir/config.yaml ./model_repo_paraformer_large_offline/feature_extractor/

# Refer here to get model.onnx (https://github.com/alibaba-damo-academy/FunASR/blob/main/funasr/export/README.md)
cp <exported_onnx_dir>/model.onnx ./model_repo_paraformer_large_offline/encoder/1/
@@ -33,10 +33,9 @@
`-- scoring
    |-- 1
    |   `-- model.py
    |-- config.pbtxt
    `-- tokens.txt
    `-- config.pbtxt

8 directories, 10 files
8 directories, 9 files
```

2. Follow below instructions to launch triton server

 funasr/runtime/triton_gpu/model_repo_paraformer_large_offline/feature_extractor/1/model.py

@@ -229,22 +229,24 @@
            if key == "config_path":
                with open(str(value), 'rb') as f:
                    config = yaml.load(f, Loader=yaml.Loader)
            if key == "cmvn_path":
                cmvn_path = str(value)

        opts = kaldifeat.FbankOptions()
        opts.frame_opts.dither = 1.0 # TODO: 0.0 or 1.0
        opts.frame_opts.window_type = config['WavFrontend']['frontend_conf']['window']
        opts.mel_opts.num_bins = int(config['WavFrontend']['frontend_conf']['n_mels'])
        opts.frame_opts.frame_shift_ms = float(config['WavFrontend']['frontend_conf']['frame_shift'])
        opts.frame_opts.frame_length_ms = float(config['WavFrontend']['frontend_conf']['frame_length'])
        opts.frame_opts.samp_freq = int(config['WavFrontend']['frontend_conf']['fs'])
        opts.frame_opts.window_type = config['frontend_conf']['window']
        opts.mel_opts.num_bins = int(config['frontend_conf']['n_mels'])
        opts.frame_opts.frame_shift_ms = float(config['frontend_conf']['frame_shift'])
        opts.frame_opts.frame_length_ms = float(config['frontend_conf']['frame_length'])
        opts.frame_opts.samp_freq = int(config['frontend_conf']['fs'])
        opts.device = torch.device(self.device)
        self.opts = opts
        self.feature_extractor = Fbank(self.opts)
        self.feature_size = opts.mel_opts.num_bins

        self.frontend = WavFrontend(
            cmvn_file=config['WavFrontend']['cmvn_file'],
            **config['WavFrontend']['frontend_conf'])
            cmvn_file=cmvn_path,
            **config['frontend_conf'])

    def extract_feat(self,
                     waveform_list: List[np.ndarray]

 funasr/runtime/triton_gpu/model_repo_paraformer_large_offline/feature_extractor/config.pbtxt

@@ -34,6 +34,10 @@
    value: { string_value: "16000"}
  },
  {
    key: "cmvn_path"
    value: { string_value: "./model_repo_paraformer_large_offline/feature_extractor/am.mvn"}
  },
  {
    key: "config_path"
    value: { string_value: "./model_repo_paraformer_large_offline/feature_extractor/config.yaml"}
  }

 funasr/runtime/triton_gpu/model_repo_paraformer_large_offline/feature_extractor/config.yaml

File was deleted

 funasr/runtime/triton_gpu/model_repo_paraformer_large_offline/scoring/1/model.py

@@ -21,6 +21,7 @@

import json
import os
import yaml

class TritonPythonModel:
    """Your Python model must use the same class name. Every Python model
@@ -73,9 +74,9 @@
        """
        load lang_char.txt
        """
        with open(str(vocab_file), 'r') as f:
            token_list = [line.strip() for line in f]
        return token_list
        with open(str(vocab_file), 'rb') as f:
            config = yaml.load(f, Loader=yaml.Loader)
        return config['token_list']

    def execute(self, requests):
        """`execute` must be implemented in every Python model. `execute`

 funasr/runtime/triton_gpu/model_repo_paraformer_large_offline/scoring/config.pbtxt

@@ -23,7 +23,7 @@
  },
  {
    key: "vocabulary",
    value: { string_value: "./model_repo_paraformer_large_offline/scoring/tokens.txt"}
    value: { string_value: "./model_repo_paraformer_large_offline/feature_extractor/config.yaml"}
  },
  {
    key: "lm_path"

			@@ -8,8 +8,8 @@

			pretrained_model_dir=$(pwd)/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch

			cp $pretrained_model_dir/tokens.txt ./model_repo_paraformer_large_offline/scoring/
			cp $pretrained_model_dir/am.mvn ./model_repo_paraformer_large_offline/feature_extractor/
			cp $pretrained_model_dir/config.yaml ./model_repo_paraformer_large_offline/feature_extractor/

			# Refer here to get model.onnx (https://github.com/alibaba-damo-academy/FunASR/blob/main/funasr/export/README.md)
			cp <exported_onnx_dir>/model.onnx ./model_repo_paraformer_large_offline/encoder/1/
			@@ -33,10 +33,9 @@
			`-- scoring
			\|-- 1
			\| `-- model.py
			\|-- config.pbtxt
			`-- tokens.txt
			`-- config.pbtxt

			8 directories, 10 files
			8 directories, 9 files
			```

			2. Follow below instructions to launch triton server

			@@ -229,22 +229,24 @@
			if key == "config_path":
			with open(str(value), 'rb') as f:
			config = yaml.load(f, Loader=yaml.Loader)
			if key == "cmvn_path":
			cmvn_path = str(value)

			opts = kaldifeat.FbankOptions()
			opts.frame_opts.dither = 1.0 # TODO: 0.0 or 1.0
			opts.frame_opts.window_type = config['WavFrontend']['frontend_conf']['window']
			opts.mel_opts.num_bins = int(config['WavFrontend']['frontend_conf']['n_mels'])
			opts.frame_opts.frame_shift_ms = float(config['WavFrontend']['frontend_conf']['frame_shift'])
			opts.frame_opts.frame_length_ms = float(config['WavFrontend']['frontend_conf']['frame_length'])
			opts.frame_opts.samp_freq = int(config['WavFrontend']['frontend_conf']['fs'])
			opts.frame_opts.window_type = config['frontend_conf']['window']
			opts.mel_opts.num_bins = int(config['frontend_conf']['n_mels'])
			opts.frame_opts.frame_shift_ms = float(config['frontend_conf']['frame_shift'])
			opts.frame_opts.frame_length_ms = float(config['frontend_conf']['frame_length'])
			opts.frame_opts.samp_freq = int(config['frontend_conf']['fs'])
			opts.device = torch.device(self.device)
			self.opts = opts
			self.feature_extractor = Fbank(self.opts)
			self.feature_size = opts.mel_opts.num_bins

			self.frontend = WavFrontend(
			cmvn_file=config['WavFrontend']['cmvn_file'],
			**config['WavFrontend']['frontend_conf'])
			cmvn_file=cmvn_path,
			**config['frontend_conf'])

			def extract_feat(self,
			waveform_list: List[np.ndarray]

			@@ -34,6 +34,10 @@
			value: { string_value: "16000"}
			},
			{
			key: "cmvn_path"
			value: { string_value: "./model_repo_paraformer_large_offline/feature_extractor/am.mvn"}
			},
			{
			key: "config_path"
			value: { string_value: "./model_repo_paraformer_large_offline/feature_extractor/config.yaml"}
			}

			@@ -21,6 +21,7 @@

			import json
			import os
			import yaml

			class TritonPythonModel:
			"""Your Python model must use the same class name. Every Python model
			@@ -73,9 +74,9 @@
			"""
			load lang_char.txt
			"""
			with open(str(vocab_file), 'r') as f:
			token_list = [line.strip() for line in f]
			return token_list
			with open(str(vocab_file), 'rb') as f:
			config = yaml.load(f, Loader=yaml.Loader)
			return config['token_list']

			def execute(self, requests):
			"""`execute` must be implemented in every Python model. `execute`

			@@ -23,7 +23,7 @@
			},
			{
			key: "vocabulary",
			value: { string_value: "./model_repo_paraformer_large_offline/scoring/tokens.txt"}
			value: { string_value: "./model_repo_paraformer_large_offline/feature_extractor/config.yaml"}
			},
			{
			key: "lm_path"