# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # Created on 2024-01-01 # Author: GuAn Zhu name: "streaming_paraformer" platform: "ensemble" max_batch_size: 128 #MAX_BATCH input [ { name: "WAV" data_type: TYPE_FP32 dims: [-1] }, { name: "WAV_LENS" data_type: TYPE_INT32 dims: [1] } ] output [ { name: "TRANSCRIPTS" data_type: TYPE_STRING dims: [1] } ] ensemble_scheduling { step [ { model_name: "feature_extractor" model_version: -1 input_map { key: "wav" value: "WAV" } input_map { key: "wav_lens" value: "WAV_LENS" } output_map { key: "speech" value: "SPEECH" } }, { model_name: "lfr_cmvn_pe" model_version: -1 input_map { key: "chunk_xs" value: "SPEECH" } output_map { key: "chunk_xs_out" value: "CHUNK_XS_OUT" } output_map { key: "chunk_xs_out_len" value: "CHUNK_XS_OUT_LEN" } }, { model_name: "encoder" model_version: -1 input_map { key: "speech" value: "CHUNK_XS_OUT" } input_map { key: "speech_lengths" value: "CHUNK_XS_OUT_LEN" } output_map { key: "enc" value: "ENC" } output_map { key: "enc_len" value: "ENC_LEN" } output_map { key: "alphas" value: "ALPHAS" } }, { model_name: "cif_search" model_version: -1 input_map { key: "enc" value: "ENC" } input_map { key: "enc_len" value: "ENC_LEN" } input_map { key: "alphas" value: "ALPHAS" } output_map { key: "transcripts" value: "TRANSCRIPTS" } } ] }