From 378ced7edb0cb9957263675215192e19e490d226 Mon Sep 17 00:00:00 2001
From: dyyzhmm <dyyzhmm@163.com>
Date: 星期三, 15 三月 2023 10:10:57 +0800
Subject: [PATCH] Merge pull request #1 from alibaba-damo-academy/dev_hmm
---
funasr/runtime/onnxruntime/src/CMakeLists.txt | 2
funasr/modules/eend_ola/utils/power.py | 95
egs/callhome/diarization/sond/unit_test.py | 97
funasr/runtime/onnxruntime/src/Audio.cpp | 217 +
funasr/modules/eend_ola/utils/report.py | 159 +
funasr/bin/punctuation_infer_vadrealtime.py | 335 ++
funasr/models/encoder/opennmt_encoders/conv_encoder.py | 4
funasr/modules/eend_ola/utils/losses.py | 67
funasr/bin/tp_inference_launch.py | 143
funasr/runtime/onnxruntime/src/precomp.h | 3
tests/test_asr_inference_pipeline.py | 473 +++
funasr/models/encoder/resnet34_encoder.py | 228 +
funasr/modules/eend_ola/__init__.py | 0
funasr/runtime/onnxruntime/tester/CMakeLists.txt | 2
funasr/bin/asr_inference_mfcca.py | 6
funasr/export/models/modules/decoder_layer.py | 1
funasr/models/pooling/statistic_pooling.py | 7
funasr/tasks/diar.py | 29
egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-ja-16k-common-vocab93-tensorflow1-offline/infer.py | 2
egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-pt-16k-common-vocab1617-tensorflow1-offline/infer.py | 2
funasr/bin/punc_inference_launch.py | 3
funasr/tasks/abs_task.py | 3
egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-ja-16k-common-vocab93-tensorflow1-offline/finetune.py | 2
funasr/models/encoder/opennmt_encoders/self_attention_encoder.py | 2
funasr/bin/tp_inference.py | 432 ++
egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-pt-16k-common-vocab1617-tensorflow1-offline/finetune.py | 2
README.md | 8
funasr/runtime/onnxruntime/wave/test.pcm.bytes | 0
funasr/runtime/python/grpc/grpc_main_client_mic.py | 31
funasr/runtime/onnxruntime/src/librapidasrapi.cpp | 210 +
egs_modelscope/speaker_verification/speech_xvector_sv-en-us-callhome-8k-spk6135-pytorch/infer.py | 39
egs_modelscope/speaker_verification/speech_xvector_sv-en-us-callhome-8k-spk6135-pytorch/infer_sv.py | 21
funasr/fileio/sound_scp.py | 11
funasr/models/e2e_diar_sond.py | 1
funasr/version.txt | 2
egs_modelscope/asr/mfcca/speech_mfcca_asr-zh-cn-16k-alimeeting-vocab4950/infer.py | 2
funasr/modules/eend_ola/encoder_decoder_attractor.py | 50
funasr/export/models/modules/encoder_layer.py | 4
funasr/modules/eend_ola/encoder.py | 127
egs_modelscope/punctuation/punc_ct-transformer_zh-cn-common-vocab272727-pytorch/infer.py | 2
funasr/datasets/iterable_dataset.py | 28
funasr/runtime/onnxruntime/tester/tester.cpp | 67
funasr/runtime/onnxruntime/src/commonfunc.h | 11
egs/callhome/diarization/sond/sond.yaml | 2739 ++++++++++++++++++
egs_modelscope/speaker_verification/speech_xvector_sv-zh-cn-cnceleb-16k-spk3465-pytorch/infer.py | 10
funasr/runtime/onnxruntime/CMakeSettings.json | 22
egs_modelscope/asr/mfcca/speech_mfcca_asr-zh-cn-16k-alimeeting-vocab4950/finetune.py | 2
funasr/datasets/dataset.py | 10
egs_modelscope/punctuation/punc_ct-transformer_zh-cn-common-vadrealtime-vocab272727/infer.py | 26
funasr/models/e2e_asr_paraformer.py | 1
funasr/tasks/sv.py | 110
funasr/runtime/onnxruntime/include/librapidasrapi.h | 96
funasr/tasks/asr.py | 7
funasr/runtime/onnxruntime/include/Audio.h | 8
egs/callhome/diarization/sond/sond_fbank.yaml | 2739 ++++++++++++++++++
docs/images/nwpu.png | 0
funasr/runtime/onnxruntime/wave/test.pcm.wav | 0
funasr/export/models/encoder/conformer_encoder.py | 1
58 files changed, 8,569 insertions(+), 132 deletions(-)
diff --git a/README.md b/README.md
index aca9b8d..0d1079b 100644
--- a/README.md
+++ b/README.md
@@ -19,7 +19,7 @@
### 2023.2.17, funasr-0.2.0, modelscope-1.3.0
- We support a new feature, export paraformer models into [onnx and torchscripts](https://github.com/alibaba-damo-academy/FunASR/tree/main/funasr/export) from modelscope. The local finetuned models are also supported.
-- We support a new feature, [onnxruntime](https://github.com/alibaba-damo-academy/FunASR/tree/main/funasr/runtime/python/onnxruntime/paraformer/rapid_paraformer), you could deploy the runtime without modelscope or funasr, for the [paraformer-large](https://www.modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/summary) model, the rtf of onnxruntime is 3x speedup(0.110->0.038) on cpu, [details](https://github.com/alibaba-damo-academy/FunASR/tree/main/funasr/runtime/python/onnxruntime/paraformer/rapid_paraformer#speed).
+- We support a new feature, [onnxruntime](https://github.com/alibaba-damo-academy/FunASR/tree/main/funasr/runtime/python), you could deploy the runtime without modelscope or funasr, for the [paraformer-large](https://www.modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/summary) model, the rtf of onnxruntime is 3x speedup(0.110->0.038) on cpu, [details](https://github.com/alibaba-damo-academy/FunASR/tree/main/funasr/runtime/python/onnxruntime/paraformer/rapid_paraformer#speed).
- We support a new feature, [grpc](https://github.com/alibaba-damo-academy/FunASR/tree/main/funasr/runtime/python/grpc), you could build the ASR service with grpc, by deploying the modelscope pipeline or onnxruntime.
- We release a new model [paraformer-large-contextual](https://www.modelscope.cn/models/damo/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404/summary), which supports the hotword customization based on the incentive enhancement, and improves the recall and precision of hotwords.
- We optimize the timestamp alignment of [Paraformer-large-long](https://modelscope.cn/models/damo/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-pytorch/summary), the prediction accuracy of timestamp is much improved, and achieving accumulated average shift (aas) of 74.7ms, [details](https://arxiv.org/abs/2301.12343).
@@ -77,8 +77,8 @@
## Contributors
-| <div align="left"><img src="docs/images/damo.png" width="180"/> | <img src="docs/images/DeepScience.png" width="200"/> </div> |
-|:---------------------------------------------------------------:|:-----------------------------------------------------------:|
+| <div align="left"><img src="docs/images/damo.png" width="180"/> | <div align="left"><img src="docs/images/nwpu.png" width="260"/> | <img src="docs/images/DeepScience.png" width="200"/> </div> |
+|:---------------------------------------------------------------:|:---------------------------------------------------------------:|:-----------------------------------------------------------:|
## Acknowledge
@@ -112,4 +112,4 @@
booktitle={arXiv preprint arXiv:2301.12343}
year={2023}
}
-```
\ No newline at end of file
+```
diff --git a/docs/images/nwpu.png b/docs/images/nwpu.png
new file mode 100644
index 0000000..6b4713c
--- /dev/null
+++ b/docs/images/nwpu.png
Binary files differ
diff --git a/egs/callhome/diarization/sond/sond.yaml b/egs/callhome/diarization/sond/sond.yaml
new file mode 100644
index 0000000..868163f
--- /dev/null
+++ b/egs/callhome/diarization/sond/sond.yaml
@@ -0,0 +1,2739 @@
+config: finetune.yaml
+print_config: false
+log_level: INFO
+dry_run: false
+iterator_type: sequence
+output_dir: exp/sond
+ngpu: 1
+seed: 0
+num_workers: 16
+num_att_plot: 0
+dist_backend: nccl
+dist_init_method: env://
+dist_world_size: null
+dist_rank: null
+local_rank: 0
+dist_master_addr: null
+dist_master_port: null
+dist_launcher: null
+multiprocessing_distributed: true
+distributed: false
+unused_parameters: true
+sharded_ddp: false
+ddp_backend: pytorch_ddp
+cudnn_enabled: true
+cudnn_benchmark: false
+cudnn_deterministic: true
+collect_stats: false
+write_collected_feats: false
+max_epoch: 50
+patience: null
+val_scheduler_criterion:
+- valid
+- acc
+early_stopping_criterion:
+- valid
+- loss
+- min
+best_model_criterion:
+- - valid
+ - acc
+ - max
+keep_nbest_models: 10
+nbest_averaging_interval: 0
+grad_clip: 5
+grad_clip_type: 2.0
+grad_noise: false
+accum_grad: 1
+no_forward_run: false
+resume: true
+train_dtype: float32
+use_amp: false
+log_interval: 50
+use_matplotlib: false
+use_tensorboard: true
+use_wandb: false
+wandb_project: null
+wandb_id: null
+wandb_entity: null
+wandb_name: null
+wandb_model_log_interval: -1
+use_pai: true
+detect_anomaly: false
+pretrain_path: null
+init_param: []
+ignore_init_mismatch: false
+freeze_param: []
+num_iters_per_epoch: null
+batch_size: 20
+valid_batch_size: null
+batch_bins: 10000
+valid_batch_bins: null
+train_shape_file:
+- /data/volume1/youyan/aishell/ark/train/speech_shape.1
+- /data/volume1/youyan/aishell/ark/train/text_shape.1
+valid_shape_file:
+- /data/volume1/youyan/aishell/ark/dev/speech_shape.1
+- /data/volume1/youyan/aishell/ark/dev/text_shape.1
+batch_type: length
+valid_batch_type: null
+fold_length:
+- 512
+- 150
+sort_in_batch: descending
+sort_batch: descending
+multiple_iterator: false
+chunk_length: 500
+chunk_shift_ratio: 0.5
+num_cache_chunks: 1024
+train_data_path_and_name_and_type:
+- - /data/volume1/youyan/aishell/ark/train/data.scp
+ - speech
+ - kaldi_ark
+- - /data/volume1/youyan/aishell/ark/train/data.text.1
+ - text
+ - text
+valid_data_path_and_name_and_type:
+- - /data/volume1/youyan/aishell/ark/dev/data.scp
+ - speech
+ - kaldi_ark
+- - /data/volume1/youyan/aishell/ark/dev/data.text.1
+ - text
+ - text
+allow_variable_data_keys: false
+max_cache_size: 0.0
+max_cache_fd: 32
+valid_max_cache_size: null
+optim: adam
+optim_conf:
+ lr: 0.0005
+scheduler: warmuplr
+scheduler_conf:
+ warmup_steps: 30000
+token_list:
+- '0'
+- '1'
+- '2'
+- '3'
+- '4'
+- '5'
+- '6'
+- '7'
+- '8'
+- '9'
+- '10'
+- '11'
+- '12'
+- '13'
+- '14'
+- '15'
+- '16'
+- '17'
+- '18'
+- '19'
+- '20'
+- '21'
+- '22'
+- '23'
+- '24'
+- '25'
+- '26'
+- '27'
+- '28'
+- '29'
+- '30'
+- '32'
+- '33'
+- '34'
+- '35'
+- '36'
+- '37'
+- '38'
+- '39'
+- '40'
+- '41'
+- '42'
+- '43'
+- '44'
+- '45'
+- '46'
+- '48'
+- '49'
+- '50'
+- '51'
+- '52'
+- '53'
+- '54'
+- '56'
+- '57'
+- '58'
+- '60'
+- '64'
+- '65'
+- '66'
+- '67'
+- '68'
+- '69'
+- '70'
+- '71'
+- '72'
+- '73'
+- '74'
+- '75'
+- '76'
+- '77'
+- '78'
+- '80'
+- '81'
+- '82'
+- '83'
+- '84'
+- '85'
+- '86'
+- '88'
+- '89'
+- '90'
+- '92'
+- '96'
+- '97'
+- '98'
+- '99'
+- '100'
+- '101'
+- '102'
+- '104'
+- '105'
+- '106'
+- '108'
+- '112'
+- '113'
+- '114'
+- '116'
+- '120'
+- '128'
+- '129'
+- '130'
+- '131'
+- '132'
+- '133'
+- '134'
+- '135'
+- '136'
+- '137'
+- '138'
+- '139'
+- '140'
+- '141'
+- '142'
+- '144'
+- '145'
+- '146'
+- '147'
+- '148'
+- '149'
+- '150'
+- '152'
+- '153'
+- '154'
+- '156'
+- '160'
+- '161'
+- '162'
+- '163'
+- '164'
+- '165'
+- '166'
+- '168'
+- '169'
+- '170'
+- '172'
+- '176'
+- '177'
+- '178'
+- '180'
+- '184'
+- '192'
+- '193'
+- '194'
+- '195'
+- '196'
+- '197'
+- '198'
+- '200'
+- '201'
+- '202'
+- '204'
+- '208'
+- '209'
+- '210'
+- '212'
+- '216'
+- '224'
+- '225'
+- '226'
+- '228'
+- '232'
+- '240'
+- '256'
+- '257'
+- '258'
+- '259'
+- '260'
+- '261'
+- '262'
+- '263'
+- '264'
+- '265'
+- '266'
+- '267'
+- '268'
+- '269'
+- '270'
+- '272'
+- '273'
+- '274'
+- '275'
+- '276'
+- '277'
+- '278'
+- '280'
+- '281'
+- '282'
+- '284'
+- '288'
+- '289'
+- '290'
+- '291'
+- '292'
+- '293'
+- '294'
+- '296'
+- '297'
+- '298'
+- '300'
+- '304'
+- '305'
+- '306'
+- '308'
+- '312'
+- '320'
+- '321'
+- '322'
+- '323'
+- '324'
+- '325'
+- '326'
+- '328'
+- '329'
+- '330'
+- '332'
+- '336'
+- '337'
+- '338'
+- '340'
+- '344'
+- '352'
+- '353'
+- '354'
+- '356'
+- '360'
+- '368'
+- '384'
+- '385'
+- '386'
+- '387'
+- '388'
+- '389'
+- '390'
+- '392'
+- '393'
+- '394'
+- '396'
+- '400'
+- '401'
+- '402'
+- '404'
+- '408'
+- '416'
+- '417'
+- '418'
+- '420'
+- '424'
+- '432'
+- '448'
+- '449'
+- '450'
+- '452'
+- '456'
+- '464'
+- '480'
+- '512'
+- '513'
+- '514'
+- '515'
+- '516'
+- '517'
+- '518'
+- '519'
+- '520'
+- '521'
+- '522'
+- '523'
+- '524'
+- '525'
+- '526'
+- '528'
+- '529'
+- '530'
+- '531'
+- '532'
+- '533'
+- '534'
+- '536'
+- '537'
+- '538'
+- '540'
+- '544'
+- '545'
+- '546'
+- '547'
+- '548'
+- '549'
+- '550'
+- '552'
+- '553'
+- '554'
+- '556'
+- '560'
+- '561'
+- '562'
+- '564'
+- '568'
+- '576'
+- '577'
+- '578'
+- '579'
+- '580'
+- '581'
+- '582'
+- '584'
+- '585'
+- '586'
+- '588'
+- '592'
+- '593'
+- '594'
+- '596'
+- '600'
+- '608'
+- '609'
+- '610'
+- '612'
+- '616'
+- '624'
+- '640'
+- '641'
+- '642'
+- '643'
+- '644'
+- '645'
+- '646'
+- '648'
+- '649'
+- '650'
+- '652'
+- '656'
+- '657'
+- '658'
+- '660'
+- '664'
+- '672'
+- '673'
+- '674'
+- '676'
+- '680'
+- '688'
+- '704'
+- '705'
+- '706'
+- '708'
+- '712'
+- '720'
+- '736'
+- '768'
+- '769'
+- '770'
+- '771'
+- '772'
+- '773'
+- '774'
+- '776'
+- '777'
+- '778'
+- '780'
+- '784'
+- '785'
+- '786'
+- '788'
+- '792'
+- '800'
+- '801'
+- '802'
+- '804'
+- '808'
+- '816'
+- '832'
+- '833'
+- '834'
+- '836'
+- '840'
+- '848'
+- '864'
+- '896'
+- '897'
+- '898'
+- '900'
+- '904'
+- '912'
+- '928'
+- '960'
+- '1024'
+- '1025'
+- '1026'
+- '1027'
+- '1028'
+- '1029'
+- '1030'
+- '1031'
+- '1032'
+- '1033'
+- '1034'
+- '1035'
+- '1036'
+- '1037'
+- '1038'
+- '1040'
+- '1041'
+- '1042'
+- '1043'
+- '1044'
+- '1045'
+- '1046'
+- '1048'
+- '1049'
+- '1050'
+- '1052'
+- '1056'
+- '1057'
+- '1058'
+- '1059'
+- '1060'
+- '1061'
+- '1062'
+- '1064'
+- '1065'
+- '1066'
+- '1068'
+- '1072'
+- '1073'
+- '1074'
+- '1076'
+- '1080'
+- '1088'
+- '1089'
+- '1090'
+- '1091'
+- '1092'
+- '1093'
+- '1094'
+- '1096'
+- '1097'
+- '1098'
+- '1100'
+- '1104'
+- '1105'
+- '1106'
+- '1108'
+- '1112'
+- '1120'
+- '1121'
+- '1122'
+- '1124'
+- '1128'
+- '1136'
+- '1152'
+- '1153'
+- '1154'
+- '1155'
+- '1156'
+- '1157'
+- '1158'
+- '1160'
+- '1161'
+- '1162'
+- '1164'
+- '1168'
+- '1169'
+- '1170'
+- '1172'
+- '1176'
+- '1184'
+- '1185'
+- '1186'
+- '1188'
+- '1192'
+- '1200'
+- '1216'
+- '1217'
+- '1218'
+- '1220'
+- '1224'
+- '1232'
+- '1248'
+- '1280'
+- '1281'
+- '1282'
+- '1283'
+- '1284'
+- '1285'
+- '1286'
+- '1288'
+- '1289'
+- '1290'
+- '1292'
+- '1296'
+- '1297'
+- '1298'
+- '1300'
+- '1304'
+- '1312'
+- '1313'
+- '1314'
+- '1316'
+- '1320'
+- '1328'
+- '1344'
+- '1345'
+- '1346'
+- '1348'
+- '1352'
+- '1360'
+- '1376'
+- '1408'
+- '1409'
+- '1410'
+- '1412'
+- '1416'
+- '1424'
+- '1440'
+- '1472'
+- '1536'
+- '1537'
+- '1538'
+- '1539'
+- '1540'
+- '1541'
+- '1542'
+- '1544'
+- '1545'
+- '1546'
+- '1548'
+- '1552'
+- '1553'
+- '1554'
+- '1556'
+- '1560'
+- '1568'
+- '1569'
+- '1570'
+- '1572'
+- '1576'
+- '1584'
+- '1600'
+- '1601'
+- '1602'
+- '1604'
+- '1608'
+- '1616'
+- '1632'
+- '1664'
+- '1665'
+- '1666'
+- '1668'
+- '1672'
+- '1680'
+- '1696'
+- '1728'
+- '1792'
+- '1793'
+- '1794'
+- '1796'
+- '1800'
+- '1808'
+- '1824'
+- '1856'
+- '1920'
+- '2048'
+- '2049'
+- '2050'
+- '2051'
+- '2052'
+- '2053'
+- '2054'
+- '2055'
+- '2056'
+- '2057'
+- '2058'
+- '2059'
+- '2060'
+- '2061'
+- '2062'
+- '2064'
+- '2065'
+- '2066'
+- '2067'
+- '2068'
+- '2069'
+- '2070'
+- '2072'
+- '2073'
+- '2074'
+- '2076'
+- '2080'
+- '2081'
+- '2082'
+- '2083'
+- '2084'
+- '2085'
+- '2086'
+- '2088'
+- '2089'
+- '2090'
+- '2092'
+- '2096'
+- '2097'
+- '2098'
+- '2100'
+- '2104'
+- '2112'
+- '2113'
+- '2114'
+- '2115'
+- '2116'
+- '2117'
+- '2118'
+- '2120'
+- '2121'
+- '2122'
+- '2124'
+- '2128'
+- '2129'
+- '2130'
+- '2132'
+- '2136'
+- '2144'
+- '2145'
+- '2146'
+- '2148'
+- '2152'
+- '2160'
+- '2176'
+- '2177'
+- '2178'
+- '2179'
+- '2180'
+- '2181'
+- '2182'
+- '2184'
+- '2185'
+- '2186'
+- '2188'
+- '2192'
+- '2193'
+- '2194'
+- '2196'
+- '2200'
+- '2208'
+- '2209'
+- '2210'
+- '2212'
+- '2216'
+- '2224'
+- '2240'
+- '2241'
+- '2242'
+- '2244'
+- '2248'
+- '2256'
+- '2272'
+- '2304'
+- '2305'
+- '2306'
+- '2307'
+- '2308'
+- '2309'
+- '2310'
+- '2312'
+- '2313'
+- '2314'
+- '2316'
+- '2320'
+- '2321'
+- '2322'
+- '2324'
+- '2328'
+- '2336'
+- '2337'
+- '2338'
+- '2340'
+- '2344'
+- '2352'
+- '2368'
+- '2369'
+- '2370'
+- '2372'
+- '2376'
+- '2384'
+- '2400'
+- '2432'
+- '2433'
+- '2434'
+- '2436'
+- '2440'
+- '2448'
+- '2464'
+- '2496'
+- '2560'
+- '2561'
+- '2562'
+- '2563'
+- '2564'
+- '2565'
+- '2566'
+- '2568'
+- '2569'
+- '2570'
+- '2572'
+- '2576'
+- '2577'
+- '2578'
+- '2580'
+- '2584'
+- '2592'
+- '2593'
+- '2594'
+- '2596'
+- '2600'
+- '2608'
+- '2624'
+- '2625'
+- '2626'
+- '2628'
+- '2632'
+- '2640'
+- '2656'
+- '2688'
+- '2689'
+- '2690'
+- '2692'
+- '2696'
+- '2704'
+- '2720'
+- '2752'
+- '2816'
+- '2817'
+- '2818'
+- '2820'
+- '2824'
+- '2832'
+- '2848'
+- '2880'
+- '2944'
+- '3072'
+- '3073'
+- '3074'
+- '3075'
+- '3076'
+- '3077'
+- '3078'
+- '3080'
+- '3081'
+- '3082'
+- '3084'
+- '3088'
+- '3089'
+- '3090'
+- '3092'
+- '3096'
+- '3104'
+- '3105'
+- '3106'
+- '3108'
+- '3112'
+- '3120'
+- '3136'
+- '3137'
+- '3138'
+- '3140'
+- '3144'
+- '3152'
+- '3168'
+- '3200'
+- '3201'
+- '3202'
+- '3204'
+- '3208'
+- '3216'
+- '3232'
+- '3264'
+- '3328'
+- '3329'
+- '3330'
+- '3332'
+- '3336'
+- '3344'
+- '3360'
+- '3392'
+- '3456'
+- '3584'
+- '3585'
+- '3586'
+- '3588'
+- '3592'
+- '3600'
+- '3616'
+- '3648'
+- '3712'
+- '3840'
+- '4096'
+- '4097'
+- '4098'
+- '4099'
+- '4100'
+- '4101'
+- '4102'
+- '4103'
+- '4104'
+- '4105'
+- '4106'
+- '4107'
+- '4108'
+- '4109'
+- '4110'
+- '4112'
+- '4113'
+- '4114'
+- '4115'
+- '4116'
+- '4117'
+- '4118'
+- '4120'
+- '4121'
+- '4122'
+- '4124'
+- '4128'
+- '4129'
+- '4130'
+- '4131'
+- '4132'
+- '4133'
+- '4134'
+- '4136'
+- '4137'
+- '4138'
+- '4140'
+- '4144'
+- '4145'
+- '4146'
+- '4148'
+- '4152'
+- '4160'
+- '4161'
+- '4162'
+- '4163'
+- '4164'
+- '4165'
+- '4166'
+- '4168'
+- '4169'
+- '4170'
+- '4172'
+- '4176'
+- '4177'
+- '4178'
+- '4180'
+- '4184'
+- '4192'
+- '4193'
+- '4194'
+- '4196'
+- '4200'
+- '4208'
+- '4224'
+- '4225'
+- '4226'
+- '4227'
+- '4228'
+- '4229'
+- '4230'
+- '4232'
+- '4233'
+- '4234'
+- '4236'
+- '4240'
+- '4241'
+- '4242'
+- '4244'
+- '4248'
+- '4256'
+- '4257'
+- '4258'
+- '4260'
+- '4264'
+- '4272'
+- '4288'
+- '4289'
+- '4290'
+- '4292'
+- '4296'
+- '4304'
+- '4320'
+- '4352'
+- '4353'
+- '4354'
+- '4355'
+- '4356'
+- '4357'
+- '4358'
+- '4360'
+- '4361'
+- '4362'
+- '4364'
+- '4368'
+- '4369'
+- '4370'
+- '4372'
+- '4376'
+- '4384'
+- '4385'
+- '4386'
+- '4388'
+- '4392'
+- '4400'
+- '4416'
+- '4417'
+- '4418'
+- '4420'
+- '4424'
+- '4432'
+- '4448'
+- '4480'
+- '4481'
+- '4482'
+- '4484'
+- '4488'
+- '4496'
+- '4512'
+- '4544'
+- '4608'
+- '4609'
+- '4610'
+- '4611'
+- '4612'
+- '4613'
+- '4614'
+- '4616'
+- '4617'
+- '4618'
+- '4620'
+- '4624'
+- '4625'
+- '4626'
+- '4628'
+- '4632'
+- '4640'
+- '4641'
+- '4642'
+- '4644'
+- '4648'
+- '4656'
+- '4672'
+- '4673'
+- '4674'
+- '4676'
+- '4680'
+- '4688'
+- '4704'
+- '4736'
+- '4737'
+- '4738'
+- '4740'
+- '4744'
+- '4752'
+- '4768'
+- '4800'
+- '4864'
+- '4865'
+- '4866'
+- '4868'
+- '4872'
+- '4880'
+- '4896'
+- '4928'
+- '4992'
+- '5120'
+- '5121'
+- '5122'
+- '5123'
+- '5124'
+- '5125'
+- '5126'
+- '5128'
+- '5129'
+- '5130'
+- '5132'
+- '5136'
+- '5137'
+- '5138'
+- '5140'
+- '5144'
+- '5152'
+- '5153'
+- '5154'
+- '5156'
+- '5160'
+- '5168'
+- '5184'
+- '5185'
+- '5186'
+- '5188'
+- '5192'
+- '5200'
+- '5216'
+- '5248'
+- '5249'
+- '5250'
+- '5252'
+- '5256'
+- '5264'
+- '5280'
+- '5312'
+- '5376'
+- '5377'
+- '5378'
+- '5380'
+- '5384'
+- '5392'
+- '5408'
+- '5440'
+- '5504'
+- '5632'
+- '5633'
+- '5634'
+- '5636'
+- '5640'
+- '5648'
+- '5664'
+- '5696'
+- '5760'
+- '5888'
+- '6144'
+- '6145'
+- '6146'
+- '6147'
+- '6148'
+- '6149'
+- '6150'
+- '6152'
+- '6153'
+- '6154'
+- '6156'
+- '6160'
+- '6161'
+- '6162'
+- '6164'
+- '6168'
+- '6176'
+- '6177'
+- '6178'
+- '6180'
+- '6184'
+- '6192'
+- '6208'
+- '6209'
+- '6210'
+- '6212'
+- '6216'
+- '6224'
+- '6240'
+- '6272'
+- '6273'
+- '6274'
+- '6276'
+- '6280'
+- '6288'
+- '6304'
+- '6336'
+- '6400'
+- '6401'
+- '6402'
+- '6404'
+- '6408'
+- '6416'
+- '6432'
+- '6464'
+- '6528'
+- '6656'
+- '6657'
+- '6658'
+- '6660'
+- '6664'
+- '6672'
+- '6688'
+- '6720'
+- '6784'
+- '6912'
+- '7168'
+- '7169'
+- '7170'
+- '7172'
+- '7176'
+- '7184'
+- '7200'
+- '7232'
+- '7296'
+- '7424'
+- '7680'
+- '8192'
+- '8193'
+- '8194'
+- '8195'
+- '8196'
+- '8197'
+- '8198'
+- '8199'
+- '8200'
+- '8201'
+- '8202'
+- '8203'
+- '8204'
+- '8205'
+- '8206'
+- '8208'
+- '8209'
+- '8210'
+- '8211'
+- '8212'
+- '8213'
+- '8214'
+- '8216'
+- '8217'
+- '8218'
+- '8220'
+- '8224'
+- '8225'
+- '8226'
+- '8227'
+- '8228'
+- '8229'
+- '8230'
+- '8232'
+- '8233'
+- '8234'
+- '8236'
+- '8240'
+- '8241'
+- '8242'
+- '8244'
+- '8248'
+- '8256'
+- '8257'
+- '8258'
+- '8259'
+- '8260'
+- '8261'
+- '8262'
+- '8264'
+- '8265'
+- '8266'
+- '8268'
+- '8272'
+- '8273'
+- '8274'
+- '8276'
+- '8280'
+- '8288'
+- '8289'
+- '8290'
+- '8292'
+- '8296'
+- '8304'
+- '8320'
+- '8321'
+- '8322'
+- '8323'
+- '8324'
+- '8325'
+- '8326'
+- '8328'
+- '8329'
+- '8330'
+- '8332'
+- '8336'
+- '8337'
+- '8338'
+- '8340'
+- '8344'
+- '8352'
+- '8353'
+- '8354'
+- '8356'
+- '8360'
+- '8368'
+- '8384'
+- '8385'
+- '8386'
+- '8388'
+- '8392'
+- '8400'
+- '8416'
+- '8448'
+- '8449'
+- '8450'
+- '8451'
+- '8452'
+- '8453'
+- '8454'
+- '8456'
+- '8457'
+- '8458'
+- '8460'
+- '8464'
+- '8465'
+- '8466'
+- '8468'
+- '8472'
+- '8480'
+- '8481'
+- '8482'
+- '8484'
+- '8488'
+- '8496'
+- '8512'
+- '8513'
+- '8514'
+- '8516'
+- '8520'
+- '8528'
+- '8544'
+- '8576'
+- '8577'
+- '8578'
+- '8580'
+- '8584'
+- '8592'
+- '8608'
+- '8640'
+- '8704'
+- '8705'
+- '8706'
+- '8707'
+- '8708'
+- '8709'
+- '8710'
+- '8712'
+- '8713'
+- '8714'
+- '8716'
+- '8720'
+- '8721'
+- '8722'
+- '8724'
+- '8728'
+- '8736'
+- '8737'
+- '8738'
+- '8740'
+- '8744'
+- '8752'
+- '8768'
+- '8769'
+- '8770'
+- '8772'
+- '8776'
+- '8784'
+- '8800'
+- '8832'
+- '8833'
+- '8834'
+- '8836'
+- '8840'
+- '8848'
+- '8864'
+- '8896'
+- '8960'
+- '8961'
+- '8962'
+- '8964'
+- '8968'
+- '8976'
+- '8992'
+- '9024'
+- '9088'
+- '9216'
+- '9217'
+- '9218'
+- '9219'
+- '9220'
+- '9221'
+- '9222'
+- '9224'
+- '9225'
+- '9226'
+- '9228'
+- '9232'
+- '9233'
+- '9234'
+- '9236'
+- '9240'
+- '9248'
+- '9249'
+- '9250'
+- '9252'
+- '9256'
+- '9264'
+- '9280'
+- '9281'
+- '9282'
+- '9284'
+- '9288'
+- '9296'
+- '9312'
+- '9344'
+- '9345'
+- '9346'
+- '9348'
+- '9352'
+- '9360'
+- '9376'
+- '9408'
+- '9472'
+- '9473'
+- '9474'
+- '9476'
+- '9480'
+- '9488'
+- '9504'
+- '9536'
+- '9600'
+- '9728'
+- '9729'
+- '9730'
+- '9732'
+- '9736'
+- '9744'
+- '9760'
+- '9792'
+- '9856'
+- '9984'
+- '10240'
+- '10241'
+- '10242'
+- '10243'
+- '10244'
+- '10245'
+- '10246'
+- '10248'
+- '10249'
+- '10250'
+- '10252'
+- '10256'
+- '10257'
+- '10258'
+- '10260'
+- '10264'
+- '10272'
+- '10273'
+- '10274'
+- '10276'
+- '10280'
+- '10288'
+- '10304'
+- '10305'
+- '10306'
+- '10308'
+- '10312'
+- '10320'
+- '10336'
+- '10368'
+- '10369'
+- '10370'
+- '10372'
+- '10376'
+- '10384'
+- '10400'
+- '10432'
+- '10496'
+- '10497'
+- '10498'
+- '10500'
+- '10504'
+- '10512'
+- '10528'
+- '10560'
+- '10624'
+- '10752'
+- '10753'
+- '10754'
+- '10756'
+- '10760'
+- '10768'
+- '10784'
+- '10816'
+- '10880'
+- '11008'
+- '11264'
+- '11265'
+- '11266'
+- '11268'
+- '11272'
+- '11280'
+- '11296'
+- '11328'
+- '11392'
+- '11520'
+- '11776'
+- '12288'
+- '12289'
+- '12290'
+- '12291'
+- '12292'
+- '12293'
+- '12294'
+- '12296'
+- '12297'
+- '12298'
+- '12300'
+- '12304'
+- '12305'
+- '12306'
+- '12308'
+- '12312'
+- '12320'
+- '12321'
+- '12322'
+- '12324'
+- '12328'
+- '12336'
+- '12352'
+- '12353'
+- '12354'
+- '12356'
+- '12360'
+- '12368'
+- '12384'
+- '12416'
+- '12417'
+- '12418'
+- '12420'
+- '12424'
+- '12432'
+- '12448'
+- '12480'
+- '12544'
+- '12545'
+- '12546'
+- '12548'
+- '12552'
+- '12560'
+- '12576'
+- '12608'
+- '12672'
+- '12800'
+- '12801'
+- '12802'
+- '12804'
+- '12808'
+- '12816'
+- '12832'
+- '12864'
+- '12928'
+- '13056'
+- '13312'
+- '13313'
+- '13314'
+- '13316'
+- '13320'
+- '13328'
+- '13344'
+- '13376'
+- '13440'
+- '13568'
+- '13824'
+- '14336'
+- '14337'
+- '14338'
+- '14340'
+- '14344'
+- '14352'
+- '14368'
+- '14400'
+- '14464'
+- '14592'
+- '14848'
+- '15360'
+- '16384'
+- '16385'
+- '16386'
+- '16387'
+- '16388'
+- '16389'
+- '16390'
+- '16391'
+- '16392'
+- '16393'
+- '16394'
+- '16395'
+- '16396'
+- '16397'
+- '16398'
+- '16400'
+- '16401'
+- '16402'
+- '16403'
+- '16404'
+- '16405'
+- '16406'
+- '16408'
+- '16409'
+- '16410'
+- '16412'
+- '16416'
+- '16417'
+- '16418'
+- '16419'
+- '16420'
+- '16421'
+- '16422'
+- '16424'
+- '16425'
+- '16426'
+- '16428'
+- '16432'
+- '16433'
+- '16434'
+- '16436'
+- '16440'
+- '16448'
+- '16449'
+- '16450'
+- '16451'
+- '16452'
+- '16453'
+- '16454'
+- '16456'
+- '16457'
+- '16458'
+- '16460'
+- '16464'
+- '16465'
+- '16466'
+- '16468'
+- '16472'
+- '16480'
+- '16481'
+- '16482'
+- '16484'
+- '16488'
+- '16496'
+- '16512'
+- '16513'
+- '16514'
+- '16515'
+- '16516'
+- '16517'
+- '16518'
+- '16520'
+- '16521'
+- '16522'
+- '16524'
+- '16528'
+- '16529'
+- '16530'
+- '16532'
+- '16536'
+- '16544'
+- '16545'
+- '16546'
+- '16548'
+- '16552'
+- '16560'
+- '16576'
+- '16577'
+- '16578'
+- '16580'
+- '16584'
+- '16592'
+- '16608'
+- '16640'
+- '16641'
+- '16642'
+- '16643'
+- '16644'
+- '16645'
+- '16646'
+- '16648'
+- '16649'
+- '16650'
+- '16652'
+- '16656'
+- '16657'
+- '16658'
+- '16660'
+- '16664'
+- '16672'
+- '16673'
+- '16674'
+- '16676'
+- '16680'
+- '16688'
+- '16704'
+- '16705'
+- '16706'
+- '16708'
+- '16712'
+- '16720'
+- '16736'
+- '16768'
+- '16769'
+- '16770'
+- '16772'
+- '16776'
+- '16784'
+- '16800'
+- '16832'
+- '16896'
+- '16897'
+- '16898'
+- '16899'
+- '16900'
+- '16901'
+- '16902'
+- '16904'
+- '16905'
+- '16906'
+- '16908'
+- '16912'
+- '16913'
+- '16914'
+- '16916'
+- '16920'
+- '16928'
+- '16929'
+- '16930'
+- '16932'
+- '16936'
+- '16944'
+- '16960'
+- '16961'
+- '16962'
+- '16964'
+- '16968'
+- '16976'
+- '16992'
+- '17024'
+- '17025'
+- '17026'
+- '17028'
+- '17032'
+- '17040'
+- '17056'
+- '17088'
+- '17152'
+- '17153'
+- '17154'
+- '17156'
+- '17160'
+- '17168'
+- '17184'
+- '17216'
+- '17280'
+- '17408'
+- '17409'
+- '17410'
+- '17411'
+- '17412'
+- '17413'
+- '17414'
+- '17416'
+- '17417'
+- '17418'
+- '17420'
+- '17424'
+- '17425'
+- '17426'
+- '17428'
+- '17432'
+- '17440'
+- '17441'
+- '17442'
+- '17444'
+- '17448'
+- '17456'
+- '17472'
+- '17473'
+- '17474'
+- '17476'
+- '17480'
+- '17488'
+- '17504'
+- '17536'
+- '17537'
+- '17538'
+- '17540'
+- '17544'
+- '17552'
+- '17568'
+- '17600'
+- '17664'
+- '17665'
+- '17666'
+- '17668'
+- '17672'
+- '17680'
+- '17696'
+- '17728'
+- '17792'
+- '17920'
+- '17921'
+- '17922'
+- '17924'
+- '17928'
+- '17936'
+- '17952'
+- '17984'
+- '18048'
+- '18176'
+- '18432'
+- '18433'
+- '18434'
+- '18435'
+- '18436'
+- '18437'
+- '18438'
+- '18440'
+- '18441'
+- '18442'
+- '18444'
+- '18448'
+- '18449'
+- '18450'
+- '18452'
+- '18456'
+- '18464'
+- '18465'
+- '18466'
+- '18468'
+- '18472'
+- '18480'
+- '18496'
+- '18497'
+- '18498'
+- '18500'
+- '18504'
+- '18512'
+- '18528'
+- '18560'
+- '18561'
+- '18562'
+- '18564'
+- '18568'
+- '18576'
+- '18592'
+- '18624'
+- '18688'
+- '18689'
+- '18690'
+- '18692'
+- '18696'
+- '18704'
+- '18720'
+- '18752'
+- '18816'
+- '18944'
+- '18945'
+- '18946'
+- '18948'
+- '18952'
+- '18960'
+- '18976'
+- '19008'
+- '19072'
+- '19200'
+- '19456'
+- '19457'
+- '19458'
+- '19460'
+- '19464'
+- '19472'
+- '19488'
+- '19520'
+- '19584'
+- '19712'
+- '19968'
+- '20480'
+- '20481'
+- '20482'
+- '20483'
+- '20484'
+- '20485'
+- '20486'
+- '20488'
+- '20489'
+- '20490'
+- '20492'
+- '20496'
+- '20497'
+- '20498'
+- '20500'
+- '20504'
+- '20512'
+- '20513'
+- '20514'
+- '20516'
+- '20520'
+- '20528'
+- '20544'
+- '20545'
+- '20546'
+- '20548'
+- '20552'
+- '20560'
+- '20576'
+- '20608'
+- '20609'
+- '20610'
+- '20612'
+- '20616'
+- '20624'
+- '20640'
+- '20672'
+- '20736'
+- '20737'
+- '20738'
+- '20740'
+- '20744'
+- '20752'
+- '20768'
+- '20800'
+- '20864'
+- '20992'
+- '20993'
+- '20994'
+- '20996'
+- '21000'
+- '21008'
+- '21024'
+- '21056'
+- '21120'
+- '21248'
+- '21504'
+- '21505'
+- '21506'
+- '21508'
+- '21512'
+- '21520'
+- '21536'
+- '21568'
+- '21632'
+- '21760'
+- '22016'
+- '22528'
+- '22529'
+- '22530'
+- '22532'
+- '22536'
+- '22544'
+- '22560'
+- '22592'
+- '22656'
+- '22784'
+- '23040'
+- '23552'
+- '24576'
+- '24577'
+- '24578'
+- '24579'
+- '24580'
+- '24581'
+- '24582'
+- '24584'
+- '24585'
+- '24586'
+- '24588'
+- '24592'
+- '24593'
+- '24594'
+- '24596'
+- '24600'
+- '24608'
+- '24609'
+- '24610'
+- '24612'
+- '24616'
+- '24624'
+- '24640'
+- '24641'
+- '24642'
+- '24644'
+- '24648'
+- '24656'
+- '24672'
+- '24704'
+- '24705'
+- '24706'
+- '24708'
+- '24712'
+- '24720'
+- '24736'
+- '24768'
+- '24832'
+- '24833'
+- '24834'
+- '24836'
+- '24840'
+- '24848'
+- '24864'
+- '24896'
+- '24960'
+- '25088'
+- '25089'
+- '25090'
+- '25092'
+- '25096'
+- '25104'
+- '25120'
+- '25152'
+- '25216'
+- '25344'
+- '25600'
+- '25601'
+- '25602'
+- '25604'
+- '25608'
+- '25616'
+- '25632'
+- '25664'
+- '25728'
+- '25856'
+- '26112'
+- '26624'
+- '26625'
+- '26626'
+- '26628'
+- '26632'
+- '26640'
+- '26656'
+- '26688'
+- '26752'
+- '26880'
+- '27136'
+- '27648'
+- '28672'
+- '28673'
+- '28674'
+- '28676'
+- '28680'
+- '28688'
+- '28704'
+- '28736'
+- '28800'
+- '28928'
+- '29184'
+- '29696'
+- '30720'
+- '32768'
+- '32769'
+- '32770'
+- '32771'
+- '32772'
+- '32773'
+- '32774'
+- '32775'
+- '32776'
+- '32777'
+- '32778'
+- '32779'
+- '32780'
+- '32781'
+- '32782'
+- '32784'
+- '32785'
+- '32786'
+- '32787'
+- '32788'
+- '32789'
+- '32790'
+- '32792'
+- '32793'
+- '32794'
+- '32796'
+- '32800'
+- '32801'
+- '32802'
+- '32803'
+- '32804'
+- '32805'
+- '32806'
+- '32808'
+- '32809'
+- '32810'
+- '32812'
+- '32816'
+- '32817'
+- '32818'
+- '32820'
+- '32824'
+- '32832'
+- '32833'
+- '32834'
+- '32835'
+- '32836'
+- '32837'
+- '32838'
+- '32840'
+- '32841'
+- '32842'
+- '32844'
+- '32848'
+- '32849'
+- '32850'
+- '32852'
+- '32856'
+- '32864'
+- '32865'
+- '32866'
+- '32868'
+- '32872'
+- '32880'
+- '32896'
+- '32897'
+- '32898'
+- '32899'
+- '32900'
+- '32901'
+- '32902'
+- '32904'
+- '32905'
+- '32906'
+- '32908'
+- '32912'
+- '32913'
+- '32914'
+- '32916'
+- '32920'
+- '32928'
+- '32929'
+- '32930'
+- '32932'
+- '32936'
+- '32944'
+- '32960'
+- '32961'
+- '32962'
+- '32964'
+- '32968'
+- '32976'
+- '32992'
+- '33024'
+- '33025'
+- '33026'
+- '33027'
+- '33028'
+- '33029'
+- '33030'
+- '33032'
+- '33033'
+- '33034'
+- '33036'
+- '33040'
+- '33041'
+- '33042'
+- '33044'
+- '33048'
+- '33056'
+- '33057'
+- '33058'
+- '33060'
+- '33064'
+- '33072'
+- '33088'
+- '33089'
+- '33090'
+- '33092'
+- '33096'
+- '33104'
+- '33120'
+- '33152'
+- '33153'
+- '33154'
+- '33156'
+- '33160'
+- '33168'
+- '33184'
+- '33216'
+- '33280'
+- '33281'
+- '33282'
+- '33283'
+- '33284'
+- '33285'
+- '33286'
+- '33288'
+- '33289'
+- '33290'
+- '33292'
+- '33296'
+- '33297'
+- '33298'
+- '33300'
+- '33304'
+- '33312'
+- '33313'
+- '33314'
+- '33316'
+- '33320'
+- '33328'
+- '33344'
+- '33345'
+- '33346'
+- '33348'
+- '33352'
+- '33360'
+- '33376'
+- '33408'
+- '33409'
+- '33410'
+- '33412'
+- '33416'
+- '33424'
+- '33440'
+- '33472'
+- '33536'
+- '33537'
+- '33538'
+- '33540'
+- '33544'
+- '33552'
+- '33568'
+- '33600'
+- '33664'
+- '33792'
+- '33793'
+- '33794'
+- '33795'
+- '33796'
+- '33797'
+- '33798'
+- '33800'
+- '33801'
+- '33802'
+- '33804'
+- '33808'
+- '33809'
+- '33810'
+- '33812'
+- '33816'
+- '33824'
+- '33825'
+- '33826'
+- '33828'
+- '33832'
+- '33840'
+- '33856'
+- '33857'
+- '33858'
+- '33860'
+- '33864'
+- '33872'
+- '33888'
+- '33920'
+- '33921'
+- '33922'
+- '33924'
+- '33928'
+- '33936'
+- '33952'
+- '33984'
+- '34048'
+- '34049'
+- '34050'
+- '34052'
+- '34056'
+- '34064'
+- '34080'
+- '34112'
+- '34176'
+- '34304'
+- '34305'
+- '34306'
+- '34308'
+- '34312'
+- '34320'
+- '34336'
+- '34368'
+- '34432'
+- '34560'
+- '34816'
+- '34817'
+- '34818'
+- '34819'
+- '34820'
+- '34821'
+- '34822'
+- '34824'
+- '34825'
+- '34826'
+- '34828'
+- '34832'
+- '34833'
+- '34834'
+- '34836'
+- '34840'
+- '34848'
+- '34849'
+- '34850'
+- '34852'
+- '34856'
+- '34864'
+- '34880'
+- '34881'
+- '34882'
+- '34884'
+- '34888'
+- '34896'
+- '34912'
+- '34944'
+- '34945'
+- '34946'
+- '34948'
+- '34952'
+- '34960'
+- '34976'
+- '35008'
+- '35072'
+- '35073'
+- '35074'
+- '35076'
+- '35080'
+- '35088'
+- '35104'
+- '35136'
+- '35200'
+- '35328'
+- '35329'
+- '35330'
+- '35332'
+- '35336'
+- '35344'
+- '35360'
+- '35392'
+- '35456'
+- '35584'
+- '35840'
+- '35841'
+- '35842'
+- '35844'
+- '35848'
+- '35856'
+- '35872'
+- '35904'
+- '35968'
+- '36096'
+- '36352'
+- '36864'
+- '36865'
+- '36866'
+- '36867'
+- '36868'
+- '36869'
+- '36870'
+- '36872'
+- '36873'
+- '36874'
+- '36876'
+- '36880'
+- '36881'
+- '36882'
+- '36884'
+- '36888'
+- '36896'
+- '36897'
+- '36898'
+- '36900'
+- '36904'
+- '36912'
+- '36928'
+- '36929'
+- '36930'
+- '36932'
+- '36936'
+- '36944'
+- '36960'
+- '36992'
+- '36993'
+- '36994'
+- '36996'
+- '37000'
+- '37008'
+- '37024'
+- '37056'
+- '37120'
+- '37121'
+- '37122'
+- '37124'
+- '37128'
+- '37136'
+- '37152'
+- '37184'
+- '37248'
+- '37376'
+- '37377'
+- '37378'
+- '37380'
+- '37384'
+- '37392'
+- '37408'
+- '37440'
+- '37504'
+- '37632'
+- '37888'
+- '37889'
+- '37890'
+- '37892'
+- '37896'
+- '37904'
+- '37920'
+- '37952'
+- '38016'
+- '38144'
+- '38400'
+- '38912'
+- '38913'
+- '38914'
+- '38916'
+- '38920'
+- '38928'
+- '38944'
+- '38976'
+- '39040'
+- '39168'
+- '39424'
+- '39936'
+- '40960'
+- '40961'
+- '40962'
+- '40963'
+- '40964'
+- '40965'
+- '40966'
+- '40968'
+- '40969'
+- '40970'
+- '40972'
+- '40976'
+- '40977'
+- '40978'
+- '40980'
+- '40984'
+- '40992'
+- '40993'
+- '40994'
+- '40996'
+- '41000'
+- '41008'
+- '41024'
+- '41025'
+- '41026'
+- '41028'
+- '41032'
+- '41040'
+- '41056'
+- '41088'
+- '41089'
+- '41090'
+- '41092'
+- '41096'
+- '41104'
+- '41120'
+- '41152'
+- '41216'
+- '41217'
+- '41218'
+- '41220'
+- '41224'
+- '41232'
+- '41248'
+- '41280'
+- '41344'
+- '41472'
+- '41473'
+- '41474'
+- '41476'
+- '41480'
+- '41488'
+- '41504'
+- '41536'
+- '41600'
+- '41728'
+- '41984'
+- '41985'
+- '41986'
+- '41988'
+- '41992'
+- '42000'
+- '42016'
+- '42048'
+- '42112'
+- '42240'
+- '42496'
+- '43008'
+- '43009'
+- '43010'
+- '43012'
+- '43016'
+- '43024'
+- '43040'
+- '43072'
+- '43136'
+- '43264'
+- '43520'
+- '44032'
+- '45056'
+- '45057'
+- '45058'
+- '45060'
+- '45064'
+- '45072'
+- '45088'
+- '45120'
+- '45184'
+- '45312'
+- '45568'
+- '46080'
+- '47104'
+- '49152'
+- '49153'
+- '49154'
+- '49155'
+- '49156'
+- '49157'
+- '49158'
+- '49160'
+- '49161'
+- '49162'
+- '49164'
+- '49168'
+- '49169'
+- '49170'
+- '49172'
+- '49176'
+- '49184'
+- '49185'
+- '49186'
+- '49188'
+- '49192'
+- '49200'
+- '49216'
+- '49217'
+- '49218'
+- '49220'
+- '49224'
+- '49232'
+- '49248'
+- '49280'
+- '49281'
+- '49282'
+- '49284'
+- '49288'
+- '49296'
+- '49312'
+- '49344'
+- '49408'
+- '49409'
+- '49410'
+- '49412'
+- '49416'
+- '49424'
+- '49440'
+- '49472'
+- '49536'
+- '49664'
+- '49665'
+- '49666'
+- '49668'
+- '49672'
+- '49680'
+- '49696'
+- '49728'
+- '49792'
+- '49920'
+- '50176'
+- '50177'
+- '50178'
+- '50180'
+- '50184'
+- '50192'
+- '50208'
+- '50240'
+- '50304'
+- '50432'
+- '50688'
+- '51200'
+- '51201'
+- '51202'
+- '51204'
+- '51208'
+- '51216'
+- '51232'
+- '51264'
+- '51328'
+- '51456'
+- '51712'
+- '52224'
+- '53248'
+- '53249'
+- '53250'
+- '53252'
+- '53256'
+- '53264'
+- '53280'
+- '53312'
+- '53376'
+- '53504'
+- '53760'
+- '54272'
+- '55296'
+- '57344'
+- '57345'
+- '57346'
+- '57348'
+- '57352'
+- '57360'
+- '57376'
+- '57408'
+- '57472'
+- '57600'
+- '57856'
+- '58368'
+- '59392'
+- '61440'
+init: null
+input_size: null
+cmvn_file: null
+ctc_conf:
+ dropout_rate: 0.0
+ ctc_type: builtin
+ reduce: true
+ ignore_nan_grad: true
+joint_net_conf: null
+use_preprocessor: true
+token_type: char
+bpemodel: null
+non_linguistic_symbols: null
+cleaner: null
+g2p: null
+speech_volume_normalize: null
+rir_scp: null
+rir_apply_prob: 1.0
+noise_scp: null
+noise_apply_prob: 1.0
+noise_db_range: '13_15'
+specaug: null
+specaug_conf: {}
+normalize: null
+normalize_conf: {}
+label_aggregator: null
+label_aggregator_conf: {}
+model: sond
+model_conf:
+ lsm_weight: 0.1
+ length_normalized_loss: true
+ max_spk_num: 16
+ normalize_speech_speaker: true
+# speech encoder
+encoder: resnet34_sp_l2reg
+encoder_conf:
+ # pass by model, equal to feature dim
+ # input_size: 80
+ pooling_type: "window_shift"
+ batchnorm_momentum: 0.01
+ pool_size: 20
+ stride: 1
+ tf2torch_tensor_name_prefix_torch: encoder
+ tf2torch_tensor_name_prefix_tf: EAND/speech_encoder
+speaker_encoder: null
+speaker_encoder_conf: {}
+ci_scorer: conv
+ci_scorer_conf:
+ input_units: 512
+ num_layers: 3
+ num_units: 512
+ kernel_size: 1
+ dropout_rate: 0.0
+ position_encoder: null
+ out_units: 1
+ out_norm: false
+ auxiliary_states: false
+ tf2torch_tensor_name_prefix_torch: ci_scorer
+ tf2torch_tensor_name_prefix_tf: EAND/compute_distance_layer/ci_scorer
+cd_scorer: san
+cd_scorer_conf:
+ input_size: 512
+ output_size: 512
+ out_units: 1
+ attention_heads: 4
+ linear_units: 1024
+ num_blocks: 4
+ dropout_rate: 0.0
+ positional_dropout_rate: 0.0
+ attention_dropout_rate: 0.0
+ # use string "null" to remove input layer
+ input_layer: "null"
+ pos_enc_class: null
+ normalize_before: true
+ tf2torch_tensor_name_prefix_torch: cd_scorer
+ tf2torch_tensor_name_prefix_tf: EAND/compute_distance_layer/cd_scorer
+# post net
+decoder: fsmn
+decoder_conf:
+ in_units: 32
+ out_units: 2517
+ filter_size: 31
+ fsmn_num_layers: 6
+ dnn_num_layers: 1
+ num_memory_units: 16
+ ffn_inner_dim: 512
+ dropout_rate: 0.0
+ tf2torch_tensor_name_prefix_torch: decoder
+ tf2torch_tensor_name_prefix_tf: EAND/post_net
+frontend: wav_frontend
+frontend_conf:
+ fs: 8000
+ window: povey
+ n_mels: 80
+ frame_length: 25
+ frame_shift: 10
+ filter_length_min: -1
+ filter_length_max: -1
+ lfr_m: 1
+ lfr_n: 1
+ dither: 0.0
+ snip_edges: false
+ upsacle_samples: false
+num_worker_count: 1
+required:
+- output_dir
+- token_list
+oss_bucket: 'null'
+version: 0.1.4
diff --git a/egs/callhome/diarization/sond/sond_fbank.yaml b/egs/callhome/diarization/sond/sond_fbank.yaml
new file mode 100644
index 0000000..fc76259
--- /dev/null
+++ b/egs/callhome/diarization/sond/sond_fbank.yaml
@@ -0,0 +1,2739 @@
+config: finetune.yaml
+print_config: false
+log_level: INFO
+dry_run: false
+iterator_type: sequence
+output_dir: exp/sond
+ngpu: 1
+seed: 0
+num_workers: 16
+num_att_plot: 0
+dist_backend: nccl
+dist_init_method: env://
+dist_world_size: null
+dist_rank: null
+local_rank: 0
+dist_master_addr: null
+dist_master_port: null
+dist_launcher: null
+multiprocessing_distributed: true
+distributed: false
+unused_parameters: true
+sharded_ddp: false
+ddp_backend: pytorch_ddp
+cudnn_enabled: true
+cudnn_benchmark: false
+cudnn_deterministic: true
+collect_stats: false
+write_collected_feats: false
+max_epoch: 50
+patience: null
+val_scheduler_criterion:
+- valid
+- acc
+early_stopping_criterion:
+- valid
+- loss
+- min
+best_model_criterion:
+- - valid
+ - acc
+ - max
+keep_nbest_models: 10
+nbest_averaging_interval: 0
+grad_clip: 5
+grad_clip_type: 2.0
+grad_noise: false
+accum_grad: 1
+no_forward_run: false
+resume: true
+train_dtype: float32
+use_amp: false
+log_interval: 50
+use_matplotlib: false
+use_tensorboard: true
+use_wandb: false
+wandb_project: null
+wandb_id: null
+wandb_entity: null
+wandb_name: null
+wandb_model_log_interval: -1
+use_pai: true
+detect_anomaly: false
+pretrain_path: null
+init_param: []
+ignore_init_mismatch: false
+freeze_param: []
+num_iters_per_epoch: null
+batch_size: 20
+valid_batch_size: null
+batch_bins: 10000
+valid_batch_bins: null
+train_shape_file:
+- /data/volume1/youyan/aishell/ark/train/speech_shape.1
+- /data/volume1/youyan/aishell/ark/train/text_shape.1
+valid_shape_file:
+- /data/volume1/youyan/aishell/ark/dev/speech_shape.1
+- /data/volume1/youyan/aishell/ark/dev/text_shape.1
+batch_type: length
+valid_batch_type: null
+fold_length:
+- 512
+- 150
+sort_in_batch: descending
+sort_batch: descending
+multiple_iterator: false
+chunk_length: 500
+chunk_shift_ratio: 0.5
+num_cache_chunks: 1024
+train_data_path_and_name_and_type:
+- - /data/volume1/youyan/aishell/ark/train/data.scp
+ - speech
+ - kaldi_ark
+- - /data/volume1/youyan/aishell/ark/train/data.text.1
+ - text
+ - text
+valid_data_path_and_name_and_type:
+- - /data/volume1/youyan/aishell/ark/dev/data.scp
+ - speech
+ - kaldi_ark
+- - /data/volume1/youyan/aishell/ark/dev/data.text.1
+ - text
+ - text
+allow_variable_data_keys: false
+max_cache_size: 0.0
+max_cache_fd: 32
+valid_max_cache_size: null
+optim: adam
+optim_conf:
+ lr: 0.0005
+scheduler: warmuplr
+scheduler_conf:
+ warmup_steps: 30000
+token_list:
+- '0'
+- '1'
+- '2'
+- '3'
+- '4'
+- '5'
+- '6'
+- '7'
+- '8'
+- '9'
+- '10'
+- '11'
+- '12'
+- '13'
+- '14'
+- '15'
+- '16'
+- '17'
+- '18'
+- '19'
+- '20'
+- '21'
+- '22'
+- '23'
+- '24'
+- '25'
+- '26'
+- '27'
+- '28'
+- '29'
+- '30'
+- '32'
+- '33'
+- '34'
+- '35'
+- '36'
+- '37'
+- '38'
+- '39'
+- '40'
+- '41'
+- '42'
+- '43'
+- '44'
+- '45'
+- '46'
+- '48'
+- '49'
+- '50'
+- '51'
+- '52'
+- '53'
+- '54'
+- '56'
+- '57'
+- '58'
+- '60'
+- '64'
+- '65'
+- '66'
+- '67'
+- '68'
+- '69'
+- '70'
+- '71'
+- '72'
+- '73'
+- '74'
+- '75'
+- '76'
+- '77'
+- '78'
+- '80'
+- '81'
+- '82'
+- '83'
+- '84'
+- '85'
+- '86'
+- '88'
+- '89'
+- '90'
+- '92'
+- '96'
+- '97'
+- '98'
+- '99'
+- '100'
+- '101'
+- '102'
+- '104'
+- '105'
+- '106'
+- '108'
+- '112'
+- '113'
+- '114'
+- '116'
+- '120'
+- '128'
+- '129'
+- '130'
+- '131'
+- '132'
+- '133'
+- '134'
+- '135'
+- '136'
+- '137'
+- '138'
+- '139'
+- '140'
+- '141'
+- '142'
+- '144'
+- '145'
+- '146'
+- '147'
+- '148'
+- '149'
+- '150'
+- '152'
+- '153'
+- '154'
+- '156'
+- '160'
+- '161'
+- '162'
+- '163'
+- '164'
+- '165'
+- '166'
+- '168'
+- '169'
+- '170'
+- '172'
+- '176'
+- '177'
+- '178'
+- '180'
+- '184'
+- '192'
+- '193'
+- '194'
+- '195'
+- '196'
+- '197'
+- '198'
+- '200'
+- '201'
+- '202'
+- '204'
+- '208'
+- '209'
+- '210'
+- '212'
+- '216'
+- '224'
+- '225'
+- '226'
+- '228'
+- '232'
+- '240'
+- '256'
+- '257'
+- '258'
+- '259'
+- '260'
+- '261'
+- '262'
+- '263'
+- '264'
+- '265'
+- '266'
+- '267'
+- '268'
+- '269'
+- '270'
+- '272'
+- '273'
+- '274'
+- '275'
+- '276'
+- '277'
+- '278'
+- '280'
+- '281'
+- '282'
+- '284'
+- '288'
+- '289'
+- '290'
+- '291'
+- '292'
+- '293'
+- '294'
+- '296'
+- '297'
+- '298'
+- '300'
+- '304'
+- '305'
+- '306'
+- '308'
+- '312'
+- '320'
+- '321'
+- '322'
+- '323'
+- '324'
+- '325'
+- '326'
+- '328'
+- '329'
+- '330'
+- '332'
+- '336'
+- '337'
+- '338'
+- '340'
+- '344'
+- '352'
+- '353'
+- '354'
+- '356'
+- '360'
+- '368'
+- '384'
+- '385'
+- '386'
+- '387'
+- '388'
+- '389'
+- '390'
+- '392'
+- '393'
+- '394'
+- '396'
+- '400'
+- '401'
+- '402'
+- '404'
+- '408'
+- '416'
+- '417'
+- '418'
+- '420'
+- '424'
+- '432'
+- '448'
+- '449'
+- '450'
+- '452'
+- '456'
+- '464'
+- '480'
+- '512'
+- '513'
+- '514'
+- '515'
+- '516'
+- '517'
+- '518'
+- '519'
+- '520'
+- '521'
+- '522'
+- '523'
+- '524'
+- '525'
+- '526'
+- '528'
+- '529'
+- '530'
+- '531'
+- '532'
+- '533'
+- '534'
+- '536'
+- '537'
+- '538'
+- '540'
+- '544'
+- '545'
+- '546'
+- '547'
+- '548'
+- '549'
+- '550'
+- '552'
+- '553'
+- '554'
+- '556'
+- '560'
+- '561'
+- '562'
+- '564'
+- '568'
+- '576'
+- '577'
+- '578'
+- '579'
+- '580'
+- '581'
+- '582'
+- '584'
+- '585'
+- '586'
+- '588'
+- '592'
+- '593'
+- '594'
+- '596'
+- '600'
+- '608'
+- '609'
+- '610'
+- '612'
+- '616'
+- '624'
+- '640'
+- '641'
+- '642'
+- '643'
+- '644'
+- '645'
+- '646'
+- '648'
+- '649'
+- '650'
+- '652'
+- '656'
+- '657'
+- '658'
+- '660'
+- '664'
+- '672'
+- '673'
+- '674'
+- '676'
+- '680'
+- '688'
+- '704'
+- '705'
+- '706'
+- '708'
+- '712'
+- '720'
+- '736'
+- '768'
+- '769'
+- '770'
+- '771'
+- '772'
+- '773'
+- '774'
+- '776'
+- '777'
+- '778'
+- '780'
+- '784'
+- '785'
+- '786'
+- '788'
+- '792'
+- '800'
+- '801'
+- '802'
+- '804'
+- '808'
+- '816'
+- '832'
+- '833'
+- '834'
+- '836'
+- '840'
+- '848'
+- '864'
+- '896'
+- '897'
+- '898'
+- '900'
+- '904'
+- '912'
+- '928'
+- '960'
+- '1024'
+- '1025'
+- '1026'
+- '1027'
+- '1028'
+- '1029'
+- '1030'
+- '1031'
+- '1032'
+- '1033'
+- '1034'
+- '1035'
+- '1036'
+- '1037'
+- '1038'
+- '1040'
+- '1041'
+- '1042'
+- '1043'
+- '1044'
+- '1045'
+- '1046'
+- '1048'
+- '1049'
+- '1050'
+- '1052'
+- '1056'
+- '1057'
+- '1058'
+- '1059'
+- '1060'
+- '1061'
+- '1062'
+- '1064'
+- '1065'
+- '1066'
+- '1068'
+- '1072'
+- '1073'
+- '1074'
+- '1076'
+- '1080'
+- '1088'
+- '1089'
+- '1090'
+- '1091'
+- '1092'
+- '1093'
+- '1094'
+- '1096'
+- '1097'
+- '1098'
+- '1100'
+- '1104'
+- '1105'
+- '1106'
+- '1108'
+- '1112'
+- '1120'
+- '1121'
+- '1122'
+- '1124'
+- '1128'
+- '1136'
+- '1152'
+- '1153'
+- '1154'
+- '1155'
+- '1156'
+- '1157'
+- '1158'
+- '1160'
+- '1161'
+- '1162'
+- '1164'
+- '1168'
+- '1169'
+- '1170'
+- '1172'
+- '1176'
+- '1184'
+- '1185'
+- '1186'
+- '1188'
+- '1192'
+- '1200'
+- '1216'
+- '1217'
+- '1218'
+- '1220'
+- '1224'
+- '1232'
+- '1248'
+- '1280'
+- '1281'
+- '1282'
+- '1283'
+- '1284'
+- '1285'
+- '1286'
+- '1288'
+- '1289'
+- '1290'
+- '1292'
+- '1296'
+- '1297'
+- '1298'
+- '1300'
+- '1304'
+- '1312'
+- '1313'
+- '1314'
+- '1316'
+- '1320'
+- '1328'
+- '1344'
+- '1345'
+- '1346'
+- '1348'
+- '1352'
+- '1360'
+- '1376'
+- '1408'
+- '1409'
+- '1410'
+- '1412'
+- '1416'
+- '1424'
+- '1440'
+- '1472'
+- '1536'
+- '1537'
+- '1538'
+- '1539'
+- '1540'
+- '1541'
+- '1542'
+- '1544'
+- '1545'
+- '1546'
+- '1548'
+- '1552'
+- '1553'
+- '1554'
+- '1556'
+- '1560'
+- '1568'
+- '1569'
+- '1570'
+- '1572'
+- '1576'
+- '1584'
+- '1600'
+- '1601'
+- '1602'
+- '1604'
+- '1608'
+- '1616'
+- '1632'
+- '1664'
+- '1665'
+- '1666'
+- '1668'
+- '1672'
+- '1680'
+- '1696'
+- '1728'
+- '1792'
+- '1793'
+- '1794'
+- '1796'
+- '1800'
+- '1808'
+- '1824'
+- '1856'
+- '1920'
+- '2048'
+- '2049'
+- '2050'
+- '2051'
+- '2052'
+- '2053'
+- '2054'
+- '2055'
+- '2056'
+- '2057'
+- '2058'
+- '2059'
+- '2060'
+- '2061'
+- '2062'
+- '2064'
+- '2065'
+- '2066'
+- '2067'
+- '2068'
+- '2069'
+- '2070'
+- '2072'
+- '2073'
+- '2074'
+- '2076'
+- '2080'
+- '2081'
+- '2082'
+- '2083'
+- '2084'
+- '2085'
+- '2086'
+- '2088'
+- '2089'
+- '2090'
+- '2092'
+- '2096'
+- '2097'
+- '2098'
+- '2100'
+- '2104'
+- '2112'
+- '2113'
+- '2114'
+- '2115'
+- '2116'
+- '2117'
+- '2118'
+- '2120'
+- '2121'
+- '2122'
+- '2124'
+- '2128'
+- '2129'
+- '2130'
+- '2132'
+- '2136'
+- '2144'
+- '2145'
+- '2146'
+- '2148'
+- '2152'
+- '2160'
+- '2176'
+- '2177'
+- '2178'
+- '2179'
+- '2180'
+- '2181'
+- '2182'
+- '2184'
+- '2185'
+- '2186'
+- '2188'
+- '2192'
+- '2193'
+- '2194'
+- '2196'
+- '2200'
+- '2208'
+- '2209'
+- '2210'
+- '2212'
+- '2216'
+- '2224'
+- '2240'
+- '2241'
+- '2242'
+- '2244'
+- '2248'
+- '2256'
+- '2272'
+- '2304'
+- '2305'
+- '2306'
+- '2307'
+- '2308'
+- '2309'
+- '2310'
+- '2312'
+- '2313'
+- '2314'
+- '2316'
+- '2320'
+- '2321'
+- '2322'
+- '2324'
+- '2328'
+- '2336'
+- '2337'
+- '2338'
+- '2340'
+- '2344'
+- '2352'
+- '2368'
+- '2369'
+- '2370'
+- '2372'
+- '2376'
+- '2384'
+- '2400'
+- '2432'
+- '2433'
+- '2434'
+- '2436'
+- '2440'
+- '2448'
+- '2464'
+- '2496'
+- '2560'
+- '2561'
+- '2562'
+- '2563'
+- '2564'
+- '2565'
+- '2566'
+- '2568'
+- '2569'
+- '2570'
+- '2572'
+- '2576'
+- '2577'
+- '2578'
+- '2580'
+- '2584'
+- '2592'
+- '2593'
+- '2594'
+- '2596'
+- '2600'
+- '2608'
+- '2624'
+- '2625'
+- '2626'
+- '2628'
+- '2632'
+- '2640'
+- '2656'
+- '2688'
+- '2689'
+- '2690'
+- '2692'
+- '2696'
+- '2704'
+- '2720'
+- '2752'
+- '2816'
+- '2817'
+- '2818'
+- '2820'
+- '2824'
+- '2832'
+- '2848'
+- '2880'
+- '2944'
+- '3072'
+- '3073'
+- '3074'
+- '3075'
+- '3076'
+- '3077'
+- '3078'
+- '3080'
+- '3081'
+- '3082'
+- '3084'
+- '3088'
+- '3089'
+- '3090'
+- '3092'
+- '3096'
+- '3104'
+- '3105'
+- '3106'
+- '3108'
+- '3112'
+- '3120'
+- '3136'
+- '3137'
+- '3138'
+- '3140'
+- '3144'
+- '3152'
+- '3168'
+- '3200'
+- '3201'
+- '3202'
+- '3204'
+- '3208'
+- '3216'
+- '3232'
+- '3264'
+- '3328'
+- '3329'
+- '3330'
+- '3332'
+- '3336'
+- '3344'
+- '3360'
+- '3392'
+- '3456'
+- '3584'
+- '3585'
+- '3586'
+- '3588'
+- '3592'
+- '3600'
+- '3616'
+- '3648'
+- '3712'
+- '3840'
+- '4096'
+- '4097'
+- '4098'
+- '4099'
+- '4100'
+- '4101'
+- '4102'
+- '4103'
+- '4104'
+- '4105'
+- '4106'
+- '4107'
+- '4108'
+- '4109'
+- '4110'
+- '4112'
+- '4113'
+- '4114'
+- '4115'
+- '4116'
+- '4117'
+- '4118'
+- '4120'
+- '4121'
+- '4122'
+- '4124'
+- '4128'
+- '4129'
+- '4130'
+- '4131'
+- '4132'
+- '4133'
+- '4134'
+- '4136'
+- '4137'
+- '4138'
+- '4140'
+- '4144'
+- '4145'
+- '4146'
+- '4148'
+- '4152'
+- '4160'
+- '4161'
+- '4162'
+- '4163'
+- '4164'
+- '4165'
+- '4166'
+- '4168'
+- '4169'
+- '4170'
+- '4172'
+- '4176'
+- '4177'
+- '4178'
+- '4180'
+- '4184'
+- '4192'
+- '4193'
+- '4194'
+- '4196'
+- '4200'
+- '4208'
+- '4224'
+- '4225'
+- '4226'
+- '4227'
+- '4228'
+- '4229'
+- '4230'
+- '4232'
+- '4233'
+- '4234'
+- '4236'
+- '4240'
+- '4241'
+- '4242'
+- '4244'
+- '4248'
+- '4256'
+- '4257'
+- '4258'
+- '4260'
+- '4264'
+- '4272'
+- '4288'
+- '4289'
+- '4290'
+- '4292'
+- '4296'
+- '4304'
+- '4320'
+- '4352'
+- '4353'
+- '4354'
+- '4355'
+- '4356'
+- '4357'
+- '4358'
+- '4360'
+- '4361'
+- '4362'
+- '4364'
+- '4368'
+- '4369'
+- '4370'
+- '4372'
+- '4376'
+- '4384'
+- '4385'
+- '4386'
+- '4388'
+- '4392'
+- '4400'
+- '4416'
+- '4417'
+- '4418'
+- '4420'
+- '4424'
+- '4432'
+- '4448'
+- '4480'
+- '4481'
+- '4482'
+- '4484'
+- '4488'
+- '4496'
+- '4512'
+- '4544'
+- '4608'
+- '4609'
+- '4610'
+- '4611'
+- '4612'
+- '4613'
+- '4614'
+- '4616'
+- '4617'
+- '4618'
+- '4620'
+- '4624'
+- '4625'
+- '4626'
+- '4628'
+- '4632'
+- '4640'
+- '4641'
+- '4642'
+- '4644'
+- '4648'
+- '4656'
+- '4672'
+- '4673'
+- '4674'
+- '4676'
+- '4680'
+- '4688'
+- '4704'
+- '4736'
+- '4737'
+- '4738'
+- '4740'
+- '4744'
+- '4752'
+- '4768'
+- '4800'
+- '4864'
+- '4865'
+- '4866'
+- '4868'
+- '4872'
+- '4880'
+- '4896'
+- '4928'
+- '4992'
+- '5120'
+- '5121'
+- '5122'
+- '5123'
+- '5124'
+- '5125'
+- '5126'
+- '5128'
+- '5129'
+- '5130'
+- '5132'
+- '5136'
+- '5137'
+- '5138'
+- '5140'
+- '5144'
+- '5152'
+- '5153'
+- '5154'
+- '5156'
+- '5160'
+- '5168'
+- '5184'
+- '5185'
+- '5186'
+- '5188'
+- '5192'
+- '5200'
+- '5216'
+- '5248'
+- '5249'
+- '5250'
+- '5252'
+- '5256'
+- '5264'
+- '5280'
+- '5312'
+- '5376'
+- '5377'
+- '5378'
+- '5380'
+- '5384'
+- '5392'
+- '5408'
+- '5440'
+- '5504'
+- '5632'
+- '5633'
+- '5634'
+- '5636'
+- '5640'
+- '5648'
+- '5664'
+- '5696'
+- '5760'
+- '5888'
+- '6144'
+- '6145'
+- '6146'
+- '6147'
+- '6148'
+- '6149'
+- '6150'
+- '6152'
+- '6153'
+- '6154'
+- '6156'
+- '6160'
+- '6161'
+- '6162'
+- '6164'
+- '6168'
+- '6176'
+- '6177'
+- '6178'
+- '6180'
+- '6184'
+- '6192'
+- '6208'
+- '6209'
+- '6210'
+- '6212'
+- '6216'
+- '6224'
+- '6240'
+- '6272'
+- '6273'
+- '6274'
+- '6276'
+- '6280'
+- '6288'
+- '6304'
+- '6336'
+- '6400'
+- '6401'
+- '6402'
+- '6404'
+- '6408'
+- '6416'
+- '6432'
+- '6464'
+- '6528'
+- '6656'
+- '6657'
+- '6658'
+- '6660'
+- '6664'
+- '6672'
+- '6688'
+- '6720'
+- '6784'
+- '6912'
+- '7168'
+- '7169'
+- '7170'
+- '7172'
+- '7176'
+- '7184'
+- '7200'
+- '7232'
+- '7296'
+- '7424'
+- '7680'
+- '8192'
+- '8193'
+- '8194'
+- '8195'
+- '8196'
+- '8197'
+- '8198'
+- '8199'
+- '8200'
+- '8201'
+- '8202'
+- '8203'
+- '8204'
+- '8205'
+- '8206'
+- '8208'
+- '8209'
+- '8210'
+- '8211'
+- '8212'
+- '8213'
+- '8214'
+- '8216'
+- '8217'
+- '8218'
+- '8220'
+- '8224'
+- '8225'
+- '8226'
+- '8227'
+- '8228'
+- '8229'
+- '8230'
+- '8232'
+- '8233'
+- '8234'
+- '8236'
+- '8240'
+- '8241'
+- '8242'
+- '8244'
+- '8248'
+- '8256'
+- '8257'
+- '8258'
+- '8259'
+- '8260'
+- '8261'
+- '8262'
+- '8264'
+- '8265'
+- '8266'
+- '8268'
+- '8272'
+- '8273'
+- '8274'
+- '8276'
+- '8280'
+- '8288'
+- '8289'
+- '8290'
+- '8292'
+- '8296'
+- '8304'
+- '8320'
+- '8321'
+- '8322'
+- '8323'
+- '8324'
+- '8325'
+- '8326'
+- '8328'
+- '8329'
+- '8330'
+- '8332'
+- '8336'
+- '8337'
+- '8338'
+- '8340'
+- '8344'
+- '8352'
+- '8353'
+- '8354'
+- '8356'
+- '8360'
+- '8368'
+- '8384'
+- '8385'
+- '8386'
+- '8388'
+- '8392'
+- '8400'
+- '8416'
+- '8448'
+- '8449'
+- '8450'
+- '8451'
+- '8452'
+- '8453'
+- '8454'
+- '8456'
+- '8457'
+- '8458'
+- '8460'
+- '8464'
+- '8465'
+- '8466'
+- '8468'
+- '8472'
+- '8480'
+- '8481'
+- '8482'
+- '8484'
+- '8488'
+- '8496'
+- '8512'
+- '8513'
+- '8514'
+- '8516'
+- '8520'
+- '8528'
+- '8544'
+- '8576'
+- '8577'
+- '8578'
+- '8580'
+- '8584'
+- '8592'
+- '8608'
+- '8640'
+- '8704'
+- '8705'
+- '8706'
+- '8707'
+- '8708'
+- '8709'
+- '8710'
+- '8712'
+- '8713'
+- '8714'
+- '8716'
+- '8720'
+- '8721'
+- '8722'
+- '8724'
+- '8728'
+- '8736'
+- '8737'
+- '8738'
+- '8740'
+- '8744'
+- '8752'
+- '8768'
+- '8769'
+- '8770'
+- '8772'
+- '8776'
+- '8784'
+- '8800'
+- '8832'
+- '8833'
+- '8834'
+- '8836'
+- '8840'
+- '8848'
+- '8864'
+- '8896'
+- '8960'
+- '8961'
+- '8962'
+- '8964'
+- '8968'
+- '8976'
+- '8992'
+- '9024'
+- '9088'
+- '9216'
+- '9217'
+- '9218'
+- '9219'
+- '9220'
+- '9221'
+- '9222'
+- '9224'
+- '9225'
+- '9226'
+- '9228'
+- '9232'
+- '9233'
+- '9234'
+- '9236'
+- '9240'
+- '9248'
+- '9249'
+- '9250'
+- '9252'
+- '9256'
+- '9264'
+- '9280'
+- '9281'
+- '9282'
+- '9284'
+- '9288'
+- '9296'
+- '9312'
+- '9344'
+- '9345'
+- '9346'
+- '9348'
+- '9352'
+- '9360'
+- '9376'
+- '9408'
+- '9472'
+- '9473'
+- '9474'
+- '9476'
+- '9480'
+- '9488'
+- '9504'
+- '9536'
+- '9600'
+- '9728'
+- '9729'
+- '9730'
+- '9732'
+- '9736'
+- '9744'
+- '9760'
+- '9792'
+- '9856'
+- '9984'
+- '10240'
+- '10241'
+- '10242'
+- '10243'
+- '10244'
+- '10245'
+- '10246'
+- '10248'
+- '10249'
+- '10250'
+- '10252'
+- '10256'
+- '10257'
+- '10258'
+- '10260'
+- '10264'
+- '10272'
+- '10273'
+- '10274'
+- '10276'
+- '10280'
+- '10288'
+- '10304'
+- '10305'
+- '10306'
+- '10308'
+- '10312'
+- '10320'
+- '10336'
+- '10368'
+- '10369'
+- '10370'
+- '10372'
+- '10376'
+- '10384'
+- '10400'
+- '10432'
+- '10496'
+- '10497'
+- '10498'
+- '10500'
+- '10504'
+- '10512'
+- '10528'
+- '10560'
+- '10624'
+- '10752'
+- '10753'
+- '10754'
+- '10756'
+- '10760'
+- '10768'
+- '10784'
+- '10816'
+- '10880'
+- '11008'
+- '11264'
+- '11265'
+- '11266'
+- '11268'
+- '11272'
+- '11280'
+- '11296'
+- '11328'
+- '11392'
+- '11520'
+- '11776'
+- '12288'
+- '12289'
+- '12290'
+- '12291'
+- '12292'
+- '12293'
+- '12294'
+- '12296'
+- '12297'
+- '12298'
+- '12300'
+- '12304'
+- '12305'
+- '12306'
+- '12308'
+- '12312'
+- '12320'
+- '12321'
+- '12322'
+- '12324'
+- '12328'
+- '12336'
+- '12352'
+- '12353'
+- '12354'
+- '12356'
+- '12360'
+- '12368'
+- '12384'
+- '12416'
+- '12417'
+- '12418'
+- '12420'
+- '12424'
+- '12432'
+- '12448'
+- '12480'
+- '12544'
+- '12545'
+- '12546'
+- '12548'
+- '12552'
+- '12560'
+- '12576'
+- '12608'
+- '12672'
+- '12800'
+- '12801'
+- '12802'
+- '12804'
+- '12808'
+- '12816'
+- '12832'
+- '12864'
+- '12928'
+- '13056'
+- '13312'
+- '13313'
+- '13314'
+- '13316'
+- '13320'
+- '13328'
+- '13344'
+- '13376'
+- '13440'
+- '13568'
+- '13824'
+- '14336'
+- '14337'
+- '14338'
+- '14340'
+- '14344'
+- '14352'
+- '14368'
+- '14400'
+- '14464'
+- '14592'
+- '14848'
+- '15360'
+- '16384'
+- '16385'
+- '16386'
+- '16387'
+- '16388'
+- '16389'
+- '16390'
+- '16391'
+- '16392'
+- '16393'
+- '16394'
+- '16395'
+- '16396'
+- '16397'
+- '16398'
+- '16400'
+- '16401'
+- '16402'
+- '16403'
+- '16404'
+- '16405'
+- '16406'
+- '16408'
+- '16409'
+- '16410'
+- '16412'
+- '16416'
+- '16417'
+- '16418'
+- '16419'
+- '16420'
+- '16421'
+- '16422'
+- '16424'
+- '16425'
+- '16426'
+- '16428'
+- '16432'
+- '16433'
+- '16434'
+- '16436'
+- '16440'
+- '16448'
+- '16449'
+- '16450'
+- '16451'
+- '16452'
+- '16453'
+- '16454'
+- '16456'
+- '16457'
+- '16458'
+- '16460'
+- '16464'
+- '16465'
+- '16466'
+- '16468'
+- '16472'
+- '16480'
+- '16481'
+- '16482'
+- '16484'
+- '16488'
+- '16496'
+- '16512'
+- '16513'
+- '16514'
+- '16515'
+- '16516'
+- '16517'
+- '16518'
+- '16520'
+- '16521'
+- '16522'
+- '16524'
+- '16528'
+- '16529'
+- '16530'
+- '16532'
+- '16536'
+- '16544'
+- '16545'
+- '16546'
+- '16548'
+- '16552'
+- '16560'
+- '16576'
+- '16577'
+- '16578'
+- '16580'
+- '16584'
+- '16592'
+- '16608'
+- '16640'
+- '16641'
+- '16642'
+- '16643'
+- '16644'
+- '16645'
+- '16646'
+- '16648'
+- '16649'
+- '16650'
+- '16652'
+- '16656'
+- '16657'
+- '16658'
+- '16660'
+- '16664'
+- '16672'
+- '16673'
+- '16674'
+- '16676'
+- '16680'
+- '16688'
+- '16704'
+- '16705'
+- '16706'
+- '16708'
+- '16712'
+- '16720'
+- '16736'
+- '16768'
+- '16769'
+- '16770'
+- '16772'
+- '16776'
+- '16784'
+- '16800'
+- '16832'
+- '16896'
+- '16897'
+- '16898'
+- '16899'
+- '16900'
+- '16901'
+- '16902'
+- '16904'
+- '16905'
+- '16906'
+- '16908'
+- '16912'
+- '16913'
+- '16914'
+- '16916'
+- '16920'
+- '16928'
+- '16929'
+- '16930'
+- '16932'
+- '16936'
+- '16944'
+- '16960'
+- '16961'
+- '16962'
+- '16964'
+- '16968'
+- '16976'
+- '16992'
+- '17024'
+- '17025'
+- '17026'
+- '17028'
+- '17032'
+- '17040'
+- '17056'
+- '17088'
+- '17152'
+- '17153'
+- '17154'
+- '17156'
+- '17160'
+- '17168'
+- '17184'
+- '17216'
+- '17280'
+- '17408'
+- '17409'
+- '17410'
+- '17411'
+- '17412'
+- '17413'
+- '17414'
+- '17416'
+- '17417'
+- '17418'
+- '17420'
+- '17424'
+- '17425'
+- '17426'
+- '17428'
+- '17432'
+- '17440'
+- '17441'
+- '17442'
+- '17444'
+- '17448'
+- '17456'
+- '17472'
+- '17473'
+- '17474'
+- '17476'
+- '17480'
+- '17488'
+- '17504'
+- '17536'
+- '17537'
+- '17538'
+- '17540'
+- '17544'
+- '17552'
+- '17568'
+- '17600'
+- '17664'
+- '17665'
+- '17666'
+- '17668'
+- '17672'
+- '17680'
+- '17696'
+- '17728'
+- '17792'
+- '17920'
+- '17921'
+- '17922'
+- '17924'
+- '17928'
+- '17936'
+- '17952'
+- '17984'
+- '18048'
+- '18176'
+- '18432'
+- '18433'
+- '18434'
+- '18435'
+- '18436'
+- '18437'
+- '18438'
+- '18440'
+- '18441'
+- '18442'
+- '18444'
+- '18448'
+- '18449'
+- '18450'
+- '18452'
+- '18456'
+- '18464'
+- '18465'
+- '18466'
+- '18468'
+- '18472'
+- '18480'
+- '18496'
+- '18497'
+- '18498'
+- '18500'
+- '18504'
+- '18512'
+- '18528'
+- '18560'
+- '18561'
+- '18562'
+- '18564'
+- '18568'
+- '18576'
+- '18592'
+- '18624'
+- '18688'
+- '18689'
+- '18690'
+- '18692'
+- '18696'
+- '18704'
+- '18720'
+- '18752'
+- '18816'
+- '18944'
+- '18945'
+- '18946'
+- '18948'
+- '18952'
+- '18960'
+- '18976'
+- '19008'
+- '19072'
+- '19200'
+- '19456'
+- '19457'
+- '19458'
+- '19460'
+- '19464'
+- '19472'
+- '19488'
+- '19520'
+- '19584'
+- '19712'
+- '19968'
+- '20480'
+- '20481'
+- '20482'
+- '20483'
+- '20484'
+- '20485'
+- '20486'
+- '20488'
+- '20489'
+- '20490'
+- '20492'
+- '20496'
+- '20497'
+- '20498'
+- '20500'
+- '20504'
+- '20512'
+- '20513'
+- '20514'
+- '20516'
+- '20520'
+- '20528'
+- '20544'
+- '20545'
+- '20546'
+- '20548'
+- '20552'
+- '20560'
+- '20576'
+- '20608'
+- '20609'
+- '20610'
+- '20612'
+- '20616'
+- '20624'
+- '20640'
+- '20672'
+- '20736'
+- '20737'
+- '20738'
+- '20740'
+- '20744'
+- '20752'
+- '20768'
+- '20800'
+- '20864'
+- '20992'
+- '20993'
+- '20994'
+- '20996'
+- '21000'
+- '21008'
+- '21024'
+- '21056'
+- '21120'
+- '21248'
+- '21504'
+- '21505'
+- '21506'
+- '21508'
+- '21512'
+- '21520'
+- '21536'
+- '21568'
+- '21632'
+- '21760'
+- '22016'
+- '22528'
+- '22529'
+- '22530'
+- '22532'
+- '22536'
+- '22544'
+- '22560'
+- '22592'
+- '22656'
+- '22784'
+- '23040'
+- '23552'
+- '24576'
+- '24577'
+- '24578'
+- '24579'
+- '24580'
+- '24581'
+- '24582'
+- '24584'
+- '24585'
+- '24586'
+- '24588'
+- '24592'
+- '24593'
+- '24594'
+- '24596'
+- '24600'
+- '24608'
+- '24609'
+- '24610'
+- '24612'
+- '24616'
+- '24624'
+- '24640'
+- '24641'
+- '24642'
+- '24644'
+- '24648'
+- '24656'
+- '24672'
+- '24704'
+- '24705'
+- '24706'
+- '24708'
+- '24712'
+- '24720'
+- '24736'
+- '24768'
+- '24832'
+- '24833'
+- '24834'
+- '24836'
+- '24840'
+- '24848'
+- '24864'
+- '24896'
+- '24960'
+- '25088'
+- '25089'
+- '25090'
+- '25092'
+- '25096'
+- '25104'
+- '25120'
+- '25152'
+- '25216'
+- '25344'
+- '25600'
+- '25601'
+- '25602'
+- '25604'
+- '25608'
+- '25616'
+- '25632'
+- '25664'
+- '25728'
+- '25856'
+- '26112'
+- '26624'
+- '26625'
+- '26626'
+- '26628'
+- '26632'
+- '26640'
+- '26656'
+- '26688'
+- '26752'
+- '26880'
+- '27136'
+- '27648'
+- '28672'
+- '28673'
+- '28674'
+- '28676'
+- '28680'
+- '28688'
+- '28704'
+- '28736'
+- '28800'
+- '28928'
+- '29184'
+- '29696'
+- '30720'
+- '32768'
+- '32769'
+- '32770'
+- '32771'
+- '32772'
+- '32773'
+- '32774'
+- '32775'
+- '32776'
+- '32777'
+- '32778'
+- '32779'
+- '32780'
+- '32781'
+- '32782'
+- '32784'
+- '32785'
+- '32786'
+- '32787'
+- '32788'
+- '32789'
+- '32790'
+- '32792'
+- '32793'
+- '32794'
+- '32796'
+- '32800'
+- '32801'
+- '32802'
+- '32803'
+- '32804'
+- '32805'
+- '32806'
+- '32808'
+- '32809'
+- '32810'
+- '32812'
+- '32816'
+- '32817'
+- '32818'
+- '32820'
+- '32824'
+- '32832'
+- '32833'
+- '32834'
+- '32835'
+- '32836'
+- '32837'
+- '32838'
+- '32840'
+- '32841'
+- '32842'
+- '32844'
+- '32848'
+- '32849'
+- '32850'
+- '32852'
+- '32856'
+- '32864'
+- '32865'
+- '32866'
+- '32868'
+- '32872'
+- '32880'
+- '32896'
+- '32897'
+- '32898'
+- '32899'
+- '32900'
+- '32901'
+- '32902'
+- '32904'
+- '32905'
+- '32906'
+- '32908'
+- '32912'
+- '32913'
+- '32914'
+- '32916'
+- '32920'
+- '32928'
+- '32929'
+- '32930'
+- '32932'
+- '32936'
+- '32944'
+- '32960'
+- '32961'
+- '32962'
+- '32964'
+- '32968'
+- '32976'
+- '32992'
+- '33024'
+- '33025'
+- '33026'
+- '33027'
+- '33028'
+- '33029'
+- '33030'
+- '33032'
+- '33033'
+- '33034'
+- '33036'
+- '33040'
+- '33041'
+- '33042'
+- '33044'
+- '33048'
+- '33056'
+- '33057'
+- '33058'
+- '33060'
+- '33064'
+- '33072'
+- '33088'
+- '33089'
+- '33090'
+- '33092'
+- '33096'
+- '33104'
+- '33120'
+- '33152'
+- '33153'
+- '33154'
+- '33156'
+- '33160'
+- '33168'
+- '33184'
+- '33216'
+- '33280'
+- '33281'
+- '33282'
+- '33283'
+- '33284'
+- '33285'
+- '33286'
+- '33288'
+- '33289'
+- '33290'
+- '33292'
+- '33296'
+- '33297'
+- '33298'
+- '33300'
+- '33304'
+- '33312'
+- '33313'
+- '33314'
+- '33316'
+- '33320'
+- '33328'
+- '33344'
+- '33345'
+- '33346'
+- '33348'
+- '33352'
+- '33360'
+- '33376'
+- '33408'
+- '33409'
+- '33410'
+- '33412'
+- '33416'
+- '33424'
+- '33440'
+- '33472'
+- '33536'
+- '33537'
+- '33538'
+- '33540'
+- '33544'
+- '33552'
+- '33568'
+- '33600'
+- '33664'
+- '33792'
+- '33793'
+- '33794'
+- '33795'
+- '33796'
+- '33797'
+- '33798'
+- '33800'
+- '33801'
+- '33802'
+- '33804'
+- '33808'
+- '33809'
+- '33810'
+- '33812'
+- '33816'
+- '33824'
+- '33825'
+- '33826'
+- '33828'
+- '33832'
+- '33840'
+- '33856'
+- '33857'
+- '33858'
+- '33860'
+- '33864'
+- '33872'
+- '33888'
+- '33920'
+- '33921'
+- '33922'
+- '33924'
+- '33928'
+- '33936'
+- '33952'
+- '33984'
+- '34048'
+- '34049'
+- '34050'
+- '34052'
+- '34056'
+- '34064'
+- '34080'
+- '34112'
+- '34176'
+- '34304'
+- '34305'
+- '34306'
+- '34308'
+- '34312'
+- '34320'
+- '34336'
+- '34368'
+- '34432'
+- '34560'
+- '34816'
+- '34817'
+- '34818'
+- '34819'
+- '34820'
+- '34821'
+- '34822'
+- '34824'
+- '34825'
+- '34826'
+- '34828'
+- '34832'
+- '34833'
+- '34834'
+- '34836'
+- '34840'
+- '34848'
+- '34849'
+- '34850'
+- '34852'
+- '34856'
+- '34864'
+- '34880'
+- '34881'
+- '34882'
+- '34884'
+- '34888'
+- '34896'
+- '34912'
+- '34944'
+- '34945'
+- '34946'
+- '34948'
+- '34952'
+- '34960'
+- '34976'
+- '35008'
+- '35072'
+- '35073'
+- '35074'
+- '35076'
+- '35080'
+- '35088'
+- '35104'
+- '35136'
+- '35200'
+- '35328'
+- '35329'
+- '35330'
+- '35332'
+- '35336'
+- '35344'
+- '35360'
+- '35392'
+- '35456'
+- '35584'
+- '35840'
+- '35841'
+- '35842'
+- '35844'
+- '35848'
+- '35856'
+- '35872'
+- '35904'
+- '35968'
+- '36096'
+- '36352'
+- '36864'
+- '36865'
+- '36866'
+- '36867'
+- '36868'
+- '36869'
+- '36870'
+- '36872'
+- '36873'
+- '36874'
+- '36876'
+- '36880'
+- '36881'
+- '36882'
+- '36884'
+- '36888'
+- '36896'
+- '36897'
+- '36898'
+- '36900'
+- '36904'
+- '36912'
+- '36928'
+- '36929'
+- '36930'
+- '36932'
+- '36936'
+- '36944'
+- '36960'
+- '36992'
+- '36993'
+- '36994'
+- '36996'
+- '37000'
+- '37008'
+- '37024'
+- '37056'
+- '37120'
+- '37121'
+- '37122'
+- '37124'
+- '37128'
+- '37136'
+- '37152'
+- '37184'
+- '37248'
+- '37376'
+- '37377'
+- '37378'
+- '37380'
+- '37384'
+- '37392'
+- '37408'
+- '37440'
+- '37504'
+- '37632'
+- '37888'
+- '37889'
+- '37890'
+- '37892'
+- '37896'
+- '37904'
+- '37920'
+- '37952'
+- '38016'
+- '38144'
+- '38400'
+- '38912'
+- '38913'
+- '38914'
+- '38916'
+- '38920'
+- '38928'
+- '38944'
+- '38976'
+- '39040'
+- '39168'
+- '39424'
+- '39936'
+- '40960'
+- '40961'
+- '40962'
+- '40963'
+- '40964'
+- '40965'
+- '40966'
+- '40968'
+- '40969'
+- '40970'
+- '40972'
+- '40976'
+- '40977'
+- '40978'
+- '40980'
+- '40984'
+- '40992'
+- '40993'
+- '40994'
+- '40996'
+- '41000'
+- '41008'
+- '41024'
+- '41025'
+- '41026'
+- '41028'
+- '41032'
+- '41040'
+- '41056'
+- '41088'
+- '41089'
+- '41090'
+- '41092'
+- '41096'
+- '41104'
+- '41120'
+- '41152'
+- '41216'
+- '41217'
+- '41218'
+- '41220'
+- '41224'
+- '41232'
+- '41248'
+- '41280'
+- '41344'
+- '41472'
+- '41473'
+- '41474'
+- '41476'
+- '41480'
+- '41488'
+- '41504'
+- '41536'
+- '41600'
+- '41728'
+- '41984'
+- '41985'
+- '41986'
+- '41988'
+- '41992'
+- '42000'
+- '42016'
+- '42048'
+- '42112'
+- '42240'
+- '42496'
+- '43008'
+- '43009'
+- '43010'
+- '43012'
+- '43016'
+- '43024'
+- '43040'
+- '43072'
+- '43136'
+- '43264'
+- '43520'
+- '44032'
+- '45056'
+- '45057'
+- '45058'
+- '45060'
+- '45064'
+- '45072'
+- '45088'
+- '45120'
+- '45184'
+- '45312'
+- '45568'
+- '46080'
+- '47104'
+- '49152'
+- '49153'
+- '49154'
+- '49155'
+- '49156'
+- '49157'
+- '49158'
+- '49160'
+- '49161'
+- '49162'
+- '49164'
+- '49168'
+- '49169'
+- '49170'
+- '49172'
+- '49176'
+- '49184'
+- '49185'
+- '49186'
+- '49188'
+- '49192'
+- '49200'
+- '49216'
+- '49217'
+- '49218'
+- '49220'
+- '49224'
+- '49232'
+- '49248'
+- '49280'
+- '49281'
+- '49282'
+- '49284'
+- '49288'
+- '49296'
+- '49312'
+- '49344'
+- '49408'
+- '49409'
+- '49410'
+- '49412'
+- '49416'
+- '49424'
+- '49440'
+- '49472'
+- '49536'
+- '49664'
+- '49665'
+- '49666'
+- '49668'
+- '49672'
+- '49680'
+- '49696'
+- '49728'
+- '49792'
+- '49920'
+- '50176'
+- '50177'
+- '50178'
+- '50180'
+- '50184'
+- '50192'
+- '50208'
+- '50240'
+- '50304'
+- '50432'
+- '50688'
+- '51200'
+- '51201'
+- '51202'
+- '51204'
+- '51208'
+- '51216'
+- '51232'
+- '51264'
+- '51328'
+- '51456'
+- '51712'
+- '52224'
+- '53248'
+- '53249'
+- '53250'
+- '53252'
+- '53256'
+- '53264'
+- '53280'
+- '53312'
+- '53376'
+- '53504'
+- '53760'
+- '54272'
+- '55296'
+- '57344'
+- '57345'
+- '57346'
+- '57348'
+- '57352'
+- '57360'
+- '57376'
+- '57408'
+- '57472'
+- '57600'
+- '57856'
+- '58368'
+- '59392'
+- '61440'
+init: null
+input_size: 80
+cmvn_file: null
+ctc_conf:
+ dropout_rate: 0.0
+ ctc_type: builtin
+ reduce: true
+ ignore_nan_grad: true
+joint_net_conf: null
+use_preprocessor: true
+token_type: char
+bpemodel: null
+non_linguistic_symbols: null
+cleaner: null
+g2p: null
+speech_volume_normalize: null
+rir_scp: null
+rir_apply_prob: 1.0
+noise_scp: null
+noise_apply_prob: 1.0
+noise_db_range: '13_15'
+specaug: null
+specaug_conf: {}
+normalize: null
+normalize_conf: {}
+label_aggregator: null
+label_aggregator_conf: {}
+model: sond
+model_conf:
+ lsm_weight: 0.1
+ length_normalized_loss: true
+ max_spk_num: 16
+ normalize_speech_speaker: true
+# speech encoder
+encoder: resnet34_sp_l2reg
+encoder_conf:
+ # pass by model, equal to feature dim
+ # input_size: 80
+ batchnorm_momentum: 0.01
+ pooling_type: "window_shift"
+ pool_size: 20
+ stride: 1
+ tf2torch_tensor_name_prefix_torch: encoder
+ tf2torch_tensor_name_prefix_tf: EAND/speech_encoder
+speaker_encoder: null
+speaker_encoder_conf: {}
+ci_scorer: conv
+ci_scorer_conf:
+ input_units: 512
+ num_layers: 3
+ num_units: 512
+ kernel_size: 1
+ dropout_rate: 0.0
+ position_encoder: null
+ out_units: 1
+ out_norm: false
+ auxiliary_states: false
+ tf2torch_tensor_name_prefix_torch: ci_scorer
+ tf2torch_tensor_name_prefix_tf: EAND/compute_distance_layer/ci_scorer
+cd_scorer: san
+cd_scorer_conf:
+ input_size: 512
+ output_size: 512
+ out_units: 1
+ attention_heads: 4
+ linear_units: 1024
+ num_blocks: 4
+ dropout_rate: 0.0
+ positional_dropout_rate: 0.0
+ attention_dropout_rate: 0.0
+ # use string "null" to remove input layer
+ input_layer: "null"
+ pos_enc_class: null
+ normalize_before: true
+ tf2torch_tensor_name_prefix_torch: cd_scorer
+ tf2torch_tensor_name_prefix_tf: EAND/compute_distance_layer/cd_scorer
+# post net
+decoder: fsmn
+decoder_conf:
+ in_units: 32
+ out_units: 2517
+ filter_size: 31
+ fsmn_num_layers: 6
+ dnn_num_layers: 1
+ num_memory_units: 16
+ ffn_inner_dim: 512
+ dropout_rate: 0.0
+ tf2torch_tensor_name_prefix_torch: decoder
+ tf2torch_tensor_name_prefix_tf: EAND/post_net
+frontend: null
+frontend_conf:
+ fs: 8000
+ window: povey
+ n_mels: 80
+ frame_length: 25
+ frame_shift: 10
+ filter_length_min: -1
+ filter_length_max: -1
+ lfr_m: 1
+ lfr_n: 1
+ dither: 0.0
+ snip_edges: false
+ upsacle_samples: false
+num_worker_count: 0
+required:
+- output_dir
+- token_list
+oss_bucket: 'null'
+version: 0.1.4
diff --git a/egs/callhome/diarization/sond/unit_test.py b/egs/callhome/diarization/sond/unit_test.py
new file mode 100644
index 0000000..519ac56
--- /dev/null
+++ b/egs/callhome/diarization/sond/unit_test.py
@@ -0,0 +1,97 @@
+from funasr.bin.diar_inference_launch import inference_launch
+import os
+
+
+def test_fbank_cpu_infer():
+ diar_config_path = "sond_fbank.yaml"
+ diar_model_path = "sond.pth"
+ output_dir = "./outputs"
+ data_path_and_name_and_type = [
+ ("data/unit_test/test_feats.scp", "speech", "kaldi_ark"),
+ ("data/unit_test/test_profile.scp", "profile", "kaldi_ark"),
+ ]
+ pipeline = inference_launch(
+ mode="sond",
+ diar_train_config=diar_config_path,
+ diar_model_file=diar_model_path,
+ output_dir=output_dir,
+ num_workers=0,
+ log_level="INFO",
+ )
+ results = pipeline(data_path_and_name_and_type)
+ print(results)
+
+
+def test_fbank_gpu_infer():
+ diar_config_path = "sond_fbank.yaml"
+ diar_model_path = "sond.pth"
+ output_dir = "./outputs"
+ data_path_and_name_and_type = [
+ ("data/unit_test/test_feats.scp", "speech", "kaldi_ark"),
+ ("data/unit_test/test_profile.scp", "profile", "kaldi_ark"),
+ ]
+ pipeline = inference_launch(
+ mode="sond",
+ diar_train_config=diar_config_path,
+ diar_model_file=diar_model_path,
+ output_dir=output_dir,
+ ngpu=1,
+ num_workers=1,
+ log_level="INFO",
+ )
+ results = pipeline(data_path_and_name_and_type)
+ print(results)
+
+
+def test_wav_gpu_infer():
+ diar_config_path = "config.yaml"
+ diar_model_path = "sond.pth"
+ output_dir = "./outputs"
+ data_path_and_name_and_type = [
+ ("data/unit_test/test_wav.scp", "speech", "sound"),
+ ("data/unit_test/test_profile.scp", "profile", "kaldi_ark"),
+ ]
+ pipeline = inference_launch(
+ mode="sond",
+ diar_train_config=diar_config_path,
+ diar_model_file=diar_model_path,
+ output_dir=output_dir,
+ ngpu=1,
+ num_workers=1,
+ log_level="WARNING",
+ )
+ results = pipeline(data_path_and_name_and_type)
+ print(results)
+
+
+def test_without_profile_gpu_infer():
+ diar_config_path = "config.yaml"
+ diar_model_path = "sond.pth"
+ output_dir = "./outputs"
+ raw_inputs = [[
+ "data/unit_test/raw_inputs/record.wav",
+ "data/unit_test/raw_inputs/spk1.wav",
+ "data/unit_test/raw_inputs/spk2.wav",
+ "data/unit_test/raw_inputs/spk3.wav",
+ "data/unit_test/raw_inputs/spk4.wav"
+ ]]
+ pipeline = inference_launch(
+ mode="sond_demo",
+ diar_train_config=diar_config_path,
+ diar_model_file=diar_model_path,
+ output_dir=output_dir,
+ ngpu=1,
+ num_workers=1,
+ log_level="WARNING",
+ param_dict={},
+ )
+ results = pipeline(raw_inputs=raw_inputs)
+ print(results)
+
+
+if __name__ == '__main__':
+ os.environ["CUDA_VISIBLE_DEVICES"] = "7"
+ test_fbank_cpu_infer()
+ # test_fbank_gpu_infer()
+ # test_wav_gpu_infer()
+ # test_without_profile_gpu_infer()
diff --git a/egs_modelscope/asr/mfcca/speech_mfcca_asr-zh-cn-16k-alimeeting-vocab4950/finetune.py b/egs_modelscope/asr/mfcca/speech_mfcca_asr-zh-cn-16k-alimeeting-vocab4950/finetune.py
index bf8176e..7db085a 100755
--- a/egs_modelscope/asr/mfcca/speech_mfcca_asr-zh-cn-16k-alimeeting-vocab4950/finetune.py
+++ b/egs_modelscope/asr/mfcca/speech_mfcca_asr-zh-cn-16k-alimeeting-vocab4950/finetune.py
@@ -31,5 +31,5 @@
params.batch_bins = 1000 # batch size锛屽鏋渄ataset_type="small"锛宐atch_bins鍗曚綅涓篺bank鐗瑰緛甯ф暟锛屽鏋渄ataset_type="large"锛宐atch_bins鍗曚綅涓烘绉掞紝
params.max_epoch = 10 # 鏈�澶ц缁冭疆鏁�
params.lr = 0.0001 # 璁剧疆瀛︿範鐜�
- params.model_revision = 'v1.0.0'
+ params.model_revision = 'v3.0.0'
modelscope_finetune(params)
diff --git a/egs_modelscope/asr/mfcca/speech_mfcca_asr-zh-cn-16k-alimeeting-vocab4950/infer.py b/egs_modelscope/asr/mfcca/speech_mfcca_asr-zh-cn-16k-alimeeting-vocab4950/infer.py
index fa22aad..b3bfe8e 100755
--- a/egs_modelscope/asr/mfcca/speech_mfcca_asr-zh-cn-16k-alimeeting-vocab4950/infer.py
+++ b/egs_modelscope/asr/mfcca/speech_mfcca_asr-zh-cn-16k-alimeeting-vocab4950/infer.py
@@ -19,7 +19,7 @@
inference_pipline = pipeline(
task=Tasks.auto_speech_recognition,
model='NPU-ASLP/speech_mfcca_asr-zh-cn-16k-alimeeting-vocab4950',
- model_revision='v1.0.0',
+ model_revision='v3.0.0',
output_dir=output_dir_job,
batch_size=1,
)
diff --git a/egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-ja-16k-common-vocab93-tensorflow1-offline/finetune.py b/egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-ja-16k-common-vocab93-tensorflow1-offline/finetune.py
index 4a5efdb..5485ff5 100644
--- a/egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-ja-16k-common-vocab93-tensorflow1-offline/finetune.py
+++ b/egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-ja-16k-common-vocab93-tensorflow1-offline/finetune.py
@@ -30,6 +30,6 @@
params["dataset_type"] = "small"
params["max_epoch"] = 50
params["lr"] = 0.00005
- params["model"] = "damo/speech_UniASR_asr_2pass-ja-16k-common-vocab93-tensorflow1-online"
+ params["model"] = "damo/speech_UniASR_asr_2pass-ja-16k-common-vocab93-tensorflow1-offline"
params["model_revision"] = None
modelscope_finetune(params)
diff --git a/egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-ja-16k-common-vocab93-tensorflow1-offline/infer.py b/egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-ja-16k-common-vocab93-tensorflow1-offline/infer.py
index a053957..1a174bb 100644
--- a/egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-ja-16k-common-vocab93-tensorflow1-offline/infer.py
+++ b/egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-ja-16k-common-vocab93-tensorflow1-offline/infer.py
@@ -6,7 +6,7 @@
output_dir = "./results"
inference_pipline = pipeline(
task=Tasks.auto_speech_recognition,
- model="damo/speech_UniASR_asr_2pass-ja-16k-common-vocab93-tensorflow1-online",
+ model="damo/speech_UniASR_asr_2pass-ja-16k-common-vocab93-tensorflow1-offline",
output_dir=output_dir,
)
rec_result = inference_pipline(audio_in=audio_in, param_dict={"decoding_model":"offline"})
diff --git a/egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-pt-16k-common-vocab1617-tensorflow1-offline/finetune.py b/egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-pt-16k-common-vocab1617-tensorflow1-offline/finetune.py
index 60f3c82..512b844 100644
--- a/egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-pt-16k-common-vocab1617-tensorflow1-offline/finetune.py
+++ b/egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-pt-16k-common-vocab1617-tensorflow1-offline/finetune.py
@@ -30,6 +30,6 @@
params["dataset_type"] = "small"
params["max_epoch"] = 50
params["lr"] = 0.00005
- params["model"] = "damo/speech_UniASR_asr_2pass-pt-16k-common-vocab1617-tensorflow1-online"
+ params["model"] = "damo/speech_UniASR_asr_2pass-pt-16k-common-vocab1617-tensorflow1-offline"
params["model_revision"] = None
modelscope_finetune(params)
diff --git a/egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-pt-16k-common-vocab1617-tensorflow1-offline/infer.py b/egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-pt-16k-common-vocab1617-tensorflow1-offline/infer.py
index 30a11ff..2dcb663 100644
--- a/egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-pt-16k-common-vocab1617-tensorflow1-offline/infer.py
+++ b/egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-pt-16k-common-vocab1617-tensorflow1-offline/infer.py
@@ -6,7 +6,7 @@
output_dir = "./results"
inference_pipline = pipeline(
task=Tasks.auto_speech_recognition,
- model="damo/speech_UniASR_asr_2pass-pt-16k-common-vocab1617-tensorflow1-online",
+ model="damo/speech_UniASR_asr_2pass-pt-16k-common-vocab1617-tensorflow1-offline",
output_dir=output_dir,
)
rec_result = inference_pipline(audio_in=audio_in, param_dict={"decoding_model":"offline"})
diff --git a/egs_modelscope/punctuation/punc_ct-transformer_zh-cn-common-vadrealtime-vocab272727/infer.py b/egs_modelscope/punctuation/punc_ct-transformer_zh-cn-common-vadrealtime-vocab272727/infer.py
new file mode 100644
index 0000000..02859c2
--- /dev/null
+++ b/egs_modelscope/punctuation/punc_ct-transformer_zh-cn-common-vadrealtime-vocab272727/infer.py
@@ -0,0 +1,26 @@
+
+##################text浜岃繘鍒舵暟鎹�#####################
+inputs = "璺ㄥ娌虫祦鏄吇鑲叉部宀竱浜烘皯鐨勭敓鍛戒箣婧愰暱鏈熶互鏉ヤ负甯姪涓嬫父鍦板尯闃茬伨鍑忕伨涓柟鎶�鏈汉鍛榺鍦ㄤ笂娓稿湴鍖烘瀬涓烘伓鍔g殑鑷劧鏉′欢涓嬪厠鏈嶅法澶у洶闅剧敋鑷冲啋鐫�鐢熷懡鍗遍櫓|鍚戝嵃鏂规彁渚涙睕鏈熸按鏂囪祫鏂欏鐞嗙揣鎬ヤ簨浠朵腑鏂归噸瑙嗗嵃鏂瑰湪璺ㄥ娌虫祦闂涓婄殑鍏冲垏|鎰挎剰杩涗竴姝ュ畬鍠勫弻鏂硅仈鍚堝伐浣滄満鍒秥鍑℃槸|涓柟鑳藉仛鐨勬垜浠瑋閮戒細鍘诲仛鑰屼笖浼氬仛寰楁洿濂芥垜璇峰嵃搴︽湅鍙嬩滑鏀惧績涓浗鍦ㄤ笂娓哥殑|浠讳綍寮�鍙戝埄鐢ㄩ兘浼氱粡杩囩瀛瑙勫垝鍜岃璇佸吋椤句笂涓嬫父鐨勫埄鐩�"
+
+from modelscope.pipelines import pipeline
+from modelscope.utils.constant import Tasks
+
+inference_pipline = pipeline(
+ task=Tasks.punctuation,
+ model='damo/punc_ct-transformer_zh-cn-common-vad_realtime-vocab272727',
+ model_revision="v1.0.0",
+ output_dir="./tmp/"
+)
+
+vads = inputs.split("|")
+
+cache_out = []
+rec_result_all="outputs:"
+for vad in vads:
+ rec_result = inference_pipline(text_in=vad, cache=cache_out)
+ #print(rec_result)
+ cache_out = rec_result['cache']
+ rec_result_all += rec_result['text']
+
+print(rec_result_all)
+
diff --git a/egs_modelscope/punctuation/punc_ct-transformer_zh-cn-common-vocab272727-pytorch/infer.py b/egs_modelscope/punctuation/punc_ct-transformer_zh-cn-common-vocab272727-pytorch/infer.py
index 8dac292..0da8d25 100644
--- a/egs_modelscope/punctuation/punc_ct-transformer_zh-cn-common-vocab272727-pytorch/infer.py
+++ b/egs_modelscope/punctuation/punc_ct-transformer_zh-cn-common-vocab272727-pytorch/infer.py
@@ -15,7 +15,7 @@
inference_pipline = pipeline(
task=Tasks.punctuation,
model='damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch',
- model_revision="v1.1.6",
+ model_revision="v1.1.7",
output_dir="./tmp/"
)
diff --git a/egs_modelscope/speaker_verification/speech_xvector_sv-en-us-callhome-8k-spk6135-pytorch/infer.py b/egs_modelscope/speaker_verification/speech_xvector_sv-en-us-callhome-8k-spk6135-pytorch/infer.py
new file mode 100644
index 0000000..1fd9dc6
--- /dev/null
+++ b/egs_modelscope/speaker_verification/speech_xvector_sv-en-us-callhome-8k-spk6135-pytorch/infer.py
@@ -0,0 +1,39 @@
+from modelscope.pipelines import pipeline
+from modelscope.utils.constant import Tasks
+import numpy as np
+
+if __name__ == '__main__':
+ inference_sv_pipline = pipeline(
+ task=Tasks.speaker_verification,
+ model='damo/speech_xvector_sv-en-us-callhome-8k-spk6135-pytorch'
+ )
+
+ # extract speaker embedding
+ # for url use "spk_embedding" as key
+ rec_result = inference_sv_pipline(
+ audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_data/sv_example_enroll.wav')
+ enroll = rec_result["spk_embedding"]
+
+ # for local file use "spk_embedding" as key
+ rec_result = inference_sv_pipline(audio_in='example/sv_example_same.wav')
+ same = rec_result["spk_embedding"]
+
+ import soundfile
+ wav = soundfile.read('example/sv_example_enroll.wav')[0]
+ # for raw inputs use "spk_embedding" as key
+ spk_embedding = inference_sv_pipline(audio_in=wav)["spk_embedding"]
+
+ rec_result = inference_sv_pipline(
+ audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_data/sv_example_different.wav')
+ different = rec_result["spk_embedding"]
+
+ # calculate cosine similarity for same speaker
+ sv_threshold = 0.80
+ same_cos = np.sum(enroll * same) / (np.linalg.norm(enroll) * np.linalg.norm(same))
+ same_cos = max(same_cos - sv_threshold, 0.0) / (1.0 - sv_threshold) * 100.0
+ print("Similarity:", same_cos)
+
+ # calculate cosine similarity for different speaker
+ diff_cos = np.sum(enroll * different) / (np.linalg.norm(enroll) * np.linalg.norm(different))
+ diff_cos = max(diff_cos - sv_threshold, 0.0) / (1.0 - sv_threshold) * 100.0
+ print("Similarity:", diff_cos)
diff --git a/egs_modelscope/speaker_verification/speech_xvector_sv-en-us-callhome-8k-spk6135-pytorch/infer_sv.py b/egs_modelscope/speaker_verification/speech_xvector_sv-en-us-callhome-8k-spk6135-pytorch/infer_sv.py
new file mode 100644
index 0000000..880b2d3
--- /dev/null
+++ b/egs_modelscope/speaker_verification/speech_xvector_sv-en-us-callhome-8k-spk6135-pytorch/infer_sv.py
@@ -0,0 +1,21 @@
+from modelscope.pipelines import pipeline
+from modelscope.utils.constant import Tasks
+
+if __name__ == '__main__':
+ inference_sv_pipline = pipeline(
+ task=Tasks.speaker_verification,
+ model='damo/speech_xvector_sv-en-us-callhome-8k-spk6135-pytorch'
+ )
+
+ # the same speaker
+ rec_result = inference_sv_pipline(audio_in=(
+ 'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_data/sv_example_enroll.wav',
+ 'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_data/sv_example_same.wav'))
+ print("Similarity", rec_result["scores"])
+
+ # different speakers
+ rec_result = inference_sv_pipline(audio_in=(
+ 'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_data/sv_example_enroll.wav',
+ 'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_data/sv_example_different.wav'))
+
+ print("Similarity", rec_result["scores"])
diff --git a/egs_modelscope/speaker_verification/speech_xvector_sv-zh-cn-cnceleb-16k-spk3465-pytorch/infer.py b/egs_modelscope/speaker_verification/speech_xvector_sv-zh-cn-cnceleb-16k-spk3465-pytorch/infer.py
index e81297a..87f3801 100644
--- a/egs_modelscope/speaker_verification/speech_xvector_sv-zh-cn-cnceleb-16k-spk3465-pytorch/infer.py
+++ b/egs_modelscope/speaker_verification/speech_xvector_sv-zh-cn-cnceleb-16k-spk3465-pytorch/infer.py
@@ -9,14 +9,20 @@
)
# 鎻愬彇涓嶅悓鍙ュ瓙鐨勮璇濅汉宓屽叆鐮�
+ # for url use "utt_id" as key
rec_result = inference_sv_pipline(
audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/sv_example_enroll.wav')
enroll = rec_result["spk_embedding"]
- rec_result = inference_sv_pipline(
- audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/sv_example_same.wav')
+ # for local file use "utt_id" as key
+ rec_result = inference_sv_pipline(audio_in='sv_example_same.wav')["test1"]
same = rec_result["spk_embedding"]
+ import soundfile
+ wav = soundfile.read('sv_example_enroll.wav')[0]
+ # for raw inputs use "utt_id" as key
+ spk_embedding = inference_sv_pipline(audio_in=wav)["spk_embedding"]
+
rec_result = inference_sv_pipline(
audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/sv_example_different.wav')
different = rec_result["spk_embedding"]
diff --git a/funasr/bin/asr_inference_mfcca.py b/funasr/bin/asr_inference_mfcca.py
index e25b2a9..4176ba6 100644
--- a/funasr/bin/asr_inference_mfcca.py
+++ b/funasr/bin/asr_inference_mfcca.py
@@ -194,8 +194,8 @@
# Input as audio signal
if isinstance(speech, np.ndarray):
speech = torch.tensor(speech)
-
-
+ if(speech.dim()==3):
+ speech = torch.squeeze(speech, 2)
#speech = speech.unsqueeze(0).to(getattr(torch, self.dtype))
speech = speech.to(getattr(torch, self.dtype))
# lenghts: (1,)
@@ -534,6 +534,8 @@
data_path_and_name_and_type,
dtype=dtype,
batch_size=batch_size,
+ fs=fs,
+ mc=True,
key_file=key_file,
num_workers=num_workers,
preprocess_fn=ASRTask.build_preprocess_fn(speech2text.asr_train_args, False),
diff --git a/funasr/bin/punc_inference_launch.py b/funasr/bin/punc_inference_launch.py
index 53db1df..e7e3f15 100755
--- a/funasr/bin/punc_inference_launch.py
+++ b/funasr/bin/punc_inference_launch.py
@@ -75,6 +75,9 @@
if mode == "punc":
from funasr.bin.punctuation_infer import inference_modelscope
return inference_modelscope(**kwargs)
+ if mode == "punc_VadRealtime":
+ from funasr.bin.punctuation_infer_vadrealtime import inference_modelscope
+ return inference_modelscope(**kwargs)
else:
logging.info("Unknown decoding mode: {}".format(mode))
return None
diff --git a/funasr/bin/punctuation_infer_vadrealtime.py b/funasr/bin/punctuation_infer_vadrealtime.py
new file mode 100644
index 0000000..d6cc153
--- /dev/null
+++ b/funasr/bin/punctuation_infer_vadrealtime.py
@@ -0,0 +1,335 @@
+#!/usr/bin/env python3
+import argparse
+import logging
+from pathlib import Path
+import sys
+from typing import Optional
+from typing import Sequence
+from typing import Tuple
+from typing import Union
+from typing import Any
+from typing import List
+
+import numpy as np
+import torch
+from typeguard import check_argument_types
+
+from funasr.datasets.preprocessor import CodeMixTokenizerCommonPreprocessor
+from funasr.utils.cli_utils import get_commandline_args
+from funasr.tasks.punctuation import PunctuationTask
+from funasr.torch_utils.device_funcs import to_device
+from funasr.torch_utils.forward_adaptor import ForwardAdaptor
+from funasr.torch_utils.set_all_random_seed import set_all_random_seed
+from funasr.utils import config_argparse
+from funasr.utils.types import str2triple_str
+from funasr.utils.types import str_or_none
+from funasr.punctuation.text_preprocessor import split_to_mini_sentence
+
+
+class Text2Punc:
+
+ def __init__(
+ self,
+ train_config: Optional[str],
+ model_file: Optional[str],
+ device: str = "cpu",
+ dtype: str = "float32",
+ ):
+ # Build Model
+ model, train_args = PunctuationTask.build_model_from_file(train_config, model_file, device)
+ self.device = device
+ # Wrape model to make model.nll() data-parallel
+ self.wrapped_model = ForwardAdaptor(model, "inference")
+ self.wrapped_model.to(dtype=getattr(torch, dtype)).to(device=device).eval()
+ # logging.info(f"Model:\n{model}")
+ self.punc_list = train_args.punc_list
+ self.period = 0
+ for i in range(len(self.punc_list)):
+ if self.punc_list[i] == ",":
+ self.punc_list[i] = "锛�"
+ elif self.punc_list[i] == "?":
+ self.punc_list[i] = "锛�"
+ elif self.punc_list[i] == "銆�":
+ self.period = i
+ self.preprocessor = CodeMixTokenizerCommonPreprocessor(
+ train=False,
+ token_type=train_args.token_type,
+ token_list=train_args.token_list,
+ bpemodel=train_args.bpemodel,
+ text_cleaner=train_args.cleaner,
+ g2p_type=train_args.g2p,
+ text_name="text",
+ non_linguistic_symbols=train_args.non_linguistic_symbols,
+ )
+ print("start decoding!!!")
+
+ @torch.no_grad()
+ def __call__(self, text: Union[list, str], cache: list, split_size=20):
+ if cache is not None and len(cache) > 0:
+ precache = "".join(cache)
+ else:
+ precache = ""
+ data = {"text": precache + text}
+ result = self.preprocessor(data=data, uid="12938712838719")
+ split_text = self.preprocessor.pop_split_text_data(result)
+ mini_sentences = split_to_mini_sentence(split_text, split_size)
+ mini_sentences_id = split_to_mini_sentence(data["text"], split_size)
+ assert len(mini_sentences) == len(mini_sentences_id)
+ cache_sent = []
+ cache_sent_id = torch.from_numpy(np.array([], dtype='int32'))
+ sentence_punc_list = []
+ sentence_words_list= []
+ cache_pop_trigger_limit = 200
+ skip_num = 0
+ for mini_sentence_i in range(len(mini_sentences)):
+ mini_sentence = mini_sentences[mini_sentence_i]
+ mini_sentence_id = mini_sentences_id[mini_sentence_i]
+ mini_sentence = cache_sent + mini_sentence
+ mini_sentence_id = np.concatenate((cache_sent_id, mini_sentence_id), axis=0)
+ data = {
+ "text": torch.unsqueeze(torch.from_numpy(mini_sentence_id), 0),
+ "text_lengths": torch.from_numpy(np.array([len(mini_sentence_id)], dtype='int32')),
+ "vad_indexes": torch.from_numpy(np.array([len(cache)-1], dtype='int32')),
+ }
+ data = to_device(data, self.device)
+ y, _ = self.wrapped_model(**data)
+ _, indices = y.view(-1, y.shape[-1]).topk(1, dim=1)
+ punctuations = indices
+ if indices.size()[0] != 1:
+ punctuations = torch.squeeze(indices)
+ assert punctuations.size()[0] == len(mini_sentence)
+
+ # Search for the last Period/QuestionMark as cache
+ if mini_sentence_i < len(mini_sentences) - 1:
+ sentenceEnd = -1
+ last_comma_index = -1
+ for i in range(len(punctuations) - 2, 1, -1):
+ if self.punc_list[punctuations[i]] == "銆�" or self.punc_list[punctuations[i]] == "锛�":
+ sentenceEnd = i
+ break
+ if last_comma_index < 0 and self.punc_list[punctuations[i]] == "锛�":
+ last_comma_index = i
+
+ if sentenceEnd < 0 and len(mini_sentence) > cache_pop_trigger_limit and last_comma_index >= 0:
+ # The sentence it too long, cut off at a comma.
+ sentenceEnd = last_comma_index
+ punctuations[sentenceEnd] = self.period
+ cache_sent = mini_sentence[sentenceEnd + 1:]
+ cache_sent_id = mini_sentence_id[sentenceEnd + 1:]
+ mini_sentence = mini_sentence[0:sentenceEnd + 1]
+ punctuations = punctuations[0:sentenceEnd + 1]
+
+ punctuations_np = punctuations.cpu().numpy()
+ sentence_punc_list += [self.punc_list[int(x)] for x in punctuations_np]
+ sentence_words_list += mini_sentence
+
+ assert len(sentence_punc_list) == len(sentence_words_list)
+ words_with_punc = []
+ sentence_punc_list_out = []
+ for i in range(0, len(sentence_words_list)):
+ if i > 0:
+ if len(sentence_words_list[i][0].encode()) == 1 and len(sentence_words_list[i - 1][-1].encode()) == 1:
+ sentence_words_list[i] = " " + sentence_words_list[i]
+ if skip_num < len(cache):
+ skip_num += 1
+ else:
+ words_with_punc.append(sentence_words_list[i])
+ if skip_num >= len(cache):
+ sentence_punc_list_out.append(sentence_punc_list[i])
+ if sentence_punc_list[i] != "_":
+ words_with_punc.append(sentence_punc_list[i])
+ sentence_out = "".join(words_with_punc)
+
+ sentenceEnd = -1
+ for i in range(len(sentence_punc_list) - 2, 1, -1):
+ if sentence_punc_list[i] == "銆�" or sentence_punc_list[i] == "锛�":
+ sentenceEnd = i
+ break
+ cache_out = sentence_words_list[sentenceEnd + 1 :]
+ if sentence_out[-1] in self.punc_list:
+ sentence_out = sentence_out[:-1]
+ sentence_punc_list_out[-1] = "_"
+ return sentence_out, sentence_punc_list_out, cache_out
+
+
+def inference(
+ batch_size: int,
+ dtype: str,
+ ngpu: int,
+ seed: int,
+ num_workers: int,
+ output_dir: str,
+ log_level: Union[int, str],
+ train_config: Optional[str],
+ model_file: Optional[str],
+ key_file: Optional[str] = None,
+ data_path_and_name_and_type: Sequence[Tuple[str, str, str]] = None,
+ raw_inputs: Union[List[Any], bytes, str] = None,
+ cache: List[Any] = None,
+ param_dict: dict = None,
+ **kwargs,
+):
+ inference_pipeline = inference_modelscope(
+ output_dir=output_dir,
+ batch_size=batch_size,
+ dtype=dtype,
+ ngpu=ngpu,
+ seed=seed,
+ num_workers=num_workers,
+ log_level=log_level,
+ key_file=key_file,
+ train_config=train_config,
+ model_file=model_file,
+ param_dict=param_dict,
+ **kwargs,
+ )
+ return inference_pipeline(data_path_and_name_and_type, raw_inputs, cache)
+
+
+def inference_modelscope(
+ batch_size: int,
+ dtype: str,
+ ngpu: int,
+ seed: int,
+ num_workers: int,
+ log_level: Union[int, str],
+ #cache: list,
+ key_file: Optional[str],
+ train_config: Optional[str],
+ model_file: Optional[str],
+ output_dir: Optional[str] = None,
+ param_dict: dict = None,
+ **kwargs,
+):
+ assert check_argument_types()
+ logging.basicConfig(
+ level=log_level,
+ format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s",
+ )
+
+ if ngpu >= 1 and torch.cuda.is_available():
+ device = "cuda"
+ else:
+ device = "cpu"
+
+ # 1. Set random-seed
+ set_all_random_seed(seed)
+ text2punc = Text2Punc(train_config, model_file, device)
+
+ def _forward(
+ data_path_and_name_and_type,
+ raw_inputs: Union[List[Any], bytes, str] = None,
+ output_dir_v2: Optional[str] = None,
+ cache: List[Any] = None,
+ param_dict: dict = None,
+ ):
+ results = []
+ split_size = 10
+
+ if raw_inputs != None:
+ line = raw_inputs.strip()
+ key = "demo"
+ if line == "":
+ item = {'key': key, 'value': ""}
+ results.append(item)
+ return results
+ #import pdb;pdb.set_trace()
+ result, _, cache = text2punc(line, cache)
+ item = {'key': key, 'value': result, 'cache': cache}
+ results.append(item)
+ return results
+
+ for inference_text, _, _ in data_path_and_name_and_type:
+ with open(inference_text, "r", encoding="utf-8") as fin:
+ for line in fin:
+ line = line.strip()
+ segs = line.split("\t")
+ if len(segs) != 2:
+ continue
+ key = segs[0]
+ if len(segs[1]) == 0:
+ continue
+ result, _ = text2punc(segs[1])
+ item = {'key': key, 'value': result}
+ results.append(item)
+ output_path = output_dir_v2 if output_dir_v2 is not None else output_dir
+ if output_path != None:
+ output_file_name = "infer.out"
+ Path(output_path).mkdir(parents=True, exist_ok=True)
+ output_file_path = (Path(output_path) / output_file_name).absolute()
+ with open(output_file_path, "w", encoding="utf-8") as fout:
+ for item_i in results:
+ key_out = item_i["key"]
+ value_out = item_i["value"]
+ fout.write(f"{key_out}\t{value_out}\n")
+ return results
+
+ return _forward
+
+
+def get_parser():
+ parser = config_argparse.ArgumentParser(
+ description="Punctuation inference",
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+ )
+
+ parser.add_argument(
+ "--log_level",
+ type=lambda x: x.upper(),
+ default="INFO",
+ choices=("CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG", "NOTSET"),
+ help="The verbose level of logging",
+ )
+
+ parser.add_argument("--output_dir", type=str, required=False)
+ parser.add_argument(
+ "--ngpu",
+ type=int,
+ default=0,
+ help="The number of gpus. 0 indicates CPU mode",
+ )
+ parser.add_argument("--seed", type=int, default=0, help="Random seed")
+ parser.add_argument(
+ "--dtype",
+ default="float32",
+ choices=["float16", "float32", "float64"],
+ help="Data type",
+ )
+ parser.add_argument(
+ "--num_workers",
+ type=int,
+ default=1,
+ help="The number of workers used for DataLoader",
+ )
+ parser.add_argument(
+ "--batch_size",
+ type=int,
+ default=1,
+ help="The batch size for inference",
+ )
+
+ group = parser.add_argument_group("Input data related")
+ group.add_argument("--data_path_and_name_and_type", type=str2triple_str, action="append", required=False)
+ group.add_argument("--raw_inputs", type=str, required=False)
+ group.add_argument("--cache", type=list, required=False)
+ group.add_argument("--param_dict", type=dict, required=False)
+ group.add_argument("--key_file", type=str_or_none)
+
+ group = parser.add_argument_group("The model configuration related")
+ group.add_argument("--train_config", type=str)
+ group.add_argument("--model_file", type=str)
+
+ return parser
+
+
+def main(cmd=None):
+ print(get_commandline_args(), file=sys.stderr)
+ parser = get_parser()
+ args = parser.parse_args(cmd)
+ kwargs = vars(args)
+ # kwargs.pop("config", None)
+ inference(**kwargs)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/funasr/bin/tp_inference.py b/funasr/bin/tp_inference.py
new file mode 100644
index 0000000..e7a1f1b
--- /dev/null
+++ b/funasr/bin/tp_inference.py
@@ -0,0 +1,432 @@
+import argparse
+import logging
+from optparse import Option
+import sys
+import json
+from pathlib import Path
+from typing import Any
+from typing import List
+from typing import Optional
+from typing import Sequence
+from typing import Tuple
+from typing import Union
+from typing import Dict
+
+import numpy as np
+import torch
+from typeguard import check_argument_types
+
+from funasr.fileio.datadir_writer import DatadirWriter
+from funasr.datasets.preprocessor import LMPreprocessor
+from funasr.tasks.asr import ASRTaskAligner as ASRTask
+from funasr.torch_utils.device_funcs import to_device
+from funasr.torch_utils.set_all_random_seed import set_all_random_seed
+from funasr.utils import config_argparse
+from funasr.utils.cli_utils import get_commandline_args
+from funasr.utils.types import str2bool
+from funasr.utils.types import str2triple_str
+from funasr.utils.types import str_or_none
+from funasr.models.frontend.wav_frontend import WavFrontend
+from funasr.text.token_id_converter import TokenIDConverter
+
+header_colors = '\033[95m'
+end_colors = '\033[0m'
+
+global_asr_language: str = 'zh-cn'
+global_sample_rate: Union[int, Dict[Any, int]] = {
+ 'audio_fs': 16000,
+ 'model_fs': 16000
+}
+
+def time_stamp_lfr6_advance(us_alphas, us_cif_peak, char_list):
+ START_END_THRESHOLD = 5
+ MAX_TOKEN_DURATION = 12
+ TIME_RATE = 10.0 * 6 / 1000 / 3 # 3 times upsampled
+ if len(us_cif_peak.shape) == 2:
+ alphas, cif_peak = us_alphas[0], us_cif_peak[0] # support inference batch_size=1 only
+ else:
+ alphas, cif_peak = us_alphas, us_cif_peak
+ num_frames = cif_peak.shape[0]
+ if char_list[-1] == '</s>':
+ char_list = char_list[:-1]
+ # char_list = [i for i in text]
+ timestamp_list = []
+ new_char_list = []
+ # for bicif model trained with large data, cif2 actually fires when a character starts
+ # so treat the frames between two peaks as the duration of the former token
+ fire_place = torch.where(cif_peak>1.0-1e-4)[0].cpu().numpy() - 3.2 # total offset
+ num_peak = len(fire_place)
+ assert num_peak == len(char_list) + 1 # number of peaks is supposed to be number of tokens + 1
+ # begin silence
+ if fire_place[0] > START_END_THRESHOLD:
+ # char_list.insert(0, '<sil>')
+ timestamp_list.append([0.0, fire_place[0]*TIME_RATE])
+ new_char_list.append('<sil>')
+ # tokens timestamp
+ for i in range(len(fire_place)-1):
+ new_char_list.append(char_list[i])
+ if MAX_TOKEN_DURATION < 0 or fire_place[i+1] - fire_place[i] < MAX_TOKEN_DURATION:
+ timestamp_list.append([fire_place[i]*TIME_RATE, fire_place[i+1]*TIME_RATE])
+ else:
+ # cut the duration to token and sil of the 0-weight frames last long
+ _split = fire_place[i] + MAX_TOKEN_DURATION
+ timestamp_list.append([fire_place[i]*TIME_RATE, _split*TIME_RATE])
+ timestamp_list.append([_split*TIME_RATE, fire_place[i+1]*TIME_RATE])
+ new_char_list.append('<sil>')
+ # tail token and end silence
+ # new_char_list.append(char_list[-1])
+ if num_frames - fire_place[-1] > START_END_THRESHOLD:
+ _end = (num_frames + fire_place[-1]) * 0.5
+ # _end = fire_place[-1]
+ timestamp_list[-1][1] = _end*TIME_RATE
+ timestamp_list.append([_end*TIME_RATE, num_frames*TIME_RATE])
+ new_char_list.append("<sil>")
+ else:
+ timestamp_list[-1][1] = num_frames*TIME_RATE
+ assert len(new_char_list) == len(timestamp_list)
+ res_str = ""
+ for char, timestamp in zip(new_char_list, timestamp_list):
+ res_str += "{} {} {};".format(char, str(timestamp[0]+0.0005)[:5], str(timestamp[1]+0.0005)[:5])
+ res = []
+ for char, timestamp in zip(new_char_list, timestamp_list):
+ if char != '<sil>':
+ res.append([int(timestamp[0] * 1000), int(timestamp[1] * 1000)])
+ return res_str, res
+
+
+class SpeechText2Timestamp:
+ def __init__(
+ self,
+ timestamp_infer_config: Union[Path, str] = None,
+ timestamp_model_file: Union[Path, str] = None,
+ timestamp_cmvn_file: Union[Path, str] = None,
+ device: str = "cpu",
+ dtype: str = "float32",
+ **kwargs,
+ ):
+ assert check_argument_types()
+ # 1. Build ASR model
+ tp_model, tp_train_args = ASRTask.build_model_from_file(
+ timestamp_infer_config, timestamp_model_file, device
+ )
+ if 'cuda' in device:
+ tp_model = tp_model.cuda() # force model to cuda
+
+ frontend = None
+ if tp_train_args.frontend is not None:
+ frontend = WavFrontend(cmvn_file=timestamp_cmvn_file, **tp_train_args.frontend_conf)
+
+ logging.info("tp_model: {}".format(tp_model))
+ logging.info("tp_train_args: {}".format(tp_train_args))
+ tp_model.to(dtype=getattr(torch, dtype)).eval()
+
+ logging.info(f"Decoding device={device}, dtype={dtype}")
+
+
+ self.tp_model = tp_model
+ self.tp_train_args = tp_train_args
+
+ token_list = self.tp_model.token_list
+ self.converter = TokenIDConverter(token_list=token_list)
+
+ self.device = device
+ self.dtype = dtype
+ self.frontend = frontend
+ self.encoder_downsampling_factor = 1
+ if tp_train_args.encoder_conf["input_layer"] == "conv2d":
+ self.encoder_downsampling_factor = 4
+
+ @torch.no_grad()
+ def __call__(
+ self,
+ speech: Union[torch.Tensor, np.ndarray],
+ speech_lengths: Union[torch.Tensor, np.ndarray] = None,
+ text_lengths: Union[torch.Tensor, np.ndarray] = None
+ ):
+ assert check_argument_types()
+
+ # Input as audio signal
+ if isinstance(speech, np.ndarray):
+ speech = torch.tensor(speech)
+ if self.frontend is not None:
+ feats, feats_len = self.frontend.forward(speech, speech_lengths)
+ feats = to_device(feats, device=self.device)
+ feats_len = feats_len.int()
+ self.tp_model.frontend = None
+ else:
+ feats = speech
+ feats_len = speech_lengths
+
+ # lfr_factor = max(1, (feats.size()[-1]//80)-1)
+ batch = {"speech": feats, "speech_lengths": feats_len}
+
+ # a. To device
+ batch = to_device(batch, device=self.device)
+
+ # b. Forward Encoder
+ enc, enc_len = self.tp_model.encode(**batch)
+ if isinstance(enc, tuple):
+ enc = enc[0]
+
+ # c. Forward Predictor
+ _, _, us_alphas, us_cif_peak = self.tp_model.calc_predictor_timestamp(enc, enc_len, text_lengths.to(self.device)+1)
+ return us_alphas, us_cif_peak
+
+
+def inference(
+ batch_size: int,
+ ngpu: int,
+ log_level: Union[int, str],
+ data_path_and_name_and_type,
+ timestamp_infer_config: Optional[str],
+ timestamp_model_file: Optional[str],
+ timestamp_cmvn_file: Optional[str] = None,
+ raw_inputs: Union[np.ndarray, torch.Tensor] = None,
+ key_file: Optional[str] = None,
+ allow_variable_data_keys: bool = False,
+ output_dir: Optional[str] = None,
+ dtype: str = "float32",
+ seed: int = 0,
+ num_workers: int = 1,
+ split_with_space: bool = True,
+ seg_dict_file: Optional[str] = None,
+ **kwargs,
+):
+ inference_pipeline = inference_modelscope(
+ batch_size=batch_size,
+ ngpu=ngpu,
+ log_level=log_level,
+ timestamp_infer_config=timestamp_infer_config,
+ timestamp_model_file=timestamp_model_file,
+ timestamp_cmvn_file=timestamp_cmvn_file,
+ key_file=key_file,
+ allow_variable_data_keys=allow_variable_data_keys,
+ output_dir=output_dir,
+ dtype=dtype,
+ seed=seed,
+ num_workers=num_workers,
+ split_with_space=split_with_space,
+ seg_dict_file=seg_dict_file,
+ **kwargs,
+ )
+ return inference_pipeline(data_path_and_name_and_type, raw_inputs)
+
+
+def inference_modelscope(
+ batch_size: int,
+ ngpu: int,
+ log_level: Union[int, str],
+ # data_path_and_name_and_type,
+ timestamp_infer_config: Optional[str],
+ timestamp_model_file: Optional[str],
+ timestamp_cmvn_file: Optional[str] = None,
+ # raw_inputs: Union[np.ndarray, torch.Tensor] = None,
+ key_file: Optional[str] = None,
+ allow_variable_data_keys: bool = False,
+ output_dir: Optional[str] = None,
+ dtype: str = "float32",
+ seed: int = 0,
+ num_workers: int = 1,
+ split_with_space: bool = True,
+ seg_dict_file: Optional[str] = None,
+ **kwargs,
+):
+ assert check_argument_types()
+ if batch_size > 1:
+ raise NotImplementedError("batch decoding is not implemented")
+ if ngpu > 1:
+ raise NotImplementedError("only single GPU decoding is supported")
+
+ logging.basicConfig(
+ level=log_level,
+ format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s",
+ )
+
+ if ngpu >= 1 and torch.cuda.is_available():
+ device = "cuda"
+ else:
+ device = "cpu"
+ # 1. Set random-seed
+ set_all_random_seed(seed)
+
+ # 2. Build speech2vadsegment
+ speechtext2timestamp_kwargs = dict(
+ timestamp_infer_config=timestamp_infer_config,
+ timestamp_model_file=timestamp_model_file,
+ timestamp_cmvn_file=timestamp_cmvn_file,
+ device=device,
+ dtype=dtype,
+ )
+ logging.info("speechtext2timestamp_kwargs: {}".format(speechtext2timestamp_kwargs))
+ speechtext2timestamp = SpeechText2Timestamp(**speechtext2timestamp_kwargs)
+
+ preprocessor = LMPreprocessor(
+ train=False,
+ token_type=speechtext2timestamp.tp_train_args.token_type,
+ token_list=speechtext2timestamp.tp_train_args.token_list,
+ bpemodel=None,
+ text_cleaner=None,
+ g2p_type=None,
+ text_name="text",
+ non_linguistic_symbols=speechtext2timestamp.tp_train_args.non_linguistic_symbols,
+ split_with_space=split_with_space,
+ seg_dict_file=seg_dict_file,
+ )
+
+ def _forward(
+ data_path_and_name_and_type,
+ raw_inputs: Union[np.ndarray, torch.Tensor] = None,
+ output_dir_v2: Optional[str] = None,
+ fs: dict = None,
+ param_dict: dict = None,
+ **kwargs
+ ):
+ # 3. Build data-iterator
+ if data_path_and_name_and_type is None and raw_inputs is not None:
+ if isinstance(raw_inputs, torch.Tensor):
+ raw_inputs = raw_inputs.numpy()
+ data_path_and_name_and_type = [raw_inputs, "speech", "waveform"]
+
+ loader = ASRTask.build_streaming_iterator(
+ data_path_and_name_and_type,
+ dtype=dtype,
+ batch_size=batch_size,
+ key_file=key_file,
+ num_workers=num_workers,
+ preprocess_fn=preprocessor,
+ collate_fn=ASRTask.build_collate_fn(speechtext2timestamp.tp_train_args, False),
+ allow_variable_data_keys=allow_variable_data_keys,
+ inference=True,
+ )
+
+ tp_result_list = []
+ for keys, batch in loader:
+ assert isinstance(batch, dict), type(batch)
+ assert all(isinstance(s, str) for s in keys), keys
+ _bs = len(next(iter(batch.values())))
+ assert len(keys) == _bs, f"{len(keys)} != {_bs}"
+
+ logging.info("timestamp predicting, utt_id: {}".format(keys))
+ _batch = {'speech':batch['speech'],
+ 'speech_lengths':batch['speech_lengths'],
+ 'text_lengths':batch['text_lengths']}
+ us_alphas, us_cif_peak = speechtext2timestamp(**_batch)
+
+ for batch_id in range(_bs):
+ key = keys[batch_id]
+ token = speechtext2timestamp.converter.ids2tokens(batch['text'][batch_id])
+ ts_str, ts_list = time_stamp_lfr6_advance(us_alphas[batch_id], us_cif_peak[batch_id], token)
+ logging.warning(ts_str)
+ item = {'key': key, 'value': ts_str, 'timestamp':ts_list}
+ tp_result_list.append(item)
+ return tp_result_list
+
+ return _forward
+
+
+def get_parser():
+ parser = config_argparse.ArgumentParser(
+ description="Timestamp Prediction Inference",
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+ )
+
+ # Note(kamo): Use '_' instead of '-' as separator.
+ # '-' is confusing if written in yaml.
+ parser.add_argument(
+ "--log_level",
+ type=lambda x: x.upper(),
+ default="INFO",
+ choices=("CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG", "NOTSET"),
+ help="The verbose level of logging",
+ )
+
+ parser.add_argument("--output_dir", type=str, required=False)
+ parser.add_argument(
+ "--ngpu",
+ type=int,
+ default=0,
+ help="The number of gpus. 0 indicates CPU mode",
+ )
+ parser.add_argument(
+ "--gpuid_list",
+ type=str,
+ default="",
+ help="The visible gpus",
+ )
+ parser.add_argument("--seed", type=int, default=0, help="Random seed")
+ parser.add_argument(
+ "--dtype",
+ default="float32",
+ choices=["float16", "float32", "float64"],
+ help="Data type",
+ )
+ parser.add_argument(
+ "--num_workers",
+ type=int,
+ default=0,
+ help="The number of workers used for DataLoader",
+ )
+
+ group = parser.add_argument_group("Input data related")
+ group.add_argument(
+ "--data_path_and_name_and_type",
+ type=str2triple_str,
+ required=False,
+ action="append",
+ )
+ group.add_argument("--raw_inputs", type=list, default=None)
+ # example=[{'key':'EdevDEWdIYQ_0021','file':'/mnt/data/jiangyu.xzy/test_data/speech_io/SPEECHIO_ASR_ZH00007_zhibodaihuo/wav/EdevDEWdIYQ_0021.wav'}])
+ group.add_argument("--key_file", type=str_or_none)
+ group.add_argument("--allow_variable_data_keys", type=str2bool, default=False)
+
+ group = parser.add_argument_group("The model configuration related")
+ group.add_argument(
+ "--timestamp_infer_config",
+ type=str,
+ help="VAD infer configuration",
+ )
+ group.add_argument(
+ "--timestamp_model_file",
+ type=str,
+ help="VAD model parameter file",
+ )
+ group.add_argument(
+ "--timestamp_cmvn_file",
+ type=str,
+ help="Global cmvn file",
+ )
+
+ group = parser.add_argument_group("infer related")
+ group.add_argument(
+ "--batch_size",
+ type=int,
+ default=1,
+ help="The batch size for inference",
+ )
+ group.add_argument(
+ "--seg_dict_file",
+ type=str,
+ default=None,
+ help="The batch size for inference",
+ )
+ group.add_argument(
+ "--split_with_space",
+ type=bool,
+ default=False,
+ help="The batch size for inference",
+ )
+
+ return parser
+
+
+def main(cmd=None):
+ print(get_commandline_args(), file=sys.stderr)
+ parser = get_parser()
+ args = parser.parse_args(cmd)
+ kwargs = vars(args)
+ kwargs.pop("config", None)
+ inference(**kwargs)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/funasr/bin/tp_inference_launch.py b/funasr/bin/tp_inference_launch.py
new file mode 100644
index 0000000..dd76df6
--- /dev/null
+++ b/funasr/bin/tp_inference_launch.py
@@ -0,0 +1,143 @@
+#!/usr/bin/env python3
+# Copyright ESPnet (https://github.com/espnet/espnet). All Rights Reserved.
+# Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0)
+
+import argparse
+import logging
+import os
+import sys
+from typing import Union, Dict, Any
+
+from funasr.utils import config_argparse
+from funasr.utils.cli_utils import get_commandline_args
+from funasr.utils.types import str2bool
+from funasr.utils.types import str2triple_str
+from funasr.utils.types import str_or_none
+
+
+def get_parser():
+ parser = config_argparse.ArgumentParser(
+ description="Timestamp Prediction Inference",
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+ )
+
+ # Note(kamo): Use '_' instead of '-' as separator.
+ # '-' is confusing if written in yaml.
+ parser.add_argument(
+ "--log_level",
+ type=lambda x: x.upper(),
+ default="INFO",
+ choices=("CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG", "NOTSET"),
+ help="The verbose level of logging",
+ )
+
+ parser.add_argument("--output_dir", type=str, required=False)
+ parser.add_argument(
+ "--ngpu",
+ type=int,
+ default=0,
+ help="The number of gpus. 0 indicates CPU mode",
+ )
+ parser.add_argument(
+ "--njob",
+ type=int,
+ default=1,
+ help="The number of jobs for each gpu",
+ )
+ parser.add_argument(
+ "--gpuid_list",
+ type=str,
+ default="",
+ help="The visible gpus",
+ )
+ parser.add_argument("--seed", type=int, default=0, help="Random seed")
+ parser.add_argument(
+ "--dtype",
+ default="float32",
+ choices=["float16", "float32", "float64"],
+ help="Data type",
+ )
+ parser.add_argument(
+ "--num_workers",
+ type=int,
+ default=1,
+ help="The number of workers used for DataLoader",
+ )
+
+ group = parser.add_argument_group("Input data related")
+ group.add_argument(
+ "--data_path_and_name_and_type",
+ type=str2triple_str,
+ required=True,
+ action="append",
+ )
+ group.add_argument("--key_file", type=str_or_none)
+ group.add_argument("--allow_variable_data_keys", type=str2bool, default=False)
+
+ group = parser.add_argument_group("The model configuration related")
+ group.add_argument(
+ "--timestamp_infer_config",
+ type=str,
+ help="VAD infer configuration",
+ )
+ group.add_argument(
+ "--timestamp_model_file",
+ type=str,
+ help="VAD model parameter file",
+ )
+ group.add_argument(
+ "--timestamp_cmvn_file",
+ type=str,
+ help="Global CMVN file",
+ )
+
+ group = parser.add_argument_group("The inference configuration related")
+ group.add_argument(
+ "--batch_size",
+ type=int,
+ default=1,
+ help="The batch size for inference",
+ )
+ return parser
+
+
+def inference_launch(mode, **kwargs):
+ if mode == "tp_norm":
+ from funasr.bin.tp_inference import inference_modelscope
+ return inference_modelscope(**kwargs)
+ else:
+ logging.info("Unknown decoding mode: {}".format(mode))
+ return None
+
+def main(cmd=None):
+ print(get_commandline_args(), file=sys.stderr)
+ parser = get_parser()
+ parser.add_argument(
+ "--mode",
+ type=str,
+ default="tp_norm",
+ help="The decoding mode",
+ )
+ args = parser.parse_args(cmd)
+ kwargs = vars(args)
+ kwargs.pop("config", None)
+
+ # set logging messages
+ logging.basicConfig(
+ level=args.log_level,
+ format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s",
+ )
+ logging.info("Decoding args: {}".format(kwargs))
+
+ # gpu setting
+ if args.ngpu > 0:
+ jobid = int(args.output_dir.split(".")[-1])
+ gpuid = args.gpuid_list.split(",")[(jobid - 1) // args.njob]
+ os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
+ os.environ["CUDA_VISIBLE_DEVICES"] = gpuid
+
+ inference_launch(**kwargs)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/funasr/datasets/dataset.py b/funasr/datasets/dataset.py
index 2af93d0..1595224 100644
--- a/funasr/datasets/dataset.py
+++ b/funasr/datasets/dataset.py
@@ -107,7 +107,7 @@
return value[()]
-def sound_loader(path, float_dtype=None):
+def sound_loader(path, dest_sample_rate=16000, float_dtype=None):
# The file is as follows:
# utterance_id_A /some/where/a.wav
# utterance_id_B /some/where/a.flac
@@ -115,7 +115,7 @@
# NOTE(kamo): SoundScpReader doesn't support pipe-fashion
# like Kaldi e.g. "cat a.wav |".
# NOTE(kamo): The audio signal is normalized to [-1,1] range.
- loader = SoundScpReader(path, normalize=True, always_2d=False)
+ loader = SoundScpReader(path, dest_sample_rate, normalize=True, always_2d=False)
# SoundScpReader.__getitem__() returns Tuple[int, ndarray],
# but ndarray is desired, so Adapter class is inserted here
@@ -139,7 +139,7 @@
DATA_TYPES = {
"sound": dict(
func=sound_loader,
- kwargs=["float_dtype"],
+ kwargs=["dest_sample_rate","float_dtype"],
help="Audio format types which supported by sndfile wav, flac, etc."
"\n\n"
" utterance_id_a a.wav\n"
@@ -282,6 +282,7 @@
int_dtype: str = "long",
max_cache_size: Union[float, int, str] = 0.0,
max_cache_fd: int = 0,
+ dest_sample_rate: int = 16000,
):
assert check_argument_types()
if len(path_name_type_list) == 0:
@@ -295,6 +296,7 @@
self.float_dtype = float_dtype
self.int_dtype = int_dtype
self.max_cache_fd = max_cache_fd
+ self.dest_sample_rate = dest_sample_rate
self.loader_dict = {}
self.debug_info = {}
@@ -335,6 +337,8 @@
for key2 in dic["kwargs"]:
if key2 == "loader_type":
kwargs["loader_type"] = loader_type
+ elif key2 == "dest_sample_rate" and loader_type=="sound":
+ kwargs["dest_sample_rate"] = self.dest_sample_rate
elif key2 == "float_dtype":
kwargs["float_dtype"] = self.float_dtype
elif key2 == "int_dtype":
diff --git a/funasr/datasets/iterable_dataset.py b/funasr/datasets/iterable_dataset.py
index 2f97e78..49c7068 100644
--- a/funasr/datasets/iterable_dataset.py
+++ b/funasr/datasets/iterable_dataset.py
@@ -66,7 +66,7 @@
return load_bytes(bytes)
DATA_TYPES = {
- "sound": lambda x: torchaudio.load(x)[0][0].numpy(),
+ "sound": lambda x: torchaudio.load(x)[0].numpy(),
"pcm": load_pcm,
"kaldi_ark": load_kaldi,
"bytes": load_bytes,
@@ -106,6 +106,7 @@
] = None,
float_dtype: str = "float32",
fs: dict = None,
+ mc: bool = False,
int_dtype: str = "long",
key_file: str = None,
):
@@ -122,6 +123,7 @@
self.int_dtype = int_dtype
self.key_file = key_file
self.fs = fs
+ self.mc = mc
self.debug_info = {}
non_iterable_list = []
@@ -192,6 +194,7 @@
array = torchaudio.transforms.Resample(orig_freq=audio_fs,
new_freq=model_fs)(array)
array = array.squeeze(0).numpy()
+
data[name] = array
if self.preprocess is not None:
@@ -238,11 +241,17 @@
model_fs = self.fs["model_fs"]
if audio_fs is not None and model_fs is not None:
array = torch.from_numpy(array)
- array = array.unsqueeze(0)
array = torchaudio.transforms.Resample(orig_freq=audio_fs,
new_freq=model_fs)(array)
- array = array.squeeze(0).numpy()
- data[name] = array
+ array = array.numpy()
+
+ if _type == "sound":
+ if self.mc:
+ data[name] = array.transpose((1, 0))
+ else:
+ data[name] = array[0]
+ else:
+ data[name] = array
if self.preprocess is not None:
data = self.preprocess(uid, data)
@@ -340,11 +349,16 @@
model_fs = self.fs["model_fs"]
if audio_fs is not None and model_fs is not None:
array = torch.from_numpy(array)
- array = array.unsqueeze(0)
array = torchaudio.transforms.Resample(orig_freq=audio_fs,
new_freq=model_fs)(array)
- array = array.squeeze(0).numpy()
- data[name] = array
+ array = array.numpy()
+ if _type == "sound":
+ if self.mc:
+ data[name] = array.transpose((1, 0))
+ else:
+ data[name] = array[0]
+ else:
+ data[name] = array
if self.non_iterable_dataset is not None:
# 2.b. Load data from non-iterable dataset
_, from_non_iterable = self.non_iterable_dataset[uid]
diff --git a/funasr/export/models/encoder/conformer_encoder.py b/funasr/export/models/encoder/conformer_encoder.py
index 9f22574..0a35653 100644
--- a/funasr/export/models/encoder/conformer_encoder.py
+++ b/funasr/export/models/encoder/conformer_encoder.py
@@ -61,7 +61,6 @@
speech: torch.Tensor,
speech_lengths: torch.Tensor,
):
- speech = speech * self._output_size ** 0.5
mask = self.make_pad_mask(speech_lengths)
mask = self.prepare_mask(mask)
if self.embed is None:
diff --git a/funasr/export/models/modules/decoder_layer.py b/funasr/export/models/modules/decoder_layer.py
index f539452..9a464a4 100644
--- a/funasr/export/models/modules/decoder_layer.py
+++ b/funasr/export/models/modules/decoder_layer.py
@@ -54,6 +54,7 @@
def forward(self, tgt, tgt_mask, memory, memory_mask, cache=None):
residual = tgt
+ tgt = self.norm1(tgt)
tgt_q = tgt
tgt_q_mask = tgt_mask
x = residual + self.self_attn(tgt_q, tgt, tgt, tgt_q_mask)
diff --git a/funasr/export/models/modules/encoder_layer.py b/funasr/export/models/modules/encoder_layer.py
index 622b109..d132574 100644
--- a/funasr/export/models/modules/encoder_layer.py
+++ b/funasr/export/models/modules/encoder_layer.py
@@ -61,7 +61,7 @@
if self.feed_forward_macaron is not None:
residual = x
x = self.norm_ff_macaron(x)
- x = residual + self.feed_forward_macaron(x)
+ x = residual + self.feed_forward_macaron(x) * 0.5
residual = x
x = self.norm_mha(x)
@@ -81,7 +81,7 @@
residual = x
x = self.norm_ff(x)
- x = residual + self.feed_forward(x)
+ x = residual + self.feed_forward(x) * 0.5
x = self.norm_final(x)
diff --git a/funasr/fileio/sound_scp.py b/funasr/fileio/sound_scp.py
index 459369e..dc872b0 100644
--- a/funasr/fileio/sound_scp.py
+++ b/funasr/fileio/sound_scp.py
@@ -4,6 +4,7 @@
import numpy as np
import soundfile
+import librosa
from typeguard import check_argument_types
from funasr.fileio.read_text import read_2column_text
@@ -30,6 +31,7 @@
dtype=np.int16,
always_2d: bool = False,
normalize: bool = False,
+ dest_sample_rate: int = 16000,
):
assert check_argument_types()
self.fname = fname
@@ -37,15 +39,18 @@
self.always_2d = always_2d
self.normalize = normalize
self.data = read_2column_text(fname)
+ self.dest_sample_rate = dest_sample_rate
def __getitem__(self, key):
wav = self.data[key]
if self.normalize:
# soundfile.read normalizes data to [-1,1] if dtype is not given
- array, rate = soundfile.read(wav, always_2d=self.always_2d)
+ array, rate = librosa.load(
+ wav, sr=self.dest_sample_rate, mono=not self.always_2d
+ )
else:
- array, rate = soundfile.read(
- wav, dtype=self.dtype, always_2d=self.always_2d
+ array, rate = librosa.load(
+ wav, sr=self.dest_sample_rate, mono=not self.always_2d, dtype=self.dtype
)
return rate, array
diff --git a/funasr/models/e2e_asr_paraformer.py b/funasr/models/e2e_asr_paraformer.py
index 5786bc4..8439f40 100644
--- a/funasr/models/e2e_asr_paraformer.py
+++ b/funasr/models/e2e_asr_paraformer.py
@@ -978,6 +978,7 @@
loss, stats, weight = force_gatherable((loss, stats, batch_size), loss.device)
return loss, stats, weight
+
class ContextualParaformer(Paraformer):
"""
Paraformer model with contextual hotword
diff --git a/funasr/models/e2e_diar_sond.py b/funasr/models/e2e_diar_sond.py
index 419c813..258d780 100644
--- a/funasr/models/e2e_diar_sond.py
+++ b/funasr/models/e2e_diar_sond.py
@@ -342,6 +342,7 @@
if isinstance(self.ci_scorer, AbsEncoder):
ci_simi = self.ci_scorer(ge_in, ge_len)[0]
+ ci_simi = torch.reshape(ci_simi, [bb, self.max_spk_num, tt]).permute([0, 2, 1])
else:
ci_simi = self.ci_scorer(speech_encoder_outputs, speaker_encoder_outputs)
diff --git a/funasr/models/encoder/opennmt_encoders/conv_encoder.py b/funasr/models/encoder/opennmt_encoders/conv_encoder.py
index 4096743..a33e0b7 100644
--- a/funasr/models/encoder/opennmt_encoders/conv_encoder.py
+++ b/funasr/models/encoder/opennmt_encoders/conv_encoder.py
@@ -137,12 +137,12 @@
self.out_padding = nn.ConstantPad1d((left_padding, right_padding), 0.0)
self.conv_out = nn.Conv1d(
num_units,
- num_units,
+ out_units,
kernel_size,
)
if self.out_norm:
- self.after_norm = LayerNorm(num_units)
+ self.after_norm = LayerNorm(out_units)
def output_size(self) -> int:
return self.num_units
diff --git a/funasr/models/encoder/opennmt_encoders/self_attention_encoder.py b/funasr/models/encoder/opennmt_encoders/self_attention_encoder.py
index 443b37a..cf77bce 100644
--- a/funasr/models/encoder/opennmt_encoders/self_attention_encoder.py
+++ b/funasr/models/encoder/opennmt_encoders/self_attention_encoder.py
@@ -272,7 +272,7 @@
position embedded tensor and mask
"""
masks = (~make_pad_mask(ilens)[:, None, :]).to(xs_pad.device)
- xs_pad *= self.output_size()**0.5
+ xs_pad = xs_pad * self.output_size()**0.5
if self.embed is None:
xs_pad = xs_pad
elif (
diff --git a/funasr/models/encoder/resnet34_encoder.py b/funasr/models/encoder/resnet34_encoder.py
index 952ce15..7d7179a 100644
--- a/funasr/models/encoder/resnet34_encoder.py
+++ b/funasr/models/encoder/resnet34_encoder.py
@@ -387,7 +387,6 @@
return var_dict_torch_update
-
class ResNet34Diar(ResNet34):
def __init__(
self,
@@ -613,3 +612,230 @@
logging.warning("{} is missed from tf checkpoint".format(name))
return var_dict_torch_update
+
+
+class ResNet34SpL2RegDiar(ResNet34_SP_L2Reg):
+ def __init__(
+ self,
+ input_size,
+ embedding_node="resnet1_dense",
+ use_head_conv=True,
+ batchnorm_momentum=0.5,
+ use_head_maxpool=False,
+ num_nodes_pooling_layer=256,
+ layers_in_block=(3, 4, 6, 3),
+ filters_in_block=(32, 64, 128, 256),
+ num_nodes_resnet1=256,
+ num_nodes_last_layer=256,
+ pooling_type="window_shift",
+ pool_size=20,
+ stride=1,
+ tf2torch_tensor_name_prefix_torch="encoder",
+ tf2torch_tensor_name_prefix_tf="seq2seq/speech_encoder"
+ ):
+ super(ResNet34SpL2RegDiar, self).__init__(
+ input_size,
+ use_head_conv=use_head_conv,
+ batchnorm_momentum=batchnorm_momentum,
+ use_head_maxpool=use_head_maxpool,
+ num_nodes_pooling_layer=num_nodes_pooling_layer,
+ layers_in_block=layers_in_block,
+ filters_in_block=filters_in_block,
+ )
+
+ self.embedding_node = embedding_node
+ self.num_nodes_resnet1 = num_nodes_resnet1
+ self.num_nodes_last_layer = num_nodes_last_layer
+ self.pooling_type = pooling_type
+ self.pool_size = pool_size
+ self.stride = stride
+ self.tf2torch_tensor_name_prefix_torch = tf2torch_tensor_name_prefix_torch
+ self.tf2torch_tensor_name_prefix_tf = tf2torch_tensor_name_prefix_tf
+
+ self.resnet1_dense = torch.nn.Linear(num_nodes_pooling_layer * 2, num_nodes_resnet1)
+ self.resnet1_bn = torch.nn.BatchNorm1d(num_nodes_resnet1, eps=1e-3, momentum=batchnorm_momentum)
+
+ self.resnet2_dense = torch.nn.Linear(num_nodes_resnet1, num_nodes_last_layer)
+ self.resnet2_bn = torch.nn.BatchNorm1d(num_nodes_last_layer, eps=1e-3, momentum=batchnorm_momentum)
+
+ def output_size(self) -> int:
+ if self.embedding_node.startswith("resnet1"):
+ return self.num_nodes_resnet1
+ elif self.embedding_node.startswith("resnet2"):
+ return self.num_nodes_last_layer
+
+ return self.num_nodes_pooling_layer
+
+ def forward(
+ self,
+ xs_pad: torch.Tensor,
+ ilens: torch.Tensor,
+ prev_states: torch.Tensor = None,
+ ) -> Tuple[torch.Tensor, torch.Tensor, Optional[torch.Tensor]]:
+
+ endpoints = OrderedDict()
+ res_out, ilens = super().forward(xs_pad, ilens)
+ endpoints["resnet0_bn"] = res_out
+ if self.pooling_type == "frame_gsp":
+ features = statistic_pooling(res_out, ilens, (2, ))
+ else:
+ features, ilens = windowed_statistic_pooling(res_out, ilens, (2, ), self.pool_size, self.stride)
+ features = features.transpose(1, 2)
+ endpoints["pooling"] = features
+
+ features = self.resnet1_dense(features)
+ endpoints["resnet1_dense"] = features
+ features = F.relu(features)
+ endpoints["resnet1_relu"] = features
+ features = self.resnet1_bn(features.transpose(1, 2)).transpose(1, 2)
+ endpoints["resnet1_bn"] = features
+
+ features = self.resnet2_dense(features)
+ endpoints["resnet2_dense"] = features
+ features = F.relu(features)
+ endpoints["resnet2_relu"] = features
+ features = self.resnet2_bn(features.transpose(1, 2)).transpose(1, 2)
+ endpoints["resnet2_bn"] = features
+
+ return endpoints[self.embedding_node], ilens, None
+
+ def gen_tf2torch_map_dict(self):
+ tensor_name_prefix_torch = self.tf2torch_tensor_name_prefix_torch
+ tensor_name_prefix_tf = self.tf2torch_tensor_name_prefix_tf
+ train_steps = 720000
+ map_dict_local = {
+ # torch: conv1d.weight in "out_channel in_channel kernel_size"
+ # tf : conv1d.weight in "kernel_size in_channel out_channel"
+ # torch: linear.weight in "out_channel in_channel"
+ # tf : dense.weight in "in_channel out_channel"
+ "{}.pre_conv.weight".format(tensor_name_prefix_torch):
+ {"name": "{}/pre_conv/kernel".format(tensor_name_prefix_tf),
+ "squeeze": None,
+ "transpose": (3, 2, 0, 1),
+ },
+ "{}.pre_conv_bn.bias".format(tensor_name_prefix_torch):
+ {"name": "{}/pre_conv_bn/beta".format(tensor_name_prefix_tf),
+ "squeeze": None,
+ "transpose": None,
+ },
+ "{}.pre_conv_bn.weight".format(tensor_name_prefix_torch):
+ {"name": "{}/pre_conv_bn/gamma".format(tensor_name_prefix_tf),
+ "squeeze": None,
+ "transpose": None,
+ },
+ "{}.pre_conv_bn.running_mean".format(tensor_name_prefix_torch):
+ {"name": "{}/pre_conv_bn/moving_mean".format(tensor_name_prefix_tf),
+ "squeeze": None,
+ "transpose": None,
+ },
+ "{}.pre_conv_bn.running_var".format(tensor_name_prefix_torch):
+ {"name": "{}/pre_conv_bn/moving_variance".format(tensor_name_prefix_tf),
+ "squeeze": None,
+ "transpose": None,
+ },
+ "{}.pre_conv_bn.num_batches_tracked".format(tensor_name_prefix_torch): train_steps
+ }
+ for layer_idx in range(3):
+ map_dict_local.update({
+ "{}.resnet{}_dense.weight".format(tensor_name_prefix_torch, layer_idx):
+ {"name": "{}/resnet{}_dense/kernel".format(tensor_name_prefix_tf, layer_idx),
+ "squeeze": None,
+ "transpose": (2, 1, 0) if layer_idx == 0 else (1, 0),
+ },
+ "{}.resnet{}_dense.bias".format(tensor_name_prefix_torch, layer_idx):
+ {"name": "{}/resnet{}_dense/bias".format(tensor_name_prefix_tf, layer_idx),
+ "squeeze": None,
+ "transpose": None,
+ },
+ "{}.resnet{}_bn.weight".format(tensor_name_prefix_torch, layer_idx):
+ {"name": "{}/resnet{}_bn/gamma".format(tensor_name_prefix_tf, layer_idx),
+ "squeeze": None,
+ "transpose": None,
+ },
+ "{}.resnet{}_bn.bias".format(tensor_name_prefix_torch, layer_idx):
+ {"name": "{}/resnet{}_bn/beta".format(tensor_name_prefix_tf, layer_idx),
+ "squeeze": None,
+ "transpose": None,
+ },
+ "{}.resnet{}_bn.running_mean".format(tensor_name_prefix_torch, layer_idx):
+ {"name": "{}/resnet{}_bn/moving_mean".format(tensor_name_prefix_tf, layer_idx),
+ "squeeze": None,
+ "transpose": None,
+ },
+ "{}.resnet{}_bn.running_var".format(tensor_name_prefix_torch, layer_idx):
+ {"name": "{}/resnet{}_bn/moving_variance".format(tensor_name_prefix_tf, layer_idx),
+ "squeeze": None,
+ "transpose": None,
+ },
+ "{}.resnet{}_bn.num_batches_tracked".format(tensor_name_prefix_torch, layer_idx): train_steps
+ })
+
+ for block_idx in range(len(self.layers_in_block)):
+ for layer_idx in range(self.layers_in_block[block_idx]):
+ for i in ["1", "2", "_sc"]:
+ map_dict_local.update({
+ "{}.block_{}.layer_{}.conv{}.weight".format(tensor_name_prefix_torch, block_idx, layer_idx, i):
+ {"name": "{}/block_{}/layer_{}/conv{}/kernel".format(tensor_name_prefix_tf, block_idx, layer_idx, i),
+ "squeeze": None,
+ "transpose": (3, 2, 0, 1),
+ },
+ "{}.block_{}.layer_{}.bn{}.weight".format(tensor_name_prefix_torch, block_idx, layer_idx, i):
+ {"name": "{}/block_{}/layer_{}/bn{}/gamma".format(tensor_name_prefix_tf, block_idx, layer_idx, i),
+ "squeeze": None,
+ "transpose": None,
+ },
+ "{}.block_{}.layer_{}.bn{}.bias".format(tensor_name_prefix_torch, block_idx, layer_idx, i):
+ {"name": "{}/block_{}/layer_{}/bn{}/beta".format(tensor_name_prefix_tf, block_idx, layer_idx, i),
+ "squeeze": None,
+ "transpose": None,
+ },
+ "{}.block_{}.layer_{}.bn{}.running_mean".format(tensor_name_prefix_torch, block_idx, layer_idx, i):
+ {"name": "{}/block_{}/layer_{}/bn{}/moving_mean".format(tensor_name_prefix_tf, block_idx, layer_idx, i),
+ "squeeze": None,
+ "transpose": None,
+ },
+ "{}.block_{}.layer_{}.bn{}.running_var".format(tensor_name_prefix_torch, block_idx, layer_idx, i):
+ {"name": "{}/block_{}/layer_{}/bn{}/moving_variance".format(tensor_name_prefix_tf, block_idx, layer_idx, i),
+ "squeeze": None,
+ "transpose": None,
+ },
+ "{}.block_{}.layer_{}.bn{}.num_batches_tracked".format(tensor_name_prefix_torch, block_idx, layer_idx, i): train_steps,
+ })
+
+ return map_dict_local
+
+ def convert_tf2torch(self,
+ var_dict_tf,
+ var_dict_torch,
+ ):
+
+ map_dict = self.gen_tf2torch_map_dict()
+
+ var_dict_torch_update = dict()
+ for name in sorted(var_dict_torch.keys(), reverse=False):
+ if name.startswith(self.tf2torch_tensor_name_prefix_torch):
+ if name in map_dict:
+ if "num_batches_tracked" not in name:
+ name_tf = map_dict[name]["name"]
+ data_tf = var_dict_tf[name_tf]
+ if map_dict[name]["squeeze"] is not None:
+ data_tf = np.squeeze(data_tf, axis=map_dict[name]["squeeze"])
+ if map_dict[name]["transpose"] is not None:
+ data_tf = np.transpose(data_tf, map_dict[name]["transpose"])
+ data_tf = torch.from_numpy(data_tf).type(torch.float32).to("cpu")
+ assert var_dict_torch[name].size() == data_tf.size(), \
+ "{}, {}, {} != {}".format(name, name_tf,
+ var_dict_torch[name].size(), data_tf.size())
+ var_dict_torch_update[name] = data_tf
+ logging.info("torch tensor: {}, {}, loading from tf tensor: {}, {}".format(
+ name, data_tf.size(), name_tf, var_dict_tf[name_tf].shape
+ ))
+ else:
+ var_dict_torch_update[name] = torch.from_numpy(np.array(map_dict[name])).type(torch.int64).to("cpu")
+ logging.info("torch tensor: {}, manually assigning to: {}".format(
+ name, map_dict[name]
+ ))
+ else:
+ logging.warning("{} is missed from tf checkpoint".format(name))
+
+ return var_dict_torch_update
diff --git a/funasr/models/pooling/statistic_pooling.py b/funasr/models/pooling/statistic_pooling.py
index dc8c98f..97f8a24 100644
--- a/funasr/models/pooling/statistic_pooling.py
+++ b/funasr/models/pooling/statistic_pooling.py
@@ -82,13 +82,16 @@
tt = xs_pad.shape[2]
num_chunk = int(math.ceil(tt / pooling_stride))
pad = pooling_size // 2
- features = F.pad(xs_pad, (0, 0, pad, pad), "reflect")
+ if xs_pad.shape == 4:
+ features = F.pad(xs_pad, (0, 0, pad, pad), "reflect")
+ else:
+ features = F.pad(xs_pad, (pad, pad), "reflect")
stat_list = []
for i in range(num_chunk):
# B x C
st, ed = i*pooling_stride, i*pooling_stride+pooling_size
- stat = statistic_pooling(features[:, :, st: ed, :], pooling_dim=pooling_dim)
+ stat = statistic_pooling(features[:, :, st: ed], pooling_dim=pooling_dim)
stat_list.append(stat.unsqueeze(2))
# B x C x T
diff --git a/funasr/modules/eend_ola/__init__.py b/funasr/modules/eend_ola/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/funasr/modules/eend_ola/__init__.py
diff --git a/funasr/modules/eend_ola/encoder.py b/funasr/modules/eend_ola/encoder.py
new file mode 100644
index 0000000..17d11ac
--- /dev/null
+++ b/funasr/modules/eend_ola/encoder.py
@@ -0,0 +1,127 @@
+import math
+import numpy as np
+import torch
+import torch.nn.functional as F
+from torch import nn
+
+
+class MultiHeadSelfAttention(nn.Module):
+ def __init__(self, n_units, h=8, dropout_rate=0.1):
+ super(MultiHeadSelfAttention, self).__init__()
+ self.linearQ = nn.Linear(n_units, n_units)
+ self.linearK = nn.Linear(n_units, n_units)
+ self.linearV = nn.Linear(n_units, n_units)
+ self.linearO = nn.Linear(n_units, n_units)
+ self.d_k = n_units // h
+ self.h = h
+ self.dropout = nn.Dropout(dropout_rate)
+
+ def __call__(self, x, batch_size, x_mask):
+ q = self.linearQ(x).view(batch_size, -1, self.h, self.d_k)
+ k = self.linearK(x).view(batch_size, -1, self.h, self.d_k)
+ v = self.linearV(x).view(batch_size, -1, self.h, self.d_k)
+ scores = torch.matmul(
+ q.permute(0, 2, 1, 3), k.permute(0, 2, 3, 1)) / math.sqrt(self.d_k)
+ if x_mask is not None:
+ x_mask = x_mask.unsqueeze(1)
+ scores = scores.masked_fill(x_mask == 0, -1e9)
+ self.att = F.softmax(scores, dim=3)
+ p_att = self.dropout(self.att)
+ x = torch.matmul(p_att, v.permute(0, 2, 1, 3))
+ x = x.permute(0, 2, 1, 3).contiguous().view(-1, self.h * self.d_k)
+ return self.linearO(x)
+
+
+class PositionwiseFeedForward(nn.Module):
+ def __init__(self, n_units, d_units, dropout_rate):
+ super(PositionwiseFeedForward, self).__init__()
+ self.linear1 = nn.Linear(n_units, d_units)
+ self.linear2 = nn.Linear(d_units, n_units)
+ self.dropout = nn.Dropout(dropout_rate)
+
+ def __call__(self, x):
+ return self.linear2(self.dropout(F.relu(self.linear1(x))))
+
+
+class PositionalEncoding(torch.nn.Module):
+ def __init__(self, d_model, dropout_rate, max_len=5000, reverse=False):
+ super(PositionalEncoding, self).__init__()
+ self.d_model = d_model
+ self.reverse = reverse
+ self.xscale = math.sqrt(self.d_model)
+ self.dropout = torch.nn.Dropout(p=dropout_rate)
+ self.pe = None
+ self.extend_pe(torch.tensor(0.0).expand(1, max_len))
+
+ def extend_pe(self, x):
+ if self.pe is not None:
+ if self.pe.size(1) >= x.size(1):
+ if self.pe.dtype != x.dtype or self.pe.device != x.device:
+ self.pe = self.pe.to(dtype=x.dtype, device=x.device)
+ return
+ pe = torch.zeros(x.size(1), self.d_model)
+ if self.reverse:
+ position = torch.arange(
+ x.size(1) - 1, -1, -1.0, dtype=torch.float32
+ ).unsqueeze(1)
+ else:
+ position = torch.arange(0, x.size(1), dtype=torch.float32).unsqueeze(1)
+ div_term = torch.exp(
+ torch.arange(0, self.d_model, 2, dtype=torch.float32)
+ * -(math.log(10000.0) / self.d_model)
+ )
+ pe[:, 0::2] = torch.sin(position * div_term)
+ pe[:, 1::2] = torch.cos(position * div_term)
+ pe = pe.unsqueeze(0)
+ self.pe = pe.to(device=x.device, dtype=x.dtype)
+
+ def forward(self, x: torch.Tensor):
+ self.extend_pe(x)
+ x = x * self.xscale + self.pe[:, : x.size(1)]
+ return self.dropout(x)
+
+
+class TransformerEncoder(nn.Module):
+ def __init__(self, idim, n_layers, n_units,
+ e_units=2048, h=8, dropout_rate=0.1, use_pos_emb=False):
+ super(TransformerEncoder, self).__init__()
+ self.lnorm_in = nn.LayerNorm(n_units)
+ self.n_layers = n_layers
+ self.dropout = nn.Dropout(dropout_rate)
+ for i in range(n_layers):
+ setattr(self, '{}{:d}'.format("lnorm1_", i),
+ nn.LayerNorm(n_units))
+ setattr(self, '{}{:d}'.format("self_att_", i),
+ MultiHeadSelfAttention(n_units, h))
+ setattr(self, '{}{:d}'.format("lnorm2_", i),
+ nn.LayerNorm(n_units))
+ setattr(self, '{}{:d}'.format("ff_", i),
+ PositionwiseFeedForward(n_units, e_units, dropout_rate))
+ self.lnorm_out = nn.LayerNorm(n_units)
+ if use_pos_emb:
+ self.pos_enc = torch.nn.Sequential(
+ torch.nn.Linear(idim, n_units),
+ torch.nn.LayerNorm(n_units),
+ torch.nn.Dropout(dropout_rate),
+ torch.nn.ReLU(),
+ PositionalEncoding(n_units, dropout_rate),
+ )
+ else:
+ self.linear_in = nn.Linear(idim, n_units)
+ self.pos_enc = None
+
+ def __call__(self, x, x_mask=None):
+ BT_size = x.shape[0] * x.shape[1]
+ if self.pos_enc is not None:
+ e = self.pos_enc(x)
+ e = e.view(BT_size, -1)
+ else:
+ e = self.linear_in(x.reshape(BT_size, -1))
+ for i in range(self.n_layers):
+ e = getattr(self, '{}{:d}'.format("lnorm1_", i))(e)
+ s = getattr(self, '{}{:d}'.format("self_att_", i))(e, x.shape[0], x_mask)
+ e = e + self.dropout(s)
+ e = getattr(self, '{}{:d}'.format("lnorm2_", i))(e)
+ s = getattr(self, '{}{:d}'.format("ff_", i))(e)
+ e = e + self.dropout(s)
+ return self.lnorm_out(e)
diff --git a/funasr/modules/eend_ola/encoder_decoder_attractor.py b/funasr/modules/eend_ola/encoder_decoder_attractor.py
new file mode 100644
index 0000000..db01b00
--- /dev/null
+++ b/funasr/modules/eend_ola/encoder_decoder_attractor.py
@@ -0,0 +1,50 @@
+import numpy as np
+import torch
+import torch.nn.functional as F
+from torch import nn
+
+
+class EncoderDecoderAttractor(nn.Module):
+
+ def __init__(self, n_units, encoder_dropout=0.1, decoder_dropout=0.1):
+ super(EncoderDecoderAttractor, self).__init__()
+ self.enc0_dropout = nn.Dropout(encoder_dropout)
+ self.encoder = nn.LSTM(n_units, n_units, 1, batch_first=True, dropout=encoder_dropout)
+ self.dec0_dropout = nn.Dropout(decoder_dropout)
+ self.decoder = nn.LSTM(n_units, n_units, 1, batch_first=True, dropout=decoder_dropout)
+ self.counter = nn.Linear(n_units, 1)
+ self.n_units = n_units
+
+ def forward_core(self, xs, zeros):
+ ilens = torch.from_numpy(np.array([x.shape[0] for x in xs])).to(torch.float32).to(xs[0].device)
+ xs = [self.enc0_dropout(x) for x in xs]
+ xs = nn.utils.rnn.pad_sequence(xs, batch_first=True, padding_value=-1)
+ xs = nn.utils.rnn.pack_padded_sequence(xs, ilens, batch_first=True, enforce_sorted=False)
+ _, (hx, cx) = self.encoder(xs)
+ zlens = torch.from_numpy(np.array([z.shape[0] for z in zeros])).to(torch.float32).to(zeros[0].device)
+ max_zlen = torch.max(zlens).to(torch.int).item()
+ zeros = [self.enc0_dropout(z) for z in zeros]
+ zeros = nn.utils.rnn.pad_sequence(zeros, batch_first=True, padding_value=-1)
+ zeros = nn.utils.rnn.pack_padded_sequence(zeros, zlens, batch_first=True, enforce_sorted=False)
+ attractors, (_, _) = self.decoder(zeros, (hx, cx))
+ attractors = nn.utils.rnn.pad_packed_sequence(attractors, batch_first=True, padding_value=-1,
+ total_length=max_zlen)[0]
+ attractors = [att[:zlens[i].to(torch.int).item()] for i, att in enumerate(attractors)]
+ return attractors
+
+ def forward(self, xs, n_speakers):
+ zeros = [torch.zeros(n_spk + 1, self.n_units).to(torch.float32).to(xs[0].device) for n_spk in n_speakers]
+ attractors = self.forward_core(xs, zeros)
+ labels = torch.cat([torch.from_numpy(np.array([[1] * n_spk + [0]], np.float32)) for n_spk in n_speakers], dim=1)
+ labels = labels.to(xs[0].device)
+ logit = torch.cat([self.counter(att).view(-1, n_spk + 1) for att, n_spk in zip(attractors, n_speakers)], dim=1)
+ loss = F.binary_cross_entropy(torch.sigmoid(logit), labels)
+
+ attractors = [att[slice(0, att.shape[0] - 1)] for att in attractors]
+ return loss, attractors
+
+ def estimate(self, xs, max_n_speakers=15):
+ zeros = [torch.zeros(max_n_speakers, self.n_units).to(torch.float32).to(xs[0].device) for _ in xs]
+ attractors = self.forward_core(xs, zeros)
+ probs = [torch.sigmoid(torch.flatten(self.counter(att))) for att in attractors]
+ return attractors, probs
\ No newline at end of file
diff --git a/funasr/modules/eend_ola/utils/losses.py b/funasr/modules/eend_ola/utils/losses.py
new file mode 100644
index 0000000..af0181d
--- /dev/null
+++ b/funasr/modules/eend_ola/utils/losses.py
@@ -0,0 +1,67 @@
+import numpy as np
+import torch
+import torch.nn.functional as F
+from itertools import permutations
+from torch import nn
+
+
+def standard_loss(ys, ts, label_delay=0):
+ losses = [F.binary_cross_entropy(torch.sigmoid(y), t) * len(y) for y, t in zip(ys, ts)]
+ loss = torch.sum(torch.stack(losses))
+ n_frames = torch.from_numpy(np.array(np.sum([t.shape[0] for t in ts]))).to(torch.float32).to(ys[0].device)
+ loss = loss / n_frames
+ return loss
+
+
+def batch_pit_n_speaker_loss(ys, ts, n_speakers_list):
+ max_n_speakers = ts[0].shape[1]
+ olens = [y.shape[0] for y in ys]
+ ys = nn.utils.rnn.pad_sequence(ys, batch_first=True, padding_value=-1)
+ ys_mask = [torch.ones(olen).to(ys.device) for olen in olens]
+ ys_mask = torch.nn.utils.rnn.pad_sequence(ys_mask, batch_first=True, padding_value=0).unsqueeze(-1)
+
+ losses = []
+ for shift in range(max_n_speakers):
+ ts_roll = [torch.roll(t, -shift, dims=1) for t in ts]
+ ts_roll = nn.utils.rnn.pad_sequence(ts_roll, batch_first=True, padding_value=-1)
+ loss = F.binary_cross_entropy(torch.sigmoid(ys), ts_roll, reduction='none')
+ if ys_mask is not None:
+ loss = loss * ys_mask
+ loss = torch.sum(loss, dim=1)
+ losses.append(loss)
+ losses = torch.stack(losses, dim=2)
+
+ perms = np.array(list(permutations(range(max_n_speakers)))).astype(np.float32)
+ perms = torch.from_numpy(perms).to(losses.device)
+ y_ind = torch.arange(max_n_speakers, dtype=torch.float32, device=losses.device)
+ t_inds = torch.fmod(perms - y_ind, max_n_speakers).to(torch.long)
+
+ losses_perm = []
+ for t_ind in t_inds:
+ losses_perm.append(
+ torch.mean(losses[:, y_ind.to(torch.long), t_ind], dim=1))
+ losses_perm = torch.stack(losses_perm, dim=1)
+
+ def select_perm_indices(num, max_num):
+ perms = list(permutations(range(max_num)))
+ sub_perms = list(permutations(range(num)))
+ return [
+ [x[:num] for x in perms].index(perm)
+ for perm in sub_perms]
+
+ masks = torch.full_like(losses_perm, device=losses.device, fill_value=float('inf'))
+ for i, t in enumerate(ts):
+ n_speakers = n_speakers_list[i]
+ indices = select_perm_indices(n_speakers, max_n_speakers)
+ masks[i, indices] = 0
+ losses_perm += masks
+
+ min_loss = torch.sum(torch.min(losses_perm, dim=1)[0])
+ n_frames = torch.from_numpy(np.array(np.sum([t.shape[0] for t in ts]))).to(losses.device)
+ min_loss = min_loss / n_frames
+
+ min_indices = torch.argmin(losses_perm, dim=1)
+ labels_perm = [t[:, perms[idx].to(torch.long)] for t, idx in zip(ts, min_indices)]
+ labels_perm = [t[:, :n_speakers] for t, n_speakers in zip(labels_perm, n_speakers_list)]
+
+ return min_loss, labels_perm
diff --git a/funasr/modules/eend_ola/utils/power.py b/funasr/modules/eend_ola/utils/power.py
new file mode 100644
index 0000000..7144e24
--- /dev/null
+++ b/funasr/modules/eend_ola/utils/power.py
@@ -0,0 +1,95 @@
+import numpy as np
+import torch
+import torch.multiprocessing
+import torch.nn.functional as F
+from itertools import combinations
+from itertools import permutations
+
+
+def generate_mapping_dict(max_speaker_num=6, max_olp_speaker_num=3):
+ all_kinds = []
+ all_kinds.append(0)
+ for i in range(max_olp_speaker_num):
+ selected_num = i + 1
+ coms = np.array(list(combinations(np.arange(max_speaker_num), selected_num)))
+ for com in coms:
+ tmp = np.zeros(max_speaker_num)
+ tmp[com] = 1
+ item = int(raw_dec_trans(tmp.reshape(1, -1), max_speaker_num)[0])
+ all_kinds.append(item)
+ all_kinds_order = sorted(all_kinds)
+
+ mapping_dict = {}
+ mapping_dict['dec2label'] = {}
+ mapping_dict['label2dec'] = {}
+ for i in range(len(all_kinds_order)):
+ dec = all_kinds_order[i]
+ mapping_dict['dec2label'][dec] = i
+ mapping_dict['label2dec'][i] = dec
+ oov_id = len(all_kinds_order)
+ mapping_dict['oov'] = oov_id
+ return mapping_dict
+
+
+def raw_dec_trans(x, max_speaker_num):
+ num_list = []
+ for i in range(max_speaker_num):
+ num_list.append(x[:, i])
+ base = 1
+ T = x.shape[0]
+ res = np.zeros((T))
+ for num in num_list:
+ res += num * base
+ base = base * 2
+ return res
+
+
+def mapping_func(num, mapping_dict):
+ if num in mapping_dict['dec2label'].keys():
+ label = mapping_dict['dec2label'][num]
+ else:
+ label = mapping_dict['oov']
+ return label
+
+
+def dec_trans(x, max_speaker_num, mapping_dict):
+ num_list = []
+ for i in range(max_speaker_num):
+ num_list.append(x[:, i])
+ base = 1
+ T = x.shape[0]
+ res = np.zeros((T))
+ for num in num_list:
+ res += num * base
+ base = base * 2
+ res = np.array([mapping_func(i, mapping_dict) for i in res])
+ return res
+
+
+def create_powerlabel(label, mapping_dict, max_speaker_num=6, max_olp_speaker_num=3):
+ T, C = label.shape
+ padding_label = np.zeros((T, max_speaker_num))
+ padding_label[:, :C] = label
+ out_label = dec_trans(padding_label, max_speaker_num, mapping_dict)
+ out_label = torch.from_numpy(out_label)
+ return out_label
+
+
+def generate_perm_pse(label, n_speaker, mapping_dict, max_speaker_num, max_olp_speaker_num=3):
+ perms = np.array(list(permutations(range(n_speaker)))).astype(np.float32)
+ perms = torch.from_numpy(perms).to(label.device).to(torch.int64)
+ perm_labels = [label[:, perm] for perm in perms]
+ perm_pse_labels = [create_powerlabel(perm_label.cpu().numpy(), mapping_dict, max_speaker_num).
+ to(perm_label.device, non_blocking=True) for perm_label in perm_labels]
+ return perm_labels, perm_pse_labels
+
+
+def generate_min_pse(label, n_speaker, mapping_dict, max_speaker_num, pse_logit, max_olp_speaker_num=3):
+ perm_labels, perm_pse_labels = generate_perm_pse(label, n_speaker, mapping_dict, max_speaker_num,
+ max_olp_speaker_num=max_olp_speaker_num)
+ losses = [F.cross_entropy(input=pse_logit, target=perm_pse_label.to(torch.long)) * len(pse_logit)
+ for perm_pse_label in perm_pse_labels]
+ loss = torch.stack(losses)
+ min_index = torch.argmin(loss)
+ selected_perm_label, selected_pse_label = perm_labels[min_index], perm_pse_labels[min_index]
+ return selected_perm_label, selected_pse_label
diff --git a/funasr/modules/eend_ola/utils/report.py b/funasr/modules/eend_ola/utils/report.py
new file mode 100644
index 0000000..bfccedf
--- /dev/null
+++ b/funasr/modules/eend_ola/utils/report.py
@@ -0,0 +1,159 @@
+import copy
+import numpy as np
+import time
+import torch
+from eend.utils.power import create_powerlabel
+from itertools import combinations
+
+metrics = [
+ ('diarization_error', 'speaker_scored', 'DER'),
+ ('speech_miss', 'speech_scored', 'SAD_MR'),
+ ('speech_falarm', 'speech_scored', 'SAD_FR'),
+ ('speaker_miss', 'speaker_scored', 'MI'),
+ ('speaker_falarm', 'speaker_scored', 'FA'),
+ ('speaker_error', 'speaker_scored', 'CF'),
+ ('correct', 'frames', 'accuracy')
+]
+
+
+def recover_prediction(y, n_speaker):
+ if n_speaker <= 1:
+ return y
+ elif n_speaker == 2:
+ com_index = torch.from_numpy(
+ np.array(list(combinations(np.arange(n_speaker), 2)))).to(
+ y.dtype)
+ num_coms = com_index.shape[0]
+ y_single = y[:, :-num_coms]
+ y_olp = y[:, -num_coms:]
+ olp_map_index = torch.where(y_olp > 0.5)
+ olp_map_index = torch.stack(olp_map_index, dim=1)
+ com_map_index = com_index[olp_map_index[:, -1]]
+ speaker_map_index = torch.from_numpy(np.array(com_map_index)).view(-1).to(torch.int64)
+ frame_map_index = olp_map_index[:, 0][:, None].repeat([1, 2]).view(-1).to(
+ torch.int64)
+ y_single[frame_map_index] = 0
+ y_single[frame_map_index, speaker_map_index] = 1
+ return y_single
+ else:
+ olp2_com_index = torch.from_numpy(np.array(list(combinations(np.arange(n_speaker), 2)))).to(y.dtype)
+ olp2_num_coms = olp2_com_index.shape[0]
+ olp3_com_index = torch.from_numpy(np.array(list(combinations(np.arange(n_speaker), 3)))).to(y.dtype)
+ olp3_num_coms = olp3_com_index.shape[0]
+ y_single = y[:, :n_speaker]
+ y_olp2 = y[:, n_speaker:n_speaker + olp2_num_coms]
+ y_olp3 = y[:, -olp3_num_coms:]
+
+ olp3_map_index = torch.where(y_olp3 > 0.5)
+ olp3_map_index = torch.stack(olp3_map_index, dim=1)
+ olp3_com_map_index = olp3_com_index[olp3_map_index[:, -1]]
+ olp3_speaker_map_index = torch.from_numpy(np.array(olp3_com_map_index)).view(-1).to(torch.int64)
+ olp3_frame_map_index = olp3_map_index[:, 0][:, None].repeat([1, 3]).view(-1).to(torch.int64)
+ y_single[olp3_frame_map_index] = 0
+ y_single[olp3_frame_map_index, olp3_speaker_map_index] = 1
+ y_olp2[olp3_frame_map_index] = 0
+
+ olp2_map_index = torch.where(y_olp2 > 0.5)
+ olp2_map_index = torch.stack(olp2_map_index, dim=1)
+ olp2_com_map_index = olp2_com_index[olp2_map_index[:, -1]]
+ olp2_speaker_map_index = torch.from_numpy(np.array(olp2_com_map_index)).view(-1).to(torch.int64)
+ olp2_frame_map_index = olp2_map_index[:, 0][:, None].repeat([1, 2]).view(-1).to(torch.int64)
+ y_single[olp2_frame_map_index] = 0
+ y_single[olp2_frame_map_index, olp2_speaker_map_index] = 1
+ return y_single
+
+
+class PowerReporter():
+ def __init__(self, valid_data_loader, mapping_dict, max_n_speaker):
+ valid_data_loader_cp = copy.deepcopy(valid_data_loader)
+ self.valid_data_loader = valid_data_loader_cp
+ del valid_data_loader
+ self.mapping_dict = mapping_dict
+ self.max_n_speaker = max_n_speaker
+
+ def report(self, model, eidx, device):
+ self.report_val(model, eidx, device)
+
+ def report_val(self, model, eidx, device):
+ model.eval()
+ ud_valid_start = time.time()
+ valid_res, valid_loss, stats_keys, vad_valid_accuracy = self.report_core(model, self.valid_data_loader, device)
+
+ # Epoch Display
+ valid_der = valid_res['diarization_error'] / valid_res['speaker_scored']
+ valid_accuracy = valid_res['correct'].to(torch.float32) / valid_res['frames'] * 100
+ vad_valid_accuracy = vad_valid_accuracy * 100
+ print('Epoch ', eidx + 1, 'Valid Loss ', valid_loss, 'Valid_DER %.5f' % valid_der,
+ 'Valid_Accuracy %.5f%% ' % valid_accuracy, 'VAD_Valid_Accuracy %.5f%% ' % vad_valid_accuracy)
+ ud_valid = (time.time() - ud_valid_start) / 60.
+ print('Valid cost time ... ', ud_valid)
+
+ def inv_mapping_func(self, label, mapping_dict):
+ if not isinstance(label, int):
+ label = int(label)
+ if label in mapping_dict['label2dec'].keys():
+ num = mapping_dict['label2dec'][label]
+ else:
+ num = -1
+ return num
+
+ def report_core(self, model, data_loader, device):
+ res = {}
+ for item in metrics:
+ res[item[0]] = 0.
+ res[item[1]] = 0.
+ with torch.no_grad():
+ loss_s = 0.
+ uidx = 0
+ for xs, ts, orders in data_loader:
+ xs = [x.to(device) for x in xs]
+ ts = [t.to(device) for t in ts]
+ orders = [o.to(device) for o in orders]
+ loss, pit_loss, mpit_loss, att_loss, ys, logits, labels, attractors = model(xs, ts, orders)
+ loss_s += loss.item()
+ uidx += 1
+
+ for logit, t, att in zip(logits, labels, attractors):
+ pred = torch.argmax(torch.softmax(logit, dim=-1), dim=-1) # (T, )
+ oov_index = torch.where(pred == self.mapping_dict['oov'])[0]
+ for i in oov_index:
+ if i > 0:
+ pred[i] = pred[i - 1]
+ else:
+ pred[i] = 0
+ pred = [self.inv_mapping_func(i, self.mapping_dict) for i in pred]
+ decisions = [bin(num)[2:].zfill(self.max_n_speaker)[::-1] for num in pred]
+ decisions = torch.from_numpy(
+ np.stack([np.array([int(i) for i in dec]) for dec in decisions], axis=0)).to(att.device).to(
+ torch.float32)
+ decisions = decisions[:, :att.shape[0]]
+
+ stats = self.calc_diarization_error(decisions, t)
+ res['speaker_scored'] += stats['speaker_scored']
+ res['speech_scored'] += stats['speech_scored']
+ res['frames'] += stats['frames']
+ for item in metrics:
+ res[item[0]] += stats[item[0]]
+ loss_s /= uidx
+ vad_acc = 0
+
+ return res, loss_s, stats.keys(), vad_acc
+
+ def calc_diarization_error(self, decisions, label, label_delay=0):
+ label = label[:len(label) - label_delay, ...]
+ n_ref = torch.sum(label, dim=-1)
+ n_sys = torch.sum(decisions, dim=-1)
+ res = {}
+ res['speech_scored'] = torch.sum(n_ref > 0)
+ res['speech_miss'] = torch.sum((n_ref > 0) & (n_sys == 0))
+ res['speech_falarm'] = torch.sum((n_ref == 0) & (n_sys > 0))
+ res['speaker_scored'] = torch.sum(n_ref)
+ res['speaker_miss'] = torch.sum(torch.max(n_ref - n_sys, torch.zeros_like(n_ref)))
+ res['speaker_falarm'] = torch.sum(torch.max(n_sys - n_ref, torch.zeros_like(n_ref)))
+ n_map = torch.sum(((label == 1) & (decisions == 1)), dim=-1).to(torch.float32)
+ res['speaker_error'] = torch.sum(torch.min(n_ref, n_sys) - n_map)
+ res['correct'] = torch.sum(label == decisions) / label.shape[1]
+ res['diarization_error'] = (
+ res['speaker_miss'] + res['speaker_falarm'] + res['speaker_error'])
+ res['frames'] = len(label)
+ return res
diff --git a/funasr/runtime/onnxruntime/CMakeSettings.json b/funasr/runtime/onnxruntime/CMakeSettings.json
index f515d1f..2eb6c5a 100644
--- a/funasr/runtime/onnxruntime/CMakeSettings.json
+++ b/funasr/runtime/onnxruntime/CMakeSettings.json
@@ -19,8 +19,26 @@
"cmakeCommandArgs": "",
"buildCommandArgs": "",
"ctestCommandArgs": "",
- "inheritEnvironments": [ "msvc_x64_x64" ],
- "variables": []
+ "inheritEnvironments": [ "msvc_x64_x64" ]
+ },
+ {
+ "name": "Linux-GCC-Debug",
+ "generator": "Unix Makefiles",
+ "configurationType": "Debug",
+ "cmakeExecutable": "cmake",
+ "remoteCopySourcesExclusionList": [ ".vs", ".git", "out" ],
+ "cmakeCommandArgs": "-DONNXRUNTIME_DIR=/data/linux/thirdpart/onnxruntime-linux-x64-1.14.1",
+ "buildCommandArgs": "",
+ "ctestCommandArgs": "",
+ "inheritEnvironments": [ "linux_x64" ],
+ "remoteMachineName": "${defaultRemoteMachineName}",
+ "remoteCMakeListsRoot": "$HOME/.vs/${projectDirName}/${workspaceHash}/src",
+ "remoteBuildRoot": "$HOME/.vs/${projectDirName}/${workspaceHash}/out/build/${name}",
+ "remoteInstallRoot": "$HOME/.vs/${projectDirName}/${workspaceHash}/out/install/${name}",
+ "remoteCopySources": true,
+ "rsyncCommandArgs": "-t --delete",
+ "remoteCopyBuildOutput": false,
+ "remoteCopySourcesMethod": "rsync"
}
]
}
\ No newline at end of file
diff --git a/funasr/runtime/onnxruntime/include/Audio.h b/funasr/runtime/onnxruntime/include/Audio.h
index af8d2a9..da5e82c 100644
--- a/funasr/runtime/onnxruntime/include/Audio.h
+++ b/funasr/runtime/onnxruntime/include/Audio.h
@@ -43,11 +43,17 @@
Audio(int data_type, int size);
~Audio();
void disp();
- bool loadwav(const char *filename);
+ bool loadwav(const char* filename);
+ bool loadwav(const char* buf, int nLen);
+ bool loadpcmwav(const char* buf, int nFileLen);
+ bool loadpcmwav(const char* filename);
int fetch_chunck(float *&dout, int len);
int fetch(float *&dout, int &len, int &flag);
void padding();
void split();
+ float get_time_len();
+
+ int get_queue_size() { return (int)frame_queue.size(); }
};
#endif
diff --git a/funasr/runtime/onnxruntime/include/librapidasrapi.h b/funasr/runtime/onnxruntime/include/librapidasrapi.h
new file mode 100644
index 0000000..a83098f
--- /dev/null
+++ b/funasr/runtime/onnxruntime/include/librapidasrapi.h
@@ -0,0 +1,96 @@
+#pragma once
+
+
+#ifdef WIN32
+
+
+#ifdef _RPASR_API_EXPORT
+
+#define _RAPIDASRAPI __declspec(dllexport)
+#else
+#define _RAPIDASRAPI __declspec(dllimport)
+#endif
+
+
+#else
+#define _RAPIDASRAPI
+#endif
+
+
+
+
+
+#ifndef _WIN32
+
+#define RPASR_CALLBCK_PREFIX __attribute__((__stdcall__))
+
+#else
+#define RPASR_CALLBCK_PREFIX __stdcall
+#endif
+
+
+#ifdef __cplusplus
+
+extern "C" {
+#endif
+
+typedef void* RPASR_HANDLE;
+
+typedef void* RPASR_RESULT;
+
+typedef unsigned char RPASR_BOOL;
+
+#define RPASR_TRUE 1
+#define RPASR_FALSE 0
+#define QM_DEFAULT_THREAD_NUM 4
+
+
+typedef enum
+{
+ RASR_NONE=-1,
+ RASRM_CTC_GREEDY_SEARCH=0,
+ RASRM_CTC_RPEFIX_BEAM_SEARCH = 1,
+ RASRM_ATTENSION_RESCORING = 2,
+
+}RPASR_MODE;
+
+typedef enum {
+
+ RPASR_MODEL_PADDLE = 0,
+ RPASR_MODEL_PADDLE_2 = 1,
+ RPASR_MODEL_K2 = 2,
+ RPASR_MODEL_PARAFORMER = 3,
+
+}RPASR_MODEL_TYPE;
+
+
+typedef void (* QM_CALLBACK)(int nCurStep, int nTotal); // nTotal: total steps; nCurStep: Current Step.
+
+ // APIs for qmasr
+
+_RAPIDASRAPI RPASR_HANDLE RapidAsrInit(const char* szModelDir, int nThread);
+
+
+
+// if not give a fnCallback ,it should be NULL
+_RAPIDASRAPI RPASR_RESULT RapidAsrRecogBuffer(RPASR_HANDLE handle, const char* szBuf, int nLen, RPASR_MODE Mode, QM_CALLBACK fnCallback);
+_RAPIDASRAPI RPASR_RESULT RapidAsrRecogPCMBuffer(RPASR_HANDLE handle, const char* szBuf, int nLen, RPASR_MODE Mode, QM_CALLBACK fnCallback);
+
+_RAPIDASRAPI RPASR_RESULT RapidAsrRecogPCMFile(RPASR_HANDLE handle, const char* szFileName, RPASR_MODE Mode, QM_CALLBACK fnCallback);
+
+_RAPIDASRAPI RPASR_RESULT RapidAsrRecogFile(RPASR_HANDLE handle, const char* szWavfile, RPASR_MODE Mode, QM_CALLBACK fnCallback);
+
+_RAPIDASRAPI const char* RapidAsrGetResult(RPASR_RESULT Result,int nIndex);
+
+_RAPIDASRAPI const int RapidAsrGetRetNumber(RPASR_RESULT Result);
+_RAPIDASRAPI void RapidAsrFreeResult(RPASR_RESULT Result);
+
+
+_RAPIDASRAPI void RapidAsrUninit(RPASR_HANDLE Handle);
+
+_RAPIDASRAPI const float RapidAsrGetRetSnippetTime(RPASR_RESULT Result);
+
+#ifdef __cplusplus
+
+}
+#endif
\ No newline at end of file
diff --git a/funasr/runtime/onnxruntime/src/Audio.cpp b/funasr/runtime/onnxruntime/src/Audio.cpp
index f515a6d..43dfb6b 100644
--- a/funasr/runtime/onnxruntime/src/Audio.cpp
+++ b/funasr/runtime/onnxruntime/src/Audio.cpp
@@ -25,8 +25,7 @@
out_idx = 1;
sum = 0;
};
- ~AudioWindow()
- {
+ ~AudioWindow(){
free(window);
};
int put(int val)
@@ -102,6 +101,11 @@
{
if (speech_buff != NULL) {
free(speech_buff);
+
+ }
+
+ if (speech_data != NULL) {
+
free(speech_data);
}
}
@@ -112,12 +116,20 @@
speech_len);
}
+float Audio::get_time_len()
+{
+ return (float)speech_len / 16000;
+ //speech_len);
+}
+
bool Audio::loadwav(const char *filename)
{
+ if (speech_data != NULL) {
+ free(speech_data);
+ }
if (speech_buff != NULL) {
free(speech_buff);
- free(speech_data);
}
offset = 0;
@@ -133,27 +145,190 @@
speech_len = (nFileLen - 44) / 2;
speech_align_len = (int)(ceil((float)speech_len / align_size) * align_size);
speech_buff = (int16_t *)malloc(sizeof(int16_t) * speech_align_len);
- memset(speech_buff, 0, sizeof(int16_t) * speech_align_len);
- int ret = fread(speech_buff, sizeof(int16_t), speech_len, fp);
- fclose(fp);
- speech_data = (float *)malloc(sizeof(float) * speech_align_len);
- memset(speech_data, 0, sizeof(float) * speech_align_len);
- int i;
- float scale = 1;
+ if (speech_buff)
+ {
+ memset(speech_buff, 0, sizeof(int16_t) * speech_align_len);
+ int ret = fread(speech_buff, sizeof(int16_t), speech_len, fp);
+ fclose(fp);
- if (data_type == 1) {
- scale = 32768;
+ speech_data = (float*)malloc(sizeof(float) * speech_align_len);
+ memset(speech_data, 0, sizeof(float) * speech_align_len);
+ int i;
+ float scale = 1;
+
+ if (data_type == 1) {
+ scale = 32768;
+ }
+
+ for (i = 0; i < speech_len; i++) {
+ speech_data[i] = (float)speech_buff[i] / scale;
+ }
+
+ AudioFrame* frame = new AudioFrame(speech_len);
+ frame_queue.push(frame);
+
+
+ return true;
}
-
- for (i = 0; i < speech_len; i++) {
- speech_data[i] = (float)speech_buff[i] / scale;
- }
-
- AudioFrame *frame = new AudioFrame(speech_len);
- frame_queue.push(frame);
- return true;
+ else
+ return false;
}
+
+
+bool Audio::loadwav(const char* buf, int nFileLen)
+{
+
+
+
+ if (speech_data != NULL) {
+ free(speech_data);
+ }
+ if (speech_buff != NULL) {
+ free(speech_buff);
+ }
+
+ offset = 0;
+
+ size_t nOffset = 0;
+
+#define WAV_HEADER_SIZE 44
+
+ speech_len = (nFileLen - WAV_HEADER_SIZE) / 2;
+ speech_align_len = (int)(ceil((float)speech_len / align_size) * align_size);
+ speech_buff = (int16_t*)malloc(sizeof(int16_t) * speech_align_len);
+ if (speech_buff)
+ {
+ memset(speech_buff, 0, sizeof(int16_t) * speech_align_len);
+ memcpy((void*)speech_buff, (const void*)(buf + WAV_HEADER_SIZE), speech_len * sizeof(int16_t));
+
+
+ speech_data = (float*)malloc(sizeof(float) * speech_align_len);
+ memset(speech_data, 0, sizeof(float) * speech_align_len);
+ int i;
+ float scale = 1;
+
+ if (data_type == 1) {
+ scale = 32768;
+ }
+
+ for (i = 0; i < speech_len; i++) {
+ speech_data[i] = (float)speech_buff[i] / scale;
+ }
+
+
+ return true;
+ }
+ else
+ return false;
+
+}
+
+
+bool Audio::loadpcmwav(const char* buf, int nBufLen)
+{
+ if (speech_data != NULL) {
+ free(speech_data);
+ }
+ if (speech_buff != NULL) {
+ free(speech_buff);
+ }
+ offset = 0;
+
+ size_t nOffset = 0;
+
+#define WAV_HEADER_SIZE 44
+
+ speech_len = nBufLen / 2;
+ speech_align_len = (int)(ceil((float)speech_len / align_size) * align_size);
+ speech_buff = (int16_t*)malloc(sizeof(int16_t) * speech_align_len);
+ if (speech_buff)
+ {
+ memset(speech_buff, 0, sizeof(int16_t) * speech_align_len);
+ memcpy((void*)speech_buff, (const void*)buf, speech_len * sizeof(int16_t));
+
+
+ speech_data = (float*)malloc(sizeof(float) * speech_align_len);
+ memset(speech_data, 0, sizeof(float) * speech_align_len);
+
+
+ int i;
+ float scale = 1;
+
+ if (data_type == 1) {
+ scale = 32768;
+ }
+
+ for (i = 0; i < speech_len; i++) {
+ speech_data[i] = (float)speech_buff[i] / scale;
+ }
+
+
+ return true;
+
+ }
+ else
+ return false;
+
+
+}
+
+bool Audio::loadpcmwav(const char* filename)
+{
+
+ if (speech_data != NULL) {
+ free(speech_data);
+ }
+ if (speech_buff != NULL) {
+ free(speech_buff);
+ }
+ offset = 0;
+
+ FILE* fp;
+ fp = fopen(filename, "rb");
+ if (fp == nullptr)
+ return false;
+ fseek(fp, 0, SEEK_END);
+ uint32_t nFileLen = ftell(fp);
+ fseek(fp, 0, SEEK_SET);
+
+ speech_len = (nFileLen) / 2;
+ speech_align_len = (int)(ceil((float)speech_len / align_size) * align_size);
+ speech_buff = (int16_t*)malloc(sizeof(int16_t) * speech_align_len);
+ if (speech_buff)
+ {
+ memset(speech_buff, 0, sizeof(int16_t) * speech_align_len);
+ int ret = fread(speech_buff, sizeof(int16_t), speech_len, fp);
+ fclose(fp);
+
+ speech_data = (float*)malloc(sizeof(float) * speech_align_len);
+ memset(speech_data, 0, sizeof(float) * speech_align_len);
+
+
+
+ int i;
+ float scale = 1;
+
+ if (data_type == 1) {
+ scale = 32768;
+ }
+
+ for (i = 0; i < speech_len; i++) {
+ speech_data[i] = (float)speech_buff[i] / scale;
+ }
+
+
+ AudioFrame* frame = new AudioFrame(speech_len);
+ frame_queue.push(frame);
+
+
+ return true;
+ }
+ else
+ return false;
+
+}
+
int Audio::fetch_chunck(float *&dout, int len)
{
@@ -163,7 +338,7 @@
} else if (offset == speech_align_len - len) {
dout = speech_data + offset;
offset = speech_align_len;
- // 涓存椂瑙e喅
+ // 涓存椂瑙e喅
AudioFrame *frame = frame_queue.front();
frame_queue.pop();
delete frame;
diff --git a/funasr/runtime/onnxruntime/src/CMakeLists.txt b/funasr/runtime/onnxruntime/src/CMakeLists.txt
index 4842072..aea222b 100644
--- a/funasr/runtime/onnxruntime/src/CMakeLists.txt
+++ b/funasr/runtime/onnxruntime/src/CMakeLists.txt
@@ -18,7 +18,7 @@
endif()
target_include_directories(rapidasr PUBLIC ${CMAKE_SOURCE_DIR}/win/include )
-
+ target_compile_definitions(rapidasr PUBLIC -D_RPASR_API_EXPORT)
else()
set(EXTRA_LIBS fftw3f webrtcvad pthread)
diff --git a/funasr/runtime/onnxruntime/src/commonfunc.h b/funasr/runtime/onnxruntime/src/commonfunc.h
index 3f3c53a..11c234e 100644
--- a/funasr/runtime/onnxruntime/src/commonfunc.h
+++ b/funasr/runtime/onnxruntime/src/commonfunc.h
@@ -1,7 +1,18 @@
#pragma once
+
+
+typedef struct
+{
+ std::string msg;
+ float snippet_time;
+}RPASR_RECOG_RESULT;
+
+
#ifdef _WIN32
#include <codecvt>
+
+
inline std::wstring string2wstring(const std::string& str, const std::string& locale)
{
typedef std::codecvt_byname<wchar_t, char, std::mbstate_t> F;
diff --git a/funasr/runtime/onnxruntime/src/librapidasrapi.cpp b/funasr/runtime/onnxruntime/src/librapidasrapi.cpp
new file mode 100644
index 0000000..1f8f7ca
--- /dev/null
+++ b/funasr/runtime/onnxruntime/src/librapidasrapi.cpp
@@ -0,0 +1,210 @@
+#include "precomp.h"
+#ifdef __cplusplus
+
+extern "C" {
+#endif
+
+
+ // APIs for qmasr
+ _RAPIDASRAPI RPASR_HANDLE RapidAsrInit(const char* szModelDir, int nThreadNum)
+ {
+
+
+ Model* mm = create_model(szModelDir, nThreadNum);
+
+ return mm;
+ }
+
+
+ _RAPIDASRAPI RPASR_RESULT RapidAsrRecogBuffer(RPASR_HANDLE handle, const char* szBuf, int nLen, RPASR_MODE Mode, QM_CALLBACK fnCallback)
+ {
+
+
+ Model* pRecogObj = (Model*)handle;
+
+ if (!pRecogObj)
+ return nullptr;
+
+ Audio audio(1);
+ audio.loadwav(szBuf,nLen);
+ audio.split();
+
+ float* buff;
+ int len;
+ int flag=0;
+ RPASR_RECOG_RESULT* pResult = new RPASR_RECOG_RESULT;
+ pResult->snippet_time = audio.get_time_len();
+ int nStep = 0;
+ int nTotal = audio.get_queue_size();
+ while (audio.fetch(buff, len, flag) > 0) {
+ pRecogObj->reset();
+ string msg = pRecogObj->forward(buff, len, flag);
+ pResult->msg += msg;
+ nStep++;
+ if (fnCallback)
+ fnCallback(nStep, nTotal);
+ }
+
+
+ return pResult;
+ }
+
+ _RAPIDASRAPI RPASR_RESULT RapidAsrRecogPCMBuffer(RPASR_HANDLE handle, const char* szBuf, int nLen, RPASR_MODE Mode, QM_CALLBACK fnCallback)
+ {
+
+ Model* pRecogObj = (Model*)handle;
+
+ if (!pRecogObj)
+ return nullptr;
+
+ Audio audio(1);
+ audio.loadpcmwav(szBuf, nLen);
+ audio.split();
+
+ float* buff;
+ int len;
+ int flag = 0;
+ RPASR_RECOG_RESULT* pResult = new RPASR_RECOG_RESULT;
+ pResult->snippet_time = audio.get_time_len();
+ int nStep = 0;
+ int nTotal = audio.get_queue_size();
+ while (audio.fetch(buff, len, flag) > 0) {
+ pRecogObj->reset();
+ string msg = pRecogObj->forward(buff, len, flag);
+ pResult->msg += msg;
+ nStep++;
+ if (fnCallback)
+ fnCallback(nStep, nTotal);
+ }
+
+
+ return pResult;
+
+ }
+
+ _RAPIDASRAPI RPASR_RESULT RapidAsrRecogPCMFile(RPASR_HANDLE handle, const char* szFileName, RPASR_MODE Mode, QM_CALLBACK fnCallback)
+ {
+
+ Model* pRecogObj = (Model*)handle;
+
+ if (!pRecogObj)
+ return nullptr;
+
+ Audio audio(1);
+ audio.loadpcmwav(szFileName);
+ audio.split();
+
+ float* buff;
+ int len;
+ int flag = 0;
+ RPASR_RECOG_RESULT* pResult = new RPASR_RECOG_RESULT;
+ pResult->snippet_time = audio.get_time_len();
+ int nStep = 0;
+ int nTotal = audio.get_queue_size();
+ while (audio.fetch(buff, len, flag) > 0) {
+ pRecogObj->reset();
+ string msg = pRecogObj->forward(buff, len, flag);
+ pResult->msg += msg;
+ nStep++;
+ if (fnCallback)
+ fnCallback(nStep, nTotal);
+ }
+
+
+ return pResult;
+
+ }
+
+ _RAPIDASRAPI RPASR_RESULT RapidAsrRecogFile(RPASR_HANDLE handle, const char* szWavfile, RPASR_MODE Mode, QM_CALLBACK fnCallback)
+ {
+ Model* pRecogObj = (Model*)handle;
+
+ if (!pRecogObj)
+ return nullptr;
+
+ Audio audio(1);
+ if(!audio.loadwav(szWavfile))
+ return nullptr;
+ audio.split();
+
+ float* buff;
+ int len;
+ int flag = 0;
+ int nStep = 0;
+ int nTotal = audio.get_queue_size();
+ RPASR_RECOG_RESULT* pResult = new RPASR_RECOG_RESULT;
+ pResult->snippet_time = audio.get_time_len();
+ while (audio.fetch(buff, len, flag) > 0) {
+ pRecogObj->reset();
+ string msg = pRecogObj->forward(buff, len, flag);
+ pResult->msg+= msg;
+ nStep++;
+ if (fnCallback)
+ fnCallback(nStep, nTotal);
+ }
+
+
+
+
+ return pResult;
+ }
+
+ _RAPIDASRAPI const int RapidAsrGetRetNumber(RPASR_RESULT Result)
+ {
+ if (!Result)
+ return 0;
+
+ return 1;
+
+ }
+
+
+ _RAPIDASRAPI const float RapidAsrGetRetSnippetTime(RPASR_RESULT Result)
+ {
+ if (!Result)
+ return 0.0f;
+
+ return ((RPASR_RECOG_RESULT*)Result)->snippet_time;
+
+ }
+
+ _RAPIDASRAPI const char* RapidAsrGetResult(RPASR_RESULT Result,int nIndex)
+ {
+ RPASR_RECOG_RESULT * pResult = (RPASR_RECOG_RESULT*)Result;
+ if(!pResult)
+ return nullptr;
+
+ return pResult->msg.c_str();
+
+ }
+
+ _RAPIDASRAPI void RapidAsrFreeResult(RPASR_RESULT Result)
+ {
+
+ if (Result)
+ {
+ delete (RPASR_RECOG_RESULT*)Result;
+
+ }
+ }
+
+ _RAPIDASRAPI void RapidAsrUninit(RPASR_HANDLE handle)
+ {
+
+ Model* pRecogObj = (Model*)handle;
+
+
+ if (!pRecogObj)
+ return;
+
+ delete pRecogObj;
+
+ }
+
+
+
+#ifdef __cplusplus
+
+}
+#endif
+
diff --git a/funasr/runtime/onnxruntime/src/precomp.h b/funasr/runtime/onnxruntime/src/precomp.h
index ec0766d..358844b 100644
--- a/funasr/runtime/onnxruntime/src/precomp.h
+++ b/funasr/runtime/onnxruntime/src/precomp.h
@@ -41,9 +41,10 @@
#include "FeatureExtract.h"
#include "FeatureQueue.h"
#include "SpeechWrap.h"
+#include <Audio.h>
#include "Model.h"
#include "paraformer_onnx.h"
-
+#include "librapidasrapi.h"
using namespace paraformer;
diff --git a/funasr/runtime/onnxruntime/tester/CMakeLists.txt b/funasr/runtime/onnxruntime/tester/CMakeLists.txt
index 651b87f..d794271 100644
--- a/funasr/runtime/onnxruntime/tester/CMakeLists.txt
+++ b/funasr/runtime/onnxruntime/tester/CMakeLists.txt
@@ -15,6 +15,6 @@
set(EXECNAME "tester")
add_executable(${EXECNAME} "tester.cpp")
-target_link_libraries(${EXECNAME} PUBLIC onnxruntime ${EXTRA_LIBS})
+target_link_libraries(${EXECNAME} PUBLIC ${EXTRA_LIBS})
diff --git a/funasr/runtime/onnxruntime/tester/tester.cpp b/funasr/runtime/onnxruntime/tester/tester.cpp
index 7bfb4c0..b9a85b7 100644
--- a/funasr/runtime/onnxruntime/tester/tester.cpp
+++ b/funasr/runtime/onnxruntime/tester/tester.cpp
@@ -1,12 +1,13 @@
-#include <iostream>
+
#ifndef _WIN32
#include <sys/time.h>
#else
#include <win_func.h>
#endif
-#include <Audio.h>
-#include <Model.h>
+#include "librapidasrapi.h"
+
+#include <iostream>
using namespace std;
@@ -21,52 +22,54 @@
struct timeval start, end;
gettimeofday(&start, NULL);
int nThreadNum = 4;
- Model* mm = create_model(argv[1], nThreadNum);
- if (!mm)
+ RPASR_HANDLE AsrHanlde=RapidAsrInit(argv[1], nThreadNum);
+
+ if (!AsrHanlde)
{
printf("Cannot load ASR Model from: %s, there must be files model.onnx and vocab.txt", argv[1]);
exit(-1);
}
-
- Audio audio(0);
- if (!audio.loadwav(argv[2]))
- {
- printf("cannot load %s\n", argv[2]);
- return -1;
- }
- audio.disp();
-
+
gettimeofday(&end, NULL);
long seconds = (end.tv_sec - start.tv_sec);
- long micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
- printf("Model initialization takes %lfs.\n", (double)micros / 1000000);
- audio.split();
+ long modle_init_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
+ printf("Model initialization takes %lfs.\n", (double)modle_init_micros / 1000000);
- setbuf(stdout, NULL);
- cout << "Result: \"";
+
+
gettimeofday(&start, NULL);
- float *buff;
- int len;
- int flag;
- while (audio.fetch(buff, len, flag) > 0) {
- mm->reset();
- string msg = mm->forward(buff, len, flag);
- cout << msg;
- }
+ RPASR_RESULT Result=RapidAsrRecogPCMFile(AsrHanlde, argv[2], RASR_NONE, NULL);
gettimeofday(&end, NULL);
-
- cout << "\"." << endl;
+ float snippet_time = 0.0f;
+ if (Result)
+ {
+ string msg = RapidAsrGetResult(Result, 0);
+ setbuf(stdout, NULL);
+ cout << "Result: \"";
+ cout << msg << endl;
+ cout << "\"." << endl;
+ snippet_time = RapidAsrGetRetSnippetTime(Result);
+ RapidAsrFreeResult(Result);
+ }
+ else
+ {
+ cout <<("no return data!");
+ }
+
+ printf("Audio length %lfs.\n", (double)snippet_time);
seconds = (end.tv_sec - start.tv_sec);
long taking_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
- printf("Model inference takes %lfs.\n", (double)micros / 1000000);
+ printf("Model inference takes %lfs.\n", (double)taking_micros / 1000000);
- printf("Model inference RTF: %04lf.\n", (double)taking_micros/micros );
+ printf("Model inference RTF: %04lf.\n", (double)taking_micros/ (snippet_time*1000000));
- delete mm;
+ RapidAsrUninit(AsrHanlde);
return 0;
}
+
+
\ No newline at end of file
diff --git a/funasr/runtime/onnxruntime/wave/test.pcm.bytes b/funasr/runtime/onnxruntime/wave/test.pcm.bytes
new file mode 100644
index 0000000..f3962c6
--- /dev/null
+++ b/funasr/runtime/onnxruntime/wave/test.pcm.bytes
Binary files differ
diff --git a/funasr/runtime/onnxruntime/wave/test.pcm.wav b/funasr/runtime/onnxruntime/wave/test.pcm.wav
new file mode 100644
index 0000000..b83d56c
--- /dev/null
+++ b/funasr/runtime/onnxruntime/wave/test.pcm.wav
Binary files differ
diff --git a/funasr/runtime/python/grpc/grpc_main_client_mic.py b/funasr/runtime/python/grpc/grpc_main_client_mic.py
index acbe90b..220e8b5 100644
--- a/funasr/runtime/python/grpc/grpc_main_client_mic.py
+++ b/funasr/runtime/python/grpc/grpc_main_client_mic.py
@@ -1,7 +1,6 @@
import pyaudio
import grpc
import json
-import webrtcvad
import time
import asyncio
import argparse
@@ -11,24 +10,16 @@
async def deal_chunk(sig_mic):
global stub,SPEAKING,asr_user,language,sample_rate
- if vad.is_speech(sig_mic, sample_rate): #speaking
- SPEAKING = True
- response = transcribe_audio_bytes(stub, sig_mic, user=asr_user, language=language, speaking = True, isEnd = False) #speaking, send audio to server.
- else: #silence
- begin_time = 0
- if SPEAKING: #means we have some audio recorded, send recognize order to server.
- SPEAKING = False
- begin_time = int(round(time.time() * 1000))
- response = transcribe_audio_bytes(stub, None, user=asr_user, language=language, speaking = False, isEnd = False) #speak end, call server for recognize one sentence
- resp = response.next()
- if "decoding" == resp.action:
- resp = response.next() #TODO, blocking operation may leads to miss some audio clips. C++ multi-threading is preferred.
- if "finish" == resp.action:
- end_time = int(round(time.time() * 1000))
- print (json.loads(resp.sentence))
- print ("delay in ms: %d " % (end_time - begin_time))
- else:
- pass
+
+ SPEAKING = True
+ resp = transcribe_audio_bytes(stub, sig_mic, user=asr_user, language=language, speaking = True, isEnd = False) #speaking, send audio to server.
+
+ if "decoding" == resp.action: #partial result
+ print(json.loads(resp.sentence))
+ elif "finish" == resp.action: #final result
+ print (json.loads(resp.sentence))
+
+
async def record(host,port,sample_rate,mic_chunk,record_seconds,asr_user,language):
@@ -88,8 +79,6 @@
language = 'zh-CN'
- vad = webrtcvad.Vad()
- vad.set_mode(1)
FORMAT = pyaudio.paInt16
CHANNELS = 1
diff --git a/funasr/tasks/abs_task.py b/funasr/tasks/abs_task.py
index a643acb..e0884ce 100644
--- a/funasr/tasks/abs_task.py
+++ b/funasr/tasks/abs_task.py
@@ -1576,6 +1576,7 @@
preprocess=iter_options.preprocess_fn,
max_cache_size=iter_options.max_cache_size,
max_cache_fd=iter_options.max_cache_fd,
+ dest_sample_rate=args.frontend_conf["fs"],
)
cls.check_task_requirements(
dataset, args.allow_variable_data_keys, train=iter_options.train
@@ -1847,6 +1848,7 @@
key_file: str = None,
batch_size: int = 1,
fs: dict = None,
+ mc: bool = False,
dtype: str = np.float32,
num_workers: int = 1,
allow_variable_data_keys: bool = False,
@@ -1865,6 +1867,7 @@
data_path_and_name_and_type,
float_dtype=dtype,
fs=fs,
+ mc=mc,
preprocess=preprocess_fn,
key_file=key_file,
)
diff --git a/funasr/tasks/asr.py b/funasr/tasks/asr.py
index 23ac976..bc89744 100644
--- a/funasr/tasks/asr.py
+++ b/funasr/tasks/asr.py
@@ -1244,3 +1244,10 @@
return model
+class ASRTaskAligner(ASRTaskParaformer):
+ @classmethod
+ def required_data_names(
+ cls, train: bool = True, inference: bool = False
+ ) -> Tuple[str, ...]:
+ retval = ("speech", "text")
+ return retval
\ No newline at end of file
diff --git a/funasr/tasks/diar.py b/funasr/tasks/diar.py
index 73c51e3..e699dcc 100644
--- a/funasr/tasks/diar.py
+++ b/funasr/tasks/diar.py
@@ -23,7 +23,7 @@
from funasr.layers.utterance_mvn import UtteranceMVN
from funasr.layers.label_aggregation import LabelAggregate
from funasr.models.ctc import CTC
-from funasr.models.encoder.resnet34_encoder import ResNet34Diar
+from funasr.models.encoder.resnet34_encoder import ResNet34Diar, ResNet34SpL2RegDiar
from funasr.models.encoder.ecapa_tdnn_encoder import ECAPA_TDNN
from funasr.models.encoder.opennmt_encoders.conv_encoder import ConvEncoder
from funasr.models.encoder.opennmt_encoders.fsmn_encoder import FsmnEncoder
@@ -122,6 +122,7 @@
fsmn=FsmnEncoder,
conv=ConvEncoder,
resnet34=ResNet34Diar,
+ resnet34_sp_l2reg=ResNet34SpL2RegDiar,
sanm_chunk_opt=SANMEncoderChunkOpt,
data2vec_encoder=Data2VecEncoder,
ecapa_tdnn=ECAPA_TDNN,
@@ -160,6 +161,7 @@
classes=dict(
dot=DotScorer,
cosine=CosScorer,
+ conv=ConvEncoder,
),
type_check=torch.nn.Module,
default=None,
@@ -571,19 +573,24 @@
var_dict_torch = model.state_dict()
var_dict_torch_update = dict()
# speech encoder
- var_dict_torch_update_local = model.encoder.convert_tf2torch(var_dict_tf, var_dict_torch)
- var_dict_torch_update.update(var_dict_torch_update_local)
+ if model.encoder is not None:
+ var_dict_torch_update_local = model.encoder.convert_tf2torch(var_dict_tf, var_dict_torch)
+ var_dict_torch_update.update(var_dict_torch_update_local)
# speaker encoder
- var_dict_torch_update_local = model.speaker_encoder.convert_tf2torch(var_dict_tf, var_dict_torch)
- var_dict_torch_update.update(var_dict_torch_update_local)
+ if model.speaker_encoder is not None:
+ var_dict_torch_update_local = model.speaker_encoder.convert_tf2torch(var_dict_tf, var_dict_torch)
+ var_dict_torch_update.update(var_dict_torch_update_local)
# cd scorer
- var_dict_torch_update_local = model.cd_scorer.convert_tf2torch(var_dict_tf, var_dict_torch)
- var_dict_torch_update.update(var_dict_torch_update_local)
+ if model.cd_scorer is not None:
+ var_dict_torch_update_local = model.cd_scorer.convert_tf2torch(var_dict_tf, var_dict_torch)
+ var_dict_torch_update.update(var_dict_torch_update_local)
# ci scorer
- var_dict_torch_update_local = model.ci_scorer.convert_tf2torch(var_dict_tf, var_dict_torch)
- var_dict_torch_update.update(var_dict_torch_update_local)
+ if model.ci_scorer is not None:
+ var_dict_torch_update_local = model.ci_scorer.convert_tf2torch(var_dict_tf, var_dict_torch)
+ var_dict_torch_update.update(var_dict_torch_update_local)
# decoder
- var_dict_torch_update_local = model.decoder.convert_tf2torch(var_dict_tf, var_dict_torch)
- var_dict_torch_update.update(var_dict_torch_update_local)
+ if model.decoder is not None:
+ var_dict_torch_update_local = model.decoder.convert_tf2torch(var_dict_tf, var_dict_torch)
+ var_dict_torch_update.update(var_dict_torch_update_local)
return var_dict_torch_update
diff --git a/funasr/tasks/sv.py b/funasr/tasks/sv.py
index 16384a7..1b08c4d 100644
--- a/funasr/tasks/sv.py
+++ b/funasr/tasks/sv.py
@@ -1,14 +1,18 @@
import argparse
import logging
+import os
+from pathlib import Path
from typing import Callable
from typing import Collection
from typing import Dict
from typing import List
from typing import Optional
from typing import Tuple
+from typing import Union
import numpy as np
import torch
+import yaml
from typeguard import check_argument_types
from typeguard import check_return_type
@@ -21,7 +25,7 @@
from funasr.models.decoder.abs_decoder import AbsDecoder
from funasr.models.encoder.abs_encoder import AbsEncoder
from funasr.models.encoder.rnn_encoder import RNNEncoder
-from funasr.models.encoder.resnet34_encoder import ResNet34
+from funasr.models.encoder.resnet34_encoder import ResNet34, ResNet34_SP_L2Reg
from funasr.models.pooling.statistic_pooling import StatisticPooling
from funasr.models.decoder.sv_decoder import DenseDecoder
from funasr.models.e2e_sv import ESPnetSVModel
@@ -103,6 +107,7 @@
"encoder",
classes=dict(
resnet34=ResNet34,
+ resnet34_sp_l2reg=ResNet34_SP_L2Reg,
rnn=RNNEncoder,
),
type_check=AbsEncoder,
@@ -394,9 +399,16 @@
# 7. Pooling layer
pooling_class = pooling_choices.get_class(args.pooling_type)
+ pooling_dim = (2, 3)
+ eps = 1e-12
+ if hasattr(args, "pooling_type_conf"):
+ if "pooling_dim" in args.pooling_type_conf:
+ pooling_dim = args.pooling_type_conf["pooling_dim"]
+ if "eps" in args.pooling_type_conf:
+ eps = args.pooling_type_conf["eps"]
pooling_layer = pooling_class(
- pooling_dim=(2, 3),
- eps=1e-12,
+ pooling_dim=pooling_dim,
+ eps=eps,
)
if args.pooling_type == "statistic":
encoder_output_size *= 2
@@ -435,3 +447,95 @@
assert check_return_type(model)
return model
+
+ # ~~~~~~~~~ The methods below are mainly used for inference ~~~~~~~~~
+ @classmethod
+ def build_model_from_file(
+ cls,
+ config_file: Union[Path, str] = None,
+ model_file: Union[Path, str] = None,
+ cmvn_file: Union[Path, str] = None,
+ device: str = "cpu",
+ ):
+ """Build model from the files.
+
+ This method is used for inference or fine-tuning.
+
+ Args:
+ config_file: The yaml file saved when training.
+ model_file: The model file saved when training.
+ cmvn_file: The cmvn file for front-end
+ device: Device type, "cpu", "cuda", or "cuda:N".
+
+ """
+ assert check_argument_types()
+ if config_file is None:
+ assert model_file is not None, (
+ "The argument 'model_file' must be provided "
+ "if the argument 'config_file' is not specified."
+ )
+ config_file = Path(model_file).parent / "config.yaml"
+ else:
+ config_file = Path(config_file)
+
+ with config_file.open("r", encoding="utf-8") as f:
+ args = yaml.safe_load(f)
+ if cmvn_file is not None:
+ args["cmvn_file"] = cmvn_file
+ args = argparse.Namespace(**args)
+ model = cls.build_model(args)
+ if not isinstance(model, AbsESPnetModel):
+ raise RuntimeError(
+ f"model must inherit {AbsESPnetModel.__name__}, but got {type(model)}"
+ )
+ model.to(device)
+ model_dict = dict()
+ model_name_pth = None
+ if model_file is not None:
+ logging.info("model_file is {}".format(model_file))
+ if device == "cuda":
+ device = f"cuda:{torch.cuda.current_device()}"
+ model_dir = os.path.dirname(model_file)
+ model_name = os.path.basename(model_file)
+ if "model.ckpt-" in model_name or ".bin" in model_name:
+ if ".bin" in model_name:
+ model_name_pth = os.path.join(model_dir, model_name.replace('.bin', '.pb'))
+ else:
+ model_name_pth = os.path.join(model_dir, "{}.pth".format(model_name))
+ if os.path.exists(model_name_pth):
+ logging.info("model_file is load from pth: {}".format(model_name_pth))
+ model_dict = torch.load(model_name_pth, map_location=device)
+ else:
+ model_dict = cls.convert_tf2torch(model, model_file)
+ model.load_state_dict(model_dict)
+ else:
+ model_dict = torch.load(model_file, map_location=device)
+ model.load_state_dict(model_dict)
+ if model_name_pth is not None and not os.path.exists(model_name_pth):
+ torch.save(model_dict, model_name_pth)
+ logging.info("model_file is saved to pth: {}".format(model_name_pth))
+
+ return model, args
+
+ @classmethod
+ def convert_tf2torch(
+ cls,
+ model,
+ ckpt,
+ ):
+ logging.info("start convert tf model to torch model")
+ from funasr.modules.streaming_utils.load_fr_tf import load_tf_dict
+ var_dict_tf = load_tf_dict(ckpt)
+ var_dict_torch = model.state_dict()
+ var_dict_torch_update = dict()
+ # speech encoder
+ var_dict_torch_update_local = model.encoder.convert_tf2torch(var_dict_tf, var_dict_torch)
+ var_dict_torch_update.update(var_dict_torch_update_local)
+ # pooling layer
+ var_dict_torch_update_local = model.pooling_layer.convert_tf2torch(var_dict_tf, var_dict_torch)
+ var_dict_torch_update.update(var_dict_torch_update_local)
+ # decoder
+ var_dict_torch_update_local = model.decoder.convert_tf2torch(var_dict_tf, var_dict_torch)
+ var_dict_torch_update.update(var_dict_torch_update_local)
+
+ return var_dict_torch_update
diff --git a/funasr/version.txt b/funasr/version.txt
index 7179039..0d91a54 100644
--- a/funasr/version.txt
+++ b/funasr/version.txt
@@ -1 +1 @@
-0.2.3
+0.3.0
diff --git a/tests/test_asr_inference_pipeline.py b/tests/test_asr_inference_pipeline.py
new file mode 100644
index 0000000..b3c5a24
--- /dev/null
+++ b/tests/test_asr_inference_pipeline.py
@@ -0,0 +1,473 @@
+import unittest
+
+from modelscope.pipelines import pipeline
+from modelscope.utils.constant import Tasks
+from modelscope.utils.logger import get_logger
+
+logger = get_logger()
+
+
+class TestConformerInferencePipelines(unittest.TestCase):
+ def test_funasr_path(self):
+ import funasr
+ import os
+ logger.info("run_dir:{0} ; funasr_path: {1}".format(os.getcwd(), funasr.__file__))
+
+ def test_aishell1(self):
+ inference_pipeline = pipeline(
+ task=Tasks.auto_speech_recognition,
+ model='damo/speech_conformer_asr_nat-zh-cn-16k-aishell1-vocab4234-pytorch')
+ rec_result = inference_pipeline(
+ audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav')
+ logger.info("asr inference result: {0}".format(rec_result))
+
+ def test_aishell2(self):
+ inference_pipeline = pipeline(
+ task=Tasks.auto_speech_recognition,
+ model='damo/speech_conformer_asr_nat-zh-cn-16k-aishell2-vocab5212-pytorch')
+ rec_result = inference_pipeline(
+ audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav')
+ logger.info("asr inference result: {0}".format(rec_result))
+
+
+class TestData2vecInferencePipelines(unittest.TestCase):
+ def test_funasr_path(self):
+ import funasr
+ import os
+ logger.info("run_dir:{0} ; funasr_path: {1}".format(os.getcwd(), funasr.__file__))
+
+ def test_transformer(self):
+ inference_pipeline = pipeline(
+ task=Tasks.auto_speech_recognition,
+ model='damo/speech_data2vec_pretrain-zh-cn-aishell2-16k-pytorch')
+ rec_result = inference_pipeline(
+ audio_in='https://modelscope.oss-cn-beijing.aliyuncs.com/test/audios/asr_example.wav')
+ logger.info("asr inference result: {0}".format(rec_result))
+
+ def test_paraformer(self):
+ inference_pipeline = pipeline(
+ task=Tasks.auto_speech_recognition,
+ model='damo/speech_data2vec_pretrain-paraformer-zh-cn-aishell2-16k')
+ rec_result = inference_pipeline(
+ audio_in='https://modelscope.oss-cn-beijing.aliyuncs.com/test/audios/asr_example.wav')
+ logger.info("asr inference result: {0}".format(rec_result))
+
+
+class TestMfccaInferencePipelines(unittest.TestCase):
+ def test_funasr_path(self):
+ import funasr
+ import os
+ logger.info("run_dir:{0} ; funasr_path: {1}".format(os.getcwd(), funasr.__file__))
+
+ def test_alimeeting(self):
+ inference_pipeline = pipeline(
+ task=Tasks.auto_speech_recognition,
+ model='NPU-ASLP/speech_mfcca_asr-zh-cn-16k-alimeeting-vocab4950',
+ model_revision='v3.0.0')
+ rec_result = inference_pipeline(
+ audio_in='https://pre.modelscope.cn/api/v1/models/NPU-ASLP/speech_mfcca_asr-zh-cn-16k-alimeeting-vocab4950/repo?Revision=master&FilePath=example/asr_example_mc.wav')
+ logger.info("asr inference result: {0}".format(rec_result))
+
+
+class TestParaformerInferencePipelines(unittest.TestCase):
+ def test_funasr_path(self):
+ import funasr
+ import os
+ logger.info("run_dir:{0} ; funasr_path: {1}".format(os.getcwd(), funasr.__file__))
+
+ def test_paraformer_large_contextual_common(self):
+ param_dict = dict()
+ param_dict['hotword'] = "https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/hotword.txt"
+ inference_pipeline = pipeline(
+ task=Tasks.auto_speech_recognition,
+ model='damo/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404',
+ param_dict=param_dict)
+ rec_result = inference_pipeline(
+ audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_hotword.wav')
+ logger.info("asr inference result: {0}".format(rec_result))
+
+ def test_paraformer_large_aishell1(self):
+ inference_pipeline = pipeline(
+ task=Tasks.auto_speech_recognition,
+ model='damo/speech_paraformer-large_asr_nat-zh-cn-16k-aishell1-vocab8404-pytorch')
+ rec_result = inference_pipeline(
+ audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav')
+ logger.info("asr inference result: {0}".format(rec_result))
+
+ def test_paraformer_large_aishell2(self):
+ inference_pipeline = pipeline(
+ task=Tasks.auto_speech_recognition,
+ model='damo/speech_paraformer-large_asr_nat-zh-cn-16k-aishell2-vocab8404-pytorch')
+ rec_result = inference_pipeline(
+ audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav')
+ logger.info("asr inference result: {0}".format(rec_result))
+
+ def test_paraformer_large_common(self):
+ inference_pipeline = pipeline(
+ task=Tasks.auto_speech_recognition,
+ model='damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch')
+ rec_result = inference_pipeline(
+ audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav')
+ logger.info("asr inference result: {0}".format(rec_result))
+
+ def test_paraformer_tiny_commandword(self):
+ inference_pipeline = pipeline(
+ task=Tasks.auto_speech_recognition,
+ model='damo/speech_paraformer-tiny-commandword_asr_nat-zh-cn-16k-vocab544-pytorch')
+ rec_result = inference_pipeline(
+ audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh_command.wav')
+ logger.info("asr inference result: {0}".format(rec_result))
+
+ def test_paraformer_8k(self):
+ inference_pipeline = pipeline(
+ task=Tasks.auto_speech_recognition,
+ model='damo/speech_paraformer_asr_nat-zh-cn-8k-common-vocab8358-tensorflow1')
+ rec_result = inference_pipeline(
+ audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_8K.wav')
+ logger.info("asr inference result: {0}".format(rec_result))
+
+ def test_paraformer_aishell1(self):
+ inference_pipeline = pipeline(
+ task=Tasks.auto_speech_recognition,
+ model='damo/speech_paraformer_asr_nat-zh-cn-16k-aishell1-vocab4234-pytorch')
+ rec_result = inference_pipeline(
+ audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav')
+ logger.info("asr inference result: {0}".format(rec_result))
+
+ def test_paraformer_aishell2(self):
+ inference_pipeline = pipeline(
+ task=Tasks.auto_speech_recognition,
+ model='damo/speech_paraformer_asr_nat-zh-cn-16k-aishell2-vocab5212-pytorch')
+ rec_result = inference_pipeline(
+ audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav')
+ logger.info("asr inference result: {0}".format(rec_result))
+
+
+class TestParaformerBertInferencePipelines(unittest.TestCase):
+ def test_funasr_path(self):
+ import funasr
+ import os
+ logger.info("run_dir:{0} ; funasr_path: {1}".format(os.getcwd(), funasr.__file__))
+
+ def test_aishell1(self):
+ inference_pipeline = pipeline(
+ task=Tasks.auto_speech_recognition,
+ model='damo/speech_paraformerbert_asr_nat-zh-cn-16k-aishell1-vocab4234-pytorch')
+ rec_result = inference_pipeline(
+ audio_in='https://modelscope.oss-cn-beijing.aliyuncs.com/test/audios/asr_example.wav')
+ logger.info("asr inference result: {0}".format(rec_result))
+
+ def test_aishell2(self):
+ inference_pipeline = pipeline(
+ task=Tasks.auto_speech_recognition,
+ model='damo/speech_paraformerbert_asr_nat-zh-cn-16k-aishell2-vocab5212-pytorch')
+ rec_result = inference_pipeline(
+ audio_in='https://modelscope.oss-cn-beijing.aliyuncs.com/test/audios/asr_example.wav')
+ logger.info("asr inference result: {0}".format(rec_result))
+
+
+class TestUniasrInferencePipelines(unittest.TestCase):
+ def test_funasr_path(self):
+ import funasr
+ import os
+ logger.info("run_dir:{0} ; funasr_path: {1}".format(os.getcwd(), funasr.__file__))
+
+ def test_uniasr_2pass_cantonese_chs_16k_common_offline(self):
+ inference_pipeline = pipeline(
+ task=Tasks.auto_speech_recognition,
+ model='damo/speech_UniASR_asr_2pass-cantonese-CHS-16k-common-vocab1468-tensorflow1-online')
+ rec_result = inference_pipeline(
+ audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_cantonese-CHS.wav',
+ param_dict={"decoding_model": "offline"})
+ logger.info("asr inference result: {0}".format(rec_result))
+
+ def test_uniasr_2pass_cantonese_chs_16k_common_online(self):
+ inference_pipeline = pipeline(
+ task=Tasks.auto_speech_recognition,
+ model='damo/speech_UniASR_asr_2pass-cantonese-CHS-16k-common-vocab1468-tensorflow1-online')
+ rec_result = inference_pipeline(
+ audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_cantonese-CHS.wav',
+ param_dict={"decoding_model": "normal"})
+ logger.info("asr inference result: {0}".format(rec_result))
+
+ def test_uniasr_2pass_cn_dialect_offline(self):
+ inference_pipeline = pipeline(
+ task=Tasks.auto_speech_recognition,
+ model='damo/speech_UniASR_asr_2pass-cn-dialect-16k-vocab8358-tensorflow1-offline')
+ rec_result = inference_pipeline(
+ audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav')
+ logger.info("asr inference result: {0}".format(rec_result))
+
+ def test_uniasr_2pass_cn_dialect_online(self):
+ inference_pipeline = pipeline(
+ task=Tasks.auto_speech_recognition,
+ model='damo/speech_UniASR_asr_2pass-cn-dialect-16k-vocab8358-tensorflow1-online')
+ rec_result = inference_pipeline(
+ audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav')
+ logger.info("asr inference result: {0}".format(rec_result))
+
+ def test_uniasr_2pass_de_common_offline(self):
+ inference_pipeline = pipeline(
+ task=Tasks.auto_speech_recognition,
+ model='damo/speech_UniASR_asr_2pass-de-16k-common-vocab3690-tensorflow1-offline')
+ rec_result = inference_pipeline(
+ audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_de.wav',
+ param_dict={"decoding_model": "offline"})
+ logger.info("asr inference result: {0}".format(rec_result))
+
+ def test_uniasr_2pass_de_common_online(self):
+ inference_pipeline = pipeline(
+ task=Tasks.auto_speech_recognition,
+ model='damo/speech_UniASR_asr_2pass-de-16k-common-vocab3690-tensorflow1-online')
+ rec_result = inference_pipeline(
+ audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_de.wav',
+ param_dict={"decoding_model": "normal"})
+ logger.info("asr inference result: {0}".format(rec_result))
+
+ def test_uniasr_2pass_en_common_offline(self):
+ inference_pipeline = pipeline(
+ task=Tasks.auto_speech_recognition,
+ model='damo/speech_UniASR_asr_2pass-en-16k-common-vocab1080-tensorflow1-offline')
+ rec_result = inference_pipeline(
+ audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_en.wav',
+ param_dict={"decoding_model": "offline"})
+ logger.info("asr inference result: {0}".format(rec_result))
+
+ def test_uniasr_2pass_en_common_online(self):
+ inference_pipeline = pipeline(
+ task=Tasks.auto_speech_recognition,
+ model='damo/speech_UniASR_asr_2pass-en-16k-common-vocab1080-tensorflow1-online')
+ rec_result = inference_pipeline(
+ audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_en.wav',
+ param_dict={"decoding_model": "normal"})
+ logger.info("asr inference result: {0}".format(rec_result))
+
+ def test_uniasr_2pass_es_common_offline(self):
+ inference_pipeline = pipeline(
+ task=Tasks.auto_speech_recognition,
+ model='damo/speech_UniASR_asr_2pass-es-16k-common-vocab3445-tensorflow1-offline')
+ rec_result = inference_pipeline(
+ audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_es.wav',
+ param_dict={"decoding_model": "offline"})
+ logger.info("asr inference result: {0}".format(rec_result))
+
+ def test_uniasr_2pass_es_common_online(self):
+ inference_pipeline = pipeline(
+ task=Tasks.auto_speech_recognition,
+ model='damo/speech_UniASR_asr_2pass-es-16k-common-vocab3445-tensorflow1-online')
+ rec_result = inference_pipeline(
+ audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_es.wav',
+ param_dict={"decoding_model": "normal"})
+ logger.info("asr inference result: {0}".format(rec_result))
+
+ def test_uniasr_2pass_fa_common_offline(self):
+ inference_pipeline = pipeline(
+ task=Tasks.auto_speech_recognition,
+ model='damo/speech_UniASR_asr_2pass-fa-16k-common-vocab1257-pytorch-offline')
+ rec_result = inference_pipeline(
+ audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_fa.wav',
+ param_dict={"decoding_model": "offline"})
+ logger.info("asr inference result: {0}".format(rec_result))
+
+ def test_uniasr_2pass_fa_common_online(self):
+ inference_pipeline = pipeline(
+ task=Tasks.auto_speech_recognition,
+ model='damo/speech_UniASR_asr_2pass-fa-16k-common-vocab1257-pytorch-online')
+ rec_result = inference_pipeline(
+ audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_fa.wav',
+ param_dict={"decoding_model": "normal"})
+ logger.info("asr inference result: {0}".format(rec_result))
+
+ def test_uniasr_2pass_fr_common_offline(self):
+ inference_pipeline = pipeline(
+ task=Tasks.auto_speech_recognition,
+ model='damo/speech_UniASR_asr_2pass-fr-16k-common-vocab3472-tensorflow1-offline')
+ rec_result = inference_pipeline(
+ audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_fr.wav',
+ param_dict={"decoding_model": "offline"})
+ logger.info("asr inference result: {0}".format(rec_result))
+
+ def test_uniasr_2pass_fr_common_online(self):
+ inference_pipeline = pipeline(
+ task=Tasks.auto_speech_recognition,
+ model='damo/speech_UniASR_asr_2pass-fr-16k-common-vocab3472-tensorflow1-online')
+ rec_result = inference_pipeline(
+ audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_fr.wav',
+ param_dict={"decoding_model": "normal"})
+ logger.info("asr inference result: {0}".format(rec_result))
+
+ def test_uniasr_2pass_id_common_offline(self):
+ inference_pipeline = pipeline(
+ task=Tasks.auto_speech_recognition,
+ model='damo/speech_UniASR_asr_2pass-id-16k-common-vocab1067-tensorflow1-online')
+ rec_result = inference_pipeline(
+ audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_id.wav',
+ param_dict={"decoding_model": "offline"})
+ logger.info("asr inference result: {0}".format(rec_result))
+
+ def test_uniasr_2pass_id_common_online(self):
+ inference_pipeline = pipeline(
+ task=Tasks.auto_speech_recognition,
+ model='damo/speech_UniASR_asr_2pass-id-16k-common-vocab1067-tensorflow1-online')
+ rec_result = inference_pipeline(
+ audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_id.wav',
+ param_dict={"decoding_model": "normal"})
+ logger.info("asr inference result: {0}".format(rec_result))
+
+ def test_uniasr_2pass_ja_common_offline(self):
+ inference_pipeline = pipeline(
+ task=Tasks.auto_speech_recognition,
+ model='damo/speech_UniASR_asr_2pass-ja-16k-common-vocab93-tensorflow1-offline')
+ rec_result = inference_pipeline(
+ audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_ja.wav',
+ param_dict={"decoding_model": "offline"})
+ logger.info("asr inference result: {0}".format(rec_result))
+
+ def test_uniasr_2pass_ja_common_online(self):
+ inference_pipeline = pipeline(
+ task=Tasks.auto_speech_recognition,
+ model='damo/speech_UniASR_asr_2pass-ja-16k-common-vocab93-tensorflow1-online')
+ rec_result = inference_pipeline(
+ audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_ja.wav',
+ param_dict={"decoding_model": "normal"})
+ logger.info("asr inference result: {0}".format(rec_result))
+
+ def test_uniasr_2pass_ko_common_offline(self):
+ inference_pipeline = pipeline(
+ task=Tasks.auto_speech_recognition,
+ model='damo/speech_UniASR_asr_2pass-ko-16k-common-vocab6400-tensorflow1-offline')
+ rec_result = inference_pipeline(
+ audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_ko.wav',
+ param_dict={"decoding_model": "offline"})
+ logger.info("asr inference result: {0}".format(rec_result))
+
+ def test_uniasr_2pass_ko_common_online(self):
+ inference_pipeline = pipeline(
+ task=Tasks.auto_speech_recognition,
+ model='damo/speech_UniASR_asr_2pass-ko-16k-common-vocab6400-tensorflow1-online')
+ rec_result = inference_pipeline(
+ audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_ko.wav',
+ param_dict={"decoding_model": "normal"})
+ logger.info("asr inference result: {0}".format(rec_result))
+
+ def test_uniasr_2pass_minnan_common_offline(self):
+ inference_pipeline = pipeline(
+ task=Tasks.auto_speech_recognition,
+ model='damo/speech_UniASR_asr_2pass-minnan-16k-common-vocab3825')
+ rec_result = inference_pipeline(
+ audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav',
+ param_dict={"decoding_model": "offline"})
+ logger.info("asr inference result: {0}".format(rec_result))
+
+ def test_uniasr_2pass_pt_common_offline(self):
+ inference_pipeline = pipeline(
+ task=Tasks.auto_speech_recognition,
+ model='damo/speech_UniASR_asr_2pass-pt-16k-common-vocab1617-tensorflow1-offline')
+ rec_result = inference_pipeline(
+ audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_pt.wav',
+ param_dict={"decoding_model": "offline"})
+ logger.info("asr inference result: {0}".format(rec_result))
+
+ def test_uniasr_2pass_pt_common_online(self):
+ inference_pipeline = pipeline(
+ task=Tasks.auto_speech_recognition,
+ model='damo/speech_UniASR_asr_2pass-pt-16k-common-vocab1617-tensorflow1-online')
+ rec_result = inference_pipeline(
+ audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_pt.wav',
+ param_dict={"decoding_model": "normal"})
+ logger.info("asr inference result: {0}".format(rec_result))
+
+ def test_uniasr_2pass_ru_common_offline(self):
+ inference_pipeline = pipeline(
+ task=Tasks.auto_speech_recognition,
+ model='damo/speech_UniASR_asr_2pass-ru-16k-common-vocab1664-tensorflow1-offline')
+ rec_result = inference_pipeline(
+ audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_ru.wav',
+ param_dict={"decoding_model": "offline"})
+ logger.info("asr inference result: {0}".format(rec_result))
+
+ def test_uniasr_2pass_ru_common_online(self):
+ inference_pipeline = pipeline(
+ task=Tasks.auto_speech_recognition,
+ model='damo/speech_UniASR_asr_2pass-ru-16k-common-vocab1664-tensorflow1-online')
+ rec_result = inference_pipeline(
+ audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_ru.wav',
+ param_dict={"decoding_model": "normal"})
+ logger.info("asr inference result: {0}".format(rec_result))
+
+ def test_uniasr_2pass_vi_common_offline(self):
+ inference_pipeline = pipeline(
+ task=Tasks.auto_speech_recognition,
+ model='damo/speech_UniASR_asr_2pass-vi-16k-common-vocab1001-pytorch-offline')
+ rec_result = inference_pipeline(
+ audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_vi.wav',
+ param_dict={"decoding_model": "offline"})
+ logger.info("asr inference result: {0}".format(rec_result))
+
+ def test_uniasr_2pass_vi_common_online(self):
+ inference_pipeline = pipeline(
+ task=Tasks.auto_speech_recognition,
+ model='damo/speech_UniASR_asr_2pass-vi-16k-common-vocab1001-pytorch-online')
+ rec_result = inference_pipeline(
+ audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_vi.wav',
+ param_dict={"decoding_model": "normal"})
+ logger.info("asr inference result: {0}".format(rec_result))
+
+ def test_uniasr_2pass_zhcn_8k_common_vocab3445_offline(self):
+ inference_pipeline = pipeline(
+ task=Tasks.auto_speech_recognition,
+ model='damo/speech_UniASR_asr_2pass-zh-cn-8k-common-vocab3445-pytorch-offline')
+ rec_result = inference_pipeline(
+ audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav',
+ param_dict={"decoding_model": "offline"})
+ logger.info("asr inference result: {0}".format(rec_result))
+
+ def test_uniasr_2pass_zhcn_8k_common_vocab3445_online(self):
+ inference_pipeline = pipeline(
+ task=Tasks.auto_speech_recognition,
+ model='damo/speech_UniASR_asr_2pass-zh-cn-8k-common-vocab3445-pytorch-online')
+ rec_result = inference_pipeline(
+ audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav',
+ param_dict={"decoding_model": "normal"})
+ logger.info("asr inference result: {0}".format(rec_result))
+
+ def test_uniasr_2pass_zhcn_8k_common_vocab8358_offline(self):
+ inference_pipeline = pipeline(
+ task=Tasks.auto_speech_recognition,
+ model='damo/speech_UniASR_asr_2pass-zh-cn-8k-common-vocab8358-tensorflow1-offline')
+ rec_result = inference_pipeline(
+ audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav',
+ param_dict={"decoding_model": "offline"})
+ logger.info("asr inference result: {0}".format(rec_result))
+
+ def test_uniasr_2pass_zhcn_8k_common_vocab8358_online(self):
+ inference_pipeline = pipeline(
+ task=Tasks.auto_speech_recognition,
+ model='damo/speech_UniASR_asr_2pass-zh-cn-8k-common-vocab8358-tensorflow1-online')
+ rec_result = inference_pipeline(
+ audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav',
+ param_dict={"decoding_model": "normal"})
+ logger.info("asr inference result: {0}".format(rec_result))
+
+ def test_uniasr_2pass_zhcn_16k_common_vocab8358_offline(self):
+ inference_pipeline = pipeline(
+ task=Tasks.auto_speech_recognition,
+ model='damo/speech_UniASR_asr_2pass-zh-cn-16k-common-vocab8358-tensorflow1-offline')
+ rec_result = inference_pipeline(
+ audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav',
+ param_dict={"decoding_model": "offline"})
+ logger.info("asr inference result: {0}".format(rec_result))
+
+ def test_uniasr_2pass_zhcn_16k_common_vocab8358_online(self):
+ inference_pipeline = pipeline(
+ task=Tasks.auto_speech_recognition,
+ model='damo/speech_UniASR_asr_2pass-zh-cn-16k-common-vocab8358-tensorflow1-online')
+ rec_result = inference_pipeline(
+ audio_in='https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav',
+ param_dict={"decoding_model": "normal"})
+ logger.info("asr inference result: {0}".format(rec_result))
+
+
+
+if __name__ == '__main__':
+ unittest.main()
--
Gitblit v1.9.1