From 8912e0696af069de47646fdb8a9d9c4e086e88b3 Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期日, 14 一月 2024 23:42:11 +0800
Subject: [PATCH] Resolve merge conflict
---
runtime/onnxruntime/bin/funasr-onnx-2pass.cpp | 36
runtime/onnxruntime/src/paraformer.h | 9
runtime/websocket/bin/websocket-server.h | 1
runtime/onnxruntime/include/com-define.h | 2
runtime/triton_gpu/README_ONLINE.md | 64
runtime/docs/websocket_protocol_zh.md | 6
runtime/websocket/bin/websocket-server-2pass.cpp | 39
web-pages/readme.md | 6
runtime/onnxruntime/src/offline-stream.cpp | 10
funasr/models/uniasr/e2e_uni_asr.py | 3
runtime/onnxruntime/src/paraformer.cpp | 37
runtime/onnxruntime/src/vocab.h | 6
funasr/tokenizer/sentencepiece_tokenizer.py | 2
runtime/onnxruntime/bin/funasr-onnx-offline.cpp | 10
runtime/onnxruntime/src/fsmn-vad-online.cpp | 5
runtime/onnxruntime/src/funasrruntime.cpp | 70
runtime/onnxruntime/src/bias-lm.h | 30
runtime/python/websocket/funasr_wss_client.py | 23
runtime/triton_gpu/model_repo_paraformer_large_online/feature_extractor/1/model.py | 221 +
runtime/websocket/readme.md | 4
runtime/onnxruntime/include/funasrruntime.h | 3
runtime/websocket/bin/funasr-wss-server.cpp | 6
runtime/onnxruntime/src/util.cpp | 415 ++
funasr/tokenizer/char_tokenizer.py | 2
runtime/onnxruntime/readme.md | 4
runtime/triton_gpu/model_repo_paraformer_large_online/feature_extractor/config.yaml | 8639 +++++++++++++++++++++++++++++++++++++++++++
runtime/websocket/bin/funasr-wss-client.cpp | 53
README_zh.md | 5
runtime/docs/SDK_tutorial_online.md | 2
runtime/onnxruntime/include/model.h | 4
README.md | 9
runtime/onnxruntime/bin/funasr-onnx-online-rtf.cpp | 14
funasr/tokenizer/abs_tokenizer.py | 165
runtime/triton_gpu/model_repo_paraformer_large_online/streaming_paraformer/config.pbtxt | 122
funasr/tokenizer/phoneme_tokenizer.py | 1
runtime/run_server_2pass.sh | 2
runtime/docs/lm_train_tutorial.md | 1
funasr/metrics/compute_acc.py | 2
runtime/onnxruntime/bin/funasr-onnx-2pass-rtf.cpp | 45
runtime/docs/docker_offline_cpu_en_lists | 2
runtime/docs/websocket_protocol.md | 6
runtime/triton_gpu/model_repo_paraformer_large_online/cif_search/config.pbtxt | 111
runtime/websocket/bin/websocket-server.cpp | 32
runtime/readme_cn.md | 5
runtime/docs/SDK_advanced_guide_online.md | 11
runtime/websocket/bin/websocket-server-2pass.h | 6
runtime/onnxruntime/bin/funasr-onnx-offline-rtf.cpp | 15
runtime/onnxruntime/src/vocab.cpp | 61
runtime/triton_gpu/model_repo_paraformer_large_online/encoder/config.pbtxt | 77
runtime/docs/docker_online_cpu_zh_lists | 2
runtime/docs/SDK_advanced_guide_offline.md | 9
web-pages/src/views/home/sstx.vue | 2
runtime/onnxruntime/src/audio.cpp | 101
funasr/quick_start_zh.md | 8
runtime/onnxruntime/src/paraformer-online.h | 3
runtime/onnxruntime/src/util.h | 14
runtime/triton_gpu/model_repo_paraformer_large_online/feature_extractor/config.pbtxt | 109
runtime/websocket/bin/funasr-wss-client-2pass.cpp | 35
runtime/onnxruntime/src/paraformer-online.cpp | 8
web-pages/src/views/home/index.vue | 18
runtime/docs/SDK_advanced_guide_offline_en.md | 8
funasr/tokenizer/build_tokenizer.py | 19
docs/installation/docker.md | 2
funasr/tokenizer/word_tokenizer.py | 1
runtime/docs/SDK_advanced_guide_offline_en_zh.md | 8
runtime/websocket/bin/funasr-wss-server-2pass.cpp | 86
runtime/triton_gpu/model_repo_paraformer_large_online/lfr_cmvn_pe/am.mvn | 8
web-pages/src/views/home/lxwjzxfw.vue | 2
runtime/onnxruntime/bin/funasr-onnx-online-asr.cpp | 6
runtime/docs/docker_offline_cpu_zh_lists | 2
runtime/onnxruntime/include/vad-model.h | 1
runtime/docs/SDK_advanced_guide_offline_zh.md | 24
runtime/onnxruntime/src/fsmn-vad-online.h | 2
runtime/readme.md | 12
runtime/websocket/readme_zh.md | 4
runtime/onnxruntime/include/audio.h | 10
docs/installation/docker_zh.md | 2
runtime/docs/SDK_tutorial_online_zh.md | 2
runtime/triton_gpu/model_repo_paraformer_large_online/decoder/config.pbtxt | 274 +
/dev/null | 75
funasr/schedulers/__init__.py | 2
runtime/docs/SDK_advanced_guide_online_zh.md | 13
funasr/models/rwkv_bat/rwkv_encoder.py | 12
runtime/triton_gpu/model_repo_paraformer_large_online/cif_search/1/model.py | 268 +
funasr/quick_start.md | 8
runtime/onnxruntime/src/tpass-stream.cpp | 14
runtime/onnxruntime/src/commonfunc.h | 1
runtime/onnxruntime/src/fsmn-vad.h | 2
runtime/triton_gpu/model_repo_paraformer_large_online/lfr_cmvn_pe/config.pbtxt | 85
funasr/datasets/audio_datasets/samplers.py | 29
runtime/onnxruntime/bin/funasr-onnx-offline-vad.cpp | 4
runtime/onnxruntime/bin/funasr-onnx-online-vad.cpp | 7
runtime/triton_gpu/model_repo_paraformer_large_online/lfr_cmvn_pe/export_lfr_cmvn_pe_onnx.py | 131
93 files changed, 11,353 insertions(+), 484 deletions(-)
diff --git a/README.md b/README.md
index db0fdcb..23d197a 100644
--- a/README.md
+++ b/README.md
@@ -27,7 +27,12 @@
<a name="whats-new"></a>
-## What's new:
+## What's new:
+- 2024/01/09: The Funasr SDK for Windows version 2.0 has been released, featuring support for The offline file transcription service (CPU) of Mandarin 4.1, The offline file transcription service (CPU) of English 1.2, The real-time transcription service (CPU) of Mandarin 1.6. For more details, please refer to the official documentation or release notes([FunASR-Runtime-Windows](https://www.modelscope.cn/models/damo/funasr-runtime-win-cpu-x64/summary))
+- 2024/01/03: File Transcription Service 4.0 released, Added support for 8k models, optimized timestamp mismatch issues and added sentence-level timestamps, improved the effectiveness of English word FST hotwords, supported automated configuration of thread parameters, and fixed known crash issues as well as memory leak problems, refer to ([docs](runtime/readme.md#file-transcription-service-mandarin-cpu)).
+- 2024/01/03: Real-time Transcription Service 1.6 released锛孴he 2pass-offline mode supports Ngram language model decoding and WFST hotwords, while also addressing known crash issues and memory leak problems, ([docs](runtime/readme.md#the-real-time-transcription-service-mandarin-cpu))
+- 2024/01/03: Fixed known crash issues as well as memory leak problems, ([docs](runtime/readme.md#file-transcription-service-english-cpu)).
+- 2023/12/04: The Funasr SDK for Windows version 1.0 has been released, featuring support for The offline file transcription service (CPU) of Mandarin, The offline file transcription service (CPU) of English, The real-time transcription service (CPU) of Mandarin. For more details, please refer to the official documentation or release notes([FunASR-Runtime-Windows](https://www.modelscope.cn/models/damo/funasr-runtime-win-cpu-x64/summary))
- 2023/11/08: The offline file transcription service 3.0 (CPU) of Mandarin has been released, adding punctuation large model, Ngram language model, and wfst hot words. For detailed information, please refer to [docs](runtime#file-transcription-service-mandarin-cpu).
- 2023/10/17: The offline file transcription service (CPU) of English has been released. For more details, please refer to ([docs](runtime#file-transcription-service-english-cpu)).
- 2023/10/13: [SlideSpeech](https://slidespeech.github.io/): A large scale multi-modal audio-visual corpus with a significant amount of real-time synchronized slides.
@@ -50,7 +55,7 @@
(Note: 馃 represents the Huggingface model zoo link, 猸� represents the ModelScope model zoo link)
-| Model Name | Task Details | Training Date | Parameters |
+| Model Name | Task Details | Training Data | Parameters |
|:------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:---------------------------------------------------------------------------:|:--------------------------------:|:----------:|
| paraformer-zh <br> ([猸怾(https://www.modelscope.cn/models/damo/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-pytorch/summary) [馃]() ) | speech recognition, with timestamps, non-streaming | 60000 hours, Mandarin | 220M |
| paraformer-zh-spk <br> ( [猸怾(https://modelscope.cn/models/damo/speech_paraformer-large-vad-punc-spk_asr_nat-zh-cn/summary) [馃]() ) | speech recognition with speaker diarization, with timestamps, non-streaming | 60000 hours, Mandarin | 220M |
diff --git a/README_zh.md b/README_zh.md
index 274e14b..6c75e42 100644
--- a/README_zh.md
+++ b/README_zh.md
@@ -31,6 +31,11 @@
<a name="鏈�鏂板姩鎬�"></a>
## 鏈�鏂板姩鎬�
+- 2024/01/09: funasr绀惧尯杞欢鍖厀indows 2.0鐗堟湰鍙戝竷锛屾敮鎸佽蒋浠跺寘涓枃绂荤嚎鏂囦欢杞啓4.1銆佽嫳鏂囩绾挎枃浠惰浆鍐�1.2銆佷腑鏂囧疄鏃跺惉鍐欐湇鍔�1.6鐨勬渶鏂板姛鑳斤紝璇︾粏淇℃伅鍙傞槄([FunASR绀惧尯杞欢鍖厀indows鐗堟湰](https://www.modelscope.cn/models/damo/funasr-runtime-win-cpu-x64/summary))
+- 2024/01/03: 涓枃绂荤嚎鏂囦欢杞啓鏈嶅姟 4.0 鍙戝竷锛屾柊澧炴敮鎸�8k妯″瀷銆佷紭鍖栨椂闂存埑涓嶅尮閰嶉棶棰樺強澧炲姞鍙ュ瓙绾у埆鏃堕棿鎴炽�佷紭鍖栬嫳鏂囧崟璇峟st鐑瘝鏁堟灉銆佹敮鎸佽嚜鍔ㄥ寲閰嶇疆绾跨▼鍙傛暟锛屽悓鏃朵慨澶嶅凡鐭ョ殑crash闂鍙婂唴瀛樻硠婕忛棶棰橈紝璇︾粏淇℃伅鍙傞槄([涓�閿儴缃叉枃妗(runtime/readme_cn.md#涓枃绂荤嚎鏂囦欢杞啓鏈嶅姟cpu鐗堟湰))
+- 2024/01/03: 涓枃瀹炴椂璇煶鍚啓鏈嶅姟 1.6 鍙戝竷锛�2pass-offline妯″紡鏀寔Ngram璇█妯″瀷瑙g爜銆亀fst鐑瘝锛屽悓鏃朵慨澶嶅凡鐭ョ殑crash闂鍙婂唴瀛樻硠婕忛棶棰橈紝璇︾粏淇℃伅鍙傞槄([涓�閿儴缃叉枃妗(runtime/readme_cn.md#涓枃瀹炴椂璇煶鍚啓鏈嶅姟cpu鐗堟湰))
+- 2024/01/03: 鑻辨枃绂荤嚎鏂囦欢杞啓鏈嶅姟 1.2 鍙戝竷锛屼慨澶嶅凡鐭ョ殑crash闂鍙婂唴瀛樻硠婕忛棶棰橈紝璇︾粏淇℃伅鍙傞槄([涓�閿儴缃叉枃妗(runtime/readme_cn.md#鑻辨枃绂荤嚎鏂囦欢杞啓鏈嶅姟cpu鐗堟湰))
+- 2023/12/04: funasr绀惧尯杞欢鍖厀indows 1.0鐗堟湰鍙戝竷锛屾敮鎸佷腑鏂囩绾挎枃浠惰浆鍐欍�佽嫳鏂囩绾挎枃浠惰浆鍐欍�佷腑鏂囧疄鏃跺惉鍐欐湇鍔★紝璇︾粏淇℃伅鍙傞槄([FunASR绀惧尯杞欢鍖厀indows鐗堟湰](https://www.modelscope.cn/models/damo/funasr-runtime-win-cpu-x64/summary))
- 2023/11/08锛氫腑鏂囩绾挎枃浠惰浆鍐欐湇鍔�3.0 CPU鐗堟湰鍙戝竷锛屾柊澧炴爣鐐瑰ぇ妯″瀷銆丯gram璇█妯″瀷涓巜fst鐑瘝锛岃缁嗕俊鎭弬闃�([涓�閿儴缃叉枃妗(runtime/readme_cn.md#涓枃绂荤嚎鏂囦欢杞啓鏈嶅姟cpu鐗堟湰))
- 2023/10/17: 鑻辨枃绂荤嚎鏂囦欢杞啓鏈嶅姟涓�閿儴缃茬殑CPU鐗堟湰鍙戝竷锛岃缁嗕俊鎭弬闃�([涓�閿儴缃叉枃妗(runtime/readme_cn.md#鑻辨枃绂荤嚎鏂囦欢杞啓鏈嶅姟cpu鐗堟湰))
- 2023/10/13: [SlideSpeech](https://slidespeech.github.io/): 涓�涓ぇ瑙勬ā鐨勫妯℃�侀煶瑙嗛璇枡搴擄紝涓昏鏄湪绾夸細璁垨鑰呭湪绾胯绋嬪満鏅紝鍖呭惈浜嗗ぇ閲忎笌鍙戣█浜鸿璇濆疄鏃跺悓姝ョ殑骞荤伅鐗囥��
diff --git a/docs/installation/docker.md b/docs/installation/docker.md
index ee02650..63bf485 100644
--- a/docs/installation/docker.md
+++ b/docs/installation/docker.md
@@ -37,7 +37,7 @@
### Image Hub
#### CPU
-`registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-cpu-0.3.0`
+`registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-cpu-0.4.1`
#### GPU
diff --git a/docs/installation/docker_zh.md b/docs/installation/docker_zh.md
index e32022e..2fa6a90 100644
--- a/docs/installation/docker_zh.md
+++ b/docs/installation/docker_zh.md
@@ -37,7 +37,7 @@
### 闀滃儚浠撳簱
#### CPU
-`registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-cpu-0.3.0`
+`registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-cpu-0.4.1`
#### GPU
diff --git a/funasr/datasets/audio_datasets/samplers.py b/funasr/datasets/audio_datasets/samplers.py
index d34fdea..9c87245 100644
--- a/funasr/datasets/audio_datasets/samplers.py
+++ b/funasr/datasets/audio_datasets/samplers.py
@@ -4,15 +4,16 @@
from funasr.register import tables
+
@tables.register("batch_sampler_classes", "DynamicBatchLocalShuffleSampler")
class BatchSampler(torch.utils.data.BatchSampler):
def __init__(self, dataset,
- batch_type: str="example",
- batch_size: int=100,
- buffer_size: int=30,
- drop_last: bool=False,
- shuffle: bool=True,
+ batch_type: str = "example",
+ batch_size: int = 100,
+ buffer_size: int = 30,
+ drop_last: bool = False,
+ shuffle: bool = True,
**kwargs):
self.drop_last = drop_last
@@ -25,24 +26,23 @@
self.max_token_length = kwargs.get("max_token_length", 5000)
self.shuffle_idx = np.arange(self.total_samples)
self.shuffle = shuffle
-
def __len__(self):
return self.total_samples
def set_epoch(self, epoch):
np.random.seed(epoch)
-
+
def __iter__(self):
if self.shuffle:
np.random.shuffle(self.shuffle_idx)
-
+
batch = []
max_token = 0
num_sample = 0
-
- iter_num = (self.total_samples-1) // self.buffer_size + 1
+
+ iter_num = (self.total_samples - 1) // self.buffer_size + 1
# print("iter_num: ", iter_num)
for iter in range(self.pre_idx + 1, iter_num):
datalen_with_index = []
@@ -50,12 +50,12 @@
idx = iter * self.buffer_size + i
if idx >= self.total_samples:
continue
-
+
idx_map = self.shuffle_idx[idx]
# prompt = self.dataset.indexed_dataset[idx_map]["prompt"]
sample_len_cur = self.dataset.get_source_len(idx_map) + \
self.dataset.get_target_len(idx_map)
-
+
datalen_with_index.append([idx, sample_len_cur])
datalen_with_index_sort = sorted(datalen_with_index, key=lambda x: x[1])
@@ -63,7 +63,7 @@
idx, sample_len_cur_raw = item
if sample_len_cur_raw > self.max_token_length:
continue
-
+
max_token_cur = max(max_token, sample_len_cur_raw)
max_token_padding = 1 + num_sample
if self.batch_type == 'length':
@@ -77,5 +77,4 @@
batch = [idx]
max_token = sample_len_cur_raw
num_sample = 1
-
-
\ No newline at end of file
+
diff --git a/funasr/metrics/compute_acc.py b/funasr/metrics/compute_acc.py
index 2b45836..9d16e1f 100644
--- a/funasr/metrics/compute_acc.py
+++ b/funasr/metrics/compute_acc.py
@@ -5,7 +5,7 @@
Args:
pad_outputs (Tensor): Prediction tensors (B * Lmax, D).
- pad_targets (LongTensor): Target label tensors (B, Lmax).
+ pad_targets (LongTensor): Target label tensors (B, Lmax, D).
ignore_label (int): Ignore label id.
Returns:
diff --git a/funasr/models/rwkv_bat/rwkv_encoder.py b/funasr/models/rwkv_bat/rwkv_encoder.py
index 3454299..af702e9 100644
--- a/funasr/models/rwkv_bat/rwkv_encoder.py
+++ b/funasr/models/rwkv_bat/rwkv_encoder.py
@@ -113,12 +113,12 @@
x = self.embed_norm(x)
olens = mask.eq(0).sum(1)
- # for training
- # for block in self.rwkv_blocks:
- # x, _ = block(x)
-
- # for streaming inference
- x = self.rwkv_infer(x)
+ if self.training:
+ for block in self.rwkv_blocks:
+ x, _ = block(x)
+ else:
+ x = self.rwkv_infer(x)
+
x = self.final_norm(x)
if self.time_reduction_factor > 1:
diff --git a/funasr/models/uniasr/e2e_uni_asr.py b/funasr/models/uniasr/e2e_uni_asr.py
index 46c5832..de7ed29 100644
--- a/funasr/models/uniasr/e2e_uni_asr.py
+++ b/funasr/models/uniasr/e2e_uni_asr.py
@@ -443,7 +443,10 @@
# force_gatherable: to-device and to-tensor if scalar for DataParallel
if self.length_normalized_loss:
batch_size = int((text_lengths + 1).sum())
+<<<<<<< HEAD:funasr/models/uniasr/e2e_uni_asr.py
+=======
+>>>>>>> main:funasr/models/e2e_uni_asr.py
loss, stats, weight = force_gatherable((loss, stats, batch_size), loss.device)
return loss, stats, weight
diff --git a/funasr/quick_start.md b/funasr/quick_start.md
index 4566b87..44ce5c4 100644
--- a/funasr/quick_start.md
+++ b/funasr/quick_start.md
@@ -47,11 +47,11 @@
```shell
sudo docker pull \
- registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-online-cpu-0.1.5
+ registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-online-cpu-0.1.6
mkdir -p ./funasr-runtime-resources/models
sudo docker run -p 10096:10095 -it --privileged=true \
-v $PWD/funasr-runtime-resources/models:/workspace/models \
- registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-online-cpu-0.1.5
+ registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-online-cpu-0.1.6
```
###### Server Start
@@ -93,11 +93,11 @@
```shell
sudo docker pull \
- registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-cpu-0.3.0
+ registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-cpu-0.4.1
mkdir -p ./funasr-runtime-resources/models
sudo docker run -p 10095:10095 -it --privileged=true \
-v $PWD/funasr-runtime-resources/models:/workspace/models \
- registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-cpu-0.3.0
+ registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-cpu-0.4.1
```
###### Server Start
diff --git a/funasr/quick_start_zh.md b/funasr/quick_start_zh.md
index 64fe870..2fe756a 100644
--- a/funasr/quick_start_zh.md
+++ b/funasr/quick_start_zh.md
@@ -48,11 +48,11 @@
```shell
sudo docker pull \
- registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-online-cpu-0.1.5
+ registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-online-cpu-0.1.6
mkdir -p ./funasr-runtime-resources/models
sudo docker run -p 10096:10095 -it --privileged=true \
-v $PWD/funasr-runtime-resources/models:/workspace/models \
- registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-online-cpu-0.1.5
+ registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-online-cpu-0.1.6
```
###### 鏈嶅姟绔惎鍔�
@@ -92,11 +92,11 @@
```shell
sudo docker pull \
- registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-cpu-0.3.0
+ registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-cpu-0.4.1
mkdir -p ./funasr-runtime-resources/models
sudo docker run -p 10095:10095 -it --privileged=true \
-v $PWD/funasr-runtime-resources/models:/workspace/models \
- registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-cpu-0.3.0
+ registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-cpu-0.4.1
```
###### 鏈嶅姟绔惎鍔�
diff --git a/funasr/schedulers/__init__.py b/funasr/schedulers/__init__.py
index 2ee3a9e..cba286a 100644
--- a/funasr/schedulers/__init__.py
+++ b/funasr/schedulers/__init__.py
@@ -20,4 +20,4 @@
cycliclr=torch.optim.lr_scheduler.CyclicLR,
onecyclelr=torch.optim.lr_scheduler.OneCycleLR,
CosineAnnealingWarmRestarts=torch.optim.lr_scheduler.CosineAnnealingWarmRestarts,
-)
\ No newline at end of file
+)
diff --git a/funasr/tokenizer/abs_tokenizer.py b/funasr/tokenizer/abs_tokenizer.py
index d43d7b2..548bf06 100644
--- a/funasr/tokenizer/abs_tokenizer.py
+++ b/funasr/tokenizer/abs_tokenizer.py
@@ -11,89 +11,90 @@
import numpy as np
-class AbsTokenizer(ABC):
- @abstractmethod
- def text2tokens(self, line: str) -> List[str]:
- raise NotImplementedError
- @abstractmethod
- def tokens2text(self, tokens: Iterable[str]) -> str:
- raise NotImplementedError
+class AbsTokenizer(ABC):
+ @abstractmethod
+ def text2tokens(self, line: str) -> List[str]:
+ raise NotImplementedError
+
+ @abstractmethod
+ def tokens2text(self, tokens: Iterable[str]) -> str:
+ raise NotImplementedError
class BaseTokenizer(ABC):
- def __init__(self, token_list: Union[Path, str, Iterable[str]]=None,
- unk_symbol: str = "<unk>",
- **kwargs,
- ):
-
- if token_list is not None:
- if isinstance(token_list, (Path, str)) and token_list.endswith(".txt"):
- token_list = Path(token_list)
- self.token_list_repr = str(token_list)
- self.token_list: List[str] = []
-
- with token_list.open("r", encoding="utf-8") as f:
- for idx, line in enumerate(f):
- line = line.rstrip()
- self.token_list.append(line)
- elif isinstance(token_list, (Path, str)) and token_list.endswith(".json"):
- token_list = Path(token_list)
- self.token_list_repr = str(token_list)
- self.token_list: List[str] = []
-
- with open(token_list, 'r', encoding='utf-8') as f:
- self.token_list = json.load(f)
-
-
- else:
- self.token_list: List[str] = list(token_list)
- self.token_list_repr = ""
- for i, t in enumerate(self.token_list):
- if i == 3:
- break
- self.token_list_repr += f"{t}, "
- self.token_list_repr += f"... (NVocab={(len(self.token_list))})"
-
- self.token2id: Dict[str, int] = {}
- for i, t in enumerate(self.token_list):
- if t in self.token2id:
- raise RuntimeError(f'Symbol "{t}" is duplicated')
- self.token2id[t] = i
-
- self.unk_symbol = unk_symbol
- if self.unk_symbol not in self.token2id:
- raise RuntimeError(
- f"Unknown symbol '{unk_symbol}' doesn't exist in the token_list"
- )
- self.unk_id = self.token2id[self.unk_symbol]
-
- def encode(self, text):
- tokens = self.text2tokens(text)
- text_ints = self.tokens2ids(tokens)
-
- return text_ints
-
- def decode(self, text_ints):
- token = self.ids2tokens(text_ints)
- text = self.tokens2text(token)
- return text
-
- def get_num_vocabulary_size(self) -> int:
- return len(self.token_list)
-
- def ids2tokens(self, integers: Union[np.ndarray, Iterable[int]]) -> List[str]:
- if isinstance(integers, np.ndarray) and integers.ndim != 1:
- raise ValueError(f"Must be 1 dim ndarray, but got {integers.ndim}")
- return [self.token_list[i] for i in integers]
-
- def tokens2ids(self, tokens: Iterable[str]) -> List[int]:
- return [self.token2id.get(i, self.unk_id) for i in tokens]
-
- @abstractmethod
- def text2tokens(self, line: str) -> List[str]:
- raise NotImplementedError
-
- @abstractmethod
- def tokens2text(self, tokens: Iterable[str]) -> str:
- raise NotImplementedError
+ def __init__(self, token_list: Union[Path, str, Iterable[str]] = None,
+ unk_symbol: str = "<unk>",
+ **kwargs,
+ ):
+
+ if token_list is not None:
+ if isinstance(token_list, (Path, str)) and token_list.endswith(".txt"):
+ token_list = Path(token_list)
+ self.token_list_repr = str(token_list)
+ self.token_list: List[str] = []
+
+ with token_list.open("r", encoding="utf-8") as f:
+ for idx, line in enumerate(f):
+ line = line.rstrip()
+ self.token_list.append(line)
+ elif isinstance(token_list, (Path, str)) and token_list.endswith(".json"):
+ token_list = Path(token_list)
+ self.token_list_repr = str(token_list)
+ self.token_list: List[str] = []
+
+ with open(token_list, 'r', encoding='utf-8') as f:
+ self.token_list = json.load(f)
+
+
+ else:
+ self.token_list: List[str] = list(token_list)
+ self.token_list_repr = ""
+ for i, t in enumerate(self.token_list):
+ if i == 3:
+ break
+ self.token_list_repr += f"{t}, "
+ self.token_list_repr += f"... (NVocab={(len(self.token_list))})"
+
+ self.token2id: Dict[str, int] = {}
+ for i, t in enumerate(self.token_list):
+ if t in self.token2id:
+ raise RuntimeError(f'Symbol "{t}" is duplicated')
+ self.token2id[t] = i
+
+ self.unk_symbol = unk_symbol
+ if self.unk_symbol not in self.token2id:
+ raise RuntimeError(
+ f"Unknown symbol '{unk_symbol}' doesn't exist in the token_list"
+ )
+ self.unk_id = self.token2id[self.unk_symbol]
+
+ def encode(self, text):
+ tokens = self.text2tokens(text)
+ text_ints = self.tokens2ids(tokens)
+
+ return text_ints
+
+ def decode(self, text_ints):
+ token = self.ids2tokens(text_ints)
+ text = self.tokens2text(token)
+ return text
+
+ def get_num_vocabulary_size(self) -> int:
+ return len(self.token_list)
+
+ def ids2tokens(self, integers: Union[np.ndarray, Iterable[int]]) -> List[str]:
+ if isinstance(integers, np.ndarray) and integers.ndim != 1:
+ raise ValueError(f"Must be 1 dim ndarray, but got {integers.ndim}")
+ return [self.token_list[i] for i in integers]
+
+ def tokens2ids(self, tokens: Iterable[str]) -> List[int]:
+ return [self.token2id.get(i, self.unk_id) for i in tokens]
+
+ @abstractmethod
+ def text2tokens(self, line: str) -> List[str]:
+ raise NotImplementedError
+
+ @abstractmethod
+ def tokens2text(self, tokens: Iterable[str]) -> str:
+ raise NotImplementedError
\ No newline at end of file
diff --git a/funasr/tokenizer/build_tokenizer.py b/funasr/tokenizer/build_tokenizer.py
index 05db6a6..9d1cdc3 100644
--- a/funasr/tokenizer/build_tokenizer.py
+++ b/funasr/tokenizer/build_tokenizer.py
@@ -1,17 +1,7 @@
from pathlib import Path
from typing import Iterable
from typing import Union
-from abc import ABC
-from abc import abstractmethod
-from typing import Iterable
-from typing import List
-from pathlib import Path
-from typing import Dict
-from typing import Iterable
-from typing import List
-from typing import Union
-import numpy as np
from funasr.tokenizer.abs_tokenizer import AbsTokenizer
from funasr.tokenizer.char_tokenizer import CharTokenizer
@@ -28,8 +18,7 @@
space_symbol: str = "<space>",
delimiter: str = None,
g2p_type: str = None,
- **kwargs,
-):
+) -> AbsTokenizer:
"""A helper function to instantiate Tokenizer"""
if token_type == "bpe":
if bpemodel is None:
@@ -39,7 +28,7 @@
raise RuntimeError(
"remove_non_linguistic_symbols is not implemented for token_type=bpe"
)
- return SentencepiecesTokenizer(bpemodel, **kwargs)
+ return SentencepiecesTokenizer(bpemodel)
elif token_type == "word":
if remove_non_linguistic_symbols and non_linguistic_symbols is not None:
@@ -49,14 +38,13 @@
remove_non_linguistic_symbols=True,
)
else:
- return WordTokenizer(delimiter=delimiter, **kwargs)
+ return WordTokenizer(delimiter=delimiter)
elif token_type == "char":
return CharTokenizer(
non_linguistic_symbols=non_linguistic_symbols,
space_symbol=space_symbol,
remove_non_linguistic_symbols=remove_non_linguistic_symbols,
- **kwargs
)
elif token_type == "phn":
@@ -65,7 +53,6 @@
non_linguistic_symbols=non_linguistic_symbols,
space_symbol=space_symbol,
remove_non_linguistic_symbols=remove_non_linguistic_symbols,
- **kwargs
)
else:
diff --git a/funasr/tokenizer/char_tokenizer.py b/funasr/tokenizer/char_tokenizer.py
index 8c6c214..0635fd7 100644
--- a/funasr/tokenizer/char_tokenizer.py
+++ b/funasr/tokenizer/char_tokenizer.py
@@ -59,4 +59,4 @@
def tokens2text(self, tokens: Iterable[str]) -> str:
tokens = [t if t != self.space_symbol else " " for t in tokens]
- return "".join(tokens)
+ return "".join(tokens)
\ No newline at end of file
diff --git a/funasr/tokenizer/funtoken.py b/funasr/tokenizer/funtoken.py
deleted file mode 100644
index 7187d85..0000000
--- a/funasr/tokenizer/funtoken.py
+++ /dev/null
@@ -1,75 +0,0 @@
-from pathlib import Path
-from typing import Iterable
-from typing import Union
-from abc import ABC
-from abc import abstractmethod
-from typing import Iterable
-from typing import List
-from pathlib import Path
-from typing import Dict
-from typing import Iterable
-from typing import List
-from typing import Union
-
-import numpy as np
-
-from funasr.tokenizer.abs_tokenizer import AbsTokenizer
-from funasr.tokenizer.char_tokenizer import CharTokenizer
-from funasr.tokenizer.phoneme_tokenizer import PhonemeTokenizer
-from funasr.tokenizer.sentencepiece_tokenizer import SentencepiecesTokenizer
-from funasr.tokenizer.word_tokenizer import WordTokenizer
-
-def build_tokenizer(
- token_type: str,
- bpemodel: Union[Path, str, Iterable[str]] = None,
- non_linguistic_symbols: Union[Path, str, Iterable[str]] = None,
- remove_non_linguistic_symbols: bool = False,
- space_symbol: str = "<space>",
- delimiter: str = None,
- g2p_type: str = None,
- **kwargs,
-):
- """A helper function to instantiate Tokenizer"""
- # import pdb;
- # pdb.set_trace()
- if token_type == "bpe":
- if bpemodel is None:
- raise ValueError('bpemodel is required if token_type = "bpe"')
-
- if remove_non_linguistic_symbols:
- raise RuntimeError(
- "remove_non_linguistic_symbols is not implemented for token_type=bpe"
- )
- return SentencepiecesTokenizer(bpemodel, **kwargs)
-
- elif token_type == "word":
- if remove_non_linguistic_symbols and non_linguistic_symbols is not None:
- return WordTokenizer(
- delimiter=delimiter,
- non_linguistic_symbols=non_linguistic_symbols,
- remove_non_linguistic_symbols=True,
- )
- else:
- return WordTokenizer(delimiter=delimiter, **kwargs)
-
- elif token_type == "char":
- return CharTokenizer(
- non_linguistic_symbols=non_linguistic_symbols,
- space_symbol=space_symbol,
- remove_non_linguistic_symbols=remove_non_linguistic_symbols,
- **kwargs
- )
-
- elif token_type == "phn":
- return PhonemeTokenizer(
- g2p_type=g2p_type,
- non_linguistic_symbols=non_linguistic_symbols,
- space_symbol=space_symbol,
- remove_non_linguistic_symbols=remove_non_linguistic_symbols,
- **kwargs
- )
-
- else:
- raise ValueError(
- f"token_mode must be one of bpe, word, char or phn: " f"{token_type}"
- )
diff --git a/funasr/tokenizer/phoneme_tokenizer.py b/funasr/tokenizer/phoneme_tokenizer.py
index f1f7168..2820f21 100644
--- a/funasr/tokenizer/phoneme_tokenizer.py
+++ b/funasr/tokenizer/phoneme_tokenizer.py
@@ -363,7 +363,6 @@
non_linguistic_symbols: Union[Path, str, Iterable[str]] = None,
space_symbol: str = "<space>",
remove_non_linguistic_symbols: bool = False,
- **kwargs,
):
if g2p_type is None:
self.g2p = split_by_space
diff --git a/funasr/tokenizer/sentencepiece_tokenizer.py b/funasr/tokenizer/sentencepiece_tokenizer.py
index df98c2c..9a65920 100644
--- a/funasr/tokenizer/sentencepiece_tokenizer.py
+++ b/funasr/tokenizer/sentencepiece_tokenizer.py
@@ -9,7 +9,7 @@
class SentencepiecesTokenizer(AbsTokenizer):
- def __init__(self, model: Union[Path, str], **kwargs):
+ def __init__(self, model: Union[Path, str]):
self.model = str(model)
# NOTE(kamo):
# Don't build SentencePieceProcessor in __init__()
diff --git a/funasr/tokenizer/word_tokenizer.py b/funasr/tokenizer/word_tokenizer.py
index d7bbaf9..cbd0673 100644
--- a/funasr/tokenizer/word_tokenizer.py
+++ b/funasr/tokenizer/word_tokenizer.py
@@ -14,7 +14,6 @@
delimiter: str = None,
non_linguistic_symbols: Union[Path, str, Iterable[str]] = None,
remove_non_linguistic_symbols: bool = False,
- **kwargs,
):
self.delimiter = delimiter
diff --git a/runtime/docs/SDK_advanced_guide_offline.md b/runtime/docs/SDK_advanced_guide_offline.md
index 370bd43..fbaae7a 100644
--- a/runtime/docs/SDK_advanced_guide_offline.md
+++ b/runtime/docs/SDK_advanced_guide_offline.md
@@ -12,6 +12,8 @@
| TIME | INFO | IMAGE VERSION | IMAGE ID |
|------------|----------------------------------------------------------------------------------------------------------------------------------|------------------------------|--------------|
+| 2024.01.08 | optimized format sentence-level timestamps | funasr-runtime-sdk-cpu-0.4.1 | 0250f8ef981b |
+| 2024.01.03 | Added support for 8k models, optimized timestamp mismatch issues and added sentence-level timestamps, improved the effectiveness of English word FST hotwords, supported automated configuration of thread parameters, and fixed known crash issues as well as memory leak problems. | funasr-runtime-sdk-cpu-0.4.0 | c4483ee08f04 |
| 2023.11.08 | supporting punc-large model, Ngram model, fst hotwords, server-side loading of hotwords, adaptation to runtime structure changes | funasr-runtime-sdk-cpu-0.3.0 | caa64bddbb43 |
| 2023.09.19 | supporting ITN model | funasr-runtime-sdk-cpu-0.2.2 | 2c5286be13e9 |
| 2023.08.22 | integrated ffmpeg to support various audio and video inputs, supporting nn-hotword model and timestamp model | funasr-runtime-sdk-cpu-0.2.0 | 1ad3d19e0707 |
@@ -30,9 +32,9 @@
### Pulling and launching images
Use the following command to pull and launch the Docker image for the FunASR runtime-SDK:
```shell
-sudo docker pull registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-cpu-0.3.0
+sudo docker pull registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-cpu-0.4.1
-sudo docker run -p 10095:10095 -it --privileged=true -v /root:/workspace/models registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-cpu-0.3.0
+sudo docker run -p 10095:10095 -it --privileged=true -v /root:/workspace/models registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-cpu-0.4.1
```
Introduction to command parameters:
@@ -84,11 +86,8 @@
```text
--download-model-dir: Model download address, download models from Modelscope by setting the model ID.
--model-dir: modelscope model ID or local model path.
---quantize: True for quantized ASR model, False for non-quantized ASR model. Default is True.
--vad-dir: modelscope model ID or local model path.
---vad-quant: True for quantized VAD model, False for non-quantized VAD model. Default is True.
--punc-dir: modelscope model ID or local model path.
---punc-quant: True for quantized PUNC model, False for non-quantized PUNC model. Default is True.
--itn-dir modelscope model ID or local model path.
--port: Port number that the server listens on. Default is 10095.
--decoder-thread-num: The number of thread pools on the server side that can handle concurrent requests.
diff --git a/runtime/docs/SDK_advanced_guide_offline_en.md b/runtime/docs/SDK_advanced_guide_offline_en.md
index d4b8a64..df23c1b 100644
--- a/runtime/docs/SDK_advanced_guide_offline_en.md
+++ b/runtime/docs/SDK_advanced_guide_offline_en.md
@@ -6,6 +6,7 @@
| TIME | INFO | IMAGE VERSION | IMAGE ID |
|------------|-----------------------------------------|---------------------------------|--------------|
+| 2024.01.03 | fixed known crash issues as well as memory leak problems | funasr-runtime-sdk-en-cpu-0.1.2 | 0cdd9f4a4bb5 |
| 2023.11.08 | Adaptation to runtime structure changes | funasr-runtime-sdk-en-cpu-0.1.1 | 27017f70f72a |
| 2023.10.16 | 1.0 released | funasr-runtime-sdk-en-cpu-0.1.0 | e0de03eb0163 |
@@ -21,9 +22,9 @@
### Pulling and launching images
Use the following command to pull and launch the Docker image for the FunASR runtime-SDK:
```shell
-sudo docker pull registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-en-cpu-0.1.1
+sudo docker pull registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-en-cpu-0.1.2
-sudo docker run -p 10097:10095 -it --privileged=true -v /root:/workspace/models registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-en-cpu-0.1.1
+sudo docker run -p 10097:10095 -it --privileged=true -v /root:/workspace/models registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-en-cpu-0.1.2
```
Introduction to command parameters:
```text
@@ -63,11 +64,8 @@
```text
--download-model-dir: Model download address, download models from Modelscope by setting the model ID.
--model-dir: modelscope model ID or local model path.
---quantize: True for quantized ASR model, False for non-quantized ASR model. Default is True.
--vad-dir: modelscope model ID or local model path.
---vad-quant: True for quantized VAD model, False for non-quantized VAD model. Default is True.
--punc-dir: modelscope model ID or local model path.
---punc-quant: True for quantized PUNC model, False for non-quantized PUNC model. Default is True.
--itn-dir modelscope model ID or local model path.
--port: Port number that the server listens on. Default is 10095.
--decoder-thread-num: The number of thread pools on the server side that can handle concurrent requests.
diff --git a/runtime/docs/SDK_advanced_guide_offline_en_zh.md b/runtime/docs/SDK_advanced_guide_offline_en_zh.md
index 88ed939..baf8402 100644
--- a/runtime/docs/SDK_advanced_guide_offline_en_zh.md
+++ b/runtime/docs/SDK_advanced_guide_offline_en_zh.md
@@ -6,6 +6,7 @@
| 鏃堕棿 | 璇︽儏 | 闀滃儚鐗堟湰 | 闀滃儚ID |
|------------|---------------|---------------------------------|--------------|
+| 2024.01.03 | 淇宸茬煡鐨刢rash闂鍙婂唴瀛樻硠婕忛棶棰� | funasr-runtime-sdk-en-cpu-0.1.2 | 0cdd9f4a4bb5 |
| 2023.11.08 | runtime缁撴瀯鍙樺寲閫傞厤 | funasr-runtime-sdk-en-cpu-0.1.1 | 27017f70f72a |
| 2023.10.16 | 1.0 鍙戝竷 | funasr-runtime-sdk-en-cpu-0.1.0 | e0de03eb0163 |
@@ -36,11 +37,11 @@
閫氳繃涓嬭堪鍛戒护鎷夊彇骞跺惎鍔‵unASR runtime-SDK鐨刣ocker闀滃儚锛�
```shell
sudo docker pull \
- registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-en-cpu-0.1.1
+ registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-en-cpu-0.1.2
mkdir -p ./funasr-runtime-resources/models
sudo docker run -p 10097:10095 -it --privileged=true \
-v $PWD/funasr-runtime-resources/models:/workspace/models \
- registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-en-cpu-0.1.1
+ registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-en-cpu-0.1.2
```
### 鏈嶅姟绔惎鍔�
@@ -148,11 +149,8 @@
```text
--download-model-dir 妯″瀷涓嬭浇鍦板潃锛岄�氳繃璁剧疆model ID浠嶮odelscope涓嬭浇妯″瀷
--model-dir modelscope model ID 鎴栬�� 鏈湴妯″瀷璺緞
---quantize True涓洪噺鍖朅SR妯″瀷锛孎alse涓洪潪閲忓寲ASR妯″瀷锛岄粯璁ゆ槸True
--vad-dir modelscope model ID 鎴栬�� 鏈湴妯″瀷璺緞
---vad-quant True涓洪噺鍖朧AD妯″瀷锛孎alse涓洪潪閲忓寲VAD妯″瀷锛岄粯璁ゆ槸True
--punc-dir modelscope model ID 鎴栬�� 鏈湴妯″瀷璺緞
---punc-quant True涓洪噺鍖朠UNC妯″瀷锛孎alse涓洪潪閲忓寲PUNC妯″瀷锛岄粯璁ゆ槸True
--itn-dir modelscope model ID 鎴栬�� 鏈湴妯″瀷璺緞
--port 鏈嶅姟绔洃鍚殑绔彛鍙凤紝榛樿涓� 10095
--decoder-thread-num 鏈嶅姟绔嚎绋嬫睜涓暟(鏀寔鐨勬渶澶у苟鍙戣矾鏁�)锛�
diff --git a/runtime/docs/SDK_advanced_guide_offline_zh.md b/runtime/docs/SDK_advanced_guide_offline_zh.md
index 13055dd..6d53609 100644
--- a/runtime/docs/SDK_advanced_guide_offline_zh.md
+++ b/runtime/docs/SDK_advanced_guide_offline_zh.md
@@ -10,6 +10,8 @@
| 鏃堕棿 | 璇︽儏 | 闀滃儚鐗堟湰 | 闀滃儚ID |
|------------|---------------------------------------------------|------------------------------|--------------|
+| 2024.01.08 | 浼樺寲鍙ュ瓙绾ф椂闂存埑json鏍煎紡 | funasr-runtime-sdk-cpu-0.4.1 | 0250f8ef981b |
+| 2024.01.03 | 鏂板鏀寔8k妯″瀷銆佷紭鍖栨椂闂存埑涓嶅尮閰嶉棶棰樺強澧炲姞鍙ュ瓙绾у埆鏃堕棿鎴炽�佷紭鍖栬嫳鏂囧崟璇峟st鐑瘝鏁堟灉銆佹敮鎸佽嚜鍔ㄥ寲閰嶇疆绾跨▼鍙傛暟锛屽悓鏃朵慨澶嶅凡鐭ョ殑crash闂鍙婂唴瀛樻硠婕忛棶棰� | funasr-runtime-sdk-cpu-0.4.0 | c4483ee08f04 |
| 2023.11.08 | 鏀寔鏍囩偣澶фā鍨嬨�佹敮鎸丯gram妯″瀷銆佹敮鎸乫st鐑瘝銆佹敮鎸佹湇鍔$鍔犺浇鐑瘝銆乺untime缁撴瀯鍙樺寲閫傞厤 | funasr-runtime-sdk-cpu-0.3.0 | caa64bddbb43 |
| 2023.09.19 | 鏀寔ITN妯″瀷 | funasr-runtime-sdk-cpu-0.2.2 | 2c5286be13e9 |
| 2023.08.22 | 闆嗘垚ffmpeg鏀寔澶氱闊宠棰戣緭鍏ャ�佹敮鎸佺儹璇嶆ā鍨嬨�佹敮鎸佹椂闂存埑妯″瀷 | funasr-runtime-sdk-cpu-0.2.0 | 1ad3d19e0707 |
@@ -44,11 +46,11 @@
```shell
sudo docker pull \
- registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-cpu-0.3.0
+ registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-cpu-0.4.1
mkdir -p ./funasr-runtime-resources/models
sudo docker run -p 10095:10095 -it --privileged=true \
-v $PWD/funasr-runtime-resources/models:/workspace/models \
- registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-cpu-0.3.0
+ registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-cpu-0.4.1
```
### 鏈嶅姟绔惎鍔�
@@ -70,9 +72,22 @@
# damo/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-onnx锛堟椂闂存埑锛�
# damo/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404-onnx锛坣n鐑瘝锛�
# 濡傛灉鎮ㄦ兂鍦ㄦ湇鍔$鍔犺浇鐑瘝锛岃鍦ㄥ涓绘満鏂囦欢./funasr-runtime-resources/models/hotwords.txt閰嶇疆鐑瘝锛坉ocker鏄犲皠鍦板潃涓�/workspace/models/hotwords.txt锛�:
-# 姣忚涓�涓儹璇嶏紝鏍煎紡(鐑瘝 鏉冮噸)锛氶樋閲屽反宸� 20
+# 姣忚涓�涓儹璇嶏紝鏍煎紡(鐑瘝 鏉冮噸)锛氶樋閲屽反宸� 20锛堟敞锛氱儹璇嶇悊璁轰笂鏃犻檺鍒讹紝浣嗕负浜嗗吋椤炬�ц兘鍜屾晥鏋滐紝寤鸿鐑瘝闀垮害涓嶈秴杩�10锛屼釜鏁颁笉瓒呰繃1k锛屾潈閲�1~100锛�
```
濡傛灉鎮ㄦ兂瀹氬埗ngram锛屽弬鑰冩枃妗�([濡備綍璁粌LM](./lm_train_tutorial.md))
+
+濡傛灉鎮ㄦ兂閮ㄧ讲8k鐨勬ā鍨嬶紝璇蜂娇鐢ㄥ涓嬪懡浠ゅ惎鍔ㄦ湇鍔★細
+```shell
+cd FunASR/runtime
+nohup bash run_server.sh \
+ --download-model-dir /workspace/models \
+ --vad-dir damo/speech_fsmn_vad_zh-cn-8k-common \
+ --model-dir damo/speech_paraformer_asr_nat-zh-cn-8k-common-vocab8358-tensorflow1 \
+ --punc-dir damo/punc_ct-transformer_cn-en-common-vocab471067-large-onnx \
+ --lm-dir damo/speech_ngram_lm_zh-cn-ai-wesp-fst-token8358 \
+ --itn-dir thuduj12/fst_itn_zh \
+ --hotword /workspace/models/hotwords.txt > log.out 2>&1 &
+```
鏈嶅姟绔缁嗗弬鏁颁粙缁嶅彲鍙傝�僛鏈嶅姟绔敤娉曡瑙(#鏈嶅姟绔敤娉曡瑙�)
@@ -165,11 +180,8 @@
```text
--download-model-dir 妯″瀷涓嬭浇鍦板潃锛岄�氳繃璁剧疆model ID浠嶮odelscope涓嬭浇妯″瀷
--model-dir modelscope model ID 鎴栬�� 鏈湴妯″瀷璺緞
---quantize True涓洪噺鍖朅SR妯″瀷锛孎alse涓洪潪閲忓寲ASR妯″瀷锛岄粯璁ゆ槸True
--vad-dir modelscope model ID 鎴栬�� 鏈湴妯″瀷璺緞
---vad-quant True涓洪噺鍖朧AD妯″瀷锛孎alse涓洪潪閲忓寲VAD妯″瀷锛岄粯璁ゆ槸True
--punc-dir modelscope model ID 鎴栬�� 鏈湴妯″瀷璺緞
---punc-quant True涓洪噺鍖朠UNC妯″瀷锛孎alse涓洪潪閲忓寲PUNC妯″瀷锛岄粯璁ゆ槸True
--lm-dir modelscope model ID 鎴栬�� 鏈湴妯″瀷璺緞
--itn-dir modelscope model ID 鎴栬�� 鏈湴妯″瀷璺緞
--port 鏈嶅姟绔洃鍚殑绔彛鍙凤紝榛樿涓� 10095
diff --git a/runtime/docs/SDK_advanced_guide_online.md b/runtime/docs/SDK_advanced_guide_online.md
index abcdb0d..2c72fdd 100644
--- a/runtime/docs/SDK_advanced_guide_online.md
+++ b/runtime/docs/SDK_advanced_guide_online.md
@@ -8,6 +8,7 @@
| TIME | INFO | IMAGE VERSION | IMAGE ID |
|------------|-------------------------------------------------------------------------------------|-------------------------------------|--------------|
+| 2024.01.03 | The 2pass-offline mode supports Ngram language model decoding and WFST hotwords, while also addressing known crash issues and memory leak problems | funasr-runtime-sdk-online-cpu-0.1.6 | f99925110d27 |
| 2023.11.09 | fix bug: without online results | funasr-runtime-sdk-online-cpu-0.1.5 | b16584b6d38b |
| 2023.11.08 | supporting server-side loading of hotwords, adaptation to runtime structure changes | funasr-runtime-sdk-online-cpu-0.1.4 | 691974017c38 |
| 2023.09.19 | supporting hotwords, timestamps, and ITN model in 2pass mode | funasr-runtime-sdk-online-cpu-0.1.2 | 7222c5319bcf |
@@ -26,9 +27,9 @@
### Pull Docker Image
Use the following command to pull and start the FunASR software package docker image:
```shell
-sudo docker pull registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-online-cpu-0.1.5
+sudo docker pull registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-online-cpu-0.1.6
mkdir -p ./funasr-runtime-resources/models
-sudo docker run -p 10096:10095 -it --privileged=true -v $PWD/funasr-runtime-resources/models:/workspace/models registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-online-cpu-0.1.5
+sudo docker run -p 10096:10095 -it --privileged=true -v $PWD/funasr-runtime-resources/models:/workspace/models registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-online-cpu-0.1.6
```
### Launching the Server
@@ -42,6 +43,7 @@
--model-dir damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-onnx \
--online-model-dir damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online-onnx \
--punc-dir damo/punc_ct-transformer_zh-cn-common-vad_realtime-vocab272727-onnx \
+ --lm-dir damo/speech_ngram_lm_zh-cn-ai-wesp-fst \
--itn-dir thuduj12/fst_itn_zh > log.out 2>&1 &
# If you want to close ssl锛宲lease add锛�--certfile 0
@@ -84,6 +86,7 @@
--online-model-dir damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online-onnx \
--vad-dir damo/speech_fsmn_vad_zh-cn-16k-common-onnx \
--punc-dir damo/punc_ct-transformer_zh-cn-common-vad_realtime-vocab272727-onnx \
+ --lm-dir damo/speech_ngram_lm_zh-cn-ai-wesp-fst \
--itn-dir thuduj12/fst_itn_zh \
--certfile ../../../ssl_key/server.crt \
--keyfile ../../../ssl_key/server.key \
@@ -101,11 +104,9 @@
--download-model-dir: Model download address, download models from Modelscope by setting the model ID.
--model-dir: modelscope model ID or local model path.
--online-model-dir modelscope model ID
---quantize: True for quantized ASR model, False for non-quantized ASR model. Default is True.
--vad-dir: modelscope model ID or local model path.
---vad-quant: True for quantized VAD model, False for non-quantized VAD model. Default is True.
--punc-dir: modelscope model ID or local model path.
---punc-quant: True for quantized PUNC model, False for non-quantized PUNC model. Default is True.
+--lm-dir modelscope model ID or local model path.
--itn-dir modelscope model ID or local model path.
--port: Port number that the server listens on. Default is 10095.
--decoder-thread-num: The number of thread pools on the server side that can handle concurrent requests.
diff --git a/runtime/docs/SDK_advanced_guide_online_zh.md b/runtime/docs/SDK_advanced_guide_online_zh.md
index fa0382a..b0e8ad5 100644
--- a/runtime/docs/SDK_advanced_guide_online_zh.md
+++ b/runtime/docs/SDK_advanced_guide_online_zh.md
@@ -12,6 +12,7 @@
| 鏃堕棿 | 璇︽儏 | 闀滃儚鐗堟湰 | 闀滃儚ID |
|:-----------|:----------------------------------|--------------------------------------|--------------|
+| 2024.01.03 | 2pass-offline妯″紡鏀寔Ngram璇█妯″瀷瑙g爜銆亀fst鐑瘝锛屽悓鏃朵慨澶嶅凡鐭ョ殑crash闂鍙婂唴瀛樻硠婕忛棶棰� | funasr-runtime-sdk-online-cpu-0.1.6 | f99925110d27 |
| 2023.11.09 | 淇鏃犲疄鏃剁粨鏋滈棶棰� | funasr-runtime-sdk-online-cpu-0.1.5 | b16584b6d38b |
| 2023.11.08 | 鏀寔鏈嶅姟绔姞杞界儹璇�(鏇存柊鐑瘝閫氫俊鍗忚)銆乺untime缁撴瀯鍙樺寲閫傞厤 | funasr-runtime-sdk-online-cpu-0.1.4 | 691974017c38 |
| 2023.09.19 | 2pass妯″紡鏀寔鐑瘝銆佹椂闂存埑銆両TN妯″瀷 | funasr-runtime-sdk-online-cpu-0.1.2 | 7222c5319bcf |
@@ -35,11 +36,11 @@
```shell
sudo docker pull \
- registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-online-cpu-0.1.5
+ registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-online-cpu-0.1.6
mkdir -p ./funasr-runtime-resources/models
sudo docker run -p 10096:10095 -it --privileged=true \
-v $PWD/funasr-runtime-resources/models:/workspace/models \
- registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-online-cpu-0.1.5
+ registry.cn-hangzhou.aliyuncs.com/funasr_repo/funasr:funasr-runtime-sdk-online-cpu-0.1.6
```
### 鏈嶅姟绔惎鍔�
@@ -53,6 +54,7 @@
--model-dir damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-onnx \
--online-model-dir damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online-onnx \
--punc-dir damo/punc_ct-transformer_zh-cn-common-vad_realtime-vocab272727-onnx \
+ --lm-dir damo/speech_ngram_lm_zh-cn-ai-wesp-fst \
--itn-dir thuduj12/fst_itn_zh \
--hotword /workspace/models/hotwords.txt > log.out 2>&1 &
@@ -61,7 +63,7 @@
# damo/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-onnx锛堟椂闂存埑锛�
# damo/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404-onnx锛坣n鐑瘝锛�
# 濡傛灉鎮ㄦ兂鍦ㄦ湇鍔$鍔犺浇鐑瘝锛岃鍦ㄥ涓绘満鏂囦欢./funasr-runtime-resources/models/hotwords.txt閰嶇疆鐑瘝锛坉ocker鏄犲皠鍦板潃涓�/workspace/models/hotwords.txt锛�:
-# 姣忚涓�涓儹璇嶏紝鏍煎紡(鐑瘝 鏉冮噸)锛氶樋閲屽反宸� 20
+# 姣忚涓�涓儹璇嶏紝鏍煎紡(鐑瘝 鏉冮噸)锛氶樋閲屽反宸� 20锛堟敞锛氱儹璇嶇悊璁轰笂鏃犻檺鍒讹紝浣嗕负浜嗗吋椤炬�ц兘鍜屾晥鏋滐紝寤鸿鐑瘝闀垮害涓嶈秴杩�10锛屼釜鏁颁笉瓒呰繃1k锛屾潈閲�1~100锛�
```
鏈嶅姟绔缁嗗弬鏁颁粙缁嶅彲鍙傝�僛鏈嶅姟绔敤娉曡瑙(#鏈嶅姟绔敤娉曡瑙�)
### 瀹㈡埛绔祴璇曚笌浣跨敤
@@ -100,6 +102,7 @@
--online-model-dir damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online-onnx \
--vad-dir damo/speech_fsmn_vad_zh-cn-16k-common-onnx \
--punc-dir damo/punc_ct-transformer_zh-cn-common-vad_realtime-vocab272727-onnx \
+ --lm-dir damo/speech_ngram_lm_zh-cn-ai-wesp-fst \
--itn-dir thuduj12/fst_itn_zh \
--certfile ../../../ssl_key/server.crt \
--keyfile ../../../ssl_key/server.key \
@@ -110,11 +113,9 @@
--download-model-dir 妯″瀷涓嬭浇鍦板潃锛岄�氳繃璁剧疆model ID浠嶮odelscope涓嬭浇妯″瀷
--model-dir modelscope model ID 鎴栬�� 鏈湴妯″瀷璺緞
--online-model-dir modelscope model ID 鎴栬�� 鏈湴妯″瀷璺緞
---quantize True涓洪噺鍖朅SR妯″瀷锛孎alse涓洪潪閲忓寲ASR妯″瀷锛岄粯璁ゆ槸True
--vad-dir modelscope model ID 鎴栬�� 鏈湴妯″瀷璺緞
---vad-quant True涓洪噺鍖朧AD妯″瀷锛孎alse涓洪潪閲忓寲VAD妯″瀷锛岄粯璁ゆ槸True
--punc-dir modelscope model ID 鎴栬�� 鏈湴妯″瀷璺緞
---punc-quant True涓洪噺鍖朠UNC妯″瀷锛孎alse涓洪潪閲忓寲PUNC妯″瀷锛岄粯璁ゆ槸True
+--lm-dir modelscope model ID 鎴栬�� 鏈湴妯″瀷璺緞
--itn-dir modelscope model ID 鎴栬�� 鏈湴妯″瀷璺緞
--port 鏈嶅姟绔洃鍚殑绔彛鍙凤紝榛樿涓� 10095
--decoder-thread-num 鏈嶅姟绔嚎绋嬫睜涓暟(鏀寔鐨勬渶澶у苟鍙戣矾鏁�)锛�
diff --git a/runtime/docs/SDK_tutorial_online.md b/runtime/docs/SDK_tutorial_online.md
index a38502d..4683761 100644
--- a/runtime/docs/SDK_tutorial_online.md
+++ b/runtime/docs/SDK_tutorial_online.md
@@ -88,7 +88,9 @@
--port specifies the deployment port number as 10095.
--mode: `offline` indicates that the inference mode is one-sentence recognition; `online` indicates that the inference mode is real-time speech recognition; `2pass` indicates real-time speech recognition, and offline models are used for error correction at the end of each sentence.
--chunk-size: indicates the latency configuration of the streaming model. [5,10,5] indicates that the current audio is 600ms, with a lookback of 300ms and a lookahead of 300ms.
+--record record is 1 means using record, fefault is 0
--wav-path specifies the audio file to be transcribed, and supports file paths.
+--audio-fs the sample rate of the audio
--threa-num sets the number of concurrent send threads, with a default value of 1.
--is-ssl sets whether to enable SSL certificate verification, with a default value of 1 for enabling and 0 for disabling.
--hotword: Hotword file path, one line for each hotword(e.g.:闃块噷宸村反 20)
diff --git a/runtime/docs/SDK_tutorial_online_zh.md b/runtime/docs/SDK_tutorial_online_zh.md
index 21a5aa6..e6705de 100644
--- a/runtime/docs/SDK_tutorial_online_zh.md
+++ b/runtime/docs/SDK_tutorial_online_zh.md
@@ -96,7 +96,9 @@
--mode锛歚offline`琛ㄧず鎺ㄧ悊妯″紡涓轰竴鍙ヨ瘽璇嗗埆锛沗online`琛ㄧず鎺ㄧ悊妯″紡涓哄疄鏃惰闊宠瘑鍒紱`2pass`琛ㄧず涓哄疄鏃惰闊宠瘑鍒紝
骞朵笖璇磋瘽鍙ュ熬閲囩敤绂荤嚎妯″瀷杩涜绾犻敊銆�
--chunk-size锛氳〃绀烘祦寮忔ā鍨媗atency閰嶇疆`[5,10,5]`锛岃〃绀哄綋鍓嶉煶棰戣В鐮佺墖娈典负600ms锛屽苟涓斿洖鐪�300ms锛屽彸鐪�300ms銆�
+--record 1琛ㄧず浣跨敤楹﹀厠椋庝綔涓鸿緭鍏ワ紝榛樿涓�0
--wav-path 闇�瑕佽繘琛岃浆鍐欑殑闊抽鏂囦欢锛屾敮鎸佹枃浠惰矾寰�
+--audio-fs pcm闊抽閲囨牱鐜�
--thread-num 璁剧疆骞跺彂鍙戦�佺嚎绋嬫暟锛岄粯璁や负1
--is-ssl 璁剧疆鏄惁寮�鍚痵sl璇佷功鏍¢獙锛岄粯璁�1寮�鍚紝璁剧疆涓�0鍏抽棴
--hotword 鐑瘝鏂囦欢锛屾瘡琛屼竴涓儹璇嶏紝鏍煎紡(鐑瘝 鏉冮噸)锛氶樋閲屽反宸� 20
diff --git a/runtime/docs/docker_offline_cpu_en_lists b/runtime/docs/docker_offline_cpu_en_lists
index 3a20a34..7dab175 100644
--- a/runtime/docs/docker_offline_cpu_en_lists
+++ b/runtime/docs/docker_offline_cpu_en_lists
@@ -1,5 +1,5 @@
DOCKER:
- funasr-runtime-sdk-en-cpu-0.1.1
+ funasr-runtime-sdk-en-cpu-0.1.2
DEFAULT_ASR_MODEL:
damo/speech_paraformer-large_asr_nat-en-16k-common-vocab10020-onnx
DEFAULT_VAD_MODEL:
diff --git a/runtime/docs/docker_offline_cpu_zh_lists b/runtime/docs/docker_offline_cpu_zh_lists
index 5d1cd20..20763c0 100644
--- a/runtime/docs/docker_offline_cpu_zh_lists
+++ b/runtime/docs/docker_offline_cpu_zh_lists
@@ -1,7 +1,7 @@
DOCKER:
+ funasr-runtime-sdk-cpu-0.4.0
funasr-runtime-sdk-cpu-0.3.0
funasr-runtime-sdk-cpu-0.2.2
- funasr-runtime-sdk-cpu-0.2.1
DEFAULT_ASR_MODEL:
damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-onnx
damo/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-onnx
diff --git a/runtime/docs/docker_online_cpu_zh_lists b/runtime/docs/docker_online_cpu_zh_lists
index 43a493c..c05b681 100644
--- a/runtime/docs/docker_online_cpu_zh_lists
+++ b/runtime/docs/docker_online_cpu_zh_lists
@@ -1,7 +1,7 @@
DOCKER:
+ funasr-runtime-sdk-online-cpu-0.1.6
funasr-runtime-sdk-online-cpu-0.1.5
funasr-runtime-sdk-online-cpu-0.1.3
- funasr-runtime-sdk-online-cpu-0.1.2
DEFAULT_ASR_MODEL:
damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-onnx
damo/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-onnx
diff --git a/runtime/docs/lm_train_tutorial.md b/runtime/docs/lm_train_tutorial.md
index 70db089..65cecc2 100644
--- a/runtime/docs/lm_train_tutorial.md
+++ b/runtime/docs/lm_train_tutorial.md
@@ -6,6 +6,7 @@
```shell
# 涓嬭浇: 绀轰緥璁粌璇枡text銆乴exicon 鍜� am寤烘ā鍗曞厓units.txt
wget https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/requirements/lm.tar.gz
+# 濡傛灉鏄尮閰�8k鐨刟m妯″瀷锛屼娇鐢� https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/requirements/lm_8358.tar.gz
tar -zxvf lm.tar.gz
```
diff --git a/runtime/docs/websocket_protocol.md b/runtime/docs/websocket_protocol.md
index f03b688..8a38490 100644
--- a/runtime/docs/websocket_protocol.md
+++ b/runtime/docs/websocket_protocol.md
@@ -36,7 +36,7 @@
#### Sending Recognition Results
The message (serialized in JSON) is:
```text
-{"mode": "offline", "wav_name": "wav_name", "text": "asr ouputs", "is_final": True, "timestamp":"[[100,200], [200,500]]"}
+{"mode": "offline", "wav_name": "wav_name", "text": "asr ouputs", "is_final": True, "timestamp":"[[100,200], [200,500]]", "stamp_sents":[]}
```
Parameter explanation:
```text
@@ -45,6 +45,7 @@
`text`: the text output of speech recognition
`is_final`: indicating the end of recognition
`timestamp`锛欼f AM is a timestamp model, it will return this field, indicating the timestamp, in the format of "[[100,200], [200,500]]"
+`stamp_sents`锛欼f AM is a timestamp model, it will return this field, indicating the stamp_sents, in the format of [{"text_seg":"姝� 鏄� 鍥� 涓�","punc":",","start":"430","end":"1130","ts_list":[[430,670],[670,810],[810,1030],[1030,1130]]}]
```
## Real-time Speech Recognition
@@ -84,7 +85,7 @@
The message (serialized in JSON) is:
```text
-{"mode": "2pass-online", "wav_name": "wav_name", "text": "asr ouputs", "is_final": True, "timestamp":"[[100,200], [200,500]]"}
+{"mode": "2pass-online", "wav_name": "wav_name", "text": "asr ouputs", "is_final": True, "timestamp":"[[100,200], [200,500]]", "stamp_sents":[]}
```
Parameter explanation:
```text
@@ -93,4 +94,5 @@
`text`: the text output of speech recognition
`is_final`: indicating the end of recognition
`timestamp`锛欼f AM is a timestamp model, it will return this field, indicating the timestamp, in the format of "[[100,200], [200,500]]"
+`stamp_sents`锛欼f AM is a timestamp model, it will return this field, indicating the stamp_sents, in the format of [{"text_seg":"姝� 鏄� 鍥� 涓�","punc":",","start":"430","end":"1130","ts_list":[[430,670],[670,810],[810,1030],[1030,1130]]}]
```
diff --git a/runtime/docs/websocket_protocol_zh.md b/runtime/docs/websocket_protocol_zh.md
index 60ff100..7385837 100644
--- a/runtime/docs/websocket_protocol_zh.md
+++ b/runtime/docs/websocket_protocol_zh.md
@@ -37,7 +37,7 @@
#### 鍙戦�佽瘑鍒粨鏋�
message涓猴紙閲囩敤json搴忓垪鍖栵級
```text
-{"mode": "offline", "wav_name": "wav_name", "text": "asr ouputs", "is_final": True,"timestamp":"[[100,200], [200,500]]"}
+{"mode": "offline", "wav_name": "wav_name", "text": "asr ouputs", "is_final": True,"timestamp":"[[100,200], [200,500]]","stamp_sents":[]}
```
鍙傛暟浠嬬粛锛�
```text
@@ -46,6 +46,7 @@
`text`锛氳〃绀鸿闊宠瘑鍒緭鍑烘枃鏈�
`is_final`锛氳〃绀鸿瘑鍒粨鏉�
`timestamp`锛氬鏋淎M涓烘椂闂存埑妯″瀷锛屼細杩斿洖姝ゅ瓧娈碉紝琛ㄧず鏃堕棿鎴筹紝鏍煎紡涓� "[[100,200], [200,500]]"(ms)
+`stamp_sents`锛氬鏋淎M涓烘椂闂存埑妯″瀷锛屼細杩斿洖姝ゅ瓧娈碉紝琛ㄧず鍙ュ瓙绾у埆鏃堕棿鎴筹紝鏍煎紡涓� [{"text_seg":"姝� 鏄� 鍥� 涓�","punc":",","start":"430","end":"1130","ts_list":[[430,670],[670,810],[810,1030],[1030,1130]]}]
```
## 瀹炴椂璇煶璇嗗埆
@@ -86,7 +87,7 @@
#### 鍙戦�佽瘑鍒粨鏋�
message涓猴紙閲囩敤json搴忓垪鍖栵級
```text
-{"mode": "2pass-online", "wav_name": "wav_name", "text": "asr ouputs", "is_final": True, "timestamp":"[[100,200], [200,500]]"}
+{"mode": "2pass-online", "wav_name": "wav_name", "text": "asr ouputs", "is_final": True, "timestamp":"[[100,200], [200,500]]","stamp_sents":[]}
```
鍙傛暟浠嬬粛锛�
```text
@@ -95,4 +96,5 @@
`text`锛氳〃绀鸿闊宠瘑鍒緭鍑烘枃鏈�
`is_final`锛氳〃绀鸿瘑鍒粨鏉�
`timestamp`锛氬鏋淎M涓烘椂闂存埑妯″瀷锛屼細杩斿洖姝ゅ瓧娈碉紝琛ㄧず鏃堕棿鎴筹紝鏍煎紡涓� "[[100,200], [200,500]]"(ms)
+`stamp_sents`锛氬鏋淎M涓烘椂闂存埑妯″瀷锛屼細杩斿洖姝ゅ瓧娈碉紝琛ㄧず鍙ュ瓙绾у埆鏃堕棿鎴筹紝鏍煎紡涓� [{"text_seg":"姝� 鏄� 鍥� 涓�","punc":",","start":"430","end":"1130","ts_list":[[430,670],[670,810],[810,1030],[1030,1130]]}]
```
diff --git a/runtime/onnxruntime/bin/funasr-onnx-2pass-rtf.cpp b/runtime/onnxruntime/bin/funasr-onnx-2pass-rtf.cpp
index c55c888..d49ba72 100644
--- a/runtime/onnxruntime/bin/funasr-onnx-2pass-rtf.cpp
+++ b/runtime/onnxruntime/bin/funasr-onnx-2pass-rtf.cpp
@@ -43,13 +43,18 @@
LOG(INFO)<< key << " : " << value_arg.getValue();
}
-void runReg(FUNASR_HANDLE tpass_handle, std::vector<int> chunk_size, vector<string> wav_list, vector<string> wav_ids,
- float* total_length, long* total_time, int core_id, ASR_TYPE asr_mode_, string nn_hotwords_) {
+void runReg(FUNASR_HANDLE tpass_handle, std::vector<int> chunk_size, vector<string> wav_list, vector<string> wav_ids, int audio_fs,
+ float* total_length, long* total_time, int core_id, ASR_TYPE asr_mode_, string nn_hotwords_,
+ float glob_beam, float lat_beam, float am_scale, int inc_bias, unordered_map<string, int> hws_map) {
struct timeval start, end;
long seconds = 0;
float n_total_length = 0.0f;
long n_total_time = 0;
+
+ FUNASR_DEC_HANDLE decoder_handle = FunASRWfstDecoderInit(tpass_handle, ASR_TWO_PASS, glob_beam, lat_beam, am_scale);
+ // load hotwords list and build graph
+ FunWfstDecoderLoadHwsRes(decoder_handle, inc_bias, hws_map);
std::vector<std::vector<float>> hotwords_embedding = CompileHotwordEmbedding(tpass_handle, nn_hotwords_, ASR_TWO_PASS);
@@ -59,7 +64,7 @@
// warm up
for (size_t i = 0; i < 2; i++)
{
- int32_t sampling_rate_ = 16000;
+ int32_t sampling_rate_ = audio_fs;
funasr::Audio audio(1);
if(is_target_file(wav_list[0].c_str(), "wav")){
if(!audio.LoadWav2Char(wav_list[0].c_str(), &sampling_rate_)){
@@ -90,7 +95,8 @@
} else {
is_final = false;
}
- FUNASR_RESULT result = FunTpassInferBuffer(tpass_handle, tpass_online_handle, speech_buff+sample_offset, step, punc_cache, is_final, sampling_rate_, "pcm", (ASR_TYPE)asr_mode_, hotwords_embedding);
+ FUNASR_RESULT result = FunTpassInferBuffer(tpass_handle, tpass_online_handle, speech_buff+sample_offset, step, punc_cache, is_final,
+ sampling_rate_, "pcm", (ASR_TYPE)asr_mode_, hotwords_embedding, true, decoder_handle);
if (result)
{
FunASRFreeResult(result);
@@ -104,7 +110,7 @@
if (i >= wav_list.size()) {
break;
}
- int32_t sampling_rate_ = 16000;
+ int32_t sampling_rate_ = audio_fs;
funasr::Audio audio(1);
if(is_target_file(wav_list[i].c_str(), "wav")){
if(!audio.LoadWav2Char(wav_list[i].c_str(), &sampling_rate_)){
@@ -139,7 +145,8 @@
is_final = false;
}
gettimeofday(&start, NULL);
- FUNASR_RESULT result = FunTpassInferBuffer(tpass_handle, tpass_online_handle, speech_buff+sample_offset, step, punc_cache, is_final, sampling_rate_, "pcm", (ASR_TYPE)asr_mode_, hotwords_embedding);
+ FUNASR_RESULT result = FunTpassInferBuffer(tpass_handle, tpass_online_handle, speech_buff+sample_offset, step, punc_cache, is_final,
+ sampling_rate_, "pcm", (ASR_TYPE)asr_mode_, hotwords_embedding, true, decoder_handle);
gettimeofday(&end, NULL);
seconds = (end.tv_sec - start.tv_sec);
long taking_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
@@ -197,6 +204,8 @@
*total_time = n_total_time;
}
}
+ FunWfstDecoderUnloadHwsRes(decoder_handle);
+ FunASRWfstDecoderUninit(decoder_handle);
FunTpassOnlineUninit(tpass_online_handle);
}
@@ -215,11 +224,17 @@
TCLAP::ValueArg<std::string> punc_dir("", PUNC_DIR, "the punc online model path, which contains model.onnx, punc.yaml", false, "", "string");
TCLAP::ValueArg<std::string> punc_quant("", PUNC_QUANT, "true (Default), load the model of model.onnx in punc_dir. If set true, load the model of model_quant.onnx in punc_dir", false, "true", "string");
TCLAP::ValueArg<std::string> itn_dir("", ITN_DIR, "the itn model(fst) path, which contains zh_itn_tagger.fst and zh_itn_verbalizer.fst", false, "", "string");
+ TCLAP::ValueArg<std::string> lm_dir("", LM_DIR, "the lm model path, which contains compiled models: TLG.fst, config.yaml, lexicon.txt ", false, "", "string");
+ TCLAP::ValueArg<float> global_beam("", GLOB_BEAM, "the decoding beam for beam searching ", false, 3.0, "float");
+ TCLAP::ValueArg<float> lattice_beam("", LAT_BEAM, "the lattice generation beam for beam searching ", false, 3.0, "float");
+ TCLAP::ValueArg<float> am_scale("", AM_SCALE, "the acoustic scale for beam searching ", false, 10.0, "float");
+ TCLAP::ValueArg<std::int32_t> fst_inc_wts("", FST_INC_WTS, "the fst hotwords incremental bias", false, 20, "int32_t");
TCLAP::ValueArg<std::string> asr_mode("", ASR_MODE, "offline, online, 2pass", false, "2pass", "string");
TCLAP::ValueArg<std::int32_t> onnx_thread("", "model-thread-num", "onnxruntime SetIntraOpNumThreads", false, 1, "int32_t");
TCLAP::ValueArg<std::int32_t> thread_num_("", THREAD_NUM, "multi-thread num for rtf", false, 1, "int32_t");
TCLAP::ValueArg<std::string> wav_path("", WAV_PATH, "the input could be: wav_path, e.g.: asr_example.wav; pcm_path, e.g.: asr_example.pcm; wav.scp, kaldi style wav list (wav_id \t wav_path)", true, "", "string");
+ TCLAP::ValueArg<std::int32_t> audio_fs("", AUDIO_FS, "the sample rate of audio", false, 16000, "int32_t");
TCLAP::ValueArg<std::string> hotword("", HOTWORD, "the hotword file, one hotword perline, Format: Hotword Weight (could be: 闃块噷宸村反 20)", false, "", "string");
cmd.add(offline_model_dir);
@@ -230,7 +245,13 @@
cmd.add(punc_dir);
cmd.add(punc_quant);
cmd.add(itn_dir);
+ cmd.add(lm_dir);
+ cmd.add(global_beam);
+ cmd.add(lattice_beam);
+ cmd.add(am_scale);
+ cmd.add(fst_inc_wts);
cmd.add(wav_path);
+ cmd.add(audio_fs);
cmd.add(asr_mode);
cmd.add(onnx_thread);
cmd.add(thread_num_);
@@ -246,6 +267,7 @@
GetValue(punc_dir, PUNC_DIR, model_path);
GetValue(punc_quant, PUNC_QUANT, model_path);
GetValue(itn_dir, ITN_DIR, model_path);
+ GetValue(lm_dir, LM_DIR, model_path);
GetValue(wav_path, WAV_PATH, model_path);
GetValue(asr_mode, ASR_MODE, model_path);
@@ -269,6 +291,14 @@
{
LOG(ERROR) << "FunTpassInit init failed";
exit(-1);
+ }
+ float glob_beam = 3.0f;
+ float lat_beam = 3.0f;
+ float am_sc = 10.0f;
+ if (lm_dir.isSet()) {
+ glob_beam = global_beam.getValue();
+ lat_beam = lattice_beam.getValue();
+ am_sc = am_scale.getValue();
}
gettimeofday(&end, NULL);
@@ -319,7 +349,8 @@
int rtf_threds = thread_num_.getValue();
for (int i = 0; i < rtf_threds; i++)
{
- threads.emplace_back(thread(runReg, tpass_hanlde, chunk_size, wav_list, wav_ids, &total_length, &total_time, i, (ASR_TYPE)asr_mode_, nn_hotwords_));
+ threads.emplace_back(thread(runReg, tpass_hanlde, chunk_size, wav_list, wav_ids, audio_fs.getValue(), &total_length, &total_time, i, (ASR_TYPE)asr_mode_, nn_hotwords_,
+ glob_beam, lat_beam, am_sc, fst_inc_wts.getValue(), hws_map));
}
for (auto& thread : threads)
diff --git a/runtime/onnxruntime/bin/funasr-onnx-2pass.cpp b/runtime/onnxruntime/bin/funasr-onnx-2pass.cpp
index 5af0b41..abcc4b2 100644
--- a/runtime/onnxruntime/bin/funasr-onnx-2pass.cpp
+++ b/runtime/onnxruntime/bin/funasr-onnx-2pass.cpp
@@ -51,10 +51,16 @@
TCLAP::ValueArg<std::string> punc_dir("", PUNC_DIR, "the punc online model path, which contains model.onnx, punc.yaml", false, "", "string");
TCLAP::ValueArg<std::string> punc_quant("", PUNC_QUANT, "true (Default), load the model of model.onnx in punc_dir. If set true, load the model of model_quant.onnx in punc_dir", false, "true", "string");
TCLAP::ValueArg<std::string> itn_dir("", ITN_DIR, "the itn model(fst) path, which contains zh_itn_tagger.fst and zh_itn_verbalizer.fst", false, "", "string");
+ TCLAP::ValueArg<std::string> lm_dir("", LM_DIR, "the lm model path, which contains compiled models: TLG.fst, config.yaml, lexicon.txt ", false, "", "string");
+ TCLAP::ValueArg<float> global_beam("", GLOB_BEAM, "the decoding beam for beam searching ", false, 3.0, "float");
+ TCLAP::ValueArg<float> lattice_beam("", LAT_BEAM, "the lattice generation beam for beam searching ", false, 3.0, "float");
+ TCLAP::ValueArg<float> am_scale("", AM_SCALE, "the acoustic scale for beam searching ", false, 10.0, "float");
+ TCLAP::ValueArg<std::int32_t> fst_inc_wts("", FST_INC_WTS, "the fst hotwords incremental bias", false, 20, "int32_t");
TCLAP::ValueArg<std::string> asr_mode("", ASR_MODE, "offline, online, 2pass", false, "2pass", "string");
TCLAP::ValueArg<std::int32_t> onnx_thread("", "model-thread-num", "onnxruntime SetIntraOpNumThreads", false, 1, "int32_t");
TCLAP::ValueArg<std::string> wav_path("", WAV_PATH, "the input could be: wav_path, e.g.: asr_example.wav; pcm_path, e.g.: asr_example.pcm; wav.scp, kaldi style wav list (wav_id \t wav_path)", true, "", "string");
+ TCLAP::ValueArg<std::int32_t> audio_fs("", AUDIO_FS, "the sample rate of audio", false, 16000, "int32_t");
TCLAP::ValueArg<std::string> hotword("", HOTWORD, "the hotword file, one hotword perline, Format: Hotword Weight (could be: 闃块噷宸村反 20)", false, "", "string");
cmd.add(offline_model_dir);
@@ -64,8 +70,14 @@
cmd.add(vad_quant);
cmd.add(punc_dir);
cmd.add(punc_quant);
+ cmd.add(lm_dir);
+ cmd.add(global_beam);
+ cmd.add(lattice_beam);
+ cmd.add(am_scale);
+ cmd.add(fst_inc_wts);
cmd.add(itn_dir);
cmd.add(wav_path);
+ cmd.add(audio_fs);
cmd.add(asr_mode);
cmd.add(onnx_thread);
cmd.add(hotword);
@@ -79,6 +91,7 @@
GetValue(vad_quant, VAD_QUANT, model_path);
GetValue(punc_dir, PUNC_DIR, model_path);
GetValue(punc_quant, PUNC_QUANT, model_path);
+ GetValue(lm_dir, LM_DIR, model_path);
GetValue(itn_dir, ITN_DIR, model_path);
GetValue(wav_path, WAV_PATH, model_path);
GetValue(asr_mode, ASR_MODE, model_path);
@@ -104,6 +117,16 @@
LOG(ERROR) << "FunTpassInit init failed";
exit(-1);
}
+ float glob_beam = 3.0f;
+ float lat_beam = 3.0f;
+ float am_sc = 10.0f;
+ if (lm_dir.isSet()) {
+ glob_beam = global_beam.getValue();
+ lat_beam = lattice_beam.getValue();
+ am_sc = am_scale.getValue();
+ }
+ // init wfst decoder
+ FUNASR_DEC_HANDLE decoder_handle = FunASRWfstDecoderInit(tpass_handle, ASR_TWO_PASS, glob_beam, lat_beam, am_sc);
gettimeofday(&end, NULL);
long seconds = (end.tv_sec - start.tv_sec);
@@ -144,6 +167,9 @@
wav_ids.emplace_back(default_id);
}
+ // load hotwords list and build graph
+ FunWfstDecoderLoadHwsRes(decoder_handle, fst_inc_wts.getValue(), hws_map);
+
std::vector<std::vector<float>> hotwords_embedding = CompileHotwordEmbedding(tpass_handle, nn_hotwords_, ASR_TWO_PASS);
// init online features
std::vector<int> chunk_size = {5,10,5};
@@ -154,7 +180,7 @@
auto& wav_file = wav_list[i];
auto& wav_id = wav_ids[i];
- int32_t sampling_rate_ = 16000;
+ int32_t sampling_rate_ = audio_fs.getValue();
funasr::Audio audio(1);
if(is_target_file(wav_file.c_str(), "wav")){
if(!audio.LoadWav2Char(wav_file.c_str(), &sampling_rate_)){
@@ -189,7 +215,9 @@
is_final = false;
}
gettimeofday(&start, NULL);
- FUNASR_RESULT result = FunTpassInferBuffer(tpass_handle, tpass_online_handle, speech_buff+sample_offset, step, punc_cache, is_final, sampling_rate_, "pcm", (ASR_TYPE)asr_mode_, hotwords_embedding);
+ FUNASR_RESULT result = FunTpassInferBuffer(tpass_handle, tpass_online_handle,
+ speech_buff+sample_offset, step, punc_cache, is_final, sampling_rate_, "pcm",
+ (ASR_TYPE)asr_mode_, hotwords_embedding, true, decoder_handle);
gettimeofday(&end, NULL);
seconds = (end.tv_sec - start.tv_sec);
taking_micros += ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
@@ -233,10 +261,12 @@
}
}
}
-
+
+ FunWfstDecoderUnloadHwsRes(decoder_handle);
LOG(INFO) << "Audio length: " << (double)snippet_time << " s";
LOG(INFO) << "Model inference takes: " << (double)taking_micros / 1000000 <<" s";
LOG(INFO) << "Model inference RTF: " << (double)taking_micros/ (snippet_time*1000000);
+ FunASRWfstDecoderUninit(decoder_handle);
FunTpassOnlineUninit(tpass_online_handle);
FunTpassUninit(tpass_handle);
return 0;
diff --git a/runtime/onnxruntime/bin/funasr-onnx-offline-rtf.cpp b/runtime/onnxruntime/bin/funasr-onnx-offline-rtf.cpp
index 41cd038..83d7e79 100644
--- a/runtime/onnxruntime/bin/funasr-onnx-offline-rtf.cpp
+++ b/runtime/onnxruntime/bin/funasr-onnx-offline-rtf.cpp
@@ -29,7 +29,7 @@
std::atomic<int> wav_index(0);
std::mutex mtx;
-void runReg(FUNASR_HANDLE asr_handle, vector<string> wav_list, vector<string> wav_ids,
+void runReg(FUNASR_HANDLE asr_handle, vector<string> wav_list, vector<string> wav_ids, int audio_fs,
float* total_length, long* total_time, int core_id, float glob_beam = 3.0f, float lat_beam = 3.0f, float am_sc = 10.0f,
int fst_inc_wts = 20, string hotword_path = "") {
@@ -54,8 +54,7 @@
// warm up
for (size_t i = 0; i < 1; i++)
{
- FunOfflineReset(asr_handle, decoder_handle);
- FUNASR_RESULT result=FunOfflineInfer(asr_handle, wav_list[0].c_str(), RASR_NONE, NULL, hotwords_embedding, 16000, false, decoder_handle);
+ FUNASR_RESULT result=FunOfflineInfer(asr_handle, wav_list[0].c_str(), RASR_NONE, NULL, hotwords_embedding, audio_fs, true, decoder_handle);
if(result){
FunASRFreeResult(result);
}
@@ -69,7 +68,7 @@
}
gettimeofday(&start, NULL);
- FUNASR_RESULT result=FunOfflineInfer(asr_handle, wav_list[i].c_str(), RASR_NONE, NULL, hotwords_embedding, 16000, false, decoder_handle);
+ FUNASR_RESULT result=FunOfflineInfer(asr_handle, wav_list[i].c_str(), RASR_NONE, NULL, hotwords_embedding, audio_fs, true, decoder_handle);
gettimeofday(&end, NULL);
seconds = (end.tv_sec - start.tv_sec);
@@ -82,6 +81,10 @@
string stamp = FunASRGetStamp(result);
if(stamp !=""){
LOG(INFO) << "Thread: " << this_thread::get_id() << "," << wav_ids[i] << " : " << stamp;
+ }
+ string stamp_sents = FunASRGetStampSents(result);
+ if(stamp_sents !=""){
+ LOG(INFO)<< wav_ids[i] <<" : "<<stamp_sents;
}
float snippet_time = FunASRGetRetSnippetTime(result);
n_total_length += snippet_time;
@@ -138,6 +141,7 @@
TCLAP::ValueArg<std::string> itn_dir("", ITN_DIR, "the itn model(fst) path, which contains zh_itn_tagger.fst and zh_itn_verbalizer.fst", false, "", "string");
TCLAP::ValueArg<std::string> wav_path("", WAV_PATH, "the input could be: wav_path, e.g.: asr_example.wav; pcm_path, e.g.: asr_example.pcm; wav.scp, kaldi style wav list (wav_id \t wav_path)", true, "", "string");
+ TCLAP::ValueArg<std::int32_t> audio_fs("", AUDIO_FS, "the sample rate of audio", false, 16000, "int32_t");
TCLAP::ValueArg<std::int32_t> thread_num("", THREAD_NUM, "multi-thread num for rtf", true, 0, "int32_t");
TCLAP::ValueArg<std::string> hotword("", HOTWORD, "the hotword file, one hotword perline, Format: Hotword Weight (could be: 闃块噷宸村反 20)", false, "", "string");
@@ -155,6 +159,7 @@
cmd.add(hotword);
cmd.add(fst_inc_wts);
cmd.add(wav_path);
+ cmd.add(audio_fs);
cmd.add(thread_num);
cmd.parse(argc, argv);
@@ -234,7 +239,7 @@
}
for (int i = 0; i < rtf_threds; i++)
{
- threads.emplace_back(thread(runReg, asr_handle, wav_list, wav_ids, &total_length, &total_time, i, glob_beam, lat_beam, am_sc, value_bias, hotword_path));
+ threads.emplace_back(thread(runReg, asr_handle, wav_list, wav_ids, audio_fs.getValue(), &total_length, &total_time, i, glob_beam, lat_beam, am_sc, value_bias, hotword_path));
}
for (auto& thread : threads)
diff --git a/runtime/onnxruntime/bin/funasr-onnx-offline-vad.cpp b/runtime/onnxruntime/bin/funasr-onnx-offline-vad.cpp
index 88c10da..950530a 100644
--- a/runtime/onnxruntime/bin/funasr-onnx-offline-vad.cpp
+++ b/runtime/onnxruntime/bin/funasr-onnx-offline-vad.cpp
@@ -68,10 +68,12 @@
TCLAP::ValueArg<std::string> quantize("", QUANTIZE, "true (Default), load the model of model.onnx in model_dir. If set true, load the model of model_quant.onnx in model_dir", false, "true", "string");
TCLAP::ValueArg<std::string> wav_path("", WAV_PATH, "the input could be: wav_path, e.g.: asr_example.wav; pcm_path, e.g.: asr_example.pcm; wav.scp, kaldi style wav list (wav_id \t wav_path)", true, "", "string");
+ TCLAP::ValueArg<std::int32_t> audio_fs("", AUDIO_FS, "the sample rate of audio", false, 16000, "int32_t");
cmd.add(model_dir);
cmd.add(quantize);
cmd.add(wav_path);
+ cmd.add(audio_fs);
cmd.parse(argc, argv);
std::map<std::string, std::string> model_path;
@@ -131,7 +133,7 @@
auto& wav_file = wav_list[i];
auto& wav_id = wav_ids[i];
gettimeofday(&start, NULL);
- FUNASR_RESULT result=FsmnVadInfer(vad_hanlde, wav_file.c_str(), NULL, 16000);
+ FUNASR_RESULT result=FsmnVadInfer(vad_hanlde, wav_file.c_str(), NULL, audio_fs.getValue());
gettimeofday(&end, NULL);
seconds = (end.tv_sec - start.tv_sec);
taking_micros += ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
diff --git a/runtime/onnxruntime/bin/funasr-onnx-offline.cpp b/runtime/onnxruntime/bin/funasr-onnx-offline.cpp
index 67a267d..4aaa002 100644
--- a/runtime/onnxruntime/bin/funasr-onnx-offline.cpp
+++ b/runtime/onnxruntime/bin/funasr-onnx-offline.cpp
@@ -50,13 +50,14 @@
TCLAP::ValueArg<std::string> vad_quant("", VAD_QUANT, "true (Default), load the model of model.onnx in vad_dir. If set true, load the model of model_quant.onnx in vad_dir", false, "true", "string");
TCLAP::ValueArg<std::string> punc_dir("", PUNC_DIR, "the punc model path, which contains model.onnx, punc.yaml", false, "", "string");
TCLAP::ValueArg<std::string> punc_quant("", PUNC_QUANT, "true (Default), load the model of model.onnx in punc_dir. If set true, load the model of model_quant.onnx in punc_dir", false, "true", "string");
- TCLAP::ValueArg<std::string> lm_dir("", LM_DIR, "the lm model path, which contains compiled models: TLG.fst, config.yaml ", false, "", "string");
+ TCLAP::ValueArg<std::string> lm_dir("", LM_DIR, "the lm model path, which contains compiled models: TLG.fst, config.yaml, lexicon.txt ", false, "", "string");
TCLAP::ValueArg<float> global_beam("", GLOB_BEAM, "the decoding beam for beam searching ", false, 3.0, "float");
TCLAP::ValueArg<float> lattice_beam("", LAT_BEAM, "the lattice generation beam for beam searching ", false, 3.0, "float");
TCLAP::ValueArg<float> am_scale("", AM_SCALE, "the acoustic scale for beam searching ", false, 10.0, "float");
TCLAP::ValueArg<std::int32_t> fst_inc_wts("", FST_INC_WTS, "the fst hotwords incremental bias", false, 20, "int32_t");
TCLAP::ValueArg<std::string> itn_dir("", ITN_DIR, "the itn model(fst) path, which contains zh_itn_tagger.fst and zh_itn_verbalizer.fst", false, "", "string");
TCLAP::ValueArg<std::string> wav_path("", WAV_PATH, "the input could be: wav_path, e.g.: asr_example.wav; pcm_path, e.g.: asr_example.pcm; wav.scp, kaldi style wav list (wav_id \t wav_path)", true, "", "string");
+ TCLAP::ValueArg<std::int32_t> audio_fs("", AUDIO_FS, "the sample rate of audio", false, 16000, "int32_t");
TCLAP::ValueArg<std::string> hotword("", HOTWORD, "the hotword file, one hotword perline, Format: Hotword Weight (could be: 闃块噷宸村反 20)", false, "", "string");
cmd.add(model_dir);
@@ -72,6 +73,7 @@
cmd.add(am_scale);
cmd.add(fst_inc_wts);
cmd.add(wav_path);
+ cmd.add(audio_fs);
cmd.add(hotword);
cmd.parse(argc, argv);
@@ -157,7 +159,7 @@
auto& wav_file = wav_list[i];
auto& wav_id = wav_ids[i];
gettimeofday(&start, NULL);
- FUNASR_RESULT result=FunOfflineInfer(asr_hanlde, wav_file.c_str(), RASR_NONE, NULL, hotwords_embedding, 16000, false, decoder_handle);
+ FUNASR_RESULT result=FunOfflineInfer(asr_hanlde, wav_file.c_str(), RASR_NONE, NULL, hotwords_embedding, audio_fs.getValue(), true, decoder_handle);
gettimeofday(&end, NULL);
seconds = (end.tv_sec - start.tv_sec);
taking_micros += ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
@@ -170,6 +172,10 @@
if(stamp !=""){
LOG(INFO)<< wav_id <<" : "<<stamp;
}
+ string stamp_sents = FunASRGetStampSents(result);
+ if(stamp_sents !=""){
+ LOG(INFO)<< wav_id <<" : "<<stamp_sents;
+ }
snippet_time += FunASRGetRetSnippetTime(result);
FunASRFreeResult(result);
}
diff --git a/runtime/onnxruntime/bin/funasr-onnx-online-asr.cpp b/runtime/onnxruntime/bin/funasr-onnx-online-asr.cpp
index 2d1e3c1..72be6a1 100644
--- a/runtime/onnxruntime/bin/funasr-onnx-online-asr.cpp
+++ b/runtime/onnxruntime/bin/funasr-onnx-online-asr.cpp
@@ -49,10 +49,12 @@
TCLAP::ValueArg<std::string> quantize("", QUANTIZE, "true (Default), load the model of model.onnx in model_dir. If set true, load the model of model_quant.onnx in model_dir", false, "true", "string");
TCLAP::ValueArg<std::string> wav_path("", WAV_PATH, "the input could be: wav_path, e.g.: asr_example.wav; pcm_path, e.g.: asr_example.pcm; wav.scp, kaldi style wav list (wav_id \t wav_path)", true, "", "string");
+ TCLAP::ValueArg<std::int32_t> audio_fs("", AUDIO_FS, "the sample rate of audio", false, 16000, "int32_t");
cmd.add(model_dir);
cmd.add(quantize);
cmd.add(wav_path);
+ cmd.add(audio_fs);
cmd.parse(argc, argv);
std::map<std::string, std::string> model_path;
@@ -110,7 +112,7 @@
auto& wav_file = wav_list[i];
auto& wav_id = wav_ids[i];
- int32_t sampling_rate_ = -1;
+ int32_t sampling_rate_ = audio_fs.getValue();
funasr::Audio audio(1);
if(is_target_file(wav_file.c_str(), "wav")){
if(!audio.LoadWav2Char(wav_file.c_str(), &sampling_rate_)){
@@ -143,7 +145,7 @@
is_final = false;
}
gettimeofday(&start, NULL);
- FUNASR_RESULT result = FunASRInferBuffer(online_handle, speech_buff+sample_offset, step, RASR_NONE, NULL, is_final, 16000);
+ FUNASR_RESULT result = FunASRInferBuffer(online_handle, speech_buff+sample_offset, step, RASR_NONE, NULL, is_final, sampling_rate_);
gettimeofday(&end, NULL);
seconds = (end.tv_sec - start.tv_sec);
taking_micros += ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
diff --git a/runtime/onnxruntime/bin/funasr-onnx-online-rtf.cpp b/runtime/onnxruntime/bin/funasr-onnx-online-rtf.cpp
index bc3673a..57a4cce 100644
--- a/runtime/onnxruntime/bin/funasr-onnx-online-rtf.cpp
+++ b/runtime/onnxruntime/bin/funasr-onnx-online-rtf.cpp
@@ -38,7 +38,7 @@
return (extension == target);
}
-void runReg(FUNASR_HANDLE asr_handle, vector<string> wav_list, vector<string> wav_ids,
+void runReg(FUNASR_HANDLE asr_handle, vector<string> wav_list, vector<string> wav_ids, int audio_fs,
float* total_length, long* total_time, int core_id) {
struct timeval start, end;
@@ -52,7 +52,7 @@
// warm up
for (size_t i = 0; i < 10; i++)
{
- int32_t sampling_rate_ = -1;
+ int32_t sampling_rate_ = audio_fs;
funasr::Audio audio(1);
if(is_target_file(wav_list[0].c_str(), "wav")){
if(!audio.LoadWav2Char(wav_list[0].c_str(), &sampling_rate_)){
@@ -84,7 +84,7 @@
} else {
is_final = false;
}
- FUNASR_RESULT result = FunASRInferBuffer(online_handle, speech_buff+sample_offset, step, RASR_NONE, NULL, is_final, 16000);
+ FUNASR_RESULT result = FunASRInferBuffer(online_handle, speech_buff+sample_offset, step, RASR_NONE, NULL, is_final, sampling_rate_);
if (result)
{
FunASRFreeResult(result);
@@ -98,7 +98,7 @@
if (i >= wav_list.size()) {
break;
}
- int32_t sampling_rate_ = -1;
+ int32_t sampling_rate_ = audio_fs;
funasr::Audio audio(1);
if(is_target_file(wav_list[i].c_str(), "wav")){
if(!audio.LoadWav2Char(wav_list[i].c_str(), &sampling_rate_)){
@@ -131,7 +131,7 @@
is_final = false;
}
gettimeofday(&start, NULL);
- FUNASR_RESULT result = FunASRInferBuffer(online_handle, speech_buff+sample_offset, step, RASR_NONE, NULL, is_final, 16000);
+ FUNASR_RESULT result = FunASRInferBuffer(online_handle, speech_buff+sample_offset, step, RASR_NONE, NULL, is_final, sampling_rate_);
gettimeofday(&end, NULL);
seconds = (end.tv_sec - start.tv_sec);
long taking_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
@@ -186,6 +186,7 @@
TCLAP::ValueArg<std::string> punc_quant("", PUNC_QUANT, "true (Default), load the model of model.onnx in punc_dir. If set true, load the model of model_quant.onnx in punc_dir", false, "true", "string");
TCLAP::ValueArg<std::string> wav_path("", WAV_PATH, "the input could be: wav_path, e.g.: asr_example.wav; pcm_path, e.g.: asr_example.pcm; wav.scp, kaldi style wav list (wav_id \t wav_path)", true, "", "string");
+ TCLAP::ValueArg<std::int32_t> audio_fs("", AUDIO_FS, "the sample rate of audio", false, 16000, "int32_t");
TCLAP::ValueArg<std::int32_t> thread_num("", THREAD_NUM, "multi-thread num for rtf", true, 0, "int32_t");
cmd.add(model_dir);
@@ -195,6 +196,7 @@
cmd.add(punc_dir);
cmd.add(punc_quant);
cmd.add(wav_path);
+ cmd.add(audio_fs);
cmd.add(thread_num);
cmd.parse(argc, argv);
@@ -260,7 +262,7 @@
int rtf_threds = thread_num.getValue();
for (int i = 0; i < rtf_threds; i++)
{
- threads.emplace_back(thread(runReg, asr_handle, wav_list, wav_ids, &total_length, &total_time, i));
+ threads.emplace_back(thread(runReg, asr_handle, wav_list, wav_ids, audio_fs.getValue(), &total_length, &total_time, i));
}
for (auto& thread : threads)
diff --git a/runtime/onnxruntime/bin/funasr-onnx-online-vad.cpp b/runtime/onnxruntime/bin/funasr-onnx-online-vad.cpp
index cd79726..3f62672 100644
--- a/runtime/onnxruntime/bin/funasr-onnx-online-vad.cpp
+++ b/runtime/onnxruntime/bin/funasr-onnx-online-vad.cpp
@@ -75,10 +75,12 @@
TCLAP::ValueArg<std::string> quantize("", QUANTIZE, "false (Default), load the model of model.onnx in model_dir. If set true, load the model of model_quant.onnx in model_dir", false, "true", "string");
TCLAP::ValueArg<std::string> wav_path("", WAV_PATH, "the input could be: wav_path, e.g.: asr_example.wav; pcm_path, e.g.: asr_example.pcm; wav.scp, kaldi style wav list (wav_id \t wav_path)", true, "", "string");
+ TCLAP::ValueArg<std::int32_t> audio_fs("", AUDIO_FS, "the sample rate of audio", false, 16000, "int32_t");
cmd.add(model_dir);
cmd.add(quantize);
cmd.add(wav_path);
+ cmd.add(audio_fs);
cmd.parse(argc, argv);
std::map<std::string, std::string> model_path;
@@ -139,10 +141,9 @@
auto& wav_file = wav_list[i];
auto& wav_id = wav_ids[i];
- int32_t sampling_rate_ = -1;
+ int32_t sampling_rate_ = audio_fs.getValue();
funasr::Audio audio(1);
if(is_target_file(wav_file.c_str(), "wav")){
- int32_t sampling_rate_ = -1;
if(!audio.LoadWav2Char(wav_file.c_str(), &sampling_rate_)){
LOG(ERROR)<<"Failed to load "<< wav_file;
exit(-1);
@@ -170,7 +171,7 @@
is_final = false;
}
gettimeofday(&start, NULL);
- FUNASR_RESULT result = FsmnVadInferBuffer(online_hanlde, speech_buff+sample_offset, step, NULL, is_final, 16000);
+ FUNASR_RESULT result = FsmnVadInferBuffer(online_hanlde, speech_buff+sample_offset, step, NULL, is_final, sampling_rate_);
gettimeofday(&end, NULL);
seconds = (end.tv_sec - start.tv_sec);
taking_micros += ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
diff --git a/runtime/onnxruntime/include/audio.h b/runtime/onnxruntime/include/audio.h
index 34fcbaf..5194aa2 100644
--- a/runtime/onnxruntime/include/audio.h
+++ b/runtime/onnxruntime/include/audio.h
@@ -52,18 +52,20 @@
queue<AudioFrame *> frame_queue;
queue<AudioFrame *> asr_online_queue;
queue<AudioFrame *> asr_offline_queue;
-
+ int dest_sample_rate;
public:
Audio(int data_type);
- Audio(int data_type, int size);
+ Audio(int model_sample_rate,int data_type);
+ Audio(int model_sample_rate,int data_type, int size);
~Audio();
+ void ClearQueue(std::queue<AudioFrame*>& q);
void Disp();
void WavResample(int32_t sampling_rate, const float *waveform, int32_t n);
bool LoadWav(const char* buf, int n_len, int32_t* sampling_rate);
- bool LoadWav(const char* filename, int32_t* sampling_rate);
+ bool LoadWav(const char* filename, int32_t* sampling_rate, bool resample=true);
bool LoadWav2Char(const char* filename, int32_t* sampling_rate);
bool LoadPcmwav(const char* buf, int n_file_len, int32_t* sampling_rate);
- bool LoadPcmwav(const char* filename, int32_t* sampling_rate);
+ bool LoadPcmwav(const char* filename, int32_t* sampling_rate, bool resample=true);
bool LoadPcmwav2Char(const char* filename, int32_t* sampling_rate);
bool LoadOthers2Char(const char* filename);
bool FfmpegLoad(const char *filename, bool copy2char=false);
diff --git a/runtime/onnxruntime/include/com-define.h b/runtime/onnxruntime/include/com-define.h
index 57908e6..9cb1f2c 100644
--- a/runtime/onnxruntime/include/com-define.h
+++ b/runtime/onnxruntime/include/com-define.h
@@ -34,6 +34,7 @@
#define THREAD_NUM "thread-num"
#define PORT_ID "port-id"
#define HOTWORD_SEP " "
+#define AUDIO_FS "audio-fs"
// #define VAD_MODEL_PATH "vad-model"
// #define VAD_CMVN_PATH "vad-cmvn"
@@ -68,6 +69,7 @@
#define QUANT_DECODER_NAME "decoder_quant.onnx"
#define LM_FST_RES "TLG.fst"
+#define LEX_PATH "lexicon.txt"
// vad
#ifndef VAD_SILENCE_DURATION
diff --git a/runtime/onnxruntime/include/funasrruntime.h b/runtime/onnxruntime/include/funasrruntime.h
index 3b52f38..cff617f 100644
--- a/runtime/onnxruntime/include/funasrruntime.h
+++ b/runtime/onnxruntime/include/funasrruntime.h
@@ -68,6 +68,7 @@
_FUNASRAPI const char* FunASRGetResult(FUNASR_RESULT result,int n_index);
_FUNASRAPI const char* FunASRGetStamp(FUNASR_RESULT result);
+_FUNASRAPI const char* FunASRGetStampSents(FUNASR_RESULT result);
_FUNASRAPI const char* FunASRGetTpassResult(FUNASR_RESULT result,int n_index);
_FUNASRAPI const int FunASRGetRetNumber(FUNASR_RESULT result);
_FUNASRAPI void FunASRFreeResult(FUNASR_RESULT result);
@@ -118,7 +119,7 @@
_FUNASRAPI FUNASR_RESULT FunTpassInferBuffer(FUNASR_HANDLE handle, FUNASR_HANDLE online_handle, const char* sz_buf,
int n_len, std::vector<std::vector<std::string>> &punc_cache, bool input_finished=true,
int sampling_rate=16000, std::string wav_format="pcm", ASR_TYPE mode=ASR_TWO_PASS,
- const std::vector<std::vector<float>> &hw_emb={{0.0}}, bool itn=true);
+ const std::vector<std::vector<float>> &hw_emb={{0.0}}, bool itn=true, FUNASR_DEC_HANDLE dec_handle=nullptr);
_FUNASRAPI void FunTpassUninit(FUNASR_HANDLE handle);
_FUNASRAPI void FunTpassOnlineUninit(FUNASR_HANDLE handle);
diff --git a/runtime/onnxruntime/include/model.h b/runtime/onnxruntime/include/model.h
index 356fca3..33caec8 100644
--- a/runtime/onnxruntime/include/model.h
+++ b/runtime/onnxruntime/include/model.h
@@ -15,7 +15,7 @@
virtual void InitAsr(const std::string &am_model, const std::string &am_cmvn, const std::string &am_config, int thread_num){};
virtual void InitAsr(const std::string &en_model, const std::string &de_model, const std::string &am_cmvn, const std::string &am_config, int thread_num){};
virtual void InitAsr(const std::string &am_model, const std::string &en_model, const std::string &de_model, const std::string &am_cmvn, const std::string &am_config, int thread_num){};
- virtual void InitLm(const std::string &lm_file, const std::string &lm_config){};
+ virtual void InitLm(const std::string &lm_file, const std::string &lm_config, const std::string &lex_file){};
virtual void InitFstDecoder(){};
virtual std::string Forward(float *din, int len, bool input_finished, const std::vector<std::vector<float>> &hw_emb={{0.0}}, void* wfst_decoder=nullptr){return "";};
virtual std::string Rescoring() = 0;
@@ -23,6 +23,8 @@
virtual void InitSegDict(const std::string &seg_dict_model){};
virtual std::vector<std::vector<float>> CompileHotwordEmbedding(std::string &hotwords){return std::vector<std::vector<float>>();};
virtual std::string GetLang(){return "";};
+ virtual int GetAsrSampleRate() = 0;
+
};
Model *CreateModel(std::map<std::string, std::string>& model_path, int thread_num=1, ASR_TYPE type=ASR_OFFLINE);
diff --git a/runtime/onnxruntime/include/vad-model.h b/runtime/onnxruntime/include/vad-model.h
index 07f1833..adb1e20 100644
--- a/runtime/onnxruntime/include/vad-model.h
+++ b/runtime/onnxruntime/include/vad-model.h
@@ -12,6 +12,7 @@
virtual ~VadModel(){};
virtual void InitVad(const std::string &vad_model, const std::string &vad_cmvn, const std::string &vad_config, int thread_num)=0;
virtual std::vector<std::vector<int>> Infer(std::vector<float> &waves, bool input_finished=true)=0;
+ virtual int GetVadSampleRate() = 0;
};
VadModel *CreateVadModel(std::map<std::string, std::string>& model_path, int thread_num);
diff --git a/runtime/onnxruntime/readme.md b/runtime/onnxruntime/readme.md
index f8f2631..33762e8 100644
--- a/runtime/onnxruntime/readme.md
+++ b/runtime/onnxruntime/readme.md
@@ -38,12 +38,12 @@
### Download onnxruntime
https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/dep_libs/onnxruntime-win-x64-1.16.1.zip
-Download and unzip to d:/ffmpeg-master-latest-win64-gpl-shared
+Download and unzip to d:/onnxruntime-win-x64-1.16.1
### Download ffmpeg
https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/dep_libs/ffmpeg-master-latest-win64-gpl-shared.zip
-Download and unzip to d:/onnxruntime-win-x64-1.16.1
+Download and unzip to d:/ffmpeg-master-latest-win64-gpl-shared
### Build runtime
```
diff --git a/runtime/onnxruntime/src/audio.cpp b/runtime/onnxruntime/src/audio.cpp
index b543797..c471329 100644
--- a/runtime/onnxruntime/src/audio.cpp
+++ b/runtime/onnxruntime/src/audio.cpp
@@ -193,18 +193,28 @@
return 0;
}
-Audio::Audio(int data_type) : data_type(data_type)
+Audio::Audio(int data_type) : dest_sample_rate(MODEL_SAMPLE_RATE), data_type(data_type)
{
speech_buff = NULL;
speech_data = NULL;
align_size = 1360;
+ seg_sample = dest_sample_rate / 1000;
}
-Audio::Audio(int data_type, int size) : data_type(data_type)
+Audio::Audio(int model_sample_rate, int data_type) : dest_sample_rate(model_sample_rate), data_type(data_type)
+{
+ speech_buff = NULL;
+ speech_data = NULL;
+ align_size = 1360;
+ seg_sample = dest_sample_rate / 1000;
+}
+
+Audio::Audio(int model_sample_rate, int data_type, int size) : dest_sample_rate(model_sample_rate), data_type(data_type)
{
speech_buff = NULL;
speech_data = NULL;
align_size = (float)size;
+ seg_sample = dest_sample_rate / 1000;
}
Audio::~Audio()
@@ -218,32 +228,43 @@
if (speech_char != NULL) {
free(speech_char);
}
+ ClearQueue(frame_queue);
+ ClearQueue(asr_online_queue);
+ ClearQueue(asr_offline_queue);
+}
+
+void Audio::ClearQueue(std::queue<AudioFrame*>& q) {
+ while (!q.empty()) {
+ AudioFrame* frame = q.front();
+ delete frame;
+ q.pop();
+ }
}
void Audio::Disp()
{
- LOG(INFO) << "Audio time is " << (float)speech_len / MODEL_SAMPLE_RATE << " s. len is " << speech_len;
+ LOG(INFO) << "Audio time is " << (float)speech_len / dest_sample_rate << " s. len is " << speech_len;
}
float Audio::GetTimeLen()
{
- return (float)speech_len / MODEL_SAMPLE_RATE;
+ return (float)speech_len / dest_sample_rate;
}
void Audio::WavResample(int32_t sampling_rate, const float *waveform,
int32_t n)
{
- LOG(INFO) << "Creating a resampler:\n"
- << " in_sample_rate: "<< sampling_rate << "\n"
- << " output_sample_rate: " << static_cast<int32_t>(MODEL_SAMPLE_RATE);
+ LOG(INFO) << "Creating a resampler: "
+ << " in_sample_rate: "<< sampling_rate
+ << " output_sample_rate: " << static_cast<int32_t>(dest_sample_rate);
float min_freq =
- std::min<int32_t>(sampling_rate, MODEL_SAMPLE_RATE);
+ std::min<int32_t>(sampling_rate, dest_sample_rate);
float lowpass_cutoff = 0.99 * 0.5 * min_freq;
int32_t lowpass_filter_width = 6;
auto resampler = std::make_unique<LinearResample>(
- sampling_rate, MODEL_SAMPLE_RATE, lowpass_cutoff, lowpass_filter_width);
+ sampling_rate, dest_sample_rate, lowpass_cutoff, lowpass_filter_width);
std::vector<float> samples;
resampler->Resample(waveform, n, true, &samples);
//reset speech_data
@@ -311,7 +332,7 @@
nullptr, // allocate a new context
AV_CH_LAYOUT_MONO, // output channel layout (stereo)
AV_SAMPLE_FMT_S16, // output sample format (signed 16-bit)
- 16000, // output sample rate (same as input)
+ dest_sample_rate, // output sample rate (same as input)
av_get_default_channel_layout(codecContext->channels), // input channel layout
codecContext->sample_fmt, // input sample format
codecContext->sample_rate, // input sample rate
@@ -344,30 +365,28 @@
while (avcodec_receive_frame(codecContext, frame) >= 0) {
// Resample audio if necessary
std::vector<uint8_t> resampled_buffer;
- int in_samples = frame->nb_samples;
- uint8_t **in_data = frame->extended_data;
- int out_samples = av_rescale_rnd(in_samples,
- 16000,
+ int out_samples = av_rescale_rnd(swr_get_delay(swr_ctx, codecContext->sample_rate) + frame->nb_samples,
+ dest_sample_rate,
codecContext->sample_rate,
AV_ROUND_DOWN);
int resampled_size = out_samples * av_get_bytes_per_sample(AV_SAMPLE_FMT_S16);
if (resampled_buffer.size() < resampled_size) {
resampled_buffer.resize(resampled_size);
- }
+ }
uint8_t *resampled_data = resampled_buffer.data();
int ret = swr_convert(
swr_ctx,
&resampled_data, // output buffer
- resampled_size, // output buffer size
- (const uint8_t **)(frame->data), //(const uint8_t **)(frame->extended_data)
- in_samples // input buffer size
+ out_samples, // output buffer size
+ (const uint8_t **)(frame->data), // choose channel
+ frame->nb_samples // input buffer size
);
if (ret < 0) {
LOG(ERROR) << "Error resampling audio";
break;
}
- std::copy(resampled_buffer.begin(), resampled_buffer.end(), std::back_inserter(resampled_buffers));
+ resampled_buffers.insert(resampled_buffers.end(), resampled_buffer.begin(), resampled_buffer.begin() + resampled_size);
}
}
}
@@ -443,6 +462,10 @@
nullptr, // write callback (not used here)
nullptr // seek callback (not used here)
);
+ if (!avio_ctx) {
+ av_free(buf_copy);
+ return false;
+ }
AVFormatContext* formatContext = avformat_alloc_context();
formatContext->pb = avio_ctx;
if (avformat_open_input(&formatContext, "", NULL, NULL) != 0) {
@@ -494,7 +517,7 @@
nullptr, // allocate a new context
AV_CH_LAYOUT_MONO, // output channel layout (stereo)
AV_SAMPLE_FMT_S16, // output sample format (signed 16-bit)
- 16000, // output sample rate (same as input)
+ dest_sample_rate, // output sample rate (same as input)
av_get_default_channel_layout(codecContext->channels), // input channel layout
codecContext->sample_fmt, // input sample format
codecContext->sample_rate, // input sample rate
@@ -529,37 +552,37 @@
while (avcodec_receive_frame(codecContext, frame) >= 0) {
// Resample audio if necessary
std::vector<uint8_t> resampled_buffer;
- int in_samples = frame->nb_samples;
- uint8_t **in_data = frame->extended_data;
- int out_samples = av_rescale_rnd(in_samples,
- 16000,
+ int out_samples = av_rescale_rnd(swr_get_delay(swr_ctx, codecContext->sample_rate) + frame->nb_samples,
+ dest_sample_rate,
codecContext->sample_rate,
AV_ROUND_DOWN);
int resampled_size = out_samples * av_get_bytes_per_sample(AV_SAMPLE_FMT_S16);
if (resampled_buffer.size() < resampled_size) {
resampled_buffer.resize(resampled_size);
- }
+ }
uint8_t *resampled_data = resampled_buffer.data();
int ret = swr_convert(
swr_ctx,
&resampled_data, // output buffer
- resampled_size, // output buffer size
- (const uint8_t **)(frame->data), //(const uint8_t **)(frame->extended_data)
- in_samples // input buffer size
+ out_samples, // output buffer size
+ (const uint8_t **)(frame->data), // choose channel: channel_data
+ frame->nb_samples // input buffer size
);
if (ret < 0) {
LOG(ERROR) << "Error resampling audio";
break;
}
- std::copy(resampled_buffer.begin(), resampled_buffer.end(), std::back_inserter(resampled_buffers));
+ resampled_buffers.insert(resampled_buffers.end(), resampled_buffer.begin(), resampled_buffer.begin() + resampled_size);
}
}
}
av_packet_unref(packet);
}
- avio_context_free(&avio_ctx);
+ //avio_context_free(&avio_ctx);
+ av_freep(&avio_ctx ->buffer);
+ av_freep(&avio_ctx);
avformat_close_input(&formatContext);
avformat_free_context(formatContext);
avcodec_free_context(&codecContext);
@@ -604,7 +627,7 @@
}
-bool Audio::LoadWav(const char *filename, int32_t* sampling_rate)
+bool Audio::LoadWav(const char *filename, int32_t* sampling_rate, bool resample)
{
WaveHeader header;
if (speech_data != NULL) {
@@ -666,7 +689,7 @@
}
//resample
- if(*sampling_rate != MODEL_SAMPLE_RATE){
+ if(resample && *sampling_rate != dest_sample_rate){
WavResample(*sampling_rate, speech_data, speech_len);
}
@@ -752,7 +775,7 @@
}
//resample
- if(*sampling_rate != MODEL_SAMPLE_RATE){
+ if(*sampling_rate != dest_sample_rate){
WavResample(*sampling_rate, speech_data, speech_len);
}
@@ -795,7 +818,7 @@
}
//resample
- if(*sampling_rate != MODEL_SAMPLE_RATE){
+ if(*sampling_rate != dest_sample_rate){
WavResample(*sampling_rate, speech_data, speech_len);
}
@@ -840,7 +863,7 @@
}
//resample
- if(*sampling_rate != MODEL_SAMPLE_RATE){
+ if(*sampling_rate != dest_sample_rate){
WavResample(*sampling_rate, speech_data, speech_len);
}
@@ -857,7 +880,7 @@
return false;
}
-bool Audio::LoadPcmwav(const char* filename, int32_t* sampling_rate)
+bool Audio::LoadPcmwav(const char* filename, int32_t* sampling_rate, bool resample)
{
if (speech_data != NULL) {
free(speech_data);
@@ -898,7 +921,7 @@
}
//resample
- if(*sampling_rate != MODEL_SAMPLE_RATE){
+ if(resample && *sampling_rate != dest_sample_rate){
WavResample(*sampling_rate, speech_data, speech_len);
}
@@ -1009,7 +1032,7 @@
AudioFrame *frame = frame_queue.front();
frame_queue.pop();
- start_time = (float)(frame->GetStart())/MODEL_SAMPLE_RATE;
+ start_time = (float)(frame->GetStart())/ dest_sample_rate;
dout = speech_data + frame->GetStart();
len = frame->GetLen();
delete frame;
@@ -1248,7 +1271,7 @@
}
// erase all_samples
- int vector_cache = MODEL_SAMPLE_RATE*2;
+ int vector_cache = dest_sample_rate*2;
if(speech_offline_start == -1){
if(all_samples.size() > vector_cache){
int erase_num = all_samples.size() - vector_cache;
diff --git a/runtime/onnxruntime/src/bias-lm.h b/runtime/onnxruntime/src/bias-lm.h
index e2d28a2..957197a 100644
--- a/runtime/onnxruntime/src/bias-lm.h
+++ b/runtime/onnxruntime/src/bias-lm.h
@@ -65,12 +65,17 @@
if (text.size() > 1) {
score = std::stof(text[1]);
}
- Utf8ToCharset(text[0], split_str);
+ SplitChiEngCharacters(text[0], split_str);
for (auto &str : split_str) {
- split_id.push_back(phn_set_.String2Id(str));
- if (!phn_set_.Find(str)) {
- is_oov = true;
- break;
+ std::vector<string> lex_vec;
+ std::string lex_str = vocab_.Word2Lex(str);
+ SplitStringToVector(lex_str, " ", true, &lex_vec);
+ for (auto &token : lex_vec) {
+ split_id.push_back(phn_set_.String2Id(token));
+ if (!phn_set_.Find(token)) {
+ is_oov = true;
+ break;
+ }
}
}
if (!is_oov) {
@@ -103,12 +108,17 @@
std::vector<std::string> split_str;
std::vector<int> split_id;
score = kv.second;
- Utf8ToCharset(kv.first, split_str);
+ SplitChiEngCharacters(kv.first, split_str);
for (auto &str : split_str) {
- split_id.push_back(phn_set_.String2Id(str));
- if (!phn_set_.Find(str)) {
- is_oov = true;
- break;
+ std::vector<string> lex_vec;
+ std::string lex_str = vocab_.Word2Lex(str);
+ SplitStringToVector(lex_str, " ", true, &lex_vec);
+ for (auto &token : lex_vec) {
+ split_id.push_back(phn_set_.String2Id(token));
+ if (!phn_set_.Find(token)) {
+ is_oov = true;
+ break;
+ }
}
}
if (!is_oov) {
diff --git a/runtime/onnxruntime/src/commonfunc.h b/runtime/onnxruntime/src/commonfunc.h
index 9bd2a00..3449ebc 100644
--- a/runtime/onnxruntime/src/commonfunc.h
+++ b/runtime/onnxruntime/src/commonfunc.h
@@ -9,6 +9,7 @@
{
std::string msg;
std::string stamp;
+ std::string stamp_sents;
std::string tpass_msg;
float snippet_time;
}FUNASR_RECOG_RESULT;
diff --git a/runtime/onnxruntime/src/fsmn-vad-online.cpp b/runtime/onnxruntime/src/fsmn-vad-online.cpp
index a8cc5d8..30627fc 100644
--- a/runtime/onnxruntime/src/fsmn-vad-online.cpp
+++ b/runtime/onnxruntime/src/fsmn-vad-online.cpp
@@ -187,8 +187,11 @@
vad_max_len_ = vad_max_len;
vad_speech_noise_thres_ = vad_speech_noise_thres;
+ frame_sample_length_ = vad_sample_rate_ / 1000 * 25;;
+ frame_shift_sample_length_ = vad_sample_rate_ / 1000 * 10;
+
// 2pass
- audio_handle = make_unique<Audio>(1);
+ audio_handle = make_unique<Audio>(vad_sample_rate,1);
}
FsmnVadOnline::~FsmnVadOnline() {
diff --git a/runtime/onnxruntime/src/fsmn-vad-online.h b/runtime/onnxruntime/src/fsmn-vad-online.h
index 9191304..4c82d11 100644
--- a/runtime/onnxruntime/src/fsmn-vad-online.h
+++ b/runtime/onnxruntime/src/fsmn-vad-online.h
@@ -21,6 +21,8 @@
std::vector<std::vector<int>> Infer(std::vector<float> &waves, bool input_finished);
void ExtractFeats(float sample_rate, vector<vector<float>> &vad_feats, vector<float> &waves, bool input_finished);
void Reset();
+ int GetVadSampleRate() { return vad_sample_rate_; };
+
// 2pass
std::unique_ptr<Audio> audio_handle = nullptr;
diff --git a/runtime/onnxruntime/src/fsmn-vad.h b/runtime/onnxruntime/src/fsmn-vad.h
index adceb1f..f06a965 100644
--- a/runtime/onnxruntime/src/fsmn-vad.h
+++ b/runtime/onnxruntime/src/fsmn-vad.h
@@ -28,6 +28,8 @@
std::vector<std::vector<float>> *in_cache,
bool is_final);
void Reset();
+
+ int GetVadSampleRate() { return vad_sample_rate_; };
std::shared_ptr<Ort::Session> vad_session_ = nullptr;
Ort::Env env_;
diff --git a/runtime/onnxruntime/src/funasrruntime.cpp b/runtime/onnxruntime/src/funasrruntime.cpp
index 3523bba..fdaf69d 100644
--- a/runtime/onnxruntime/src/funasrruntime.cpp
+++ b/runtime/onnxruntime/src/funasrruntime.cpp
@@ -57,7 +57,7 @@
if (!recog_obj)
return nullptr;
- funasr::Audio audio(1);
+ funasr::Audio audio(recog_obj->GetAsrSampleRate(),1);
if(wav_format == "pcm" || wav_format == "PCM"){
if (!audio.LoadPcmwav(sz_buf, n_len, &sampling_rate))
return nullptr;
@@ -93,7 +93,7 @@
if (!recog_obj)
return nullptr;
- funasr::Audio audio(1);
+ funasr::Audio audio(recog_obj->GetAsrSampleRate(),1);
if(funasr::is_target_file(sz_filename, "wav")){
int32_t sampling_rate_ = -1;
if(!audio.LoadWav(sz_filename, &sampling_rate_))
@@ -134,7 +134,7 @@
if (!vad_obj)
return nullptr;
- funasr::Audio audio(1);
+ funasr::Audio audio(vad_obj->GetVadSampleRate(),1);
if(wav_format == "pcm" || wav_format == "PCM"){
if (!audio.LoadPcmwav(sz_buf, n_len, &sampling_rate))
return nullptr;
@@ -146,6 +146,7 @@
funasr::FUNASR_VAD_RESULT* p_result = new funasr::FUNASR_VAD_RESULT;
p_result->snippet_time = audio.GetTimeLen();
if(p_result->snippet_time == 0){
+ p_result->segments = new vector<std::vector<int>>();
return p_result;
}
@@ -162,7 +163,7 @@
if (!vad_obj)
return nullptr;
- funasr::Audio audio(1);
+ funasr::Audio audio(vad_obj->GetVadSampleRate(),1);
if(funasr::is_target_file(sz_filename, "wav")){
int32_t sampling_rate_ = -1;
if(!audio.LoadWav(sz_filename, &sampling_rate_))
@@ -178,6 +179,7 @@
funasr::FUNASR_VAD_RESULT* p_result = new funasr::FUNASR_VAD_RESULT;
p_result->snippet_time = audio.GetTimeLen();
if(p_result->snippet_time == 0){
+ p_result->segments = new vector<std::vector<int>>();
return p_result;
}
@@ -222,7 +224,7 @@
if (!offline_stream)
return nullptr;
- funasr::Audio audio(1);
+ funasr::Audio audio(offline_stream->asr_handle->GetAsrSampleRate(),1);
try{
if(wav_format == "pcm" || wav_format == "PCM"){
if (!audio.LoadPcmwav(sz_buf, n_len, &sampling_rate))
@@ -294,10 +296,18 @@
#if !defined(__APPLE__)
if(offline_stream->UseITN() && itn){
string msg_itn = offline_stream->itn_handle->Normalize(p_result->msg);
+ if(!(p_result->stamp).empty()){
+ std::string new_stamp = funasr::TimestampSmooth(p_result->msg, msg_itn, p_result->stamp);
+ if(!new_stamp.empty()){
+ p_result->stamp = new_stamp;
+ }
+ }
p_result->msg = msg_itn;
}
#endif
-
+ if (!(p_result->stamp).empty()){
+ p_result->stamp_sents = funasr::TimestampSentence(p_result->msg, p_result->stamp);
+ }
return p_result;
}
@@ -308,7 +318,7 @@
if (!offline_stream)
return nullptr;
- funasr::Audio audio(1);
+ funasr::Audio audio((offline_stream->asr_handle)->GetAsrSampleRate(),1);
try{
if(funasr::is_target_file(sz_filename, "wav")){
int32_t sampling_rate_ = -1;
@@ -384,9 +394,18 @@
#if !defined(__APPLE__)
if(offline_stream->UseITN() && itn){
string msg_itn = offline_stream->itn_handle->Normalize(p_result->msg);
+ if(!(p_result->stamp).empty()){
+ std::string new_stamp = funasr::TimestampSmooth(p_result->msg, msg_itn, p_result->stamp);
+ if(!new_stamp.empty()){
+ p_result->stamp = new_stamp;
+ }
+ }
p_result->msg = msg_itn;
}
#endif
+ if (!(p_result->stamp).empty()){
+ p_result->stamp_sents = funasr::TimestampSentence(p_result->msg, p_result->stamp);
+ }
return p_result;
}
@@ -420,7 +439,7 @@
_FUNASRAPI FUNASR_RESULT FunTpassInferBuffer(FUNASR_HANDLE handle, FUNASR_HANDLE online_handle, const char* sz_buf,
int n_len, std::vector<std::vector<std::string>> &punc_cache, bool input_finished,
int sampling_rate, std::string wav_format, ASR_TYPE mode,
- const std::vector<std::vector<float>> &hw_emb, bool itn)
+ const std::vector<std::vector<float>> &hw_emb, bool itn, FUNASR_DEC_HANDLE dec_handle)
{
funasr::TpassStream* tpass_stream = (funasr::TpassStream*)handle;
funasr::TpassOnlineStream* tpass_online_stream = (funasr::TpassOnlineStream*)online_handle;
@@ -494,7 +513,12 @@
// timestamp
std::string cur_stamp = "[";
while(audio->FetchTpass(frame) > 0){
- string msg = ((funasr::Paraformer*)asr_handle)->Forward(frame->data, frame->len, frame->is_final, hw_emb);
+ // dec reset
+ funasr::WfstDecoder* wfst_decoder = (funasr::WfstDecoder*)dec_handle;
+ if (wfst_decoder){
+ wfst_decoder->StartUtterance();
+ }
+ string msg = ((funasr::Paraformer*)asr_handle)->Forward(frame->data, frame->len, frame->is_final, hw_emb, dec_handle);
std::vector<std::string> msg_vec = funasr::split(msg, '|'); // split with timestamp
if(msg_vec.size()==0){
@@ -524,10 +548,19 @@
#if !defined(__APPLE__)
if(tpass_stream->UseITN() && itn){
string msg_itn = tpass_stream->itn_handle->Normalize(msg_punc);
+ // TimestampSmooth
+ if(!(p_result->stamp).empty()){
+ std::string new_stamp = funasr::TimestampSmooth(p_result->tpass_msg, msg_itn, p_result->stamp);
+ if(!new_stamp.empty()){
+ p_result->stamp = new_stamp;
+ }
+ }
p_result->tpass_msg = msg_itn;
}
#endif
-
+ if (!(p_result->stamp).empty()){
+ p_result->stamp_sents = funasr::TimestampSentence(p_result->tpass_msg, p_result->stamp);
+ }
if(frame != NULL){
delete frame;
frame = NULL;
@@ -582,6 +615,15 @@
return nullptr;
return p_result->stamp.c_str();
+ }
+
+ _FUNASRAPI const char* FunASRGetStampSents(FUNASR_RESULT result)
+ {
+ funasr::FUNASR_RECOG_RESULT * p_result = (funasr::FUNASR_RECOG_RESULT*)result;
+ if(!p_result)
+ return nullptr;
+
+ return p_result->stamp_sents.c_str();
}
_FUNASRAPI const char* FunASRGetTpassResult(FUNASR_RESULT result,int n_index)
@@ -727,8 +769,14 @@
funasr::OfflineStream* offline_stream = (funasr::OfflineStream*)handle;
funasr::Paraformer* paraformer = (funasr::Paraformer*)offline_stream->asr_handle.get();
if (paraformer->lm_)
+ mm = new funasr::WfstDecoder(paraformer->lm_.get(),
+ paraformer->GetPhoneSet(), paraformer->GetLmVocab(), glob_beam, lat_beam, am_scale);
+ } else if (asr_type == ASR_TWO_PASS){
+ funasr::TpassStream* tpass_stream = (funasr::TpassStream*)handle;
+ funasr::Paraformer* paraformer = (funasr::Paraformer*)tpass_stream->asr_handle.get();
+ if (paraformer->lm_)
mm = new funasr::WfstDecoder(paraformer->lm_.get(),
- paraformer->GetPhoneSet(), paraformer->GetVocab(), glob_beam, lat_beam, am_scale);
+ paraformer->GetPhoneSet(), paraformer->GetLmVocab(), glob_beam, lat_beam, am_scale);
}
return mm;
}
diff --git a/runtime/onnxruntime/src/offline-stream.cpp b/runtime/onnxruntime/src/offline-stream.cpp
index 2709ca6..ae8cf18 100644
--- a/runtime/onnxruntime/src/offline-stream.cpp
+++ b/runtime/onnxruntime/src/offline-stream.cpp
@@ -63,10 +63,16 @@
// Lm resource
if (model_path.find(LM_DIR) != model_path.end() && model_path.at(LM_DIR) != "") {
- string fst_path, lm_config_path, hws_path;
+ string fst_path, lm_config_path, lex_path;
fst_path = PathAppend(model_path.at(LM_DIR), LM_FST_RES);
lm_config_path = PathAppend(model_path.at(LM_DIR), LM_CONFIG_NAME);
- asr_handle->InitLm(fst_path, lm_config_path);
+ lex_path = PathAppend(model_path.at(LM_DIR), LEX_PATH);
+ if (access(lex_path.c_str(), F_OK) != 0 )
+ {
+ LOG(ERROR) << "Lexicon.txt file is not exist, please use the latest version. Skip load LM model.";
+ }else{
+ asr_handle->InitLm(fst_path, lm_config_path, lex_path);
+ }
}
// PUNC model
diff --git a/runtime/onnxruntime/src/paraformer-online.cpp b/runtime/onnxruntime/src/paraformer-online.cpp
index d08b57e..55a4fd1 100644
--- a/runtime/onnxruntime/src/paraformer-online.cpp
+++ b/runtime/onnxruntime/src/paraformer-online.cpp
@@ -61,7 +61,11 @@
for(int i=0; i<fsmn_lorder*fsmn_dims; i++){
fsmn_init_cache_.emplace_back(0);
}
- chunk_len = chunk_size[1]*frame_shift*lfr_n*MODEL_SAMPLE_RATE/1000;
+ chunk_len = chunk_size[1]*frame_shift*lfr_n*para_handle_->asr_sample_rate/1000;
+
+ frame_sample_length_ = para_handle_->asr_sample_rate / 1000 * frame_length;
+ frame_shift_sample_length_ = para_handle_->asr_sample_rate / 1000 * frame_shift;
+
}
void ParaformerOnline::FbankKaldi(float sample_rate, std::vector<std::vector<float>> &wav_feats,
@@ -489,7 +493,7 @@
if(is_first_chunk){
is_first_chunk = false;
}
- ExtractFeats(MODEL_SAMPLE_RATE, wav_feats, waves, input_finished);
+ ExtractFeats(para_handle_->asr_sample_rate, wav_feats, waves, input_finished);
if(wav_feats.size() == 0){
return result;
}
diff --git a/runtime/onnxruntime/src/paraformer-online.h b/runtime/onnxruntime/src/paraformer-online.h
index 138c77c..8c9bb88 100644
--- a/runtime/onnxruntime/src/paraformer-online.h
+++ b/runtime/onnxruntime/src/paraformer-online.h
@@ -111,6 +111,9 @@
string ForwardChunk(std::vector<std::vector<float>> &wav_feats, bool input_finished);
string Forward(float* din, int len, bool input_finished, const std::vector<std::vector<float>> &hw_emb={{0.0}}, void* wfst_decoder=nullptr);
string Rescoring();
+
+ int GetAsrSampleRate() { return para_handle_->asr_sample_rate; };
+
// 2pass
std::string online_res;
int chunk_len;
diff --git a/runtime/onnxruntime/src/paraformer.cpp b/runtime/onnxruntime/src/paraformer.cpp
index 4e89ea2..c56421c 100644
--- a/runtime/onnxruntime/src/paraformer.cpp
+++ b/runtime/onnxruntime/src/paraformer.cpp
@@ -19,10 +19,11 @@
// offline
void Paraformer::InitAsr(const std::string &am_model, const std::string &am_cmvn, const std::string &am_config, int thread_num){
+ LoadConfigFromYaml(am_config.c_str());
// knf options
fbank_opts_.frame_opts.dither = 0;
fbank_opts_.mel_opts.num_bins = n_mels;
- fbank_opts_.frame_opts.samp_freq = MODEL_SAMPLE_RATE;
+ fbank_opts_.frame_opts.samp_freq = asr_sample_rate;
fbank_opts_.frame_opts.window_type = window_type;
fbank_opts_.frame_opts.frame_shift_ms = frame_shift;
fbank_opts_.frame_opts.frame_length_ms = frame_length;
@@ -65,7 +66,6 @@
for (auto& item : m_strOutputNames)
m_szOutputNames.push_back(item.c_str());
vocab = new Vocab(am_config.c_str());
- LoadConfigFromYaml(am_config.c_str());
phone_set_ = new PhoneSet(am_config.c_str());
LoadCmvn(am_cmvn.c_str());
}
@@ -77,7 +77,7 @@
// knf options
fbank_opts_.frame_opts.dither = 0;
fbank_opts_.mel_opts.num_bins = n_mels;
- fbank_opts_.frame_opts.samp_freq = MODEL_SAMPLE_RATE;
+ fbank_opts_.frame_opts.samp_freq = asr_sample_rate;
fbank_opts_.frame_opts.window_type = window_type;
fbank_opts_.frame_opts.frame_shift_ms = frame_shift;
fbank_opts_.frame_opts.frame_length_ms = frame_length;
@@ -187,13 +187,13 @@
}
void Paraformer::InitLm(const std::string &lm_file,
- const std::string &lm_cfg_file) {
+ const std::string &lm_cfg_file,
+ const std::string &lex_file) {
try {
lm_ = std::shared_ptr<fst::Fst<fst::StdArc>>(
fst::Fst<fst::StdArc>::Read(lm_file));
if (lm_){
- if (vocab) { delete vocab; }
- vocab = new Vocab(lm_cfg_file.c_str());
+ lm_vocab = new Vocab(lm_cfg_file.c_str(), lex_file.c_str());
LOG(INFO) << "Successfully load lm file " << lm_file;
}else{
LOG(ERROR) << "Failed to load lm file " << lm_file;
@@ -215,6 +215,9 @@
}
try{
+ YAML::Node frontend_conf = config["frontend_conf"];
+ this->asr_sample_rate = frontend_conf["fs"].as<int>();
+
YAML::Node lang_conf = config["lang"];
if (lang_conf.IsDefined()){
language = lang_conf.as<string>();
@@ -256,6 +259,9 @@
this->cif_threshold = predictor_conf["threshold"].as<double>();
this->tail_alphas = predictor_conf["tail_threshold"].as<double>();
+
+ this->asr_sample_rate = frontend_conf["fs"].as<int>();
+
}catch(exception const &e){
LOG(ERROR) << "Error when load argument from vad config YAML.";
@@ -300,10 +306,18 @@
Paraformer::~Paraformer()
{
- if(vocab)
+ if(vocab){
delete vocab;
- if(seg_dict)
+ }
+ if(lm_vocab){
+ delete lm_vocab;
+ }
+ if(seg_dict){
delete seg_dict;
+ }
+ if(phone_set_){
+ delete phone_set_;
+ }
}
void Paraformer::StartUtterance()
@@ -454,7 +468,7 @@
int32_t in_feat_dim = fbank_opts_.mel_opts.num_bins;
std::vector<std::vector<float>> asr_feats;
- FbankKaldi(MODEL_SAMPLE_RATE, din, len, asr_feats);
+ FbankKaldi(asr_sample_rate, din, len, asr_feats);
if(asr_feats.size() == 0){
return "";
}
@@ -675,6 +689,11 @@
return vocab;
}
+Vocab* Paraformer::GetLmVocab()
+{
+ return lm_vocab;
+}
+
PhoneSet* Paraformer::GetPhoneSet()
{
return phone_set_;
diff --git a/runtime/onnxruntime/src/paraformer.h b/runtime/onnxruntime/src/paraformer.h
index b5bc46d..5bb9477 100644
--- a/runtime/onnxruntime/src/paraformer.h
+++ b/runtime/onnxruntime/src/paraformer.h
@@ -20,6 +20,7 @@
*/
private:
Vocab* vocab = nullptr;
+ Vocab* lm_vocab = nullptr;
SegDict* seg_dict = nullptr;
PhoneSet* phone_set_ = nullptr;
//const float scale = 22.6274169979695;
@@ -57,14 +58,15 @@
string Rescoring();
string GetLang(){return language;};
-
+ int GetAsrSampleRate() { return asr_sample_rate; };
void StartUtterance();
void EndUtterance();
- void InitLm(const std::string &lm_file, const std::string &lm_cfg_file);
+ void InitLm(const std::string &lm_file, const std::string &lm_cfg_file, const std::string &lex_file);
string BeamSearch(WfstDecoder* &wfst_decoder, float* in, int n_len, int64_t token_nums);
string FinalizeDecode(WfstDecoder* &wfst_decoder,
bool is_stamp=false, std::vector<float> us_alphas={0}, std::vector<float> us_cif_peak={0});
Vocab* GetVocab();
+ Vocab* GetLmVocab();
PhoneSet* GetPhoneSet();
knf::FbankOptions fbank_opts_;
@@ -107,8 +109,7 @@
int fsmn_dims = 512;
float cif_threshold = 1.0;
float tail_alphas = 0.45;
-
-
+ int asr_sample_rate = MODEL_SAMPLE_RATE;
};
} // namespace funasr
diff --git a/runtime/onnxruntime/src/tpass-stream.cpp b/runtime/onnxruntime/src/tpass-stream.cpp
index a3e1b0e..b723e0f 100644
--- a/runtime/onnxruntime/src/tpass-stream.cpp
+++ b/runtime/onnxruntime/src/tpass-stream.cpp
@@ -66,6 +66,20 @@
LOG(ERROR) <<"Can not find offline-model-dir or online-model-dir";
exit(-1);
}
+
+ // Lm resource
+ if (model_path.find(LM_DIR) != model_path.end() && model_path.at(LM_DIR) != "") {
+ string fst_path, lm_config_path, lex_path;
+ fst_path = PathAppend(model_path.at(LM_DIR), LM_FST_RES);
+ lm_config_path = PathAppend(model_path.at(LM_DIR), LM_CONFIG_NAME);
+ lex_path = PathAppend(model_path.at(LM_DIR), LEX_PATH);
+ if (access(lex_path.c_str(), F_OK) != 0 )
+ {
+ LOG(ERROR) << "Lexicon.txt file is not exist, please use the latest version. Skip load LM model.";
+ }else{
+ asr_handle->InitLm(fst_path, lm_config_path, lex_path);
+ }
+ }
// PUNC model
if(model_path.find(PUNC_DIR) != model_path.end()){
diff --git a/runtime/onnxruntime/src/util.cpp b/runtime/onnxruntime/src/util.cpp
index 005de57..039fa90 100644
--- a/runtime/onnxruntime/src/util.cpp
+++ b/runtime/onnxruntime/src/util.cpp
@@ -247,6 +247,395 @@
}
}
+// Timestamp Smooth
+void TimestampAdd(std::deque<string> &alignment_str1, std::string str_word){
+ if(!TimestampIsPunctuation(str_word)){
+ alignment_str1.push_front(str_word);
+ }
+}
+
+bool TimestampIsPunctuation(const std::string& str) {
+ const std::string punctuation = u8"锛屻�傦紵銆�,?";
+ // const std::string punctuation = u8"锛屻�傦紵銆�,.?";
+ for (char ch : str) {
+ if (punctuation.find(ch) == std::string::npos) {
+ return false;
+ }
+ }
+ return true;
+}
+
+vector<vector<int>> ParseTimestamps(const std::string& str) {
+ vector<vector<int>> timestamps;
+ std::istringstream ss(str);
+ std::string segment;
+
+ // skip first'['
+ ss.ignore(1);
+
+ while (std::getline(ss, segment, ']')) {
+ std::istringstream segmentStream(segment);
+ std::string number;
+ vector<int> ts;
+
+ // skip'['
+ segmentStream.ignore(1);
+
+ while (std::getline(segmentStream, number, ',')) {
+ ts.push_back(std::stoi(number));
+ }
+ if(ts.size() != 2){
+ LOG(ERROR) << "ParseTimestamps Failed";
+ timestamps.clear();
+ return timestamps;
+ }
+ timestamps.push_back(ts);
+ ss.ignore(1);
+ }
+
+ return timestamps;
+}
+
+bool TimestampIsDigit(U16CHAR_T &u16) {
+ return u16 >= L'0' && u16 <= L'9';
+}
+
+bool TimestampIsAlpha(U16CHAR_T &u16) {
+ return (u16 >= L'A' && u16 <= L'Z') || (u16 >= L'a' && u16 <= L'z');
+}
+
+bool TimestampIsPunctuation(U16CHAR_T &u16) {
+ // (& ' -) in the dict
+ if (u16 == 0x26 || u16 == 0x27 || u16 == 0x2D){
+ return false;
+ }
+ return (u16 >= 0x21 && u16 <= 0x2F) // 鏍囧噯ASCII鏍囩偣
+ || (u16 >= 0x3A && u16 <= 0x40) // 鏍囧噯ASCII鏍囩偣
+ || (u16 >= 0x5B && u16 <= 0x60) // 鏍囧噯ASCII鏍囩偣
+ || (u16 >= 0x7B && u16 <= 0x7E) // 鏍囧噯ASCII鏍囩偣
+ || (u16 >= 0x2000 && u16 <= 0x206F) // 甯哥敤鐨刄nicode鏍囩偣
+ || (u16 >= 0x3000 && u16 <= 0x303F); // CJK绗﹀彿鍜屾爣鐐�
+}
+
+void TimestampSplitChiEngCharacters(const std::string &input_str,
+ std::vector<std::string> &characters) {
+ characters.resize(0);
+ std::string eng_word = "";
+ U16CHAR_T space = 0x0020;
+ std::vector<U16CHAR_T> u16_buf;
+ u16_buf.resize(std::max(u16_buf.size(), input_str.size() + 1));
+ U16CHAR_T* pu16 = u16_buf.data();
+ U8CHAR_T * pu8 = (U8CHAR_T*)input_str.data();
+ size_t ilen = input_str.size();
+ size_t len = EncodeConverter::Utf8ToUtf16(pu8, ilen, pu16, ilen + 1);
+ for (size_t i = 0; i < len; i++) {
+ if (EncodeConverter::IsChineseCharacter(pu16[i])) {
+ if(!eng_word.empty()){
+ characters.push_back(eng_word);
+ eng_word = "";
+ }
+ U8CHAR_T u8buf[4];
+ size_t n = EncodeConverter::Utf16ToUtf8(pu16 + i, u8buf);
+ u8buf[n] = '\0';
+ characters.push_back((const char*)u8buf);
+ } else if (TimestampIsDigit(pu16[i]) || TimestampIsPunctuation(pu16[i])){
+ if(!eng_word.empty()){
+ characters.push_back(eng_word);
+ eng_word = "";
+ }
+ U8CHAR_T u8buf[4];
+ size_t n = EncodeConverter::Utf16ToUtf8(pu16 + i, u8buf);
+ u8buf[n] = '\0';
+ characters.push_back((const char*)u8buf);
+ } else if (pu16[i] == space){
+ if(!eng_word.empty()){
+ characters.push_back(eng_word);
+ eng_word = "";
+ }
+ }else{
+ U8CHAR_T u8buf[4];
+ size_t n = EncodeConverter::Utf16ToUtf8(pu16 + i, u8buf);
+ u8buf[n] = '\0';
+ eng_word += (const char*)u8buf;
+ }
+ }
+ if(!eng_word.empty()){
+ characters.push_back(eng_word);
+ eng_word = "";
+ }
+}
+
+std::string VectorToString(const std::vector<std::vector<int>>& vec, bool out_empty) {
+ if(vec.size() == 0){
+ if(out_empty){
+ return "";
+ }else{
+ return "[]";
+ }
+ }
+ std::ostringstream out;
+ out << "[";
+
+ for (size_t i = 0; i < vec.size(); ++i) {
+ out << "[";
+ for (size_t j = 0; j < vec[i].size(); ++j) {
+ out << vec[i][j];
+ if (j < vec[i].size() - 1) {
+ out << ",";
+ }
+ }
+ out << "]";
+ if (i < vec.size() - 1) {
+ out << ",";
+ }
+ }
+
+ out << "]";
+ return out.str();
+}
+
+std::string TimestampSmooth(std::string &text, std::string &text_itn, std::string &str_time){
+ vector<vector<int>> timestamps_out;
+ std::string timestamps_str = "";
+ // process string to vector<string>
+ std::vector<std::string> characters;
+ funasr::TimestampSplitChiEngCharacters(text, characters);
+
+ std::vector<std::string> characters_itn;
+ funasr::TimestampSplitChiEngCharacters(text_itn, characters_itn);
+
+ //convert string to vector<vector<int>>
+ vector<vector<int>> timestamps = funasr::ParseTimestamps(str_time);
+
+ if (timestamps.size() == 0){
+ LOG(ERROR) << "Timestamp Smooth Failed: Length of timestamp is zero";
+ return timestamps_str;
+ }
+
+ // edit distance
+ int m = characters.size();
+ int n = characters_itn.size();
+ std::vector<std::vector<int>> dp(m + 1, std::vector<int>(n + 1, 0));
+
+ // init
+ for (int i = 0; i <= m; ++i) {
+ dp[i][0] = i;
+ }
+ for (int j = 0; j <= n; ++j) {
+ dp[0][j] = j;
+ }
+
+ // dp
+ for (int i = 1; i <= m; ++i) {
+ for (int j = 1; j <= n; ++j) {
+ if (characters[i - 1] == characters_itn[j - 1]) {
+ dp[i][j] = dp[i - 1][j - 1];
+ } else {
+ dp[i][j] = std::min({dp[i - 1][j], dp[i][j - 1], dp[i - 1][j - 1]}) + 1;
+ }
+ }
+ }
+
+ // backtrack
+ std::deque<string> alignment_str1, alignment_str2;
+ int i = m, j = n;
+ while (i > 0 || j > 0) {
+ if (i > 0 && j > 0 && dp[i][j] == dp[i - 1][j - 1]) {
+ funasr::TimestampAdd(alignment_str1, characters[i - 1]);
+ funasr::TimestampAdd(alignment_str2, characters_itn[j - 1]);
+ i -= 1;
+ j -= 1;
+ } else if (i > 0 && dp[i][j] == dp[i - 1][j] + 1) {
+ funasr::TimestampAdd(alignment_str1, characters[i - 1]);
+ alignment_str2.push_front("");
+ i -= 1;
+ } else if (j > 0 && dp[i][j] == dp[i][j - 1] + 1) {
+ alignment_str1.push_front("");
+ funasr::TimestampAdd(alignment_str2, characters_itn[j - 1]);
+ j -= 1;
+ } else{
+ funasr::TimestampAdd(alignment_str1, characters[i - 1]);
+ funasr::TimestampAdd(alignment_str2, characters_itn[j - 1]);
+ i -= 1;
+ j -= 1;
+ }
+ }
+
+ // smooth
+ int itn_count = 0;
+ int idx_tp = 0;
+ int idx_itn = 0;
+ vector<vector<int>> timestamps_tmp;
+ for(int index = 0; index < alignment_str1.size(); index++){
+ if (alignment_str1[index] == alignment_str2[index]){
+ bool subsidy = false;
+ if (itn_count > 0 && timestamps_tmp.size() == 0){
+ if(idx_tp >= timestamps.size()){
+ LOG(ERROR) << "Timestamp Smooth Failed: Index of tp is out of range. ";
+ return timestamps_str;
+ }
+ timestamps_tmp.push_back(timestamps[idx_tp]);
+ subsidy = true;
+ itn_count++;
+ }
+
+ if (timestamps_tmp.size() > 0){
+ if (itn_count > 0){
+ int begin = timestamps_tmp[0][0];
+ int end = timestamps_tmp.back()[1];
+ int total_time = end - begin;
+ int interval = total_time / itn_count;
+ for(int idx_cnt=0; idx_cnt < itn_count; idx_cnt++){
+ vector<int> ts;
+ ts.push_back(begin + interval*idx_cnt);
+ if(idx_cnt == itn_count-1){
+ ts.push_back(end);
+ }else {
+ ts.push_back(begin + interval*(idx_cnt + 1));
+ }
+ timestamps_out.push_back(ts);
+ }
+ }
+ timestamps_tmp.clear();
+ }
+ if(!subsidy){
+ if(idx_tp >= timestamps.size()){
+ LOG(ERROR) << "Timestamp Smooth Failed: Index of tp is out of range. ";
+ return timestamps_str;
+ }
+ timestamps_out.push_back(timestamps[idx_tp]);
+ }
+ idx_tp++;
+ itn_count = 0;
+ }else{
+ if (!alignment_str1[index].empty()){
+ if(idx_tp >= timestamps.size()){
+ LOG(ERROR) << "Timestamp Smooth Failed: Index of tp is out of range. ";
+ return timestamps_str;
+ }
+ timestamps_tmp.push_back(timestamps[idx_tp]);
+ idx_tp++;
+ }
+ if (!alignment_str2[index].empty()){
+ itn_count++;
+ }
+ }
+ // count length of itn
+ if (!alignment_str2[index].empty()){
+ idx_itn++;
+ }
+ }
+ {
+ if (itn_count > 0 && timestamps_tmp.size() == 0){
+ if (timestamps_out.size() > 0){
+ timestamps_tmp.push_back(timestamps_out.back());
+ itn_count++;
+ timestamps_out.pop_back();
+ } else{
+ LOG(ERROR) << "Timestamp Smooth Failed: Last itn has no timestamp.";
+ return timestamps_str;
+ }
+ }
+
+ if (timestamps_tmp.size() > 0){
+ if (itn_count > 0){
+ int begin = timestamps_tmp[0][0];
+ int end = timestamps_tmp.back()[1];
+ int total_time = end - begin;
+ int interval = total_time / itn_count;
+ for(int idx_cnt=0; idx_cnt < itn_count; idx_cnt++){
+ vector<int> ts;
+ ts.push_back(begin + interval*idx_cnt);
+ if(idx_cnt == itn_count-1){
+ ts.push_back(end);
+ }else {
+ ts.push_back(begin + interval*(idx_cnt + 1));
+ }
+ timestamps_out.push_back(ts);
+ }
+ }
+ timestamps_tmp.clear();
+ }
+ }
+ if(timestamps_out.size() != idx_itn){
+ LOG(ERROR) << "Timestamp Smooth Failed: Timestamp length does not matched.";
+ return timestamps_str;
+ }
+
+ timestamps_str = VectorToString(timestamps_out);
+ return timestamps_str;
+}
+
+std::string TimestampSentence(std::string &text, std::string &str_time){
+ std::vector<std::string> characters;
+ funasr::TimestampSplitChiEngCharacters(text, characters);
+ vector<vector<int>> timestamps = funasr::ParseTimestamps(str_time);
+
+ int idx_str = 0, idx_ts = 0;
+ int start = -1, end = -1;
+ std::string text_seg = "";
+ std::string ts_sentences = "";
+ std::string ts_sent = "";
+ vector<vector<int>> ts_seg;
+ while(idx_str < characters.size()){
+ if (TimestampIsPunctuation(characters[idx_str])){
+ if(ts_seg.size() >0){
+ if (ts_seg[0].size() == 2){
+ start = ts_seg[0][0];
+ }
+ if (ts_seg[ts_seg.size()-1].size() == 2){
+ end = ts_seg[ts_seg.size()-1][1];
+ }
+ }
+ // format
+ ts_sent += "{\"text_seg\":\"" + text_seg + "\",";
+ ts_sent += "\"punc\":\"" + characters[idx_str] + "\",";
+ ts_sent += "\"start\":\"" + to_string(start) + "\",";
+ ts_sent += "\"end\":\"" + to_string(end) + "\",";
+ ts_sent += "\"ts_list\":" + VectorToString(ts_seg, false) + "}";
+
+ if (idx_str == characters.size()-1){
+ ts_sentences += ts_sent;
+ } else{
+ ts_sentences += ts_sent + ",";
+ }
+ // clear
+ text_seg = "";
+ ts_sent = "";
+ start = 0;
+ end = 0;
+ ts_seg.clear();
+ } else if(idx_ts < timestamps.size()) {
+ if (text_seg.empty()){
+ text_seg = characters[idx_str];
+ }else{
+ text_seg += " " + characters[idx_str];
+ }
+ ts_seg.push_back(timestamps[idx_ts]);
+ idx_ts++;
+ }
+ idx_str++;
+ }
+ // for none punc results
+ if(ts_seg.size() >0){
+ if (ts_seg[0].size() == 2){
+ start = ts_seg[0][0];
+ }
+ if (ts_seg[ts_seg.size()-1].size() == 2){
+ end = ts_seg[ts_seg.size()-1][1];
+ }
+ // format
+ ts_sent += "{\"text_seg\":\"" + text_seg + "\",";
+ ts_sent += "\"punc\":\"\",";
+ ts_sent += "\"start\":\"" + to_string(start) + "\",";
+ ts_sent += "\"end\":\"" + to_string(end) + "\",";
+ ts_sent += "\"ts_list\":" + VectorToString(ts_seg, false) + "}";
+ ts_sentences += ts_sent;
+ }
+
+ return "[" +ts_sentences + "]";
+}
+
std::vector<std::string> split(const std::string &s, char delim) {
std::vector<std::string> elems;
std::stringstream ss(s);
@@ -333,12 +722,23 @@
int sub_word = !(word.find("@@") == string::npos);
// process word start and middle part
if (sub_word) {
- combine += word.erase(word.length() - 2);
- if(!is_combining){
- begin = timestamp_list[i][0];
+ // if badcase: lo@@ chinese
+ if (i == raw_char.size()-1 || i<raw_char.size()-1 && IsChinese(raw_char[i+1])){
+ word = word.erase(word.length() - 2) + " ";
+ if (is_combining) {
+ combine += word;
+ is_combining = false;
+ word = combine;
+ combine = "";
+ }
+ }else{
+ combine += word.erase(word.length() - 2);
+ if(!is_combining){
+ begin = timestamp_list[i][0];
+ }
+ is_combining = true;
+ continue;
}
- is_combining = true;
- continue;
}
// process word end part
else if (is_combining) {
@@ -669,4 +1069,9 @@
ifs_hws.close();
}
+void SmoothTimestamps(std::string &str_punc, std::string &str_itn, std::string &str_timetamp){
+
+ return;
+}
+
} // namespace funasr
diff --git a/runtime/onnxruntime/src/util.h b/runtime/onnxruntime/src/util.h
index 3ccfa6b..bb4e21a 100644
--- a/runtime/onnxruntime/src/util.h
+++ b/runtime/onnxruntime/src/util.h
@@ -3,11 +3,13 @@
#include <vector>
#include <memory>
#include <unordered_map>
+#include <deque>
#include "tensor.h"
using namespace std;
namespace funasr {
+typedef unsigned short U16CHAR_T;
extern float *LoadParams(const char *filename);
extern void SaveDataFile(const char *filename, void *data, uint32_t len);
@@ -35,7 +37,17 @@
std::vector<std::string> &chinese_characters);
void SplitChiEngCharacters(const std::string &input_str,
std::vector<std::string> &characters);
-
+void TimestampAdd(std::deque<string> &alignment_str1, std::string str_word);
+vector<vector<int>> ParseTimestamps(const std::string& str);
+bool TimestampIsDigit(U16CHAR_T &u16);
+bool TimestampIsAlpha(U16CHAR_T &u16);
+bool TimestampIsPunctuation(U16CHAR_T &u16);
+bool TimestampIsPunctuation(const std::string& str);
+void TimestampSplitChiEngCharacters(const std::string &input_str,
+ std::vector<std::string> &characters);
+std::string VectorToString(const std::vector<std::vector<int>>& vec, bool out_empty=true);
+std::string TimestampSmooth(std::string &text, std::string &text_itn, std::string &str_time);
+std::string TimestampSentence(std::string &text, std::string &str_time);
std::vector<std::string> split(const std::string &s, char delim);
template<typename T>
diff --git a/runtime/onnxruntime/src/vocab.cpp b/runtime/onnxruntime/src/vocab.cpp
index d29281c..6991376 100644
--- a/runtime/onnxruntime/src/vocab.cpp
+++ b/runtime/onnxruntime/src/vocab.cpp
@@ -16,6 +16,12 @@
ifstream in(filename);
LoadVocabFromYaml(filename);
}
+Vocab::Vocab(const char *filename, const char *lex_file)
+{
+ ifstream in(filename);
+ LoadVocabFromYaml(filename);
+ LoadLex(lex_file);
+}
Vocab::~Vocab()
{
}
@@ -37,11 +43,37 @@
}
}
-int Vocab::GetIdByToken(const std::string &token) {
- if (token_id.count(token)) {
- return token_id[token];
+void Vocab::LoadLex(const char* filename){
+ std::ifstream file(filename);
+ std::string line;
+ while (std::getline(file, line)) {
+ std::string key, value;
+ std::istringstream iss(line);
+ std::getline(iss, key, '\t');
+ std::getline(iss, value);
+
+ if (!key.empty() && !value.empty()) {
+ lex_map[key] = value;
+ }
}
- return 0;
+
+ file.close();
+}
+
+string Vocab::Word2Lex(const std::string &word) const {
+ auto it = lex_map.find(word);
+ if (it != lex_map.end()) {
+ return it->second;
+ }
+ return "";
+}
+
+int Vocab::GetIdByToken(const std::string &token) const {
+ auto it = token_id.find(token);
+ if (it != token_id.end()) {
+ return it->second;
+ }
+ return -1;
}
void Vocab::Vector2String(vector<int> in, std::vector<std::string> &preds)
@@ -120,8 +152,8 @@
std::string combine = "";
std::string unicodeChar = "鈻�";
- for (auto it = in.begin(); it != in.end(); it++) {
- string word = vocab[*it];
+ for (i=0; i<in.size(); i++){
+ string word = vocab[in[i]];
// step1 space character skips
if (word == "<s>" || word == "</s>" || word == "<unk>")
continue;
@@ -146,9 +178,20 @@
int sub_word = !(word.find("@@") == string::npos);
// process word start and middle part
if (sub_word) {
- combine += word.erase(word.length() - 2);
- is_combining = true;
- continue;
+ // if badcase: lo@@ chinese
+ if (i == in.size()-1 || i<in.size()-1 && IsChinese(vocab[in[i+1]])){
+ word = word.erase(word.length() - 2) + " ";
+ if (is_combining) {
+ combine += word;
+ is_combining = false;
+ word = combine;
+ combine = "";
+ }
+ }else{
+ combine += word.erase(word.length() - 2);
+ is_combining = true;
+ continue;
+ }
}
// process word end part
else if (is_combining) {
diff --git a/runtime/onnxruntime/src/vocab.h b/runtime/onnxruntime/src/vocab.h
index 8834b97..19e3648 100644
--- a/runtime/onnxruntime/src/vocab.h
+++ b/runtime/onnxruntime/src/vocab.h
@@ -13,11 +13,14 @@
private:
vector<string> vocab;
std::map<string, int> token_id;
+ std::map<string, string> lex_map;
bool IsEnglish(string ch);
void LoadVocabFromYaml(const char* filename);
+ void LoadLex(const char* filename);
public:
Vocab(const char *filename);
+ Vocab(const char *filename, const char *lex_file);
~Vocab();
int Size() const;
bool IsChinese(string ch);
@@ -26,7 +29,8 @@
string Vector2StringV2(vector<int> in, std::string language="");
string Id2String(int id) const;
string WordFormat(std::string word);
- int GetIdByToken(const std::string &token);
+ int GetIdByToken(const std::string &token) const;
+ string Word2Lex(const std::string &word) const;
};
} // namespace funasr
diff --git a/runtime/python/websocket/funasr_wss_client.py b/runtime/python/websocket/funasr_wss_client.py
index 66b3ce0..92dc548 100644
--- a/runtime/python/websocket/funasr_wss_client.py
+++ b/runtime/python/websocket/funasr_wss_client.py
@@ -41,6 +41,10 @@
type=str,
default=None,
help="audio_in")
+parser.add_argument("--audio_fs",
+ type=int,
+ default=16000,
+ help="audio_fs")
parser.add_argument("--send_without_sleep",
action="store_true",
default=True,
@@ -164,7 +168,7 @@
hotword_msg=json.dumps(fst_dict)
print (hotword_msg)
- sample_rate = 16000
+ sample_rate = args.audio_fs
wav_format = "pcm"
use_itn=True
if args.use_itn == 0:
@@ -182,20 +186,12 @@
if wav_path.endswith(".pcm"):
with open(wav_path, "rb") as f:
audio_bytes = f.read()
- elif wav_path.endswith(".wav"):
- import wave
- with wave.open(wav_path, "rb") as wav_file:
- params = wav_file.getparams()
- sample_rate = wav_file.getframerate()
- frames = wav_file.readframes(wav_file.getnframes())
- audio_bytes = bytes(frames)
else:
wav_format = "others"
with open(wav_path, "rb") as f:
audio_bytes = f.read()
- # stride = int(args.chunk_size/1000*16000*2)
- stride = int(60 * args.chunk_size[1] / args.chunk_interval / 1000 * 16000 * 2)
+ stride = int(60 * args.chunk_size[1] / args.chunk_interval / 1000 * sample_rate * 2)
chunk_num = (len(audio_bytes) - 1) // stride + 1
# print(stride)
@@ -253,6 +249,7 @@
wav_name = meg.get("wav_name", "demo")
text = meg["text"]
timestamp=""
+ offline_msg_done = meg.get("is_final", False)
if "timestamp" in meg:
timestamp = meg["timestamp"]
@@ -262,7 +259,9 @@
else:
text_write_line = "{}\t{}\n".format(wav_name, text)
ibest_writer.write(text_write_line)
-
+
+ if 'mode' not in meg:
+ continue
if meg["mode"] == "online":
text_print += "{}".format(text)
text_print = text_print[-args.words_max_print:]
@@ -289,7 +288,7 @@
text_print = text_print[-args.words_max_print:]
os.system('clear')
print("\rpid" + str(id) + ": " + text_print)
- offline_msg_done=True
+ # offline_msg_done=True
except Exception as e:
print("Exception:", e)
diff --git a/runtime/readme.md b/runtime/readme.md
index aa7c5e2..3f57818 100644
--- a/runtime/readme.md
+++ b/runtime/readme.md
@@ -17,6 +17,7 @@
To meet the needs of different users, we have prepared different tutorials with text and images for both novice and advanced developers.
### Whats-new
+- 2024/01/03: Fixed known crash issues as well as memory leak problems, docker image version funasr-runtime-sdk-en-cpu-0.1.2 (0cdd9f4a4bb5).
- 2023/11/08: Adaptation to runtime structure changes (FunASR/funasr/runtime -> FunASR/runtime), docker image version funasr-runtime-sdk-en-cpu-0.1.1 (27017f70f72a).
- 2023/10/16: English File Transcription Service 1.0 released, docker image version funasr-runtime-sdk-en-cpu-0.1.0 (e0de03eb0163), refer to the detailed documentation锛圼here](https://mp.weixin.qq.com/s/DZZUTj-6xwFfi-96ml--4A)锛�
@@ -39,6 +40,7 @@
In order to meet the needs of different users for different scenarios, different tutorials are prepared:
### Whats-new
+- 2024/01/03: Real-time Transcription Service 1.6 released锛孴he 2pass-offline mode supports Ngram language model decoding and WFST hotwords, while also addressing known crash issues and memory leak problems, docker image version funasr-runtime-sdk-online-cpu-0.1.6 (f99925110d27)
- 2023/11/09: Real-time Transcription Service 1.5 released锛宖ix bug: without online results, docker image version funasr-runtime-sdk-online-cpu-0.1.5 (b16584b6d38b)
- 2023/11/08: Real-time Transcription Service 1.4 released, supporting server-side loading of hotwords (updated hotword communication protocol), adaptation to runtime structure changes (FunASR/funasr/runtime -> FunASR/runtime), docker image version funasr-runtime-sdk-online-cpu-0.1.4(691974017c38).
- 2023/09/19: Real-time Transcription Service 1.2 released, supporting hotwords, timestamps, and ITN model in 2pass mode, docker image version funasr-runtime-sdk-online-cpu-0.1.2 (7222c5319bcf).
@@ -66,10 +68,12 @@
To meet the needs of different users, we have prepared different tutorials with text and images for both novice and advanced developers.
### Whats-new
-2023/11/08: File Transcription Service 3.0 released, supporting punctuation large model, Ngram model, fst hotwords (updated hotword communication protocol), server-side loading of hotwords, adaptation to runtime structure changes (FunASR/funasr/runtime -> FunASR/runtime), docker image version funasr-runtime-sdk-cpu-0.3.0 (caa64bddbb43), refer to the detailed documentation 锛圼here]()锛�
-2023/09/19: File Transcription Service 2.2 released, supporting ITN model, docker image version funasr-runtime-sdk-cpu-0.2.2 (2c5286be13e9).
-2023/08/22: File Transcription Service 2.0 released, integrated ffmpeg to support various audio and video inputs, supporting hotword model and timestamp model, docker image version funasr-runtime-sdk-cpu-0.2.0 (1ad3d19e0707), refer to the detailed documentation 锛圼here](https://mp.weixin.qq.com/s/oJHe0MKDqTeuIFH-F7GHMg)锛�
-2023/07/03: File Transcription Service 1.0 released, docker image version funasr-runtime-sdk-cpu-0.1.0 (1ad3d19e0707), refer to the detailed documentation 锛圼here](https://mp.weixin.qq.com/s/DHQwbgdBWcda0w_L60iUww)锛�
+- 2024/01/08: File Transcription Service 4.1 released, optimized format sentence-level timestamps, docker image version funasr-runtime-sdk-cpu-0.4.1 (0250f8ef981b)
+- 2024/01/03: File Transcription Service 4.0 released, Added support for 8k models, optimized timestamp mismatch issues and added sentence-level timestamps, improved the effectiveness of English word FST hotwords, supported automated configuration of thread parameters, and fixed known crash issues as well as memory leak problems, docker image version funasr-runtime-sdk-cpu-0.4.0 (c4483ee08f04)
+- 2023/11/08: File Transcription Service 3.0 released, supporting punctuation large model, Ngram model, fst hotwords (updated hotword communication protocol), server-side loading of hotwords, adaptation to runtime structure changes (FunASR/funasr/runtime -> FunASR/runtime), docker image version funasr-runtime-sdk-cpu-0.3.0 (caa64bddbb43), refer to the detailed documentation 锛圼here]()锛�
+- 2023/09/19: File Transcription Service 2.2 released, supporting ITN model, docker image version funasr-runtime-sdk-cpu-0.2.2 (2c5286be13e9).
+- 2023/08/22: File Transcription Service 2.0 released, integrated ffmpeg to support various audio and video inputs, supporting hotword model and timestamp model, docker image version funasr-runtime-sdk-cpu-0.2.0 (1ad3d19e0707), refer to the detailed documentation 锛圼here](https://mp.weixin.qq.com/s/oJHe0MKDqTeuIFH-F7GHMg)锛�
+- 2023/07/03: File Transcription Service 1.0 released, docker image version funasr-runtime-sdk-cpu-0.1.0 (1ad3d19e0707), refer to the detailed documentation 锛圼here](https://mp.weixin.qq.com/s/DHQwbgdBWcda0w_L60iUww)锛�
### Technical Principles
diff --git a/runtime/readme_cn.md b/runtime/readme_cn.md
index cf00a0b..2c7bb2b 100644
--- a/runtime/readme_cn.md
+++ b/runtime/readme_cn.md
@@ -19,6 +19,7 @@
涓轰簡鏀寔涓嶅悓鐢ㄦ埛鐨勯渶姹傦紝閽堝涓嶅悓鍦烘櫙锛屽噯澶囦簡涓嶅悓鐨勫浘鏂囨暀绋嬶細
### 鏈�鏂板姩鎬�
+- 2024/01/03: 鑻辨枃绂荤嚎鏂囦欢杞啓鏈嶅姟 1.2 鍙戝竷锛屼慨澶嶅凡鐭ョ殑crash闂鍙婂唴瀛樻硠婕忛棶棰橈紝dokcer闀滃儚鐗堟湰funasr-runtime-sdk-en-cpu-0.1.2 (0cdd9f4a4bb5)
- 2023/11/08: 鑻辨枃绂荤嚎鏂囦欢杞啓鏈嶅姟 1.1 鍙戝竷锛宺untime缁撴瀯鍙樺寲閫傞厤锛團unASR/funasr/runtime->FunASR/runtime锛夛紝dokcer闀滃儚鐗堟湰funasr-runtime-sdk-en-cpu-0.1.1 (27017f70f72a)
- 2023/10/16: 鑻辨枃绂荤嚎鏂囦欢杞啓鏈嶅姟 1.0 鍙戝竷锛宒okcer闀滃儚鐗堟湰funasr-runtime-sdk-en-cpu-0.1.0 (e0de03eb0163)锛屽師鐞嗕粙缁嶆枃妗o紙[鐐瑰嚮姝ゅ](https://mp.weixin.qq.com/s/DZZUTj-6xwFfi-96ml--4A)锛�
@@ -33,6 +34,7 @@
涓轰簡鏀寔涓嶅悓鐢ㄦ埛鐨勯渶姹傦紝閽堝涓嶅悓鍦烘櫙锛屽噯澶囦簡涓嶅悓鐨勫浘鏂囨暀绋嬶細
### 鏈�鏂板姩鎬�
+- 2024/01/03: 涓枃瀹炴椂璇煶鍚啓鏈嶅姟 1.6 鍙戝竷锛�2pass-offline妯″紡鏀寔Ngram璇█妯″瀷瑙g爜銆亀fst鐑瘝锛屽悓鏃朵慨澶嶅凡鐭ョ殑crash闂鍙婂唴瀛樻硠婕忛棶棰橈紝dokcer闀滃儚鐗堟湰funasr-runtime-sdk-online-cpu-0.1.6 (f99925110d27)
- 2023/11/09: 涓枃瀹炴椂璇煶鍚啓鏈嶅姟 1.5 鍙戝竷锛屼慨澶嶆棤瀹炴椂缁撴灉鐨勯棶棰橈紝dokcer闀滃儚鐗堟湰funasr-runtime-sdk-online-cpu-0.1.5 (b16584b6d38b)
- 2023/11/08: 涓枃瀹炴椂璇煶鍚啓鏈嶅姟 1.4 鍙戝竷锛屾敮鎸佹湇鍔$鍔犺浇鐑瘝(鏇存柊鐑瘝閫氫俊鍗忚)銆乺untime缁撴瀯鍙樺寲閫傞厤锛團unASR/funasr/runtime->FunASR/runtime锛夛紝dokcer闀滃儚鐗堟湰funasr-runtime-sdk-online-cpu-0.1.4 (691974017c38)
- 2023/09/19: 涓枃瀹炴椂璇煶鍚啓鏈嶅姟 1.2 鍙戝竷锛�2pass妯″紡鏀寔鐑瘝銆佹椂闂存埑銆両TN妯″瀷锛宒okcer闀滃儚鐗堟湰funasr-runtime-sdk-online-cpu-0.1.2 (7222c5319bcf)
@@ -52,7 +54,8 @@
涓轰簡鏀寔涓嶅悓鐢ㄦ埛鐨勯渶姹傦紝閽堝涓嶅悓鍦烘櫙锛屽噯澶囦簡涓嶅悓鐨勫浘鏂囨暀绋嬶細
### 鏈�鏂板姩鎬�
-
+- 2024/01/08: 涓枃绂荤嚎鏂囦欢杞啓鏈嶅姟 4.1 鍙戝竷锛屼紭鍖栧彞瀛愮骇鏃堕棿鎴砵son鏍煎紡锛宒okcer闀滃儚鐗堟湰funasr-runtime-sdk-cpu-0.4.1 (0250f8ef981b)
+- 2024/01/03: 涓枃绂荤嚎鏂囦欢杞啓鏈嶅姟 4.0 鍙戝竷锛屾柊澧炴敮鎸�8k妯″瀷銆佷紭鍖栨椂闂存埑涓嶅尮閰嶉棶棰樺強澧炲姞鍙ュ瓙绾у埆鏃堕棿鎴炽�佷紭鍖栬嫳鏂囧崟璇峟st鐑瘝鏁堟灉銆佹敮鎸佽嚜鍔ㄥ寲閰嶇疆绾跨▼鍙傛暟锛屽悓鏃朵慨澶嶅凡鐭ョ殑crash闂鍙婂唴瀛樻硠婕忛棶棰橈紝dokcer闀滃儚鐗堟湰funasr-runtime-sdk-cpu-0.4.0 (c4483ee08f04)
- 2023/11/08: 涓枃绂荤嚎鏂囦欢杞啓鏈嶅姟 3.0 鍙戝竷锛屾敮鎸佹爣鐐瑰ぇ妯″瀷銆佹敮鎸丯gram妯″瀷銆佹敮鎸乫st鐑瘝(鏇存柊鐑瘝閫氫俊鍗忚)銆佹敮鎸佹湇鍔$鍔犺浇鐑瘝銆乺untime缁撴瀯鍙樺寲閫傞厤锛團unASR/funasr/runtime->FunASR/runtime锛夛紝dokcer闀滃儚鐗堟湰funasr-runtime-sdk-cpu-0.3.0 (caa64bddbb43)锛屽師鐞嗕粙缁嶆枃妗o紙[鐐瑰嚮姝ゅ](https://mp.weixin.qq.com/s/jSbnKw_m31BUUbTukPSOIw)锛�
- 2023/09/19: 涓枃绂荤嚎鏂囦欢杞啓鏈嶅姟 2.2 鍙戝竷锛屾敮鎸両TN妯″瀷锛宒okcer闀滃儚鐗堟湰funasr-runtime-sdk-cpu-0.2.2 (2c5286be13e9)
- 2023/08/22: 涓枃绂荤嚎鏂囦欢杞啓鏈嶅姟 2.0 鍙戝竷锛岄泦鎴恌fmpeg鏀寔澶氱闊宠棰戣緭鍏ャ�佹敮鎸佺儹璇嶆ā鍨嬨�佹敮鎸佹椂闂存埑妯″瀷锛宒okcer闀滃儚鐗堟湰funasr-runtime-sdk-cpu-0.2.0 (1ad3d19e0707)锛屽師鐞嗕粙缁嶆枃妗o紙[鐐瑰嚮姝ゅ](https://mp.weixin.qq.com/s/oJHe0MKDqTeuIFH-F7GHMg)锛�
diff --git a/runtime/run_server_2pass.sh b/runtime/run_server_2pass.sh
index 95a3bf7..2fc5f11 100644
--- a/runtime/run_server_2pass.sh
+++ b/runtime/run_server_2pass.sh
@@ -5,6 +5,7 @@
vad_dir="damo/speech_fsmn_vad_zh-cn-16k-common-onnx"
punc_dir="damo/punc_ct-transformer_zh-cn-common-vad_realtime-vocab272727-onnx"
itn_dir="thuduj12/fst_itn_zh"
+lm_dir="damo/speech_ngram_lm_zh-cn-ai-wesp-fst"
port=10095
certfile="../../../ssl_key/server.crt"
keyfile="../../../ssl_key/server.key"
@@ -30,6 +31,7 @@
--vad-dir "${vad_dir}" \
--punc-dir "${punc_dir}" \
--itn-dir "${itn_dir}" \
+ --lm-dir "${lm_dir}" \
--decoder-thread-num ${decoder_thread_num} \
--model-thread-num ${model_thread_num} \
--io-thread-num ${io_thread_num} \
diff --git a/runtime/triton_gpu/README_ONLINE.md b/runtime/triton_gpu/README_ONLINE.md
new file mode 100755
index 0000000..c3b6e83
--- /dev/null
+++ b/runtime/triton_gpu/README_ONLINE.md
@@ -0,0 +1,64 @@
+### Steps:
+1. Prepare model repo files
+* git clone https://www.modelscope.cn/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online-onnx.git
+* Convert lfr_cmvn_pe.onnx model. For example: python export_lfr_cmvn_pe_onnx.py
+* If you export to onnx, you should have several model files in `${MODEL_DIR}`:
+```
+鈹溾攢鈹� README.md
+鈹斺攢鈹� model_repo_paraformer_large_online
+ 鈹溾攢鈹� cif_search
+ 鈹偮犅� 鈹溾攢鈹� 1
+ 鈹偮犅� 鈹偮犅� 鈹斺攢鈹� model.py
+ 鈹偮犅� 鈹斺攢鈹� config.pbtxt
+ 鈹溾攢鈹� decoder
+ 鈹偮犅� 鈹溾攢鈹� 1
+ 鈹偮犅� 鈹偮犅� 鈹斺攢鈹� decoder.onnx
+ 鈹偮犅� 鈹斺攢鈹� config.pbtxt
+ 鈹溾攢鈹� encoder
+ 鈹偮犅� 鈹溾攢鈹� 1
+ 鈹偮犅� 鈹偮犅� 鈹斺攢鈹� model.onnx
+ 鈹偮犅� 鈹斺攢鈹� config.pbtxt
+ 鈹溾攢鈹� feature_extractor
+ 鈹偮犅� 鈹溾攢鈹� 1
+ 鈹偮犅� 鈹偮犅� 鈹斺攢鈹� model.py
+ 鈹偮犅� 鈹溾攢鈹� config.pbtxt
+ 鈹偮犅� 鈹斺攢鈹� config.yaml
+ 鈹溾攢鈹� lfr_cmvn_pe
+ 鈹偮犅� 鈹溾攢鈹� 1
+ 鈹偮犅� 鈹偮犅� 鈹斺攢鈹� lfr_cmvn_pe.onnx
+ 鈹偮犅� 鈹溾攢鈹� am.mvn
+ 鈹偮犅� 鈹溾攢鈹� config.pbtxt
+ 鈹偮犅� 鈹斺攢鈹� export_lfr_cmvn_pe_onnx.py
+ 鈹斺攢鈹� streaming_paraformer
+ 鈹溾攢鈹� 1
+ 鈹斺攢鈹� config.pbtxt
+```
+
+2. Follow below instructions to launch triton server
+```sh
+# using docker image Dockerfile/Dockerfile.server
+docker build . -f Dockerfile/Dockerfile.server -t triton-paraformer:23.01
+docker run -it --rm --name "paraformer_triton_server" --gpus all -v <path_host/model_repo_paraformer_large_online>:/workspace/ --shm-size 1g --net host triton-paraformer:23.01
+
+# launch the service
+cd /workspace
+tritonserver --model-repository model_repo_paraformer_large_online \
+ --pinned-memory-pool-byte-size=512000000 \
+ --cuda-memory-pool-byte-size=0:1024000000
+
+```
+
+### Performance benchmark with a single A10
+
+* FP32, onnx, [paraformer larger online](https://modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online-onnx/summary
+),Our chunksize is 10 * 960 / 16000 = 0.6 s, so we should care about the perf of latency less than 0.6s so that it can be a realtime application.
+
+
+| Concurrency | Throughput | Latency_p50 (ms) | Latency_p90 (ms) | Latency_p95 (ms) | Latency_p99 (ms) |
+|-------------|------------|------------------|------------------|------------------|------------------|
+| 20 | 309.252 | 56.913 | 76.267 | 85.598 | 138.462 |
+| 40 | 391.058 | 97.911 | 145.509 | 150.545 | 185.399 |
+| 60 | 426.269 | 138.244 | 185.855 | 201.016 | 236.528 |
+| 80 | 431.781 | 170.991 | 227.983 | 252.453 | 412.273 |
+| 100 | 473.351 | 206.205 | 262.612 | 288.964 | 463.337 |
+
diff --git a/runtime/triton_gpu/model_repo_paraformer_large_online/cif_search/1/model.py b/runtime/triton_gpu/model_repo_paraformer_large_online/cif_search/1/model.py
new file mode 100755
index 0000000..96ad821
--- /dev/null
+++ b/runtime/triton_gpu/model_repo_paraformer_large_online/cif_search/1/model.py
@@ -0,0 +1,268 @@
+# Created on 2024-01-01
+# Author: GuAn Zhu
+
+import triton_python_backend_utils as pb_utils
+import numpy as np
+from torch.utils.dlpack import from_dlpack
+import json
+import yaml
+import asyncio
+from collections import OrderedDict
+
+
+class LimitedDict(OrderedDict):
+ def __init__(self, max_length):
+ super().__init__()
+ self.max_length = max_length
+
+ def __setitem__(self, key, value):
+ if len(self) >= self.max_length:
+ self.popitem(last=False)
+ super().__setitem__(key, value)
+
+
+class CIFSearch:
+ """CIFSearch: https://github.com/alibaba-damo-academy/FunASR/blob/main/runtime/python/onnxruntime/funasr_onnx
+ /paraformer_online_bin.py """
+ def __init__(self):
+ self.cache = {"cif_hidden": np.zeros((1, 1, 512)).astype(np.float32),
+ "cif_alphas": np.zeros((1, 1)).astype(np.float32), "last_chunk": False}
+ self.chunk_size = [5, 10, 5]
+ self.tail_threshold = 0.45
+ self.cif_threshold = 1.0
+
+ def infer(self, hidden, alphas):
+ batch_size, len_time, hidden_size = hidden.shape
+ token_length = []
+ list_fires = []
+ list_frames = []
+ cache_alphas = []
+ cache_hiddens = []
+ alphas[:, :self.chunk_size[0]] = 0.0
+ alphas[:, sum(self.chunk_size[:2]):] = 0.0
+
+ if self.cache is not None and "cif_alphas" in self.cache and "cif_hidden" in self.cache:
+ hidden = np.concatenate((self.cache["cif_hidden"], hidden), axis=1)
+ alphas = np.concatenate((self.cache["cif_alphas"], alphas), axis=1)
+ if self.cache is not None and "last_chunk" in self.cache and self.cache["last_chunk"]:
+ tail_hidden = np.zeros((batch_size, 1, hidden_size)).astype(np.float32)
+ tail_alphas = np.array([[self.tail_threshold]]).astype(np.float32)
+ tail_alphas = np.tile(tail_alphas, (batch_size, 1))
+ hidden = np.concatenate((hidden, tail_hidden), axis=1)
+ alphas = np.concatenate((alphas, tail_alphas), axis=1)
+
+ len_time = alphas.shape[1]
+ for b in range(batch_size):
+ integrate = 0.0
+ frames = np.zeros(hidden_size).astype(np.float32)
+ list_frame = []
+ list_fire = []
+ for t in range(len_time):
+ alpha = alphas[b][t]
+ if alpha + integrate < self.cif_threshold:
+ integrate += alpha
+ list_fire.append(integrate)
+ frames += alpha * hidden[b][t]
+ else:
+ frames += (self.cif_threshold - integrate) * hidden[b][t]
+ list_frame.append(frames)
+ integrate += alpha
+ list_fire.append(integrate)
+ integrate -= self.cif_threshold
+ frames = integrate * hidden[b][t]
+
+ cache_alphas.append(integrate)
+ if integrate > 0.0:
+ cache_hiddens.append(frames / integrate)
+ else:
+ cache_hiddens.append(frames)
+
+ token_length.append(len(list_frame))
+ list_fires.append(list_fire)
+ list_frames.append(list_frame)
+
+ max_token_len = max(token_length)
+ list_ls = []
+ for b in range(batch_size):
+ pad_frames = np.zeros((max_token_len - token_length[b], hidden_size)).astype(np.float32)
+ if token_length[b] == 0:
+ list_ls.append(pad_frames)
+ else:
+ list_ls.append(np.concatenate((list_frames[b], pad_frames), axis=0))
+
+ self.cache["cif_alphas"] = np.stack(cache_alphas, axis=0)
+ self.cache["cif_alphas"] = np.expand_dims(self.cache["cif_alphas"], axis=0)
+ self.cache["cif_hidden"] = np.stack(cache_hiddens, axis=0)
+ self.cache["cif_hidden"] = np.expand_dims(self.cache["cif_hidden"], axis=0)
+
+ return np.stack(list_ls, axis=0).astype(np.float32), np.stack(token_length, axis=0).astype(np.int32)
+
+
+class TritonPythonModel:
+ """Your Python model must use the same class name. Every Python model
+ that is created must have "TritonPythonModel" as the class name.
+ """
+
+ def initialize(self, args):
+ """`initialize` is called only once when the model is being loaded.
+ Implementing `initialize` function is optional. This function allows
+ the model to initialize any state associated with this model.
+
+ Parameters
+ ----------
+ args : dict
+ Both keys and values are strings. The dictionary keys and values are:
+ * model_config: A JSON string containing the model configuration
+ * model_instance_kind: A string containing model instance kind
+ * model_instance_device_id: A string containing model instance device ID
+ * model_repository: Model repository path
+ * model_version: Model version
+ * model_name: Model name
+ """
+ self.model_config = model_config = json.loads(args['model_config'])
+ self.max_batch_size = max(model_config["max_batch_size"], 1)
+
+ # # Get OUTPUT0 configuration
+ output0_config = pb_utils.get_output_config_by_name(
+ model_config, "transcripts")
+ # # Convert Triton types to numpy types
+ self.out0_dtype = pb_utils.triton_string_to_numpy(
+ output0_config['data_type'])
+
+ self.init_vocab(self.model_config['parameters'])
+
+ self.cif_search_cache = LimitedDict(1024)
+ self.start = LimitedDict(1024)
+
+ def init_vocab(self, parameters):
+ for li in parameters.items():
+ key, value = li
+ value = value["string_value"]
+ if key == "vocabulary":
+ self.vocab_dict = self.load_vocab(value)
+
+ def load_vocab(self, vocab_file):
+ with open(str(vocab_file), 'rb') as f:
+ config = yaml.load(f, Loader=yaml.Loader)
+ return config['token_list']
+
+ async def execute(self, requests):
+ """`execute` must be implemented in every Python model. `execute`
+ function receives a list of pb_utils.InferenceRequest as the only
+ argument. This function is called when an inference is requested
+ for this model.
+
+ Parameters
+ ----------
+ requests : list
+ A list of pb_utils.InferenceRequest
+
+ Returns
+ -------
+ list
+ A list of pb_utils.InferenceResponse. The length of this list must
+ be the same as `requests`
+ """
+ # Every Python backend must iterate through list of requests and create
+ # an instance of pb_utils.InferenceResponse class for each of them. You
+ # should avoid storing any of the input Tensors in the class attributes
+ # as they will be overridden in subsequent inference requests. You can
+ # make a copy of the underlying NumPy array and store it if it is
+ # required.
+
+ batch_end = []
+ responses = []
+ batch_corrid = []
+ qualified_corrid = []
+ batch_result = {}
+ inference_response_awaits = []
+
+ for request in requests:
+ hidden = pb_utils.get_input_tensor_by_name(request, "enc")
+ hidden = from_dlpack(hidden.to_dlpack()).cpu().numpy()
+ alphas = pb_utils.get_input_tensor_by_name(request, "alphas")
+ alphas = from_dlpack(alphas.to_dlpack()).cpu().numpy()
+ hidden_len = pb_utils.get_input_tensor_by_name(request, "enc_len")
+ hidden_len = from_dlpack(hidden_len.to_dlpack()).cpu().numpy()
+
+ in_start = pb_utils.get_input_tensor_by_name(request, "START")
+ start = in_start.as_numpy()[0][0]
+
+ in_corrid = pb_utils.get_input_tensor_by_name(request, "CORRID")
+ corrid = in_corrid.as_numpy()[0][0]
+
+ in_end = pb_utils.get_input_tensor_by_name(request, "END")
+ end = in_end.as_numpy()[0][0]
+
+ batch_end.append(end)
+ batch_corrid.append(corrid)
+
+ if start:
+ self.cif_search_cache[corrid] = CIFSearch()
+ self.start[corrid] = 1
+ if end:
+ self.cif_search_cache[corrid].cache["last_chunk"] = True
+
+ acoustic, acoustic_len = self.cif_search_cache[corrid].infer(hidden, alphas)
+ batch_result[corrid] = ''
+ if acoustic.shape[1] == 0:
+ continue
+ else:
+ qualified_corrid.append(corrid)
+ input_tensor0 = pb_utils.Tensor("enc", hidden)
+ input_tensor1 = pb_utils.Tensor("enc_len", np.array([hidden_len], dtype=np.int32))
+ input_tensor2 = pb_utils.Tensor("acoustic_embeds", acoustic)
+ input_tensor3 = pb_utils.Tensor("acoustic_embeds_len", np.array([acoustic_len], dtype=np.int32))
+ input_tensors = [input_tensor0, input_tensor1, input_tensor2, input_tensor3]
+
+ if self.start[corrid] and end:
+ flag = 3
+ elif end:
+ flag = 2
+ elif self.start[corrid]:
+ flag = 1
+ self.start[corrid] = 0
+ else:
+ flag = 0
+ inference_request = pb_utils.InferenceRequest(
+ model_name='decoder',
+ requested_output_names=['sample_ids'],
+ inputs=input_tensors,
+ request_id='',
+ correlation_id=corrid,
+ flags=flag
+ )
+ inference_response_awaits.append(inference_request.async_exec())
+
+ inference_responses = await asyncio.gather(*inference_response_awaits)
+
+ for index_corrid, inference_response in zip(qualified_corrid, inference_responses):
+ if inference_response.has_error():
+ raise pb_utils.TritonModelException(inference_response.error().message())
+ else:
+ sample_ids = pb_utils.get_output_tensor_by_name(inference_response, 'sample_ids')
+ token_ids = from_dlpack(sample_ids.to_dlpack()).cpu().numpy()[0]
+
+ # Change integer-ids to tokens
+ tokens = [self.vocab_dict[token_id] for token_id in token_ids]
+ batch_result[index_corrid] = "".join(tokens)
+
+ for i, index_corrid in enumerate(batch_corrid):
+ sent = np.array([batch_result[index_corrid]])
+ out0 = pb_utils.Tensor("transcripts", sent.astype(self.out0_dtype))
+ inference_response = pb_utils.InferenceResponse(output_tensors=[out0])
+ responses.append(inference_response)
+
+ if batch_end[i]:
+ del self.cif_search_cache[index_corrid]
+ del self.start[index_corrid]
+
+ return responses
+
+ def finalize(self):
+ """`finalize` is called only once when the model is being unloaded.
+ Implementing `finalize` function is optional. This function allows
+ the model to perform any necessary clean ups before exit.
+ """
+ print('Cleaning up...')
+
diff --git a/runtime/triton_gpu/model_repo_paraformer_large_online/cif_search/config.pbtxt b/runtime/triton_gpu/model_repo_paraformer_large_online/cif_search/config.pbtxt
new file mode 100755
index 0000000..bf60588
--- /dev/null
+++ b/runtime/triton_gpu/model_repo_paraformer_large_online/cif_search/config.pbtxt
@@ -0,0 +1,111 @@
+# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Created on 2024-01-01
+# Author: GuAn Zhu
+
+name: "cif_search"
+backend: "python"
+max_batch_size: 128
+
+sequence_batching{
+ max_sequence_idle_microseconds: 15000000
+ oldest {
+ max_candidate_sequences: 1024
+ preferred_batch_size: [32, 64, 128]
+ }
+ control_input [
+ {
+ name: "START",
+ control [
+ {
+ kind: CONTROL_SEQUENCE_START
+ fp32_false_true: [0, 1]
+ }
+ ]
+ },
+ {
+ name: "READY"
+ control [
+ {
+ kind: CONTROL_SEQUENCE_READY
+ fp32_false_true: [0, 1]
+ }
+ ]
+ },
+ {
+ name: "CORRID",
+ control [
+ {
+ kind: CONTROL_SEQUENCE_CORRID
+ data_type: TYPE_UINT64
+ }
+ ]
+ },
+ {
+ name: "END",
+ control [
+ {
+ kind: CONTROL_SEQUENCE_END
+ fp32_false_true: [0, 1]
+ }
+ ]
+ }
+ ]
+}
+
+
+parameters [
+ {
+ key: "vocabulary",
+ value: { string_value: "model_repo_paraformer_large_online/feature_extractor/config.yaml"}
+ },
+ { key: "FORCE_CPU_ONLY_INPUT_TENSORS"
+ value: {string_value:"no"}
+ }
+]
+
+input [
+ {
+ name: "enc"
+ data_type: TYPE_FP32
+ dims: [-1, 512]
+ },
+ {
+ name: "enc_len"
+ data_type: TYPE_INT32
+ dims: [1]
+ reshape: { shape: [ ] }
+ },
+ {
+ name: 'alphas'
+ data_type: TYPE_FP32
+ dims: [-1]
+ }
+]
+
+output [
+ {
+ name: "transcripts"
+ data_type: TYPE_STRING
+ dims: [1]
+ }
+]
+
+instance_group [
+ {
+ count: 6
+ kind: KIND_CPU
+ }
+ ]
diff --git a/runtime/triton_gpu/model_repo_paraformer_large_online/decoder/config.pbtxt b/runtime/triton_gpu/model_repo_paraformer_large_online/decoder/config.pbtxt
new file mode 100755
index 0000000..9efc8bd
--- /dev/null
+++ b/runtime/triton_gpu/model_repo_paraformer_large_online/decoder/config.pbtxt
@@ -0,0 +1,274 @@
+# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Created on 2024-01-01
+# Author: GuAn Zhu
+
+name: "decoder"
+backend: "onnxruntime"
+default_model_filename: "decoder.onnx"
+
+max_batch_size: 128
+
+sequence_batching{
+ max_sequence_idle_microseconds: 15000000
+ oldest {
+ max_candidate_sequences: 1024
+ preferred_batch_size: [16, 32, 64]
+ }
+ control_input [
+ ]
+ state [
+ {
+ input_name: "in_cache_0"
+ output_name: "out_cache_0"
+ data_type: TYPE_FP32
+ dims: [ 512, 10 ]
+ initial_state: {
+ data_type: TYPE_FP32
+ dims: [ 512, 10]
+ zero_data: true
+ name: "initial state"
+ }
+ },
+ {
+ input_name: "in_cache_1"
+ output_name: "out_cache_1"
+ data_type: TYPE_FP32
+ dims: [ 512, 10 ]
+ initial_state: {
+ data_type: TYPE_FP32
+ dims: [ 512, 10]
+ zero_data: true
+ name: "initial state"
+ }
+ },
+ {
+ input_name: "in_cache_2"
+ output_name: "out_cache_2"
+ data_type: TYPE_FP32
+ dims: [ 512, 10 ]
+ initial_state: {
+ data_type: TYPE_FP32
+ dims: [ 512, 10]
+ zero_data: true
+ name: "initial state"
+ }
+ },
+ {
+ input_name: "in_cache_3"
+ output_name: "out_cache_3"
+ data_type: TYPE_FP32
+ dims: [ 512, 10 ]
+ initial_state: {
+ data_type: TYPE_FP32
+ dims: [ 512, 10]
+ zero_data: true
+ name: "initial state"
+ }
+ },
+ {
+ input_name: "in_cache_4"
+ output_name: "out_cache_4"
+ data_type: TYPE_FP32
+ dims: [ 512, 10 ]
+ initial_state: {
+ data_type: TYPE_FP32
+ dims: [ 512, 10]
+ zero_data: true
+ name: "initial state"
+ }
+ },
+ {
+ input_name: "in_cache_5"
+ output_name: "out_cache_5"
+ data_type: TYPE_FP32
+ dims: [ 512, 10 ]
+ initial_state: {
+ data_type: TYPE_FP32
+ dims: [ 512, 10]
+ zero_data: true
+ name: "initial state"
+ }
+ },
+ {
+ input_name: "in_cache_6"
+ output_name: "out_cache_6"
+ data_type: TYPE_FP32
+ dims: [ 512, 10 ]
+ initial_state: {
+ data_type: TYPE_FP32
+ dims: [ 512, 10]
+ zero_data: true
+ name: "initial state"
+ }
+ },
+ {
+ input_name: "in_cache_7"
+ output_name: "out_cache_7"
+ data_type: TYPE_FP32
+ dims: [ 512, 10 ]
+ initial_state: {
+ data_type: TYPE_FP32
+ dims: [ 512, 10]
+ zero_data: true
+ name: "initial state"
+ }
+ },
+ {
+ input_name: "in_cache_8"
+ output_name: "out_cache_8"
+ data_type: TYPE_FP32
+ dims: [ 512, 10 ]
+ initial_state: {
+ data_type: TYPE_FP32
+ dims: [ 512, 10]
+ zero_data: true
+ name: "initial state"
+ }
+ },
+ {
+ input_name: "in_cache_9"
+ output_name: "out_cache_9"
+ data_type: TYPE_FP32
+ dims: [ 512, 10 ]
+ initial_state: {
+ data_type: TYPE_FP32
+ dims: [ 512, 10]
+ zero_data: true
+ name: "initial state"
+ }
+ },
+ {
+ input_name: "in_cache_10"
+ output_name: "out_cache_10"
+ data_type: TYPE_FP32
+ dims: [ 512, 10 ]
+ initial_state: {
+ data_type: TYPE_FP32
+ dims: [ 512, 10]
+ zero_data: true
+ name: "initial state"
+ }
+ },
+ {
+ input_name: "in_cache_11"
+ output_name: "out_cache_11"
+ data_type: TYPE_FP32
+ dims: [ 512, 10 ]
+ initial_state: {
+ data_type: TYPE_FP32
+ dims: [ 512, 10]
+ zero_data: true
+ name: "initial state"
+ }
+ },
+ {
+ input_name: "in_cache_12"
+ output_name: "out_cache_12"
+ data_type: TYPE_FP32
+ dims: [ 512, 10 ]
+ initial_state: {
+ data_type: TYPE_FP32
+ dims: [ 512, 10]
+ zero_data: true
+ name: "initial state"
+ }
+ },
+ {
+ input_name: "in_cache_13"
+ output_name: "out_cache_13"
+ data_type: TYPE_FP32
+ dims: [ 512, 10 ]
+ initial_state: {
+ data_type: TYPE_FP32
+ dims: [ 512, 10]
+ zero_data: true
+ name: "initial state"
+ }
+ },
+ {
+ input_name: "in_cache_14"
+ output_name: "out_cache_14"
+ data_type: TYPE_FP32
+ dims: [ 512, 10 ]
+ initial_state: {
+ data_type: TYPE_FP32
+ dims: [ 512, 10]
+ zero_data: true
+ name: "initial state"
+ }
+ },
+ {
+ input_name: "in_cache_15"
+ output_name: "out_cache_15"
+ data_type: TYPE_FP32
+ dims: [ 512, 10 ]
+ initial_state: {
+ data_type: TYPE_FP32
+ dims: [ 512, 10]
+ zero_data: true
+ name: "initial state"
+ }
+ }
+ ]
+}
+
+
+input [
+ {
+ name: "enc"
+ data_type: TYPE_FP32
+ dims: [-1, 512]
+ },
+ {
+ name: "enc_len"
+ data_type: TYPE_INT32
+ dims: [1]
+ reshape: { shape: [ ] }
+ },
+ {
+ name: "acoustic_embeds"
+ data_type: TYPE_FP32
+ dims: [-1, 512]
+ },
+ {
+ name: "acoustic_embeds_len"
+ data_type: TYPE_INT32
+ dims: [1]
+ reshape: { shape: [ ] }
+ }
+]
+
+output [
+ {
+ name: "logits"
+ data_type: TYPE_FP32
+ dims: [-1, 8404]
+ },
+ {
+ name: "sample_ids"
+ data_type: TYPE_INT64
+ dims: [-1]
+ }
+]
+
+
+instance_group [
+ {
+ count: 1
+ kind: KIND_GPU
+ }
+]
+
diff --git a/runtime/triton_gpu/model_repo_paraformer_large_online/encoder/config.pbtxt b/runtime/triton_gpu/model_repo_paraformer_large_online/encoder/config.pbtxt
new file mode 100755
index 0000000..3e54df1
--- /dev/null
+++ b/runtime/triton_gpu/model_repo_paraformer_large_online/encoder/config.pbtxt
@@ -0,0 +1,77 @@
+# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Created on 2024-01-01
+# Author: GuAn Zhu
+
+name: "encoder"
+backend: "onnxruntime"
+default_model_filename: "model.onnx"
+
+max_batch_size: 128
+
+
+sequence_batching{
+ max_sequence_idle_microseconds: 15000000
+ oldest {
+ max_candidate_sequences: 1024
+ preferred_batch_size: [32, 64, 128]
+ max_queue_delay_microseconds: 300
+ }
+ control_input [
+ ]
+ state [
+ ]
+}
+
+
+input [
+ {
+ name: "speech"
+ data_type: TYPE_FP32
+ dims: [-1, 560]
+ },
+ {
+ name: "speech_lengths"
+ data_type: TYPE_INT32
+ dims: [1]
+ reshape: { shape: [ ] }
+ }
+]
+
+output [
+ {
+ name: "enc"
+ data_type: TYPE_FP32
+ dims: [-1, 512]
+ },
+ {
+ name: "enc_len"
+ data_type: TYPE_INT32
+ dims: [1]
+ reshape: { shape: [ ] }
+ },
+ {
+ name: "alphas"
+ data_type: TYPE_FP32
+ dims: [-1]
+ }
+]
+
+instance_group [
+ {
+ count: 1
+ kind: KIND_GPU
+ }
+]
diff --git a/runtime/triton_gpu/model_repo_paraformer_large_online/feature_extractor/1/model.py b/runtime/triton_gpu/model_repo_paraformer_large_online/feature_extractor/1/model.py
new file mode 100755
index 0000000..7d81a98
--- /dev/null
+++ b/runtime/triton_gpu/model_repo_paraformer_large_online/feature_extractor/1/model.py
@@ -0,0 +1,221 @@
+# Created on 2024-01-01
+# Author: GuAn Zhu
+
+# Modified from NVIDIA(https://github.com/wenet-e2e/wenet/blob/main/runtime/gpu/
+# model_repo_stateful/feature_extractor/1/model.py)
+
+import triton_python_backend_utils as pb_utils
+from torch.utils.dlpack import from_dlpack
+import torch
+import kaldifeat
+from typing import List
+import json
+import numpy as np
+import yaml
+from collections import OrderedDict
+
+
+class LimitedDict(OrderedDict):
+ def __init__(self, max_length):
+ super().__init__()
+ self.max_length = max_length
+
+ def __setitem__(self, key, value):
+ if len(self) >= self.max_length:
+ self.popitem(last=False)
+ super().__setitem__(key, value)
+
+
+class Fbank(torch.nn.Module):
+ def __init__(self, opts):
+ super(Fbank, self).__init__()
+ self.fbank = kaldifeat.Fbank(opts)
+
+ def forward(self, waves: List[torch.Tensor]):
+ return self.fbank(waves)
+
+
+class Feat(object):
+ def __init__(self, seqid, offset_ms, sample_rate, frame_stride, device='cpu'):
+ self.seqid = seqid
+ self.sample_rate = sample_rate
+ self.wav = torch.tensor([], device=device)
+ self.offset = int(offset_ms / 1000 * sample_rate)
+ self.frames = None
+ self.frame_stride = int(frame_stride)
+ self.device = device
+ self.lfr_m = 7
+
+ def add_wavs(self, wav: torch.tensor):
+ wav = wav.to(self.device)
+ self.wav = torch.cat((self.wav, wav), axis=0)
+
+ def get_seg_wav(self):
+ seg = self.wav[:]
+ self.wav = self.wav[-self.offset:]
+ return seg
+
+ def add_frames(self, frames: torch.tensor):
+ """
+ frames: seq_len x feat_sz
+ """
+ if self.frames is None:
+ self.frames = torch.cat((frames[0, :].repeat((self.lfr_m - 1) // 2, 1),
+ frames), axis=0)
+ else:
+ self.frames = torch.cat([self.frames, frames], axis=0)
+
+ def get_frames(self, num_frames: int):
+ seg = self.frames[0: num_frames]
+ self.frames = self.frames[self.frame_stride:]
+ return seg
+
+
+class TritonPythonModel:
+ """Your Python model must use the same class name. Every Python model
+ that is created must have "TritonPythonModel" as the class name.
+ """
+
+ def initialize(self, args):
+ """`initialize` is called only once when the model is being loaded.
+ Implementing `initialize` function is optional. This function allows
+ the model to initialize any state associated with this model.
+ Parameters
+ ----------
+ args : dict
+ Both keys and values are strings. The dictionary keys and values are:
+ * model_config: A JSON string containing the model configuration
+ * model_instance_kind: A string containing model instance kind
+ * model_instance_device_id: A string containing model instance device ID
+ * model_repository: Model repository path
+ * model_version: Model version
+ * model_name: Model name
+ """
+ self.model_config = model_config = json.loads(args['model_config'])
+ self.max_batch_size = max(model_config["max_batch_size"], 1)
+
+ if "GPU" in model_config["instance_group"][0]["kind"]:
+ self.device = "cuda"
+ else:
+ self.device = "cpu"
+
+ # Get OUTPUT0 configuration
+ output0_config = pb_utils.get_output_config_by_name(
+ model_config, "speech")
+ # Convert Triton types to numpy types
+ self.output0_dtype = pb_utils.triton_string_to_numpy(
+ output0_config['data_type'])
+
+ if self.output0_dtype == np.float32:
+ self.dtype = torch.float32
+ else:
+ self.dtype = torch.float16
+
+ self.feature_size = output0_config['dims'][-1]
+ self.decoding_window = output0_config['dims'][-2]
+
+ params = self.model_config['parameters']
+ for li in params.items():
+ key, value = li
+ value = value["string_value"]
+ if key == "config_path":
+ with open(str(value), 'rb') as f:
+ config = yaml.load(f, Loader=yaml.Loader)
+
+ opts = kaldifeat.FbankOptions()
+ opts.frame_opts.dither = 0.0
+ opts.frame_opts.window_type = config['frontend_conf']['window']
+ opts.mel_opts.num_bins = int(config['frontend_conf']['n_mels'])
+ opts.frame_opts.frame_shift_ms = float(config['frontend_conf']['frame_shift'])
+ opts.frame_opts.frame_length_ms = float(config['frontend_conf']['frame_length'])
+ opts.frame_opts.samp_freq = int(config['frontend_conf']['fs'])
+ opts.device = torch.device(self.device)
+ self.opts = opts
+ self.feature_extractor = Fbank(self.opts)
+
+ self.seq_feat = LimitedDict(1024)
+ chunk_size_s = float(params["chunk_size_s"]["string_value"])
+
+ sample_rate = opts.frame_opts.samp_freq
+ frame_shift_ms = opts.frame_opts.frame_shift_ms
+ frame_length_ms = opts.frame_opts.frame_length_ms
+
+ self.chunk_size = int(chunk_size_s * sample_rate)
+ self.frame_stride = (chunk_size_s * 1000) // frame_shift_ms
+ self.offset_ms = self.get_offset(frame_length_ms, frame_shift_ms)
+ self.sample_rate = sample_rate
+
+ def get_offset(self, frame_length_ms, frame_shift_ms):
+ offset_ms = 0
+ while offset_ms + frame_shift_ms < frame_length_ms:
+ offset_ms += frame_shift_ms
+ return offset_ms
+
+ def execute(self, requests):
+ """`execute` must be implemented in every Python model. `execute`
+ function receives a list of pb_utils.InferenceRequest as the only
+ argument. This function is called when an inference is requested
+ for this model.
+ Parameters
+ ----------
+ requests : list
+ A list of pb_utils.InferenceRequest
+ Returns
+ -------
+ list
+ A list of pb_utils.InferenceResponse. The length of this list must
+ be the same as `requests`
+ """
+ total_waves = []
+ responses = []
+ batch_seqid = []
+ end_seqid = {}
+ for request in requests:
+ input0 = pb_utils.get_input_tensor_by_name(request, "wav")
+ wav = from_dlpack(input0.to_dlpack())[0]
+ # input1 = pb_utils.get_input_tensor_by_name(request, "wav_lens")
+ # wav_len = from_dlpack(input1.to_dlpack())[0]
+ wav_len = len(wav)
+ if wav_len < self.chunk_size:
+ temp = torch.zeros(self.chunk_size, dtype=torch.float32,
+ device=self.device)
+ temp[0:wav_len] = wav[:]
+ wav = temp
+
+ in_start = pb_utils.get_input_tensor_by_name(request, "START")
+ start = in_start.as_numpy()[0][0]
+ in_ready = pb_utils.get_input_tensor_by_name(request, "READY")
+ ready = in_ready.as_numpy()[0][0]
+ in_corrid = pb_utils.get_input_tensor_by_name(request, "CORRID")
+ corrid = in_corrid.as_numpy()[0][0]
+ in_end = pb_utils.get_input_tensor_by_name(request, "END")
+ end = in_end.as_numpy()[0][0]
+
+ if start:
+ self.seq_feat[corrid] = Feat(corrid, self.offset_ms,
+ self.sample_rate,
+ self.frame_stride,
+ self.device)
+ if ready:
+ self.seq_feat[corrid].add_wavs(wav)
+
+ batch_seqid.append(corrid)
+ if end:
+ end_seqid[corrid] = 1
+
+ wav = self.seq_feat[corrid].get_seg_wav() * 32768
+ total_waves.append(wav)
+ features = self.feature_extractor(total_waves)
+ for corrid, frames in zip(batch_seqid, features):
+ self.seq_feat[corrid].add_frames(frames)
+ speech = self.seq_feat[corrid].get_frames(self.decoding_window)
+ out_tensor0 = pb_utils.Tensor("speech", torch.unsqueeze(speech, 0).to("cpu").numpy())
+ output_tensors = [out_tensor0]
+ response = pb_utils.InferenceResponse(output_tensors=output_tensors)
+ responses.append(response)
+ if corrid in end_seqid:
+ del self.seq_feat[corrid]
+ return responses
+
+ def finalize(self):
+ print("Remove feature extractor!")
diff --git a/runtime/triton_gpu/model_repo_paraformer_large_online/feature_extractor/config.pbtxt b/runtime/triton_gpu/model_repo_paraformer_large_online/feature_extractor/config.pbtxt
new file mode 100755
index 0000000..ef322da
--- /dev/null
+++ b/runtime/triton_gpu/model_repo_paraformer_large_online/feature_extractor/config.pbtxt
@@ -0,0 +1,109 @@
+# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Created on 2024-01-01
+# Author: GuAn Zhu
+
+name: "feature_extractor"
+backend: "python"
+max_batch_size: 128
+
+parameters [
+ {
+ key: "chunk_size_s",
+ value: { string_value: "0.6"}
+ },
+ {
+ key: "config_path"
+ value: { string_value: "model_repo_paraformer_large_online/feature_extractor/config.yaml"}
+ }
+]
+
+sequence_batching{
+ max_sequence_idle_microseconds: 15000000
+ oldest {
+ max_candidate_sequences: 1024
+ preferred_batch_size: [32, 64, 128]
+ max_queue_delay_microseconds: 300
+ }
+ control_input [
+ {
+ name: "START",
+ control [
+ {
+ kind: CONTROL_SEQUENCE_START
+ fp32_false_true: [0, 1]
+ }
+ ]
+ },
+ {
+ name: "READY"
+ control [
+ {
+ kind: CONTROL_SEQUENCE_READY
+ fp32_false_true: [0, 1]
+ }
+ ]
+ },
+ {
+ name: "CORRID",
+ control [
+ {
+ kind: CONTROL_SEQUENCE_CORRID
+ data_type: TYPE_UINT64
+ }
+ ]
+ },
+ {
+ name: "END",
+ control [
+ {
+ kind: CONTROL_SEQUENCE_END
+ fp32_false_true: [0, 1]
+ }
+ ]
+ }
+ ]
+}
+
+
+input [
+ {
+ name: "wav"
+ data_type: TYPE_FP32
+ dims: [-1]
+ },
+ {
+ name: "wav_lens"
+ data_type: TYPE_INT32
+ dims: [1]
+ }
+]
+
+output [
+ {
+ name: "speech"
+ data_type: TYPE_FP32
+ dims: [61, 80] # 80
+ }
+]
+
+
+instance_group [
+ {
+ count: 1
+ kind: KIND_GPU
+ }
+]
+
diff --git a/runtime/triton_gpu/model_repo_paraformer_large_online/feature_extractor/config.yaml b/runtime/triton_gpu/model_repo_paraformer_large_online/feature_extractor/config.yaml
new file mode 100755
index 0000000..9b2266f
--- /dev/null
+++ b/runtime/triton_gpu/model_repo_paraformer_large_online/feature_extractor/config.yaml
@@ -0,0 +1,8639 @@
+config: examples/aishell_demo/paraformer_s2/conf/train_asr_paraformer_sanm_50e_16d_2048_512_lfr6.yaml
+print_config: false
+log_level: INFO
+dry_run: false
+iterator_type: sequence
+output_dir: /nfs/FunASR_results/paraformer/1m-1gpu/baseline_train_asr_paraformer_sanm_50e_16d_2048_512_lfr6_fbank_zh_char_local
+ngpu: 1
+seed: 0
+num_workers: 16
+num_att_plot: 3
+dist_backend: nccl
+dist_init_method: file:///nfs/FunASR_results/paraformer/1m-1gpu/baseline_train_asr_paraformer_sanm_50e_16d_2048_512_lfr6_fbank_zh_char_local/ddp_init
+dist_world_size: 1
+dist_rank: 0
+local_rank: 0
+dist_master_addr: null
+dist_master_port: null
+dist_launcher: null
+multiprocessing_distributed: true
+unused_parameters: true
+sharded_ddp: false
+cudnn_enabled: true
+cudnn_benchmark: false
+cudnn_deterministic: true
+collect_stats: false
+write_collected_feats: false
+max_epoch: 20
+patience: null
+val_scheduler_criterion:
+- valid
+- acc
+early_stopping_criterion:
+- valid
+- loss
+- min
+best_model_criterion:
+- - valid
+ - acc
+ - max
+keep_nbest_models: 5
+nbest_averaging_interval: 0
+grad_clip: 5
+grad_clip_type: 2.0
+grad_noise: false
+accum_grad: 1
+no_forward_run: false
+resume: true
+train_dtype: float32
+use_amp: false
+log_interval: 50
+use_matplotlib: true
+use_tensorboard: true
+use_wandb: false
+wandb_project: null
+wandb_id: null
+wandb_entity: null
+wandb_name: null
+wandb_model_log_interval: -1
+detect_anomaly: false
+pretrain_path: null
+init_param:
+- /nfs/init_model/paraformer_9k_bigmodel.pth
+ignore_init_mismatch: false
+freeze_param: []
+num_iters_per_epoch: null
+batch_size: 20
+valid_batch_size: null
+batch_bins: 1000
+valid_batch_bins: null
+train_shape_file:
+- /nfs/dataset/data/asr_stats_fbank_zh_char/train/speech_shape
+- /nfs/dataset/data/asr_stats_fbank_zh_char/train/text_shape.char
+valid_shape_file:
+- /nfs/dataset/data/asr_stats_fbank_zh_char/dev/speech_shape
+- /nfs/dataset/data/asr_stats_fbank_zh_char/dev/text_shape.char
+batch_type: length
+valid_batch_type: null
+fold_length:
+- 512
+- 150
+sort_in_batch: descending
+sort_batch: descending
+multiple_iterator: false
+chunk_length: 500
+chunk_shift_ratio: 0.5
+num_cache_chunks: 1024
+train_data_file: /nfs/dataset/data/dump/fbank/train/ark_txt.scp
+valid_data_file: /nfs/dataset/data/dump/fbank/dev/ark_txt.scp
+train_data_path_and_name_and_type:
+- - /nfs/dataset/data/dump/fbank/train/feats.scp
+ - speech
+ - kaldi_ark
+- - /nfs/dataset/data/dump/fbank/train/text
+ - text
+ - text
+valid_data_path_and_name_and_type:
+- - /nfs/dataset/data/dump/fbank/dev/feats.scp
+ - speech
+ - kaldi_ark
+- - /nfs/dataset/data/dump/fbank/dev/text
+ - text
+ - text
+allow_variable_data_keys: false
+max_cache_size: 0.0
+max_cache_fd: 32
+valid_max_cache_size: null
+optim: adam
+optim_conf:
+ lr: 0.0005
+scheduler: warmuplr
+scheduler_conf:
+ warmup_steps: 30000
+use_pai: false
+num_worker_count: 1
+access_key_id: null
+access_key_secret: null
+endpoint: null
+bucket_name: null
+oss_bucket: null
+token_list:
+- <blank>
+- <s>
+- </s>
+- and
+- 绛�
+- 闄�
+- 鐪�
+- 濉�
+- 妾�
+- 琛�
+- 姘�
+- 瀛�
+- 闃�
+- 閭�
+- 鍧�
+- 鍠�
+- 鏇�
+- 榧�
+- 闅�
+- 鑵�
+- 鏃�
+- 鐭�
+- 鏁�
+- 淇�
+- 浼�
+- 鎬�
+- 濉�
+- price
+- 鏄�
+- 缃�
+- 濞�
+- 妫�
+- 寮�
+- 鑼�
+- 搴�
+- 姹�
+- 鎿�
+- 璐�
+- 楣�
+- these
+- 杩�
+- 璇�
+- do
+- 鐩�
+- 绉�
+- 鍟�
+- 棰�
+- 杈�
+- 褰�
+- ps
+- 鏂�
+- 鐬�
+- 閾�
+- 婕�
+- 韫�
+- 鏃�
+- 绐�
+- 鑷�
+- 瑙�
+- 鍢�
+- 娣�
+- 灏�
+- 琚�
+- 鏂�
+- 绛�
+- 濯�
+- 鎸�
+- 鑷�
+- 榻�
+- 鐠�
+- 绗�
+- 婊�
+- 鍗�
+- 鎰�
+- 鎬�
+- gr
+- 骞�
+- 绁�
+- 绠�
+- 绀�
+- 鑼�
+- 鍖�
+- 婢�
+- 淇�
+- 铇�
+- ing
+- 鑲�
+- 鑲�
+- 宸�
+- 涔�
+- 锠�
+- 璺�
+- 钘�
+- 娌�
+- ness
+- 鐠�
+- 绉�
+- 瀵�
+- 鏍�
+- 鑸�
+- 鍖�
+- 鐪�
+- 宀�
+- 鍕�
+- 鐠�
+- 榛�
+- 鐘�
+- 鍝�
+- 缃�
+- k
+- 涓�
+- de
+- 璺�
+- 姊�
+- 闇�
+- 姣�
+- 宄�
+- 绔�
+- 鐮�
+- 鐮�
+- 鐪�
+- 婊�
+- 楣�
+- 鑲�
+- 闃�
+- per
+- 蹇�
+- 涔�
+- 搴�
+- 閭�
+- 杓�
+- 椹�
+- 澶�
+- 瀵�
+- 蹇�
+- 宕�
+- 鐫�
+- 閫�
+- 宄�
+- 瓒�
+- 鐙�
+- 锜�
+- 绗�
+- 閫�
+- 娲�
+- 缂�
+- ore
+- 杈�
+- 绮�
+- 韫�
+- 榛�
+- 娴�
+- comp
+- 鐘�
+- 钘�
+- 鏈�
+- 鍡�
+- 榛�
+- 閫�
+- 缁�
+- 閴�
+- 楹�
+- 鍠�
+- 琚�
+- 鍒�
+- 渚�
+- ic
+- 楠�
+- 鐡�
+- 鏌�
+- 妗�
+- 椴�
+- 瑜�
+- 闊�
+- 濡�
+- 鐢�
+- to
+- 杞�
+- 濉�
+- 鍧�
+- 璨�
+- n
+- 钑�
+- 鐤�
+- 浼�
+- 閰�
+- 鏆�
+- 闇�
+- 浜�
+- 钀�
+- 鍢�
+- nu
+- 鎸�
+- by
+- 鑱�
+- 琚�
+- 宥�
+- 妗�
+- 鎶�
+- 鏀�
+- 闉�
+- 姣�
+- 鏃�
+- 搴�
+- 鍛�
+- 璇�
+- tting
+- 鐙�
+- 榄�
+- 浼�
+- 鍠�
+- 妯�
+- 缈�
+- 鎬�
+- 鍦�
+- 鐫�
+- 鐩�
+- times
+- 椴�
+- 鐖�
+- 缁�
+- 鐨�
+- 灏�
+- 鍢�
+- 娓�
+- 杩�
+- 鍢�
+- 琚�
+- 濮�
+- 濂�
+- 鍙�
+- 绂�
+- 鎸�
+- 搴�
+- 缁�
+- 婕�
+- 榫�
+- tu
+- 姒�
+- 璇�
+- minu
+- 钀�
+- 瑁�
+- 鐜�
+- 璋�
+- 浜�
+- 鍓�
+- any
+- 璇�
+- 鍞�
+- 闋�
+- 鏂�
+- 璧�
+- 楠�
+- 璁�
+- 姣�
+- 搴�
+- 寰�
+- 妞�
+- 杩�
+- 鍩�
+- 杈�
+- 姹�
+- 鍙�
+- 杈�
+- 闅�
+- 閬�
+- pp
+- 缈�
+- 浣�
+- 鏍�
+- 韪�
+- 鐨�
+- 鑻�
+- 鐥�
+- 濂�
+- 闃�
+- 鎮�
+- 鐐�
+- 纰�
+- 鑼�
+- 鐫�
+- 闂�
+- few
+- 绫�
+- 瀛�
+- 鎷�
+- for
+- 鏇�
+- 鐤�
+- 杈�
+- 璧�
+- 濮�
+- 涓�
+- 璇�
+- 鎬�
+- 娌�
+- with
+- 鐫�
+- 璋�
+- 鏅�
+- 鍡�
+- id
+- aga
+- 瀹�
+- 榄�
+- 椴�
+- 瀵�
+- 婊�
+- 鐝�
+- 鑵�
+- 鍐�
+- 澶�
+- 濞�
+- 瀹�
+- 渚�
+- 绛�
+- an
+- 纾�
+- 閭�
+- 鐫�
+- 韪�
+- ite
+- 鐙�
+- ele
+- 钃�
+- 鐚�
+- 璞�
+- 钄�
+- 娌�
+- 鍘�
+- 閾�
+- 鐧�
+- 绔�
+- 鐢�
+- 鐠�
+- 鑼�
+- 鍝�
+- 鑿�
+- 榫�
+- 宀�
+- 瀚�
+- 鎷�
+- 鏈�
+- 鎴�
+- 鐞�
+- 缁�
+- 婢�
+- 妤�
+- 鑾�
+- cer
+- here
+- 鍒�
+- 甯�
+- 鍡�
+- 鍋�
+- 鎷�
+- 楂�
+- 绌�
+- 鍕�
+- 鏍�
+- 濉�
+- ou
+- 姗�
+- 鍒�
+- 渚�
+- 閹�
+- bre
+- 瓒�
+- 绋�
+- 宀�
+- 鎷�
+- 钀�
+- 宀�
+- 姘�
+- 妗�
+- 楝�
+- clu
+- 铓�
+- 鑲�
+- 璁�
+- 楠�
+- 蹇�
+- 闆�
+- 0
+- 绠�
+- 鑵�
+- 鐠�
+- 閰�
+- 閿�
+- 锜�
+- 閫�
+- 妞�
+- 棰�
+- ts
+- 鐭�
+- 鎷�
+- 鐝�
+- 渚�
+- 铓�
+- 鐨�
+- 缈�
+- 鍎�
+- 鎭�
+- 鐎�
+- 鏁�
+- 鐮�
+- 濂�
+- 鑰�
+- 鐑�
+- 缁�
+- 缂�
+- 杈�
+- time
+- 钄�
+- too
+- 鍦�
+- 楠�
+- 鎱�
+- 鍟�
+- 甯�
+- 妤�
+- bi
+- 铓�
+- 娴�
+- ine
+- 缍�
+- old
+- 鑲�
+- 鎿�
+- ling
+- 鐦�
+- 濞�
+- prob
+- 鏆�
+- 椴�
+- 鐒�
+- 鍓�
+- 鐜�
+- 涔�
+- 绾�
+- bo
+- 鐔�
+- 姣�
+- 槌�
+- 闉�
+- 绲�
+- 绯�
+- 鑿�
+- 寤�
+- 璋�
+- 鍐�
+- 閬�
+- 琛�
+- ich
+- 鏌�
+- 宄�
+- 娓�
+- 浜�
+- 鑽�
+- 铦�
+- 鎵�
+- 閯�
+- 璇�
+- 灏�
+- 鎽�
+- 鐗�
+- 钖�
+- da
+- w
+- 鍛�
+- 闄�
+- 纾�
+- 鍖�
+- while
+- 杩�
+- 鑹�
+- 閮�
+- ct
+- 铓�
+- 娴�
+- 椴�
+- 鑵�
+- pres
+- 姘�
+- 棰�
+- 澶�
+- ter
+- 宸�
+- 渚�
+- 濡�
+- 鍤�
+- 娈�
+- 绉�
+- bas
+- 閿�
+- 绡�
+- 鍚�
+- 閯�
+- 槌�
+- 鐤�
+- cor
+- 姣�
+- 姝�
+- 閭�
+- 鍦�
+- inn
+- 鑸�
+- 鍩�
+- 璨�
+- 甯�
+- 濡�
+- ged
+- 绐�
+- put
+- 璇�
+- 鍫�
+- 姘�
+- 鍦�
+- 鎽�
+- 娌�
+- 璋�
+- 杞�
+- 璇�
+- 鐞�
+- 缇�
+- 妾�
+- 鎱�
+- 韪�
+- 鍟�
+- 鐬�
+- 灞�
+- 鎾�
+- 绛�
+- 璐�
+- 椋�
+- 鑻�
+- 鎵�
+- mis
+- 妗�
+- 渚�
+- jo
+- ke
+- 鍐�
+- 婊�
+- 锠�
+- 鍛�
+- 鎹�
+- 璇�
+- 宕�
+- 闃�
+- 鎺�
+- 鍔�
+- 鐨�
+- 宸�
+- 鑲�
+- 鑲�
+- 钁�
+- 妾�
+- 鐢�
+- 鐙�
+- 璋�
+- 鍌�
+- 璇�
+- 绾�
+- 鑽�
+- 鍘�
+- bri
+- 缁�
+- 鐫�
+- 甯�
+- 姹�
+- 鍣�
+- 鐏�
+- 閯�
+- 鐓�
+- 钂�
+- 鍘�
+- 棣�
+- 鐙�
+- 纰�
+- 绌�
+- lar
+- 椋�
+- 杩�
+- meeting
+- tter
+- 瓒�
+- 杈�
+- 妞�
+- 姹�
+- 鐕�
+- ate
+- 绀�
+- 楠�
+- will
+- not
+- 鐓�
+- 鍡�
+- 绉�
+- 鍕�
+- 闄�
+- 閿�
+- 澧�
+- 瀹�
+- 铇�
+- 閰�
+- 鍞�
+- 棰�
+- 浠�
+- iting
+- 鐢�
+- 妤�
+- 鐟�
+- 閯�
+- our
+- 璇�
+- venue
+- 闇�
+- 闀�
+- 鐥�
+- 濞�
+- 濠�
+- 鍩�
+- 姹�
+- 閾�
+- 寰�
+- 闅�
+- 鐚�
+- 鍗�
+- 鎱�
+- said
+- 瑁�
+- bus
+- 鍙�
+- 绮�
+- 杩�
+- 缂�
+- 绾�
+- 纾�
+- 鐐�
+- 鍑�
+- 鏇�
+- 鍏�
+- 娲�
+- 鏉�
+- 姒�
+- hotels
+- 鐫�
+- 绯�
+- 绐�
+- 钁�
+- 甯�
+- 鑽�
+- 濉�
+- 鐭�
+- 鍦�
+- er
+- 铓�
+- 绡�
+- 鍜�
+- 鍚�
+- 鍠�
+- 宀�
+- 鍤�
+- 璋�
+- 宕�
+- 锜�
+- wal
+- 濮�
+- 璋�
+- 涓�
+- 鑿�
+- ction
+- 鑲�
+- 绁�
+- 杩�
+- 鐠�
+- 浠�
+- 鍞�
+- 纭�
+- 鍢�
+- 閱�
+- 鍏�
+- 鎭�
+- 閾�
+- 鍙�
+- cre
+- 寮�
+- 鏇�
+- 铻�
+- 鑻�
+- 鐤�
+- 楦�
+- 鏉�
+- 鎼�
+- 閫�
+- 鑼�
+- 鐨�
+- 琛�
+- 鑲�
+- ns
+- market
+- 璁�
+- 闃�
+- 鏈�
+- 鑻�
+- 楂�
+- is
+- 骞�
+- 閮�
+- 钀�
+- 甯�
+- 鐑�
+- 瑗�
+- 宕�
+- 婧�
+- 铻�
+- 閾�
+- 浠�
+- 鑸�
+- 鏁�
+- 鍊�
+- 閿�
+- 楦�
+- 澶�
+- 鍩�
+- 鑵�
+- 闆�
+- ol
+- 娑�
+- 缈�
+- 澶�
+- 鐤�
+- el
+- 椹�
+- 楣�
+- 鐟�
+- 钖�
+- 濂�
+- 濞�
+- 闈�
+- 闀�
+- 浼�
+- 鏆�
+- what
+- 钀�
+- 鐨�
+- 鐑�
+- 鏁�
+- 鏍�
+- ri
+- 鐫�
+- 浼�
+- 寰�
+- 绁�
+- 璐�
+- 姹�
+- 绠�
+- 鎺�
+- 姊�
+- ob
+- 璧�
+- 榛�
+- 娼�
+- 椴�
+- 鐗�
+- 缃�
+- inter
+- 鍐�
+- 鎴�
+- 钖�
+- if
+- 鍫�
+- 鎼�
+- 婕�
+- 璧�
+- 鐥�
+- 鍙�
+- 椴�
+- 鍏�
+- 娴�
+- 钖�
+- 姊�
+- 璋�
+- 鍔�
+- 鑼�
+- 椋�
+- r
+- her
+- 椴�
+- 濡�
+- 鎯�
+- 姒�
+- 璋�
+- 姊�
+- twenty
+- ni
+- 鐚�
+- 鍏�
+- 姘�
+- 鑲�
+- 楗�
+- 闄�
+- within
+- 杞�
+- cted
+- 鍗�
+- 鍤�
+- 鍞�
+- inte
+- 閽�
+- 鎶�
+- 閮�
+- 鐗�
+- 钄�
+- 閭�
+- 鎳�
+- 閭�
+- 鑼�
+- 杈�
+- 绉�
+- 榛�
+- hard
+- 鎮�
+- 缁�
+- ved
+- oo
+- used
+- 婧�
+- 钘�
+- 澹�
+- 鐚�
+- 鑺�
+- char
+- we
+- 鍛�
+- ss
+- 宀�
+- 妗�
+- 杩�
+- 鑽�
+- 鍝�
+- 浠�
+- 鎶�
+- 娣�
+- il
+- 鐢�
+- 瑜�
+- 鏋�
+- bir
+- ges
+- 鏆�
+- 妞�
+- 鍚�
+- sal
+- 鐛�
+- 鐓�
+- 绠�
+- 鐖�
+- 鍛�
+- 閭�
+- 灏�
+- 姘�
+- sh
+- fore
+- mat
+- 娴�
+- 鏋�
+- 绋�
+- 鑹�
+- 鍞�
+- can
+- eng
+- know
+- 妗�
+- 浜�
+- tou
+- 浼�
+- 铏�
+- 缍�
+- sha
+- 鑵�
+- 钄�
+- 鑾�
+- 鎴�
+- 閶�
+- 閰�
+- 鐠�
+- 闃�
+- 璐�
+- 杩�
+- 楠�
+- dre
+- 璧�
+- 鎯�
+- 瑁�
+- 鏅�
+- 閾�
+- 鎰�
+- 闅�
+- 绁�
+- 璨�
+- 鐗�
+- al
+- 鎶�
+- 鏌�
+- 鎺�
+- 鍠�
+- 锜�
+- 娼�
+- 鎵�
+- 鑳�
+- 閮�
+- 钃�
+- 绋�
+- 鍌�
+- 鍨�
+- 鐢�
+- 闈�
+- chic
+- 鏁�
+- 鏍�
+- 榻�
+- 浠�
+- 鎷�
+- har
+- 浠�
+- 鐮�
+- i
+- 鑸�
+- 鍚�
+- 鎱�
+- 鑺�
+- why
+- tic
+- 鍝�
+- 浣�
+- 閾�
+- 濡�
+- x
+- 鍒�
+- 娈�
+- 韬�
+- 鑾�
+- 瀹�
+- 鎷�
+- 钁�
+- 鍝�
+- 濠�
+- ance
+- 妫�
+- 娌�
+- 宕�
+- 鏇�
+- 瀹�
+- 鏂�
+- 鎺�
+- 閱�
+- 搴�
+- 榛�
+- 涔�
+- 鏃�
+- 閾�
+- 瀵�
+- 閽�
+- 娌�
+- gh
+- 鍚�
+- 瀣�
+- 鍘�
+- 绠�
+- ah
+- 璀�
+- 椴�
+- 钂�
+- 閵�
+- 瑙�
+- 鑴�
+- 澶�
+- who
+- 姣�
+- 璧�
+- 鍢�
+- 韫�
+- 椐�
+- avenue
+- 鍛�
+- that's
+- 瑷�
+- 钂�
+- 灏�
+- 鎶�
+- 闇�
+- que
+- 鐗�
+- 鍙�
+- 缁�
+- 鎹�
+- 鍩�
+- 铔�
+- 杩�
+- ir
+- 鏈�
+- 涓�
+- 楣�
+- 閽�
+- 鏅�
+- 閽�
+- 宀�
+- 鐑�
+- 鎺�
+- 绗�
+- 棣�
+- 鐢�
+- 鍟�
+- 璧�
+- 锠�
+- 鑽�
+- 婵�
+- 鎽�
+- 鍓�
+- 娴�
+- 鐡�
+- 娑�
+- 闃�
+- eng
+- 澧�
+- 椴�
+- 鑰�
+- 鎷�
+- 杞�
+- 寮�
+- 绉�
+- ken
+- 鐪�
+- 绌�
+- 璺�
+- 鑺�
+- 鍓�
+- 婀�
+- 鍚�
+- 鍠�
+- 鍊�
+- 浼�
+- 鍜�
+- 鍣�
+- 鍓�
+- 鐣�
+- 鍥�
+- 姣�
+- 鐖�
+- 绠�
+- ans
+- 'no'
+- 缂�
+- fic
+- 蹇�
+- 绀�
+- 瑙�
+- 渚�
+- 涔�
+- 缂�
+- 姣�
+- 娈�
+- 绂�
+- 韫�
+- 鑼�
+- 娌�
+- 鑵�
+- 鏇�
+- 鍊�
+- 楠�
+- 淇�
+- 涓�
+- 宸�
+- 绯�
+- there
+- 绗�
+- 娉�
+- 铏�
+- 闅�
+- 瀹�
+- 璋�
+- 娣�
+- even
+- 鍢�
+- 鎺�
+- 杩�
+- 鏍�
+- 闅�
+- 閽�
+- 楗�
+- 瑁�
+- 鎼�
+- 鏈�
+- 鍤�
+- 鍨�
+- 鍊�
+- sy
+- 钂�
+- 瑷�
+- 鐏�
+- 钁�
+- 韪�
+- only
+- den
+- 鑳�
+- 鏇�
+- 姹�
+- 濂�
+- 闄�
+- 鏅�
+- 鑷�
+- 璧�
+- 铓�
+- 渚�
+- 钘�
+- 閸�
+- 绌�
+- 灏�
+- find
+- 鍋�
+- 椤�
+- 瀣�
+- 娴�
+- area
+- 鐨�
+- 韫�
+- af
+- 鏇�
+- ger
+- 琚�
+- 娓�
+- 鍖�
+- 鎯�
+- 鏋�
+- 妲�
+- 璺�
+- 姹�
+- 瀚�
+- 宕�
+- 棰�
+- 涓�
+- 涓�
+- 鍝�
+- 椴�
+- 浣�
+- 鐤�
+- 渚�
+- 褰�
+- 浠�
+- 楦�
+- 寮�
+- 缂�
+- 妾�
+- 娓�
+- 灏�
+- comm
+- 鐦�
+- 鍥�
+- 閿�
+- 鎯�
+- 琛�
+- 钄�
+- 榫�
+- 閰�
+- ina
+- 灏�
+- 瀛�
+- 钄�
+- 甯�
+- 寮�
+- 杩�
+- 瑷�
+- 鎭�
+- 绱�
+- 鍚�
+- 瑙�
+- 鍗�
+- need
+- 鍙�
+- 鑼�
+- 姹�
+- 閭�
+- 纾�
+- 鐒�
+- 铚�
+- 绫�
+- 淇�
+- ath
+- 铔�
+- 缁�
+- 澹�
+- 璇�
+- ing
+- 甯�
+- 鑼�
+- 鐮�
+- has
+- 铦�
+- 鐭�
+- 鎷�
+- 涔�
+- 娴�
+- another
+- 杈�
+- 鏈�
+- 娈�
+- 澹�
+- 鐏�
+- 绀�
+- 閽�
+- 鐡�
+- 搴�
+- 璇�
+- 姣�
+- 闈�
+- 楦�
+- 澧�
+- 鐠�
+- 鍜�
+- 鎯�
+- 鍖�
+- 鑵�
+- 鑻�
+- 鑻�
+- 涓�
+- 鑺�
+- 鍥�
+- 娣�
+- 棣�
+- 姒�
+- 鑽�
+- 鎽�
+- 閱�
+- 缂�
+- 甯�
+- 铔�
+- 鏇�
+- 钀�
+- 鑾�
+- 鐘�
+- 鎷�
+- 鐗�
+- 钑�
+- 骞�
+- 鍐�
+- 鍩�
+- 鑼�
+- 浣�
+- 鍣�
+- ked
+- port
+- 鏌�
+- 鍚�
+- 绔�
+- 闉�
+- 绯�
+- 鏍�
+- 瑜�
+- 鏉�
+- 闄�
+- shi
+- 鏈�
+- 鍗�
+- 閽�
+- 鎷�
+- walk
+- 閽�
+- 宀�
+- 琛�
+- 鑻�
+- 鐕�
+- 澧�
+- 鎴�
+- ations
+- 璇�
+- 鍐�
+- 寮�
+- 鎺�
+- 鑵�
+- 娣�
+- 榧�
+- 濡�
+- 浜�
+- 淇�
+- 楣�
+- 鍗�
+- 浣�
+- 妫�
+- does
+- tes
+- 鎷�
+- 鍔�
+- 缁�
+- ren
+- 璐�
+- g
+- 娣�
+- 閽�
+- 妫�
+- 澧�
+- 鐤�
+- 楠�
+- 鎽�
+- 绁�
+- 鍏�
+- 鍧�
+- int
+- use
+- 娉�
+- 璧�
+- 鐢�
+- 钁�
+- 杈�
+- 鐐�
+- 鏃�
+- 楦�
+- 鑼�
+- 铻�
+- 鑹�
+- 鏅�
+- 閽�
+- 鍕�
+- 鍢�
+- 榫�
+- 钑�
+- 娓�
+- 閽�
+- 鍐�
+- 寮�
+- 棰�
+- 鐏�
+- 鍩�
+- 椴�
+- 浜�
+- 鐭�
+- 杞�
+- a
+- 鍗�
+- 瑙�
+- 鍛�
+- 绁�
+- rec
+- 閫�
+- 鎲�
+- 钂�
+- 鍏�
+- 涔�
+- 楦�
+- 鍗�
+- 搴�
+- 浠�
+- how
+- 閾�
+- 韪�
+- 闅�
+- 閬�
+- 璞�
+- low
+- ak
+- 鍔�
+- 鍝�
+- 澶�
+- proble
+- es
+- 璇�
+- 鍝�
+- 鎶�
+- 绁�
+- 鍋�
+- 鎻�
+- 鐨�
+- 鐩�
+- 鍒�
+- 楠�
+- 鑽�
+- 楗�
+- 鑰�
+- 鑾�
+- just
+- 绨�
+- 鐜�
+- pl
+- 绫�
+- 鐝�
+- 钑�
+- 鑷�
+- 闂�
+- 宕�
+- gra
+- 鐞�
+- 鍦�
+- 鐡�
+- 璧�
+- 闀�
+- 琚�
+- 鍏�
+- 鑺�
+- 铓�
+- stu
+- mee
+- 娌�
+- 浼�
+- 瑙�
+- 绛�
+- 搴�
+- still
+- 棰�
+- wat
+- '4'
+- 缁�
+- 鍏�
+- 浜�
+- sho
+- 鐝�
+- 楗�
+- 榛�
+- than
+- good
+- l
+- 姊�
+- 蹇�
+- 鑽�
+- 韬�
+- 韫�
+- 鍛�
+- 鍦�
+- 鍞�
+- 闄�
+- ue
+- 椴�
+- 纰�
+- 鎬�
+- 椋�
+- country
+- 绮�
+- 鎬�
+- 椋�
+- 鐑�
+- 鍚�
+- 宓�
+- 椹�
+- 绾�
+- in
+- 闂�
+- 棣�
+- 姒�
+- 绐�
+- 娉�
+- 纭�
+- 韬�
+- th
+- 鑰�
+- 璐�
+- wom
+- 鎺�
+- 绠�
+- 缁�
+- 鑸�
+- 鐒�
+- 鎸�
+- 闀�
+- thirty
+- 闂�
+- 鎽�
+- 鍫�
+- 鐗�
+- 鏍�
+- 鍫�
+- 棣�
+- 鐩�
+- t
+- 鏃�
+- 鍑�
+- 娲�
+- 閷�
+- 闊�
+- por
+- 瀛�
+- 鑼�
+- 闂�
+- 娣�
+- 鍧�
+- 鐑�
+- 娲�
+- gre
+- 鏁�
+- 鍝�
+- 鍒�
+- ding
+- 閬�
+- 閽�
+- 澹�
+- lo
+- 绾�
+- 鐮�
+- 鐏�
+- lee
+- 鐜�
+- up
+- 姊�
+- 鏃�
+- 浣�
+- 绔�
+- 缁�
+- 鐮�
+- 閰�
+- 鑻�
+- 鐒�
+- 绁�
+- 鑻�
+- 鍡�
+- ail
+- 娈�
+- om
+- 妫�
+- 缈�
+- 澧�
+- 钀�
+- 鍨�
+- 纰�
+- cts
+- 娓�
+- 鐭�
+- 鎺�
+- best
+- 閿�
+- 璋�
+- 鍠�
+- 闆�
+- 杈�
+- 鍟�
+- 鍡�
+- 璋�
+- 鐤�
+- 鐜�
+- 鍞�
+- 鍏�
+- 褰�
+- 婧�
+- 涓�
+- 妫�
+- 妗�
+- 妯�
+- 璺�
+- 铦�
+- 鍝�
+- 鍟�
+- 鏇�
+- 涔�
+- 绠�
+- 鍩�
+- 鏈�
+- ear
+- 椴�
+- ship
+- 鍚�
+- 绮�
+- 鑸�
+- 浼�
+- 瑙�
+- 鐕�
+- 閾�
+- 纭�
+- 鎾�
+- 鐦�
+- thanks
+- 閿�
+- 鍦�
+- contin
+- 渚�
+- 娴�
+- 妫�
+- 姝�
+- ici
+- 鐝�
+- 瑜�
+- 鍒�
+- 婕�
+- 绠�
+- 缂�
+- 绡�
+- 椴�
+- 鐮�
+- 鍚�
+- 閮�
+- 蹇�
+- 鏍�
+- 鐦�
+- 楗�
+- tw
+- 鎷�
+- 鐩�
+- 澹�
+- 妗�
+- 鍡�
+- 闉�
+- 鐢�
+- 閿�
+- 娑�
+- 鍐�
+- 閯�
+- 娣�
+- 杈�
+- 鍞�
+- 鏆�
+- 铓�
+- 璺�
+- 閮�
+- 闀�
+- ku
+- 璧�
+- 濮�
+- 璇�
+- 纭�
+- 鑰�
+- 娑�
+- day
+- 绗�
+- away
+- 绋�
+- 楣�
+- 鐝�
+- 姣�
+- 閰�
+- 姹�
+- 姊�
+- 瀚�
+- 鍑�
+- ces
+- 宸�
+- 鏅�
+- 鑲�
+- 鑾�
+- 鐥�
+- 缂�
+- 鎬�
+- 閯�
+- 鎼�
+- fri
+- 浠�
+- 璇�
+- 瀹�
+- 鐝�
+- 鍦�
+- 寮�
+- 鎮�
+- buil
+- 缁�
+- 澶�
+- 澶�
+- 绁�
+- 钃�
+- 鍦�
+- 渚�
+- 璺�
+- 璋�
+- 鑾�
+- 楂�
+- 杩�
+- 楣�
+- 鍗�
+- 鍡�
+- 浣�
+- 鎰�
+- 濯�
+- 娈�
+- 榻�
+- 濡�
+- 娈�
+- 鍡�
+- 閽�
+- court
+- 韪�
+- 鑴�
+- 鑿�
+- 鐞�
+- 鍌�
+- 涓�
+- 铏�
+- 鍞�
+- 閫�
+- 椴�
+- 闀�
+- 鑲�
+- 寮�
+- 鏉�
+- 灞�
+- 鎭�
+- 钂�
+- 澶�
+- 鐜�
+- 鍧�
+- gar
+- ans
+- 鍢�
+- 宓�
+- 鍛�
+- 閰�
+- 灞�
+- 閳�
+- 楹�
+- 鏃�
+- 鏃�
+- 鐤�
+- with
+- 瑙�
+- 灞�
+- 瓒�
+- 锠�
+- 瀵�
+- 鐬�
+- 灞�
+- 鏄�
+- 榄�
+- 琛�
+- 閽�
+- 閰�
+- 槌�
+- 鎴�
+- 鑺�
+- 鍗�
+- gu
+- 闄�
+- 榛�
+- 缂�
+- ch
+- 鎽�
+- 姊�
+- 鑳�
+- 鎾�
+- 鐤�
+- 鐮�
+- el
+- 鍞�
+- 鑺�
+- 閫�
+- ol
+- 缁�
+- 棣�
+- 鐘�
+- bal
+- 椴�
+- 闊�
+- 鐒�
+- 鑳�
+- 绮�
+- 鏋�
+- 宓�
+- 鍙�
+- she
+- through
+- 绛�
+- 缈�
+- 鍗�
+- se
+- 榄�
+- 鏍�
+- mes
+- 鏅�
+- 璺�
+- 鎺�
+- 闃�
+- 鏅�
+- 鏉�
+- st
+- 闈�
+- 鏂�
+- 绮�
+- 鑸�
+- 鐎�
+- 妫�
+- 鑼�
+- 闊�
+- 閻�
+- 鐏�
+- 榫�
+- 鍗�
+- lion
+- 鏇�
+- 鍝�
+- 鐨�
+- 鍝�
+- pe
+- 鎮�
+- 閫�
+- 娑�
+- 铔�
+- 浣�
+- 鐚�
+- 鐙�
+- nine
+- 鍥�
+- 椋�
+- 鎬�
+- 鎱�
+- 鎱�
+- 淇�
+- 姹�
+- 鑲�
+- 闅�
+- 鍧�
+- 璧�
+- 娴�
+- 鐧�
+- my
+- 濮�
+- ill
+- 鑳�
+- 瑕�
+- 鑷�
+- 鐭�
+- 鐐�
+- 琛�
+- 鍓�
+- 棣�
+- 浼�
+- 鑹�
+- 濡�
+- 鑲�
+- 妾�
+- soon
+- 姘�
+- 鐞�
+- 璋�
+- light
+- 澶�
+- seven
+- 鏃�
+- 鑺�
+- en
+- 鐑�
+- 璇�
+- 鏀�
+- 闄�
+- 瑙�
+- 閾�
+- 姘�
+- 浣�
+- night
+- hou
+- 楣�
+- 鑶�
+- 鐐�
+- 鎶�
+- 鐝�
+- ses
+- 婕�
+- both
+- 妗�
+- 绗�
+- 閹�
+- led
+- 鎶�
+- 鑶�
+- 铚�
+- 鑿�
+- 濞�
+- 鍥�
+- 鎻�
+- 娴�
+- f
+- 鑺�
+- 鍚�
+- sion
+- 閬�
+- 鐡�
+- 鎱�
+- 浠�
+- 閯�
+- 鐭�
+- thou
+- 娌�
+- 鍞�
+- 鍖�
+- 璁�
+- 宓�
+- 楂�
+- ine
+- 鎭�
+- 绐�
+- -
+- 琛�
+- 鑶�
+- 纰�
+- national
+- it
+- 鐦�
+- ci
+- 渚�
+- l
+- 闄�
+- 鐓�
+- 鍘�
+- 鍘�
+- 鎮�
+- 绛�
+- 鐘�
+- 缃�
+- 缁�
+- 鏁�
+- ys
+- 鎹�
+- 娈�
+- 濉�
+- 鎸�
+- tal
+- 钀�
+- 鍗�
+- 鐢�
+- 鍚�
+- 铚�
+- 甯�
+- 绗�
+- new
+- 鏄�
+- 璇�
+- 瑗�
+- 鐭�
+- 钘�
+- 鑻�
+- th
+- 鍝�
+- its
+- 铓�
+- ran
+- 婢�
+- eight
+- 璐�
+- 鍌�
+- 浜�
+- 缁�
+- 妲�
+- 绛�
+- 杩�
+- 閱�
+- 閷�
+- 韫�
+- 鐜�
+- look
+- 鍦�
+- 棰�
+- 鏃�
+- 鍦�
+- 缁�
+- op
+- 鍜�
+- 妲�
+- 鍐�
+- 涔�
+- 楦�
+- 鏌�
+- 铓�
+- 鎿�
+- 閿�
+- 濮�
+- 鎯�
+- 绠�
+- 濂�
+- ra
+- 鐬�
+- 渚�
+- 鎭�
+- 宀�
+- 鍠�
+- 鎽�
+- 鍗�
+- wee
+- 缇�
+- cep
+- fron
+- 濡�
+- 寰�
+- 绂�
+- 宸�
+- 澶�
+- 閯�
+- re
+- 鍔�
+- 杩�
+- 鐙�
+- 鐟�
+- 妫�
+- 灞�
+- 鐨�
+- 闄�
+- 娈�
+- 浠�
+- 铦�
+- 缂�
+- 闀�
+- 鍝�
+- ase
+- row
+- 缂�
+- stance
+- 浜�
+- custom
+- 鎶�
+- 闉�
+- 铔�
+- 鍩�
+- 婕�
+- 鍑�
+- 铏�
+- 濮�
+- 鐥�
+- 閭�
+- ss
+- 鎴�
+- 鎮�
+- spe
+- 缇�
+- 鍚�
+- 閿�
+- 婀�
+- 绔�
+- 娣�
+- 瀛�
+- '3'
+- 閮�
+- 鍗�
+- fif
+- 婵�
+- 灏�
+- 绨�
+- 閿�
+- 鍟�
+- 鎳�
+- 闇�
+- 妫�
+- 绨�
+- 鎬�
+- g
+- 姣�
+- 鐘�
+- ffe
+- 闀�
+- 闁�
+- 鍚�
+- 椹�
+- are
+- 鍩�
+- 蹇�
+- 婕�
+- 濞�
+- 渚�
+- 钄�
+- 鍊�
+- 椴�
+- fr
+- ful
+- 鍢�
+- 婊�
+- 楱�
+- ility
+- 鍠�
+- 鎮�
+- 鍡�
+- 浜�
+- 纭�
+- mer
+- 纾�
+- 闇�
+- 楹�
+- 鏇�
+- 铚�
+- police
+- 闀�
+- 绗�
+- 鑻�
+- 闈�
+- 鍑�
+- 澶�
+- 璐�
+- 缂�
+- 鍡�
+- 娌�
+- ened
+- 鑺�
+- 璇�
+- 灏�
+- 鐝�
+- 宕�
+- ld
+- x
+- 鎻�
+- 绾�
+- 缂�
+- 杞�
+- 搴�
+- 娓�
+- 楦�
+- 涔�
+- 鎯�
+- peop
+- 姝�
+- 鍞�
+- 妯�
+- 鑶�
+- fro
+- 鍝�
+- 鑵�
+- 闇�
+- 鍧�
+- 闇�
+- 鎺�
+- 濞�
+- 闀�
+- 宸�
+- 纰�
+- 钘�
+- 娲�
+- 鑽�
+- 鍥�
+- 缂�
+- 閽�
+- 椋�
+- 鍎�
+- 鑻�
+- 璨�
+- 鏌�
+- 钃�
+- 闃�
+- 璐�
+- 纰�
+- 鐙�
+- 鑹�
+- 鍠�
+- 绋�
+- 鏄�
+- i'm
+- 璋�
+- 铚�
+- 杞�
+- 涔�
+- 瀵�
+- 鍟�
+- 铏�
+- ster
+- 鍨�
+- 鍡�
+- 璁�
+- ves
+- again
+- 闅�
+- 甯�
+- 鍡�
+- 缁�
+- 鍏�
+- 鍗�
+- 鎶�
+- 浠�
+- 浠�
+- 鏍�
+- 鎷�
+- 钀�
+- him
+- 鑽�
+- 娣�
+- '7'
+- 閶�
+- 鏁�
+- 棰�
+- ment
+- 瀚�
+- 妫�
+- show
+- 璺�
+- out
+- 姹�
+- 杩�
+- 姒�
+- 鏆�
+- 绯�
+- 鏅�
+- '9'
+- 绋�
+- 鏇�
+- 钂�
+- ture
+- 楝�
+- 閫�
+- 宀�
+- 鑺�
+- 鏄�
+- 鍩�
+- 骞�
+- 鐚�
+- 浼�
+- pub
+- 鍗�
+- 鑽�
+- 瓒�
+- 娣�
+- 鍢�
+- 鎮�
+- 钘�
+- 浜�
+- 娓�
+- pool
+- 绨�
+- 璋�
+- 鍣�
+- 绐�
+- 绁�
+- 闃�
+- 娑�
+- 鎺�
+- 鐧�
+- 鐤�
+- 鎼�
+- 婕�
+- 閿�
+- 閽�
+- 鑰�
+- 韪�
+- 楠�
+- 绋�
+- 閿�
+- 绻�
+- 缂�
+- 鍔�
+- 鍟�
+- 钑�
+- 浠�
+- 鏄�
+- 涓�
+- 婊�
+- 鏌�
+- 闀�
+- 鍝�
+- 鍑�
+- 鍣�
+- 鐟�
+- 鍡�
+- 绠�
+- 铚�
+- 鏈�
+- 璞�
+- ap
+- 鍟�
+- 缈�
+- 鎰�
+- peri
+- 铓�
+- 寮�
+- 绂�
+- 铓�
+- 鍧�
+- 鎹�
+- 绾�
+- 韫�
+- for
+- 鑽�
+- 鑽�
+- 鎳�
+- 濂�
+- 槌�
+- 鐤�
+- 鎼�
+- 涓�
+- 骞�
+- 宀�
+- 鐗�
+- 铦�
+- 绲�
+- 绉�
+- 缂�
+- at
+- 缃�
+- 鍡�
+- 宀�
+- 缁�
+- 鍦�
+- 鎭�
+- 鍙�
+- 鏂�
+- 鐧�
+- 鐓�
+- 闆�
+- 灏�
+- 閻�
+- 楹�
+- 榛�
+- 娈�
+- 閮�
+- 鍒�
+- 鍓�
+- 鎻�
+- 姣�
+- 鍣�
+- fi
+- 姘�
+- 娉�
+- 妯�
+- 杩�
+- 瀚�
+- 榻�
+- 娈�
+- 澧�
+- 褰�
+- 鏅�
+- 鍜�
+- 鑺�
+- 鐫�
+- 鏌�
+- 鎴�
+- 灞�
+- 铏�
+- 闊�
+- 娑�
+- 濮�
+- 閶�
+- sure
+- 鏃�
+- 娑�
+- 鐢�
+- 鍢�
+- 纭�
+- som
+- 鎯�
+- 鐙�
+- 鍫�
+- 灞�
+- 鎰�
+- li
+- 琛�
+- 璋�
+- 宥�
+- 宄�
+- 纰�
+- 鏆�
+- h
+- 閺�
+- 鐦�
+- 铚�
+- 娴�
+- 钀�
+- 鑱�
+- 铓�
+- 姹�
+- people
+- 濡�
+- 楦�
+- 鏄�
+- 鑺�
+- 鎸�
+- 褰�
+- 绔�
+- 娲�
+- 鐑�
+- 楣�
+- 鑳�
+- 鍏�
+- 鍜�
+- 妤�
+- 甯�
+- 濡�
+- ant
+- 鑷�
+- 妗�
+- 鍒�
+- 鍥�
+- na
+- 闉�
+- 妤�
+- 閫�
+- 鎯�
+- 鍗�
+- 闂�
+- 鑰�
+- 閭�
+- 绔�
+- 閽�
+- 鐜�
+- 閮�
+- 鏄�
+- 涔�
+- 閽�
+- 鏅�
+- 绗�
+- 鏍�
+- 鑺�
+- 灏�
+- 蹇�
+- 鍖�
+- as
+- 棰�
+- our
+- that
+- 绋�
+- 閿�
+- 闊�
+- 鍒�
+- 寤�
+- k
+- teen
+- 骞�
+- pic
+- 鍙�
+- 楠�
+- 妫�
+- 缇�
+- 鍨�
+- 鐘�
+- 濯�
+- 鍏�
+- 灏�
+- 涔�
+- 椴�
+- 鍒�
+- 澹�
+- 鑺�
+- hotel
+- 浣�
+- 姘�
+- 鎮�
+- 棰�
+- 姹�
+- 缂�
+- 骞�
+- 绔�
+- 鍠�
+- 鐤�
+- long
+- 骞�
+- 闀�
+- 閰�
+- ings
+- ood
+- 鏌�
+- 鍞�
+- 杈�
+- 绋�
+- 瑗�
+- 璁�
+- 绡�
+- 鍧�
+- 琚�
+- 鍗�
+- 鑷�
+- 姝�
+- 鏄�
+- 鎽�
+- gh
+- 鑱�
+- 鐘�
+- 鏁�
+- 鐗�
+- 鏃�
+- 閿�
+- 鐜�
+- 浣�
+- 閯�
+- 婕�
+- 鍙�
+- 宸�
+- 鍛�
+- where
+- 鎴�
+- 鍡�
+- 鐞�
+- 鍘�
+- 绐�
+- cas
+- 鑸�
+- 鐢�
+- 鍑�
+- 璋�
+- 鏃�
+- 娌�
+- 鐙�
+- 婧�
+- 缁�
+- 鍔�
+- 婊�
+- in
+- 瑜�
+- fam
+- 妤�
+- 閲�
+- 纾�
+- see
+- 鏂�
+- 浣�
+- 澹�
+- 澧�
+- 璇�
+- around
+- 缇�
+- 娴�
+- 鐭�
+- 閾�
+- provi
+- 钘�
+- 浼�
+- 闃�
+- 鍝�
+- 娼�
+- 绮�
+- 鍍�
+- 鍑�
+- 鐞�
+- 绉�
+- 娑�
+- 璞�
+- 闀�
+- 钂�
+- 鍙�
+- 棰�
+- 鑾�
+- 闃�
+- 鐥�
+- 鐖�
+- 瀣�
+- 婊�
+- 鐗�
+- 娌�
+- 鐠�
+- 绐�
+- 婀�
+- 鍠�
+- 寰�
+- 鑰�
+- 浠�
+- 铔�
+- 鍚�
+- 鏍�
+- 閮�
+- 璋�
+- 鑶�
+- 鍨�
+- 鎭�
+- 绛�
+- 娣�
+- 鍓�
+- vie
+- 浼�
+- first
+- 娓�
+- 槎�
+- 缂�
+- 韪�
+- 鍛�
+- 瀹�
+- 棰�
+- 钄�
+- 鎸�
+- 浜�
+- 澧�
+- 鍊�
+- 姊�
+- 鐚�
+- 椤�
+- 娉�
+- 楦�
+- 璧�
+- 鑱�
+- 楝�
+- 闅�
+- 鑳�
+- 椹�
+- 涓�
+- 閭�
+- 椴�
+- 闊�
+- 濠�
+- sed
+- it
+- 瀹�
+- 灞�
+- 浼�
+- 缈�
+- 閾�
+- 纾�
+- 閱�
+- 鐦�
+- 浣�
+- 闇�
+- 鑷�
+- 鍧�
+- 淇�
+- 鑸�
+- 杈�
+- 璋�
+- 鐢�
+- 绁�
+- tell
+- 鍟�
+- ace
+- 瀹�
+- 楠�
+- 娴�
+- 鍐�
+- 鑲�
+- im
+- win
+- 鐢�
+- 韫�
+- 绮�
+- 鑴�
+- 閬�
+- next
+- expe
+- 姒�
+- 韫�
+- 閭�
+- stru
+- 娌�
+- 瀹�
+- 鏃�
+- 閿�
+- 渚�
+- 鎷�
+- 杈�
+- 浠�
+- be
+- 娲�
+- 鎽�
+- 寰�
+- 棰�
+- 寰�
+- 楝�
+- 鎸�
+- 鎴�
+- 鍢�
+- 鏉�
+- 楠�
+- 鍔�
+- 鍝�
+- 闆�
+- 鎿�
+- 榇�
+- 琛�
+- 鑺�
+- 鎹�
+- 闇�
+- com
+- 淇�
+- 浼�
+- ory
+- 杞�
+- 鍗�
+- 璋�
+- 瀛�
+- te
+- 閿�
+- 鐬�
+- 瀵�
+- 绯�
+- 鍫�
+- 涔�
+- 鎼�
+- 閺�
+- 浣�
+- con
+- 鐞�
+- art
+- 寰�
+- 濉�
+- 璁�
+- 鐬�
+- rence
+- 婧�
+- 鍗�
+- 閫�
+- 闃�
+- 闃�
+- 濠�
+- mil
+- 涓�
+- 濮�
+- 娴�
+- 搴�
+- sing
+- 鍡�
+- 鍝�
+- 鐮�
+- 鍚�
+- 闂�
+- 璐�
+- 灞�
+- 濞�
+- ce
+- 鍥�
+- 妤�
+- 鍏�
+- 椴�
+- 鍡�
+- tely
+- 甯�
+- 閭�
+- 鎳�
+- 娆�
+- ong
+- 閮�
+- 涓�
+- breakfast
+- 宕�
+- 姗�
+- 鍋�
+- 娌�
+- under
+- tion
+- 闈�
+- 鍫�
+- 浠�
+- 閾�
+- 闅�
+- 铔�
+- 鐙�
+- 鎵�
+- 鐔�
+- over
+- 妾�
+- 楦�
+- 妾�
+- 褰�
+- 锜�
+- 鑵�
+- 妲�
+- 娉�
+- 鑸�
+- 鐥�
+- 绉�
+- 姘�
+- 缃�
+- 鍟�
+- 鍟�
+- 鍐�
+- 鐤�
+- 宓�
+- 鍚�
+- 鑸�
+- 閲�
+- mb
+- 瑁�
+- 妾�
+- 杈�
+- 鐪�
+- 婧�
+- can
+- 鍞�
+- 濯�
+- 浣�
+- 鐙�
+- 浠�
+- rent
+- 娌�
+- 璇�
+- 闇�
+- 濠�
+- 闆�
+- eigh
+- 鐧�
+- 鐬�
+- 婢�
+- 娲�
+- 闃�
+- ta
+- 渚�
+- 韬�
+- 鑾�
+- 楠�
+- 瀹�
+- 绺�
+- birth
+- 钀�
+- men
+- 绉�
+- 杞�
+- 鍒�
+- fl
+- 楦�
+- 蹇�
+- 鑳�
+- 鏀�
+- 纾�
+- 椋�
+- 椴�
+- 閭�
+- 闃�
+- 鐨�
+- 鍗�
+- 鎼�
+- 闈�
+- 鍞�
+- 鎾�
+- 鏌�
+- 鍨�
+- 鑶�
+- 杈�
+- 姒�
+- 閭�
+- 閿�
+- 鎴�
+- 钄�
+- 绮�
+- 缈�
+- 鏀�
+- 鎮�
+- 娑�
+- 濯�
+- 淇�
+- 闀�
+- 姊�
+- 钃�
+- 鑹�
+- guest
+- 椤�
+- 璀�
+- 鍏�
+- 鍥�
+- 鍊�
+- 閬�
+- 鏈�
+- such
+- 绡�
+- 鍥�
+- 瀹�
+- 鎴�
+- 宓�
+- 濞�
+- 绠�
+- 妾�
+- 鐜�
+- please
+- 褰�
+- 铚�
+- 鎬�
+- 鐧�
+- 鎬�
+- 闀�
+- 杩�
+- 璇�
+- 搴�
+- 寮�
+- 鑺�
+- 鍗�
+- 閫�
+- 棰�
+- 鏉�
+- 铇�
+- 妤�
+- 棰�
+- 鍚�
+- 鍫�
+- 婢�
+- 寮�
+- 娴�
+- 鍫�
+- 涓�
+- 鐒�
+- 'on'
+- 绾�
+- 宸�
+- 纭�
+- 瀛�
+- 绱�
+- 娌�
+- 鎴�
+- 灞�
+- 鑳�
+- 鍔�
+- 鐝�
+- 淇�
+- 妯�
+- 璁�
+- 璞�
+- 鍙�
+- 閽�
+- 闇�
+- 鐎�
+- 绯�
+- 鍣�
+- 璞�
+- 婀�
+- 娲�
+- 鑿�
+- 鎮�
+- ree
+- 鍑�
+- 寰�
+- 閮�
+- today
+- 鍕�
+- 瀣�
+- 铻�
+- 鎴�
+- 鐠�
+- 蹇�
+- 淇�
+- 瑷�
+- 搴�
+- 璐�
+- 璐�
+- kind
+- 鎵�
+- 楠�
+- 鍜�
+- 鍑�
+- 璁�
+- 鎺�
+- ated
+- 鑻�
+- 槌�
+- 鍣�
+- 鑼�
+- govern
+- 绛�
+- 棰�
+- 鎰�
+- 娓�
+- 韪�
+- 缃�
+- 姹�
+- 韪�
+- 闄�
+- 鐤�
+- 闂�
+- 鎴�
+- 钂�
+- 缂�
+- 鏇�
+- 濠�
+- 鍐�
+- 闇�
+- 鏌�
+- sent
+- 姘�
+- et
+- 鍜�
+- 鍦�
+- 鍜�
+- site
+- sti
+- 姊�
+- water
+- 鑸�
+- 鍤�
+- 铚�
+- 閫�
+- 婀�
+- 鏍�
+- 鍒�
+- 钖�
+- ally
+- 璇�
+- 钘�
+- 閽�
+- 浼�
+- 鑾�
+- 纭�
+- 绐�
+- sa
+- 鎰�
+- 锜�
+- 绉�
+- 闆�
+- 瑜�
+- 楣�
+- 娉�
+- ner
+- ast
+- 鑿�
+- 鏅�
+- 鏋�
+- 鍋�
+- 鍤�
+- 閬�
+- su
+- 鎺�
+- 鍗�
+- 棣�
+- 鍔�
+- 鑳�
+- rep
+- 娑�
+- ther
+- 瀛�
+- 娑�
+- 鐙�
+- 涓�
+- 宸�
+- 鑴�
+- 鐢�
+- 鐝�
+- 閮�
+- 钄�
+- 鐤�
+- 搴�
+- 纭�
+- 瑁�
+- 楠�
+- sequ
+- 杩�
+- 鐩�
+- 鍣�
+- 灏�
+- 鍜�
+- 铚�
+- 搴�
+- 闀�
+- 铦�
+- 瀹�
+- 鍗�
+- 寮�
+- 瀛�
+- 鐚�
+- 鐘�
+- 濡�
+- 钁�
+- every
+- 铻�
+- 棣�
+- ating
+- 澹�
+- 鐔�
+- rela
+- 鍡�
+- 绾�
+- 閿�
+- 鍛�
+- 鎶�
+- 纾�
+- 鐤�
+- 缇�
+- 缁�
+- 閾�
+- 鎺�
+- 瀹�
+- 鑽�
+- 鐜�
+- ser
+- 鍟�
+- 姘�
+- 鐩�
+- 鐤�
+- 楝�
+- 缁�
+- 閿�
+- 楝�
+- 鐡�
+- 楹�
+- 鏃�
+- 濞�
+- 鏁�
+- 璺�
+- 鐑�
+- 锠�
+- te
+- 璇�
+- 閲�
+- 缈�
+- 鐝�
+- 鎱�
+- 椴�
+- 鍕�
+- 琚�
+- 鐟�
+- tly
+- 搴�
+- government
+- 鑽�
+- 闃�
+- 鐑�
+- 鍊�
+- 淇�
+- 楦�
+- 鍊�
+- ound
+- co
+- 绔�
+- 鑵�
+- 鏄�
+- 婵�
+- 鍟�
+- 鎻�
+- 娉�
+- 閮�
+- 鍨�
+- 杞�
+- 鏌�
+- 閰�
+- 鏉�
+- 鏅�
+- 寤�
+- 濂�
+- 閱�
+- 闀�
+- 璁�
+- 缂�
+- 榫�
+- 鐣�
+- 鑴�
+- ma
+- 閱�
+- 鍡�
+- 涓�
+- 娈�
+- 榄�
+- 鐔�
+- wr
+- 鍤�
+- 褰�
+- 鏍�
+- americ
+- 璋�
+- 娉�
+- 鏌�
+- 楂�
+- 鐩�
+- 璇�
+- 鐦�
+- 钀�
+- 鍠�
+- 濯�
+- 蹇�
+- 闃�
+- 鏇�
+- 瑁�
+- 閿�
+- something
+- 鐘�
+- 鐖�
+- 鐓�
+- 鎻�
+- 鑻�
+- 鍢�
+- 鑳�
+- 閾�
+- 灞�
+- 棰�
+- 閿�
+- 楠�
+- 娓�
+- 閭�
+- 鑴�
+- 婊�
+- 缃�
+- 鏁�
+- 榧�
+- 鐖�
+- 鎽�
+- 杈�
+- 鐢�
+- 鑻�
+- 鏁�
+- 鍐�
+- 钑�
+- 閮�
+- 鑰�
+- 闂�
+- 閫�
+- 鎷�
+- 灏�
+- fe
+- 椴�
+- 棰�
+- '0'
+- room
+- 鑳�
+- 婢�
+- 濯�
+- 寤�
+- 閲�
+- 璐�
+- 闀�
+- nine
+- 鎭�
+- 楣�
+- 鏃�
+- 閾�
+- 鍫�
+- 鍓�
+- ket
+- 鏀�
+- 澧�
+- 娲�
+- 淇�
+- ors
+- 璇�
+- 榛�
+- 鐝�
+- 璺�
+- 娴�
+- 瀛�
+- mp
+- 鐙�
+- 韪�
+- 娓�
+- 閫�
+- 棰�
+- 鍔�
+- 缇�
+- 缇�
+- 闃�
+- 璺�
+- 瑜�
+- 涔�
+- 鎷�
+- 鐩�
+- 楦�
+- 鎶�
+- 閫�
+- 瑜�
+- 鎬�
+- cou
+- 锜�
+- cen
+- 鍌�
+- 鐮�
+- 椴�
+- 韫�
+- restaurant
+- 鏉�
+- 鑼�
+- 灏�
+- 鑰�
+- 娣�
+- 韬�
+- 铚�
+- 鍢�
+- 璋�
+- 铦�
+- 鍫�
+- 濮�
+- happ
+- 閾�
+- 妤�
+- park
+- 鍔�
+- 娈�
+- 鐣�
+- say
+- 缁�
+- 绾�
+- 婧�
+- 铏�
+- 绡�
+- 铚�
+- 鎯�
+- 璺�
+- 鍟�
+- 婧�
+- 楗�
+- 瑁�
+- 鍕�
+- 鏌�
+- 鎯�
+- 闄�
+- 娈�
+- 瀹�
+- 寰�
+- 绾�
+- 鐥�
+- 璋�
+- 鍙�
+- 鐘�
+- 閯�
+- 绮�
+- 鐪�
+- 鍧�
+- 濡�
+- 鍓�
+- 闆�
+- 鍌�
+- 瀚�
+- 鍢�
+- 鍜�
+- 鎰�
+- 绨�
+- 璧�
+- 绠�
+- 榫�
+- 鏀�
+- 鑿�
+- 鍧�
+- 濂�
+- 榛�
+- 鐞�
+- 娼�
+- 鏈�
+- tually
+- 閾�
+- 涔�
+- 閱�
+- 娼�
+- 妲�
+- 绾�
+- 鐎�
+- 璇�
+- 鎱�
+- 濂�
+- 鍢�
+- 鎻�
+- 鏄�
+- 闈�
+- 鍙�
+- ce
+- 瓒�
+- 閱�
+- 纰�
+- 鍣�
+- 婢�
+- 鐭�
+- '&'
+- 鍘�
+- 鍟�
+- 鏅�
+- 鐐�
+- ook
+- 鏂�
+- 鐩�
+- 濡�
+- 濞�
+- 鐕�
+- 绐�
+- 娉�
+- also
+- 鑿�
+- 鐐�
+- 閫�
+- 鍩�
+- 鍑�
+- 婊�
+- 閾�
+- 瑙�
+- 鑾�
+- le
+- 鍖�
+- 韪�
+- 閰�
+- 楣�
+- enjoy
+- 闂�
+- 姣�
+- 绁�
+- 韬�
+- 閮�
+- 绗�
+- 涔�
+- 鐢�
+- like
+- 鍚�
+- 鍘�
+- 鐖�
+- 鐗�
+- 璧�
+- 鍍�
+- 閽�
+- 娴�
+- 蹇�
+- get
+- 鍐�
+- 妯�
+- 璇�
+- 韫�
+- 閼�
+- 鐣�
+- 婊�
+- 鎺�
+- 灏�
+- war
+- 妗�
+- 鐡�
+- 姣�
+- 鑶�
+- 楂�
+- 閽�
+- 妗�
+- 纰�
+- 杈�
+- 鍞�
+- 绔�
+- 鐥�
+- 鑶�
+- 閿�
+- 鍢�
+- 闆�
+- 鐗�
+- 鏄�
+- 鏁�
+- 钑�
+- 浼�
+- 寰�
+- 瑗�
+- 鐩�
+- 鎲�
+- 娑�
+- different
+- after
+- 铏�
+- 浣�
+- 鐓�
+- 瀹�
+- ori
+- 鏀�
+- 鍐�
+- 闆�
+- 妯�
+- 鍝�
+- 鐡�
+- 棣�
+- 鍞�
+- 鐐�
+- 鏃�
+- 鍘�
+- 钁�
+- 鐥�
+- 宸�
+- 闈�
+- 鍑�
+- 鏈�
+- 鐑�
+- 浠�
+- 鎭�
+- 鍠�
+- 璐�
+- 閮�
+- 楗�
+- 閾�
+- 婊�
+- 锠�
+- 钖�
+- 榻�
+- 瑜�
+- 榛�
+- 娑�
+- n't
+- 纾�
+- 鍖�
+- 娌�
+- 闀�
+- nice
+- 娴�
+- 鎮�
+- 娣�
+- 鎹�
+- 绠�
+- 鐬�
+- 鍕�
+- 灞�
+- 韫�
+- 韫�
+- 妲�
+- 绫�
+- 鏍�
+- 鑴�
+- 椤�
+- 妗�
+- 鑴�
+- 娆�
+- 铦�
+- 鍕�
+- 鍧�
+- 閰�
+- 鍞�
+- 绺�
+- 娆�
+- 鑶�
+- 璇�
+- 鍏�
+- 妤�
+- 濞�
+- 濞�
+- 闄�
+- ple
+- 灏�
+- 骞�
+- 璞�
+- 妗�
+- 婊�
+- 楹�
+- 缃�
+- 鏈�
+- 鑰�
+- 姹�
+- 鐧�
+- 杈�
+- 閫�
+- 钄�
+- 璧�
+- 绉�
+- 鍜�
+- 鏂�
+- 璺�
+- 鑸�
+- 鑾�
+- 闂�
+- 姘�
+- 楠�
+- 妲�
+- 鐡�
+- 椁�
+- 鐬�
+- 娌�
+- 鏈�
+- had
+- 椴�
+- 宓�
+- nothing
+- 瀚�
+- 鎭�
+- 琛�
+- 杞�
+- 鏉�
+- 璧�
+- 娲�
+- red
+- 鎷�
+- 鐙�
+- 鍗�
+- 铓�
+- 鐤�
+- 鎹�
+- 濠�
+- 鍥�
+- 姊�
+- 鍨�
+- 閫�
+- 楂�
+- 鏅�
+- 楠�
+- 瀛�
+- 鐐�
+- 鍠�
+- 鍨�
+- 铚�
+- sts
+- 闃�
+- 鏈�
+- 鐔�
+- 缁�
+- 鍧�
+- 鑻�
+- are
+- 瀵�
+- 鍚�
+- 鐨�
+- 鎹�
+- 鐐�
+- b
+- the
+- 鍝�
+- 闉�
+- ir
+- 鎺�
+- tation
+- 鍣�
+- 閰�
+- 蹇�
+- 纭�
+- 鑰�
+- 鍋�
+- 闆�
+- 纾�
+- 閿�
+- over
+- 渚�
+- 濠�
+- 鍚�
+- 绔�
+- man
+- 涔�
+- 鑻�
+- 缁�
+- 鍐�
+- 鍛�
+- 瀛�
+- 缂�
+- 鍩�
+- 鐘�
+- 楹�
+- 钃�
+- 鎶�
+- 澶�
+- tri
+- 鏉�
+- 浣�
+- 鑾�
+- di
+- 姣�
+- 璐�
+- 鐚�
+- 妗�
+- so
+- 姘�
+- 鍙�
+- 鍝�
+- 婵�
+- 婀�
+- 鍏�
+- 璇�
+- 鐩�
+- 閽�
+- 楝�
+- 涓�
+- 椴�
+- 璨�
+- 閶�
+- 鍫�
+- 鑼�
+- 鎷�
+- 鐗�
+- 铦�
+- 闀�
+- 鍤�
+- 鏉�
+- 鍠�
+- 褰�
+- col
+- 浠�
+- 娑�
+- 铓�
+- 濡�
+- 绠�
+- 闅�
+- 浜�
+- 鐤�
+- 瀣�
+- 鍠�
+- 鏀�
+- am
+- 鍚�
+- fe
+- zero
+- 鐥�
+- 璇�
+- 骞�
+- 鐤�
+- 瀣�
+- 闄�
+- 绛�
+- 涓�
+- sm
+- 楂�
+- 鍔�
+- 钀�
+- 璇�
+- 闀�
+- 鍧�
+- 鍜�
+- 姹�
+- 姒�
+- 宀�
+- 宕�
+- ined
+- chu
+- 绯�
+- 绁�
+- 鐚�
+- 濠�
+- 鎼�
+- 鍜�
+- 绠�
+- 杈�
+- 鎮�
+- 鑽�
+- 鎸�
+- 鎵�
+- drive
+- 鎾�
+- 鑾�
+- 閯�
+- 鍐�
+- 绋�
+- 鎴�
+- ould
+- 閾�
+- 缈�
+- try
+- 鍦�
+- 鑹�
+- 妗�
+- 鍏�
+- 鎻�
+- 鍫�
+- 妗�
+- ink
+- email
+- 鐘�
+- 閾�
+- 鎷�
+- 槌�
+- 鏁�
+- 闆�
+- 濂�
+- 瑁�
+- 鎴�
+- 鍝�
+- 鏍�
+- 鍝�
+- ds
+- 娴�
+- 宀�
+- 鎸�
+- 鑾�
+- 鑵�
+- 鏉�
+- 淇�
+- 鏂�
+- 铔�
+- 璇�
+- 鐐�
+- 鐠�
+- 鎬�
+- 绀�
+- 鍜�
+- 鐝�
+- 闊�
+- 閭�
+- 绁�
+- 鐨�
+- 娓�
+- 鍥�
+- 娼�
+- 娣�
+- ter
+- che
+- 濡�
+- 钀�
+- 鐔�
+- 鎵�
+- 浜�
+- 浜�
+- 鐦�
+- 鑸�
+- 鐨�
+- 璇�
+- 铏�
+- 娆�
+- 鎺�
+- 妫�
+- 宀�
+- 姣�
+- 鍗�
+- 缃�
+- 鐤�
+- ali
+- 鏁�
+- i
+- 绱�
+- mu
+- 娑�
+- 鎼�
+- 濂�
+- cri
+- 棣�
+- 缈�
+- 鎶�
+- 绛�
+- 鐜�
+- 闇�
+- 閾�
+- 婢�
+- 瀚�
+- 澶�
+- 鍜�
+- 鍥�
+- 寰�
+- 鐚�
+- pre
+- 閫�
+- 绯�
+- 婕�
+- 鑱�
+- 鍙�
+- 鐦�
+- 缁�
+- 妫�
+- 绗�
+- 閾�
+- 閲�
+- 鎭�
+- 鐧�
+- 鑻�
+- 鏂�
+- 閱�
+- 妞�
+- 楸�
+- 娑�
+- z
+- 鐩�
+- par
+- 鎾�
+- 鎴�
+- 鍋�
+- 宸�
+- 鎵�
+- busine
+- 妗�
+- 搴�
+- 鍧�
+- problem
+- 鑸�
+- centr
+- fifty
+- 姊�
+- 皤柉
+- restaurants
+- beau
+- fac
+- 璋�
+- 鑵�
+- 闃�
+- 瀛�
+- 绉�
+- 搴�
+- 鐣�
+- 渚�
+- ang
+- 骞�
+- 涓�
+- 婧�
+- 褰�
+- 榄�
+- 璇�
+- 楦�
+- 閭�
+- 鐦�
+- ay
+- 鑳�
+- 鍕�
+- 妲�
+- 姗�
+- 鐢�
+- ide
+- 娌�
+- 閿�
+- 绁�
+- 鎱�
+- 浣�
+- 鑻�
+- 鏆�
+- 鏃�
+- 鎾�
+- 淇�
+- 灞�
+- 鎶�
+- 瀚�
+- 娴�
+- 椴�
+- 鏆�
+- 绐�
+- 鐐�
+- 婧�
+- 楂�
+- 绨�
+- 宥�
+- 宄�
+- si
+- 瑙�
+- 鍗�
+- 鑱�
+- ty
+- 鐚�
+- 鑵�
+- 鏅�
+- 琚�
+- 鎴�
+- 娉�
+- 鍟�
+- 鎴�
+- 钂�
+- 楹�
+- 姹�
+- 鑼�
+- 鍩�
+- 铔�
+- 寰�
+- 楦�
+- 鑹�
+- 椤�
+- 楂�
+- 寰�
+- 璞�
+- 纰�
+- 娆�
+- 钖�
+- hi
+- 鍛�
+- 鑸�
+- 鍔�
+- 褰�
+- ''''
+- 娓�
+- 缇�
+- 鏋�
+- 妯�
+- 寮�
+- 鍧�
+- 寰�
+- 棣�
+- 鍒�
+- 鐠�
+- 閿�
+- 閽�
+- 鍥�
+- 鎷�
+- 鍓�
+- 鎻�
+- 婊�
+- 缂�
+- ach
+- 鏂�
+- 璇�
+- 瀵�
+- 瑁�
+- 鍚�
+- should
+- 钂�
+- 绐�
+- 绀�
+- 鎴�
+- 褰�
+- 鎭�
+- 鍩�
+- 杩�
+- 閭�
+- 鐔�
+- 娉�
+- 涔�
+- 绡�
+- center
+- 鐧�
+- 鏄�
+- 鐗�
+- 婵�
+- 鍣�
+- 鑻�
+- ang
+- give
+- 濮�
+- 绁�
+- 鎼�
+- 鎮�
+- 鐡�
+- 瀚�
+- wit
+- 鐣�
+- 绁�
+- 婀�
+- 鎵�
+- 妗�
+- 閱�
+- 璋�
+- 铏�
+- 鐜�
+- 鏂�
+- 瀵�
+- 鏀�
+- 蹇�
+- man
+- 鍍�
+- money
+- 椋�
+- 鍥�
+- 鍚�
+- 鎵�
+- j
+- 骞�
+- 娉�
+- 鎽�
+- 鍖�
+- 寤�
+- 閫�
+- 琚�
+- 妤�
+- 鎼�
+- 缂�
+- ft
+- 宀�
+- scho
+- 涓�
+- 铚�
+- 钃�
+- 鍗�
+- 鍊�
+- 鍠�
+- 铔�
+- 娈�
+- 缁�
+- chan
+- 闇�
+- 妫�
+- 娼�
+- 钁�
+- 鍤�
+- ed
+- 婕�
+- 闃�
+- 淇�
+- 寰�
+- 妲�
+- 閰�
+- 鍊�
+- 姗�
+- send
+- 璋�
+- 鍡�
+- 缃�
+- 璐�
+- 鎼�
+- 鍧�
+- 韫�
+- 绶�
+- 闃�
+- 姝�
+- 闁�
+- 宀�
+- 鍊�
+- 璇�
+- 鍋�
+- 閱�
+- 鐥�
+- 绀�
+- 鍑�
+- 鐣�
+- 鎽�
+- 杩�
+- 鎺�
+- 鎸�
+- 濯�
+- same
+- 铓�
+- 鎿�
+- 婢�
+- 鐪�
+- 鍓�
+- 瀚�
+- 楣�
+- 濞�
+- 鍑�
+- 鍘�
+- 鑷�
+- 鏋�
+- 鐐�
+- 鐑�
+- 鎺�
+- 娲�
+- 鍍�
+- velo
+- 瑙�
+- 閫�
+- have
+- 鐨�
+- 閰�
+- 鎬�
+- 鍙�
+- 缁�
+- thir
+- 浠�
+- 绾�
+- les
+- 鏈�
+- 寰�
+- 寰�
+- come
+- 韬�
+- 姝�
+- 鐢�
+- 璺�
+- br
+- 閹�
+- 璁�
+- 鐜�
+- 琛�
+- 楹�
+- 鏋�
+- 鎷�
+- 鑵�
+- reas
+- 鑺�
+- 楣�
+- more
+- 鍎�
+- 鎰�
+- 娣�
+- 韬�
+- 鐜�
+- 灞�
+- 姘�
+- 鍝�
+- 铏�
+- sit
+- 瀹�
+- 澶�
+- 姊�
+- 绱�
+- 鐦�
+- recei
+- 鍖�
+- 鍙�
+- from
+- 鍐�
+- 鐣�
+- 璋�
+- 鑾�
+- 閿�
+- pas
+- 椹�
+- 鏄�
+- 鍠�
+- v
+- 鍛�
+- '''re'
+- 闃�
+- 韪�
+- 璁�
+- 璁�
+- 榛�
+- 绛�
+- seven
+- 浠�
+- 鑸�
+- 鎻�
+- 濡�
+- 绀�
+- 楂�
+- 浼�
+- 棣�
+- 璋�
+- 绡�
+- 閫�
+- 鐜�
+- 鐪�
+- 椹�
+- all
+- 鍡�
+- 浜�
+- 娣�
+- 瑁�
+- 鍜�
+- 鐒�
+- 姘�
+- ari
+- 宕�
+- 鎺�
+- 娌�
+- 瀵�
+- 杞�
+- 鏃�
+- 缁�
+- test
+- 楹�
+- 鏋�
+- 鎸�
+- 鍙�
+- 椹�
+- 绛�
+- 鍧�
+- 椤�
+- 榧�
+- 鐐�
+- 鎺�
+- 濮�
+- 婧�
+- 搴�
+- 绠�
+- 鐕�
+- 鎷�
+- 鍛�
+- 鎰�
+- 琚�
+- 鐫�
+- 鐪�
+- 鍐�
+- 钁�
+- but
+- 鐫�
+- 鎺�
+- 鍞�
+- 闄�
+- 鍠�
+- 鏅�
+- 婧�
+- 鑹�
+- 鑻�
+- 鑵�
+- 锜�
+- 鐕�
+- 鍊�
+- 浼�
+- 钀�
+- 姹�
+- 6
+- 鍍�
+- 铇�
+- 鐦�
+- 涔�
+- 鎺�
+- 涔�
+- 婧�
+- 鐥�
+- lo
+- 铦�
+- 鍟�
+- 缂�
+- part
+- 鍙�
+- 鏂�
+- 楦�
+- 閹�
+- 鎮�
+- 璇�
+- 瀛�
+- 鐛�
+- 鐜�
+- 姹�
+- ary
+- 鍚�
+- 鎷�
+- 璇�
+- 鎵�
+- 榧�
+- 鐝�
+- 姗�
+- 闅�
+- close
+- 濮�
+- ty
+- 杩�
+- 楠�
+- 宕�
+- go
+- 鍕�
+- 鍊�
+- ble
+- 鐩�
+- 鍓�
+- qu
+- 鍦�
+- 鑽�
+- 鑽�
+- 鐤�
+- 閰�
+- 绋�
+- 骞�
+- 浜�
+- 铔�
+- 娉�
+- 瀹�
+- q
+- 妤�
+- 鍒�
+- 浣�
+- 娼�
+- 铻�
+- 閾�
+- 宸�
+- 鍛�
+- 鍨�
+- 鍟�
+- 鎴�
+- 浣�
+- 瀵�
+- 缈�
+- 鍟�
+- fa
+- w
+- 缁�
+- 鎶�
+- 璎�
+- 蹇�
+- 瓒�
+- 浣�
+- 铻�
+- last
+- 杩�
+- 鐖�
+- 纰�
+- 寤�
+- 鍟�
+- 鏌�
+- 閰�
+- 鐑�
+- 鐓�
+- 绮�
+- 姹�
+- 妤�
+- 鑰�
+- 鎸�
+- 瑙�
+- 鑽�
+- 鑽�
+- 鐤�
+- 閬�
+- 浠�
+- 鐟�
+- 鎵�
+- 鎭�
+- 钘�
+- 缇�
+- 鍛�
+- 缂�
+- 浜�
+- 鏋�
+- 瀹�
+- 娴�
+- back
+- 鎵�
+- 绡�
+- sou
+- 娑�
+- son
+- 缈�
+- 璁�
+- 鐞�
+- 鐜�
+- 鏍�
+- 闂�
+- 榫�
+- 浜�
+- 妗�
+- sil
+- 骞�
+- 濠�
+- 鑴�
+- 鐤�
+- 姝�
+- 鍏�
+- 鏌�
+- 闀�
+- 娑�
+- 闇�
+- 寰�
+- 蹇�
+- 璀�
+- 绉�
+- 鑴�
+- 鐞�
+- 鐪�
+- own
+- 閱�
+- 缁�
+- ices
+- 宥�
+- 鑳�
+- u
+- 宄�
+- 绠�
+- 涓�
+- 鐥�
+- 琛�
+- 閿�
+- 瀹�
+- 鐨�
+- 鎰�
+- 鐭�
+- 鐖�
+- 浜�
+- 瑁�
+- 楣�
+- 鎴�
+- 鎹�
+- 涔�
+- 鑳�
+- 褰�
+- kil
+- 钁�
+- 姘�
+- 鎹�
+- 鎴�
+- 鐗�
+- '5'
+- dis
+- 鍥�
+- 瀛�
+- 鏌�
+- 姒�
+- 鍠�
+- 鎮�
+- 淇�
+- c
+- 缂�
+- 瀵�
+- je
+- 绔�
+- way
+- 鎷�
+- 鐗�
+- 闃�
+- 绌�
+- 閽�
+- 婵�
+- 鐨�
+- 闂�
+- 鍛�
+- 闉�
+- 鐕�
+- 鑲�
+- 铻�
+- 绡�
+- 褰�
+- 娲�
+- 娓�
+- 鐮�
+- 杈�
+- 鎱�
+- 宕�
+- 璇�
+- 鎰�
+- 濮�
+- 椋�
+- 鍣�
+- 閾�
+- 瀵�
+- 鍜�
+- 浼�
+- 娈�
+- 瀹�
+- 铓�
+- 鏌�
+- 鏋�
+- 鐟�
+- 鎸�
+- 缁�
+- 鏈�
+- 缁�
+- 闅�
+- 鍍�
+- 绾�
+- 鎻�
+- 鑰�
+- 鑸�
+- tom
+- 鏃�
+- 缈�
+- products
+- 缂�
+- 鑲�
+- 闂�
+- 纭�
+- 鐮�
+- 铚�
+- bu
+- 閸�
+- 鏅�
+- 闃�
+- 琛�
+- 铦�
+- 涓�
+- 楂�
+- 鐓�
+- 涔�
+- 绐�
+- 鑺�
+- 娅�
+- ru
+- deta
+- 璋�
+- 淇�
+- 妗�
+- 鑺�
+- 娉�
+- 浼�
+- 绮�
+- 鎻�
+- 棰�
+- qui
+- 閯�
+- 鑲�
+- 灏�
+- 婊�
+- 妞�
+- 鐧�
+- 鐢�
+- local
+- fol
+- 鍋�
+- 涓�
+- 娑�
+- 閾�
+- du
+- 鍣�
+- 鍊�
+- 鐝�
+- 鍦�
+- 澧�
+- car
+- 鎾�
+- 鏍�
+- 璇�
+- 铻�
+- 铇�
+- 鎵�
+- 鎸�
+- 杞�
+- 娈�
+- 閰�
+- high
+- ach
+- 椴�
+- 鍟�
+- think
+- don't
+- 琛�
+- cho
+- 鐩�
+- 鍟�
+- 鎬�
+- 婊�
+- 鑹�
+- 寰�
+- 鎾�
+- 娣�
+- 鑼�
+- 纭�
+- 妗�
+- 榄�
+- 娼�
+- at
+- 浠�
+- 楣�
+- 娌�
+- 鎿�
+- 鐐�
+- 鐮�
+- 涔�
+- 璁�
+- 鍚�
+- 鐤�
+- 钁�
+- 澶�
+- 璺�
+- 铇�
+- 椴�
+- 婊�
+- 鑿�
+- 婊�
+- 鍔�
+- 鍊�
+- 浣�
+- 鑼�
+- 楂�
+- 闆�
+- ile
+- 娆�
+- 宕�
+- 鍥�
+- 鍍�
+- 瀹�
+- 杈�
+- 妫�
+- se
+- 鎹�
+- 鎵�
+- 閾�
+- 閾�
+- 浜�
+- 鐔�
+- 娲�
+- 濞�
+- the
+- 鐥�
+- 绠�
+- 椹�
+- 绀�
+- 閮�
+- 璋�
+- ack
+- 鏅�
+- res
+- lot
+- 绾�
+- 璇�
+- ways
+- 璋�
+- 鐓�
+- 缁�
+- 鐐�
+- 鍗�
+- 灞�
+- 鐣�
+- 鎷�
+- ar
+- 閬�
+- 鍛�
+- 椹�
+- 鑲�
+- 璺�
+- 鑾�
+- 绾�
+- 鍜�
+- hu
+- 鍟�
+- 姝�
+- 蹇�
+- 涓�
+- many
+- 鍕�
+- ned
+- their
+- 鐫�
+- 鍐�
+- 涔�
+- 宸�
+- 鍫�
+- 娲�
+- 閮�
+- 钁�
+- this
+- 姣�
+- 閱�
+- ton
+- 璇�
+- 鏃�
+- 铔�
+- 瑙�
+- 鐞�
+- 楱�
+- 鎹�
+- imp
+- 灏�
+- 鎷�
+- e
+- 姝�
+- 鍒�
+- 璺�
+- three
+- 閿�
+- 渚�
+- 鎷�
+- 鎵�
+- 楗�
+- 鍓�
+- 娼�
+- 淇�
+- 鍡�
+- 绌�
+- 鍦�
+- 鎯�
+- 鎲�
+- 鎯�
+- 濂�
+- inclu
+- mo
+- 閾�
+- 鑷�
+- 娴�
+- 绁�
+- really
+- 甯�
+- dri
+- 妤�
+- 閭�
+- 寮�
+- 绡�
+- 鍏�
+- up
+- 鎱�
+- 鑵�
+- 纰�
+- 鎸�
+- 璁�
+- 璞�
+- 婊�
+- indi
+- into
+- 瑜�
+- 閰�
+- t
+- 绗�
+- 褰�
+- 娑�
+- body
+- ins
+- 婕�
+- 宄�
+- down
+- 缂�
+- 鍓�
+- 绂�
+- 鍙�
+- wa
+- 澶�
+- 闅�
+- 鎬�
+- 鎵�
+- great
+- 娉�
+- 濞�
+- 瀛�
+- 褰�
+- 姣�
+- 铔�
+- 钀�
+- 娉�
+- 鑸�
+- dge
+- 鐬�
+- vo
+- 鍜�
+- 绁�
+- 鏋�
+- 鎲�
+- 鏂�
+- gro
+- 鍊�
+- 楣�
+- all
+- 妲�
+- 蹇�
+- 绲�
+- 瑁�
+- two
+- 鍏�
+- 鏄�
+- 閬�
+- 绐�
+- 鍚�
+- it's
+- 鍚�
+- 濡�
+- 閮�
+- ies
+- centre
+- 鍚�
+- 浜�
+- 闆�
+- ask
+- 鍙�
+- 濯�
+- 铓�
+- 濂�
+- 鍨�
+- 妞�
+- 绡�
+- 鑱�
+- 绐�
+- 鍨�
+- diffe
+- 璐�
+- 瑜�
+- 璞�
+- 鑵�
+- 鍜�
+- 鐪�
+- 鏁�
+- 榧�
+- 鏄�
+- 椤�
+- 鐞�
+- 鐮�
+- 鍍�
+- 涔�
+- 鐬�
+- 鎾�
+- 缇�
+- 绾�
+- 璧�
+- 鐒�
+- 姗�
+- 宓�
+- 闄�
+- 妤�
+- 鐗�
+- 楣�
+- 甯�
+- may
+- 鍋�
+- 涓�
+- 鍏�
+- 鐮�
+- 婵�
+- 鐛�
+- 濂�
+- 鑵�
+- 闈�
+- 鏈�
+- 鍞�
+- 绾�
+- 瑁�
+- cl
+- hund
+- 浼�
+- 鐚�
+- 鐒�
+- 宀�
+- 杈�
+- 涓�
+- 鏌�
+- 绉�
+- 鑳�
+- 槌�
+- 宀�
+- 鍠�
+- 榛�
+- 瑙�
+- forty
+- 绾�
+- 娉�
+- 椹�
+- 閾�
+- 钀�
+- 绛�
+- 铔�
+- 鐞�
+- 楦�
+- 杩�
+- 閾�
+- read
+- 鍨�
+- 韪�
+- 濂�
+- 鍒�
+- 杞�
+- 鐩�
+- 榛�
+- 绾�
+- chil
+- 鎵�
+- 鐙�
+- 璐�
+- 璐�
+- 槌�
+- 绡�
+- der
+- 婊�
+- 宓�
+- 濡�
+- 鑷�
+- 璋�
+- 澶�
+- 闊�
+- 鎯�
+- 妫�
+- 宄�
+- ms
+- 鑳�
+- 鎭�
+- 鏋�
+- 楣�
+- 鍒�
+- 璁�
+- 鍞�
+- 姊�
+- 鎹�
+- 鐢�
+- 蹇�
+- 杈�
+- 姊�
+- 姝�
+- al
+- 娣�
+- 鑸�
+- 鎭�
+- 鐛�
+- 澶�
+- 鍠�
+- 锜�
+- 浼�
+- sh
+- '''s'
+- 濮�
+- 鐥�
+- ck
+- 淇�
+- 楂�
+- 铚�
+- 鎰�
+- 鍛�
+- mer
+- 妗�
+- 楝�
+- 鍨�
+- 鍥�
+- 鍝�
+- 榫�
+- 槌�
+- 楠�
+- 鐮�
+- 娉�
+- 鐚�
+- 鑼�
+- 鑼�
+- cha
+- small
+- 鍊�
+- 涔�
+- 濂�
+- 楣�
+- 鐥�
+- 濂�
+- 杈�
+- sting
+- 鍐�
+- 璧�
+- 娉�
+- 缇�
+- 鎮�
+- 鐒�
+- 鐬�
+- 绱�
+- 鍗�
+- take
+- 槌�
+- 鑿�
+- 濮�
+- 杈�
+- 鍢�
+- fast
+- 鍥�
+- 鍑�
+- 绾�
+- 鍜�
+- 绠�
+- well
+- 閽�
+- he
+- 杈�
+- 楣�
+- unk
+- 璇�
+- 娉�
+- 涓�
+- 闀�
+- 楠�
+- ve
+- 瀵�
+- pay
+- 榇�
+- 鐫�
+- 姘�
+- 鐗�
+- 娲�
+- 灏�
+- quo
+- 妞�
+- 鑺�
+- 闄�
+- 鑰�
+- 鏌�
+- 鐗�
+- 铓�
+- much
+- most
+- now
+- singapore
+- 涓�
+- 娲�
+- 鐕�
+- 鍟�
+- 鑳�
+- 濯�
+- 纰�
+- 婵�
+- 闄�
+- 鎵�
+- 鍔�
+- 鎱�
+- 绗�
+- 娉�
+- 閯�
+- hote
+- 缃�
+- 娌�
+- 钁�
+- 锠�
+- 浜�
+- 椴�
+- 鍑�
+- 钀�
+- 绫�
+- 鑳�
+- 鍝�
+- 渚�
+- 鏅�
+- 鍜�
+- 鏉�
+- 閭�
+- 鐢�
+- 鑳�
+- 鍘�
+- 鍢�
+- 鍠�
+- is
+- 闇�
+- 浠�
+- 槌�
+- 閴�
+- 鐚�
+- 绾�
+- 鍐�
+- 闃�
+- 楗�
+- 瀹�
+- 璞�
+- 鍎�
+- 鐚�
+- 鐎�
+- 鑽�
+- 婊�
+- 鑼�
+- 閰�
+- 榧�
+- 鏃�
+- 灞�
+- 姒�
+- s
+- 鍛�
+- 椹�
+- 钖�
+- 楣�
+- 棣�
+- 楣�
+- 绾�
+- thank
+- 杞�
+- 娓�
+- 闀�
+- cost
+- 姹�
+- 璋�
+- 鐢�
+- publi
+- 鍌�
+- 鎶�
+- 鏉�
+- 瑁�
+- 姝�
+- 閺�
+- 鐘�
+- 韫�
+- 鎰�
+- 鏂�
+- able
+- ind
+- 鐙�
+- 闆�
+- 濡�
+- 宸�
+- 鍢�
+- 绛�
+- 鍠�
+- 鑸�
+- 鍒�
+- fi
+- company
+- 閮�
+- 鐬�
+- 瑜�
+- 娓�
+- 宸�
+- 鑻�
+- 鍐�
+- 鏆�
+- 鍨�
+- 鎴�
+- 婧�
+- 鍓�
+- 钁�
+- 鐚�
+- 钄�
+- 宀�
+- 閭�
+- any
+- pri
+- 鍗�
+- 鍩�
+- 棣�
+- 鍑�
+- 鏌�
+- 婧�
+- 鍚�
+- 铚�
+- 鐐�
+- 寰�
+- inve
+- 妗�
+- frien
+- 閾�
+- 搴�
+- 寤�
+- 瑾�
+- 鍡�
+- 蹇�
+- i'
+- 铻�
+- 鑳�
+- un
+- 閾�
+- 閰�
+- 榉�
+- wi
+- 鐪�
+- 璺�
+- 姹�
+- 閽�
+- 閽�
+- 闆�
+- 闄�
+- 闈�
+- 铔�
+- 榛�
+- 鑴�
+- 鍙�
+- 鏋�
+- 鏁�
+- sk
+- 缂�
+- 鑿�
+- 鍕�
+- 姹�
+- 鑳�
+- 娲�
+- 娲�
+- ers
+- 楂�
+- 瀹�
+- 鑱�
+- 宓�
+- 鍥�
+- 娣�
+- 鍨�
+- 娆�
+- oms
+- 鎻�
+- before
+- 鐣�
+- 楗�
+- 绉�
+- 鑾�
+- 鍔�
+- 鐐�
+- 鍖�
+- 闄�
+- 钑�
+- 铦�
+- po
+- 鍖�
+- year
+- 姗�
+- pping
+- 娉�
+- 鎵�
+- 鐞�
+- 鍙�
+- 鐜�
+- 褰�
+- 缂�
+- 鎬�
+- 閬�
+- 鍧�
+- sta
+- 鐥�
+- 鍩�
+- 鍙�
+- 姘�
+- 鑴�
+- 鏄�
+- ments
+- 瀹�
+- 鐩�
+- 鎯�
+- 棣�
+- 缁�
+- 閬�
+- 鎵�
+- 钄�
+- 宕�
+- 闃�
+- 钁�
+- 娈�
+- 娼�
+- 姣�
+- ro
+- 闈�
+- 涓�
+- tional
+- 闂�
+- 娑�
+- 绗�
+- n
+- 闀�
+- 铚�
+- 鑳�
+- 琛�
+- 鐚�
+- 楣�
+- 淇�
+- 璞�
+- 浜�
+- 鐥�
+- 钃�
+- 楠�
+- 鎵�
+- 缁�
+- 绮�
+- 鏈�
+- 鍑�
+- 榄�
+- 閲�
+- ars
+- 閫�
+- clo
+- 甯�
+- 閬�
+- 璁�
+- 瑾�
+- 鐗�
+- 鍒�
+- 鎵�
+- 宄�
+- 9
+- 鏄�
+- 杞�
+- 瀣�
+- them
+- 绐�
+- 韫�
+- 楠�
+- 婀�
+- 鍫�
+- 宸�
+- 濠�
+- 楠�
+- 铻�
+- 閰�
+- ag
+- 闂�
+- 婊�
+- 閮�
+- 鏈�
+- supp
+- 鍊�
+- 鑵�
+- 瑜�
+- 鏋�
+- 婕�
+- 浣�
+- 鐓�
+- 娌�
+- 鑲�
+- experi
+- 浜�
+- 鍫�
+- 瀵�
+- 绉�
+- every
+- 閾�
+- 铔�
+- 鎿�
+- a
+- 槌�
+- 淇�
+- ze
+- 寮�
+- 瓒�
+- 缈�
+- 娑�
+- 鐩�
+- 閬�
+- 閬�
+- 灞�
+- 缁�
+- cont
+- 閮�
+- den
+- 浠�
+- 璇�
+- 绮�
+- 闀�
+- 婧�
+- 鍟�
+- 璐�
+- 杞�
+- 鐚�
+- 钖�
+- 鑳�
+- 鎶�
+- than
+- king
+- 鐕�
+- 绗�
+- 楦�
+- 钃�
+- 宄�
+- ent
+- 鎼�
+- 纾�
+- 鏄�
+- 濡�
+- 鐜�
+- 鍙�
+- 娓�
+- 褰�
+- lim
+- 瀛�
+- 鍥�
+- we
+- 缁�
+- 椹�
+- 鏋�
+- 鏂�
+- 璧�
+- 纰�
+- 婵�
+- 鍐�
+- 鍚�
+- 浣�
+- because
+- 鏅�
+- 椁�
+- 鐬�
+- 鏍�
+- 韫�
+- 鑽�
+- 灏�
+- 渚�
+- 姘�
+- 鐐�
+- 鎶�
+- 涓�
+- 娼�
+- 娼�
+- mon
+- 鍩�
+- 鏉�
+- 鍏�
+- 鎹�
+- 鎬�
+- 姝�
+- f
+- near
+- 鍒�
+- 鎼�
+- 楦�
+- 淇�
+- 鑰�
+- 鐗�
+- 鐬�
+- 楝�
+- 鎯�
+- bro
+- 娌�
+- 娴�
+- pe
+- 閲�
+- vi
+- jal
+- 鎷�
+- 鍐�
+- 鏆�
+- 榄�
+- 鐥�
+- 鐩�
+- 鏋�
+- 绉�
+- 鏉�
+- 閮�
+- 鍍�
+- 鏄�
+- 瓒�
+- sen
+- family
+- 宀�
+- 姒�
+- 铔�
+- 缁�
+- 鏉�
+- 楣�
+- 閽�
+- 鎷�
+- dly
+- 鑻�
+- 閿�
+- 璺�
+- 纰�
+- 閽�
+- 娣�
+- 槌�
+- 娴�
+- 鏌�
+- 妗�
+- 鎰�
+- 瑁�
+- 閭�
+- 鎭�
+- 妤�
+- 娌�
+- ban
+- ssi
+- 槌�
+- 瑜�
+- 閽�
+- y
+- 鎼�
+- 楠�
+- 鍧�
+- 濞�
+- 閯�
+- 琚�
+- 鍖�
+- 鍘�
+- 鑹�
+- 闅�
+- 鑸�
+- 韫�
+- 鏉�
+- 鑳�
+- 鐥�
+- 鍑�
+- 鎲�
+- ll
+- 鐫�
+- 绐�
+- 鐮�
+- 闆�
+- 鍊�
+- 绫�
+- 楗�
+- 浠�
+- 绗�
+- 闉�
+- 绛�
+- 鏍�
+- 鎻�
+- 娲�
+- 鍨�
+- 妾�
+- 鐪�
+- 鍋�
+- 闂�
+- 濮�
+- 灏�
+- 鐮�
+- 鎬�
+- 鎵�
+- 鎭�
+- 宀�
+- 浜�
+- 鍨�
+- 韪�
+- 娉�
+- 鎴�
+- 杞�
+- 鐑�
+- 澶�
+- 姝�
+- 宸�
+- 澹�
+- 鍙�
+- 缁�
+- 宸�
+- 閭�
+- 妞�
+- 纭�
+- 鐣�
+- 甯�
+- 娌�
+- 姒�
+- 蹇�
+- 濡�
+- 鐨�
+- 璁�
+- 閾�
+- 杞�
+- 琛�
+- 钂�
+- 钑�
+- things
+- 鑵�
+- 鍧�
+- 闉�
+- 姒�
+- 闇�
+- 鍨�
+- ous
+- 绮�
+- 鏋�
+- 钖�
+- 鑳�
+- 榫�
+- 缁�
+- ning
+- med
+- 鎸�
+- 璇�
+- 璇�
+- 浣�
+- 鍗�
+- 杈�
+- 鑺�
+- 鍙�
+- 缈�
+- 灏�
+- 闈�
+- 閰�
+- 鍜�
+- 鏆�
+- 濮�
+- 璨�
+- 澧�
+- 鍖�
+- been
+- 澧�
+- 鑺�
+- sc
+- 濡�
+- 鐙�
+- 宀�
+- ants
+- 璋�
+- 闉�
+- 濠�
+- one
+- 韪�
+- 璁�
+- land
+- 骞�
+- 杌�
+- 鏅�
+- wi
+- 鐟�
+- 渚�
+- 鍟�
+- 鏈�
+- 鏋�
+- 濂�
+- 鍜�
+- 瑷�
+- 鑷�
+- 纭�
+- 甯�
+- dent
+- 鎴�
+- 鐦�
+- 娈�
+- 鍌�
+- oun
+- 鏋�
+- 灞�
+- 閲�
+- 鑺�
+- 璇�
+- 閽�
+- 鍫�
+- uring
+- 璇�
+- 琛�
+- 棣�
+- 榫�
+- 璁�
+- stre
+- 濡�
+- 椹�
+- 钁�
+- 妲�
+- 娲�
+- side
+- 姘�
+- 瀛�
+- 鐙�
+- 姝�
+- 鍥�
+- 鎰�
+- 杩�
+- pu
+- right
+- 璧�
+- 闃�
+- mber
+- 璋�
+- town
+- 鐩�
+- 鏁�
+- 楗�
+- loc
+- spa
+- 姗�
+- 鍓�
+- 钂�
+- 濮�
+- 鍘�
+- 閾�
+- 濂�
+- 寮�
+- 鏍�
+- 楣�
+- 绯�
+- 楱�
+- 闆�
+- 瑁�
+- 鎷�
+- 榧�
+- some
+- 鎼�
+- 鍧�
+- 钃�
+- ice
+- 鍗�
+- ad
+- 鐦�
+- 婀�
+- 姘�
+- 璋�
+- 缂�
+- ard
+- 鏍�
+- 杩�
+- 娴�
+- 绉�
+- 钀�
+- always
+- 姹�
+- 韪�
+- 鏉�
+- ded
+- 楂�
+- 绉�
+- 婀�
+- 绮�
+- 灞�
+- 妲�
+- 鍨�
+- 璧�
+- 鎯�
+- lar
+- 钖�
+- 杩�
+- p
+- 濉�
+- ga
+- ey
+- ho
+- 鎭�
+- 鏅�
+- 娉�
+- 鑼�
+- 鍡�
+- 缁�
+- lit
+- 娓�
+- 閬�
+- 娲�
+- 鏅�
+- 婧�
+- ying
+- 鏋�
+- 鏃�
+- 閫�
+- 鐝�
+- 绾�
+- 缃�
+- ons
+- 宥�
+- 鐟�
+- 瑁�
+- 鎼�
+- '6'
+- 鍜�
+- 鍞�
+- 鐟�
+- 鐫�
+- 鏂�
+- din
+- 浼�
+- 楠�
+- 鍑�
+- 鐦�
+- 鍍�
+- 闉�
+- 鍏�
+- 浼�
+- 鏌�
+- 甯�
+- 宄�
+- 閯�
+- 鎶�
+- an
+- 鐗�
+- loca
+- 绌�
+- 鎳�
+- '2'
+- amer
+- 绋�
+- 鍙�
+- bra
+- 鎷�
+- str
+- res
+- 娌�
+- 楗�
+- pen
+- 鍘�
+- 鍌�
+- ality
+- 寤�
+- 寮�
+- 澶�
+- 鐫�
+- 闆�
+- 鏅�
+- 鍓�
+- 璧�
+- 鐤�
+- 鐮�
+- 褰�
+- min
+- 寰�
+- 闃�
+- tions
+- 濂�
+- 钀�
+- 閭�
+- 鍋�
+- 浼�
+- er
+- 绛�
+- 铔�
+- business
+- 椤�
+- 鐦�
+- 娲�
+- 閽�
+- 瑙�
+- 鏀�
+- 鍔�
+- 鎾�
+- break
+- 姘�
+- 婕�
+- sor
+- 鍧�
+- 甯�
+- 澧�
+- ation
+- 鐬�
+- 绋�
+- 鎱�
+- 鑲�
+- 韬�
+- 绯�
+- 缇�
+- 璞�
+- us
+- 鐐�
+- 楗�
+- 鎸�
+- 鎸�
+- 鏋�
+- dr
+- 閾�
+- 濂�
+- 妲�
+- 缁�
+- 鎿�
+- 铏�
+- 閽�
+- 鑳�
+- 鍑�
+- 杈�
+- 铏�
+- 閾�
+- 鏉�
+- 鐮�
+- 璧�
+- 娼�
+- 鍚�
+- 榘�
+- 鍚�
+- 杩�
+- 婊�
+- 寮�
+- ff
+- 鐥�
+- 铔�
+- 瑙�
+- 鏉�
+- 鍢�
+- 鍋�
+- 璧�
+- 鐑�
+- 瀛�
+- 閽�
+- 榫�
+- 閽�
+- 鐫�
+- 鎾�
+- big
+- 绗�
+- 淇�
+- 閿�
+- 鍛�
+- 娆�
+- cle
+- 鐝�
+- 娴�
+- 澧�
+- hel
+- 褰�
+- 濮�
+- 鎷�
+- 闆�
+- hi
+- 閽�
+- 澹�
+- 骞�
+- 鍗�
+- 鐥�
+- 婢�
+- 闀�
+- 鑸�
+- 鍏�
+- 缁�
+- 妫�
+- 灏�
+- 绐�
+- 鐕�
+- 鑶�
+- 闀�
+- 瑁�
+- 鍔�
+- resta
+- 寤�
+- 瀹�
+- 钃�
+- 濞�
+- 鐚�
+- 闃�
+- 锜�
+- 鎭�
+- 璁�
+- 铚�
+- of
+- 鎬�
+- 绗�
+- 鐬�
+- 璧�
+- 宀�
+- 绂�
+- 鏄�
+- 濂�
+- 閽�
+- 鎱�
+- 婊�
+- ack
+- 璺�
+- 璐�
+- 宸�
+- 瀵�
+- 閾�
+- pro
+- 鏈�
+- mor
+- 绠�
+- 涓�
+- ready
+- 姝�
+- 璺�
+- 缂�
+- 妗�
+- 璁�
+- 鎽�
+- 鐘�
+- 椹�
+- 鍜�
+- 閾�
+- 缁�
+- 鍙�
+- 閬�
+- 閿�
+- 閲�
+- 绗�
+- 缂�
+- 宓�
+- 鑶�
+- 甯�
+- 娉�
+- 槌�
+- 瀚�
+- 浠�
+- 鎹�
+- 寰�
+- each
+- vil
+- 濯�
+- 琚�
+- 瑁�
+- 浣�
+- 璧�
+- 鏈�
+- 绗�
+- 鍟�
+- 杞�
+- 鎺�
+- 褰�
+- 姘�
+- got
+- 缂�
+- 锠�
+- 鏀�
+- 瀵�
+- 楠�
+- 鍣�
+- ar
+- 妫�
+- 鐤�
+- 璞�
+- 绁�
+- 琛�
+- 鍖�
+- 鐜�
+- 瑙�
+- 搴�
+- 浜�
+- 绡�
+- 閾�
+- 鍜�
+- 绂�
+- 鑹�
+- 鍝�
+- hope
+- 绯�
+- 濞�
+- 绐�
+- 楂�
+- 闃�
+- 鎳�
+- 鍙�
+- 绂�
+- 绌�
+- 娓�
+- 閭�
+- 鑲�
+- 鏉�
+- 鑲�
+- 鍨�
+- 骞�
+- ct
+- 鑻�
+- 闅�
+- wh
+- 瀹�
+- 鐓�
+- 鏌�
+- 闀�
+- 鑰�
+- kes
+- 婵�
+- 鐥�
+- work
+- 鍚�
+- 褰�
+- 鏍�
+- 绛�
+- 璐�
+- 灏�
+- ere
+- 娼�
+- 鐭�
+- 绺�
+- 鐟�
+- 娌�
+- 婧�
+- 婕�
+- rooms
+- 闇�
+- 鍜�
+- e
+- 鐝�
+- 浠�
+- 鐪�
+- 閱�
+- 绁�
+- 榧�
+- 绐�
+- ia
+- hundred
+- 鐡�
+- 灏�
+- no
+- 瑗�
+- 娼�
+- 妤�
+- 鍌�
+- 铚�
+- 'yes'
+- 铔�
+- sig
+- gs
+- 绛�
+- 鍥�
+- 璋�
+- bur
+- 璺�
+- 椴�
+- 鎯�
+- 鑲�
+- dy
+- 铻�
+- 闇�
+- 鐪�
+- 閮�
+- 瀹�
+- 鐣�
+- 鍒�
+- 缈�
+- 璇�
+- 璺�
+- or
+- 棰�
+- 璺�
+- 娣�
+- 鑷�
+- 妤�
+- 灞�
+- 璧�
+- 缇�
+- you
+- 鍜�
+- als
+- 鏂�
+- 鐣�
+- 鏅�
+- 蹇�
+- 鑲�
+- 瀹�
+- 鍑�
+- 瑜�
+- ba
+- 鎽�
+- 鐔�
+- 绡�
+- 涔�
+- 缁�
+- 娌�
+- 鐬�
+- 鍝�
+- 鑸�
+- 鐢�
+- 浜�
+- 鐟�
+- 韪�
+- 榛�
+- 椴�
+- cal
+- 鐠�
+- enty
+- 鍓�
+- 娲�
+- 瀛�
+- 绁�
+- 澶�
+- 宀�
+- 鎷�
+- nineteen
+- cause
+- 鎰�
+- 缈�
+- 鐬�
+- 鏄�
+- 寮�
+- 鎽�
+- 涔�
+- 鍜�
+- 鍕�
+- 浼�
+- 鐟�
+- 姗�
+- 涓�
+- 閾�
+- 鎬�
+- 缁�
+- 鑶�
+- 5
+- 鎵�
+- to
+- 鐚�
+- 鐞�
+- 缇�
+- 鍙�
+- 鐙�
+- 鐪�
+- 鐑�
+- 铓�
+- 鍎�
+- 鎻�
+- ber
+- '1'
+- 鎺�
+- 瑁�
+- 钘�
+- r
+- squ
+- 椴�
+- 娼�
+- 鏅�
+- 娑�
+- 楦�
+- 瑙�
+- 璇�
+- 鍗�
+- as
+- 寮�
+- 鑴�
+- 閾�
+- 鍐�
+- 鎳�
+- 鍟�
+- 娑�
+- 鎵�
+- 鍨�
+- 妫�
+- 璧�
+- 鍨�
+- 闀�
+- 鎬�
+- 宄�
+- 鎯�
+- 缂�
+- ass
+- 璇�
+- tive
+- never
+- 鏄�
+- ence
+- 鑰�
+- 寮�
+- 鏈�
+- 鏋�
+- 瑗�
+- 濂�
+- 绁�
+- 楠�
+- 缇�
+- 璞�
+- 鐦�
+- 璐�
+- 绂�
+- 鍝�
+- 绁�
+- 铻�
+- 閿�
+- 瀛�
+- st
+- 蹇�
+- 鎺�
+- 鏄�
+- 鍝�
+- 楣�
+- 鐤�
+- 閬�
+- 锠�
+- 鎭�
+- 鐙�
+- 鎲�
+- 鎮�
+- 寮�
+- 鑴�
+- soci
+- 鐖�
+- home
+- 濂�
+- 鎳�
+- 鐖�
+- 甯�
+- 瑗�
+- 闄�
+- ters
+- ted
+- 鏅�
+- 鏄�
+- eas
+- ven
+- 韫�
+- ac
+- 姝�
+- fas
+- 鎶�
+- 鐗�
+- 娴�
+- 姒�
+- lan
+- 宸�
+- 鍒�
+- secon
+- 椤�
+- 涓�
+- 椴�
+- 鍡�
+- out
+- 鍑�
+- 瑜�
+- 鐟�
+- 鐝�
+- 鎯�
+- 鏂�
+- 鐩�
+- 鍦�
+- did
+- date
+- 闄�
+- 鐪�
+- 婀�
+- 璋�
+- 鎾�
+- 绀�
+- 鎶�
+- 鏃�
+- 鍊�
+- 锜�
+- produ
+- 蹇�
+- free
+- 鐡�
+- 搴�
+- 鏅�
+- 鍋�
+- 楹�
+- 缂�
+- 鍞�
+- 鏉�
+- 鎴�
+- 鍧�
+- 鐨�
+- 鎷�
+- 绨�
+- 鎬�
+- 璐�
+- 闄�
+- 鍩�
+- 鐨�
+- 绱�
+- 閿�
+- 鑻�
+- 鑿�
+- 鍤�
+- 鍏�
+- 瀚�
+- 闈�
+- d
+- 灏�
+- 棰�
+- 鑻�
+- de
+- inc
+- 绛�
+- 娴�
+- 绐�
+- 涔�
+- 澹�
+- pi
+- 鑽�
+- 鎰�
+- 鐮�
+- 宸�
+- ke
+- 鏃�
+- 鍜�
+- 鐭�
+- 濠�
+- 缁�
+- 绾�
+- 濂�
+- 琚�
+- 鍠�
+- 璐�
+- 閸�
+- 閫�
+- days
+- 鐤�
+- 绛�
+- 楂�
+- 閿�
+- 淇�
+- 缂�
+- 閿�
+- 楠�
+- belie
+- 璺�
+- 宀�
+- 鐫�
+- 铚�
+- 閾�
+- 闂�
+- 楹�
+- 瓒�
+- 閿�
+- 褰�
+- 缂�
+- 鐢�
+- 鑻�
+- 閽�
+- ph
+- ef
+- 鐏�
+- 閵�
+- 濉�
+- 娣�
+- 楦�
+- 鎾�
+- 娴�
+- though
+- 鎴�
+- av
+- 鑽�
+- 瑕�
+- road
+- wn
+- 绾�
+- 甯�
+- 瑁�
+- 棰�
+- end
+- 姹�
+- 椹�
+- '8'
+- 妞�
+- 鐗�
+- ties
+- 渚�
+- 妯�
+- 鎵�
+- 鍚�
+- 钑�
+- 鍨�
+- 鑼�
+- 娉�
+- wor
+- 鑹�
+- 娆�
+- 鐝�
+- 寤�
+- 鍒�
+- 鐩�
+- 绨�
+- bus
+- ten
+- check
+- 纭�
+- 姒�
+- 琚�
+- 鍘�
+- 棣�
+- 姗�
+- 娑�
+- 鎶�
+- 鍏�
+- 棰�
+- 鎵�
+- 娓�
+- 搴�
+- 鐛�
+- 鐦�
+- 鍠�
+- 鏈�
+- 韪�
+- 闀�
+- 绯�
+- 缈�
+- 鍏�
+- 鑻�
+- gi
+- 椤�
+- 铓�
+- 鐞�
+- 鍘�
+- 妲�
+- 婊�
+- 閽�
+- ple
+- 鍙�
+- 鍥�
+- 鐓�
+- 鏉�
+- 閾�
+- 涓�
+- 浠�
+- 鍜�
+- 鑾�
+- 璇�
+- 鍠�
+- 鏃�
+- 鏋�
+- 棰�
+- 鍦�
+- 闇�
+- 鑰�
+- 闃�
+- 宓�
+- 楸�
+- 鍊�
+- me
+- 鍜�
+- sing
+- 鐪�
+- 缇�
+- 浜�
+- 姗�
+- 鑿�
+- 宸�
+- 鏌�
+- 璧�
+- 鏆�
+- d
+- 鎹�
+- world
+- 闅�
+- 閼�
+- 鎹�
+- 杞�
+- 鎸�
+- 鍟�
+- 鑱�
+- 鐐�
+- 婀�
+- 瑭�
+- 楗�
+- 鏌�
+- 鎭�
+- 鎺�
+- 鐖�
+- 鏂�
+- 绐�
+- 姝�
+- 鍒�
+- 瀹�
+- 椁�
+- 榈�
+- 鏄�
+- 姒�
+- 閫�
+- 绮�
+- 淇�
+- 闊�
+- 浠�
+- 缃�
+- 鐮�
+- 钃�
+- 铇�
+- 缂�
+- going
+- 閬�
+- your
+- ever
+- 鍒�
+- 娣�
+- 鍐�
+- 濡�
+- 閽�
+- 鎬�
+- vern
+- z
+- 寮�
+- 鐮�
+- 铚�
+- 楗�
+- 鍐�
+- 琛�
+- 闊�
+- 瀵�
+- 铏�
+- 鐜�
+- 3
+- tho
+- 闂�
+- 鏃�
+- 鐓�
+- 宥�
+- 娑�
+- 榫�
+- ko
+- ge
+- 閬�
+- 閮�
+- 瑜�
+- 钖�
+- 閭�
+- 瀚�
+- 鍙�
+- 瀚�
+- 婕�
+- gue
+- 钘�
+- 鑷�
+- 灞�
+- ily
+- 鏃�
+- 鐥�
+- 鍩�
+- 鍥�
+- 淇�
+- ours
+- 钃�
+- 娓�
+- 閿�
+- 鏈�
+- 闄�
+- 璧�
+- make
+- 鐏�
+- 姹�
+- 鍩�
+- 鍚�
+- 鏍�
+- 缈�
+- 纾�
+- 姹�
+- 妞�
+- 閿�
+- 鎼�
+- 钂�
+- 闊�
+- 鎾�
+- 绾�
+- 绗�
+- order
+- 鍧�
+- 浼�
+- val
+- 鐑�
+- 姊�
+- 鐟�
+- 闄�
+- go
+- 缁�
+- 鐞�
+- 婀�
+- 纭�
+- 鐔�
+- 瑙�
+- 渚�
+- 寰�
+- 鐗�
+- 绉�
+- 鐭�
+- 绁�
+- 璺�
+- age
+- 鍒�
+- 缂�
+- 澶�
+- 闃�
+- 鍒�
+- am
+- 鑻�
+- 姝�
+- cess
+- 钃�
+- he
+- 鍠�
+- 鐣�
+- 閯�
+- 璁�
+- 闆�
+- 鍣�
+- sel
+- 閽�
+- 鑳�
+- 韫�
+- help
+- 鎹�
+- 鑺�
+- 鑲�
+- 鎺�
+- 鏃�
+- 璎�
+- 鐢�
+- ort
+- 鎼�
+- 鍝�
+- 楠�
+- 鍚�
+- 鍙�
+- 鍊�
+- 鑲�
+- 鎲�
+- 绔�
+- product
+- 妫�
+- 榛�
+- 鐮�
+- 鎰�
+- 鏄�
+- 闃�
+- 榄�
+- 鎸�
+- 鐚�
+- pa
+- 鍑�
+- 鍘�
+- 浣�
+- 椴�
+- 绮�
+- 楣�
+- 濮�
+- 鎶�
+- 绫�
+- part
+- found
+- 鍡�
+- 鎻�
+- 铔�
+- 绫�
+- 鐩�
+- 瑁�
+- 鏆�
+- 濡�
+- 鍠�
+- 鎯�
+- wha
+- 绐�
+- ks
+- 姗�
+- 缁�
+- 鑿�
+- 楗�
+- ste
+- 鐚�
+- 鑺�
+- 鎼�
+- tain
+- street
+- 鍛�
+- 鎳�
+- 琚�
+- 鑲�
+- on
+- 浜�
+- 璋�
+- 妞�
+- 鑾�
+- 鏍�
+- 韬�
+- 棰�
+- 鍦�
+- 鍖�
+- mail
+- 绋�
+- 骞�
+- 鑴�
+- 閮�
+- 鎷�
+- fo
+- 閼�
+- 鏀�
+- la
+- 杞�
+- 鎺�
+- 鑺�
+- can't
+- 褰�
+- 瑙�
+- 閭�
+- 鍩�
+- 鑾�
+- 浼�
+- 鑵�
+- 闃�
+- tre
+- 鐪�
+- 璐�
+- 闈�
+- 閽�
+- point
+- 鍍�
+- 搴�
+- 鍏�
+- 杩�
+- 绌�
+- 閫�
+- 婧�
+- 鍜�
+- 浠�
+- 缁�
+- 鍙�
+- 鍡�
+- 閫�
+- 渚�
+- 鎶�
+- 缇�
+- 娆�
+- 鑰�
+- 閿�
+- 缃�
+- 璺�
+- 姹�
+- 璧�
+- 绱�
+- 鎺�
+- 鍟�
+- 鑶�
+- 鎹�
+- 绂�
+- 宕�
+- 鏍�
+- 鎺�
+- 璁�
+- y
+- 鐟�
+- 璐�
+- 鎸�
+- 楣�
+- 鍎�
+- 槌�
+- 楠�
+- 閲�
+- 鐭�
+- 娼�
+- ort
+- 鐥�
+- 鏃�
+- o
+- 鎲�
+- 闀�
+- 铚�
+- 澹�
+- 钁�
+- 鑱�
+- 鐏�
+- 绔�
+- 鎬�
+- m
+- 璇�
+- 韬�
+- 娉�
+- would
+- 缂�
+- 妯�
+- 杩�
+- 铔�
+- ls
+- 楣�
+- 椴�
+- 鑼�
+- 缃�
+- ques
+- 鐣�
+- 鍘�
+- 缂�
+- 鍔�
+- 鎷�
+- 钀�
+- 鍊�
+- 鍋�
+- 闀�
+- 韪�
+- 瓒�
+- 鎵�
+- 妞�
+- 鐥�
+- 鍙�
+- 绉�
+- thing
+- q
+- fre
+- 鍓�
+- beach
+- 鑸�
+- 榛�
+- 鎵�
+- 楗�
+- 娆�
+- 楠�
+- 钃�
+- 閭�
+- 鐤�
+- ''''
+- 鍚�
+- 鎾�
+- 鑻�
+- 铚�
+- mar
+- 妗�
+- made
+- 鐓�
+- 瑗�
+- 鍨�
+- 濮�
+- 閫�
+- 鏌�
+- 鍩�
+- 鎬�
+- 铓�
+- little
+- 娈�
+- 鐐�
+- 铔�
+- 浼�
+- 绂�
+- 鐫�
+- 鎹�
+- 棰�
+- 鐚�
+- 锜�
+- 宀�
+- o
+- 绾�
+- 濡�
+- 闃�
+- 鐜�
+- 钂�
+- 瀵�
+- 瀛�
+- 鎯�
+- 娲�
+- 娌�
+- 閾�
+- 闀�
+- 娼�
+- 鏉�
+- tal
+- 闀�
+- 铏�
+- 槎�
+- 妞�
+- 绨�
+- 閿�
+- 椤�
+- 浠�
+- 棰�
+- 鎺�
+- 鑰�
+- 鑽�
+- 鎾�
+- 璋�
+- 娌�
+- 鎯�
+- 纰�
+- 鐨�
+- 宄�
+- 鐐�
+- 鍘�
+- 鎷�
+- 寰�
+- 甯�
+- 铚�
+- 鑾�
+- 杩�
+- 璇�
+- 鍠�
+- sequence
+- 鎰�
+- ure
+- 鏋�
+- thous
+- 妲�
+- 绉�
+- 鑿�
+- 鑿�
+- ten
+- 鐧�
+- 鐝�
+- 婀�
+- 鐡�
+- 鍚�
+- 濡�
+- 鍐�
+- 鍌�
+- 铓�
+- 鑽�
+- 瀛�
+- 鍖�
+- 鍫�
+- 鑽�
+- 缁�
+- 鍒�
+- des
+- 1
+- 鐣�
+- 瑁�
+- 璧�
+- ving
+- 鍏�
+- ly
+- 鐫�
+- 閬�
+- 鐞�
+- 淇�
+- 宀�
+- 铔�
+- 钂�
+- nor
+- 閽�
+- 楹�
+- 绾�
+- coun
+- 鏉�
+- 瀹�
+- 鍚�
+- 铦�
+- 婀�
+- 鎯�
+- enjo
+- 鍗�
+- 鐗�
+- 璇�
+- 璋�
+- 鎽�
+- 铻�
+- 鐒�
+- 鐩�
+- mple
+- 钀�
+- 鐏�
+- 鐣�
+- when
+- 鐑�
+- 淇�
+- 琛�
+- 榫�
+- 璋�
+- 鍞�
+- 妤�
+- 鐥�
+- 鏇�
+- 缇�
+- 缃�
+- 鑴�
+- 寤�
+- sur
+- 鎷�
+- 娲�
+- 婀�
+- 楦�
+- 鍐�
+- 璺�
+- school
+- 灏�
+- 鎸�
+- 鐤�
+- 璁�
+- self
+- 鎷�
+- 婧�
+- 鍨�
+- 鐬�
+- 绗�
+- thousand
+- 涔�
+- 鍣�
+- lie
+- 娌�
+- 淇�
+- 鐏�
+- 璧�
+- 鐮�
+- 閫�
+- 鏍�
+- loo
+- 缃�
+- 槌�
+- 缈�
+- 閾�
+- 璇�
+- 閰�
+- 浣�
+- 姝�
+- 鐟�
+- 鐪�
+- 鐠�
+- 鎷�
+- u
+- 棰�
+- 绗�
+- ur
+- 铦�
+- 铔�
+- number
+- 棣�
+- place
+- 浼�
+- 鐑�
+- ver
+- 娣�
+- 鍤�
+- 榧�
+- 璎�
+- 瓒�
+- 鎭�
+- 鎼�
+- 浼�
+- 鏉�
+- 鐞�
+- 宕�
+- 鑻�
+- 鍏�
+- 鐔�
+- 鍦�
+- 娉�
+- 姣�
+- very
+- 鐞�
+- 瀛�
+- 鎵�
+- 鎭�
+- t's
+- 鍩�
+- do
+- 鏀�
+- 鏄�
+- 闈�
+- feel
+- 閰�
+- 钑�
+- der
+- 鐒�
+- gen
+- 杈�
+- 瑙�
+- 骞�
+- 妞�
+- 鑰�
+- 榛�
+- 瀵�
+- 閱�
+- 鑰�
+- 鎾�
+- 钄�
+- 鏇�
+- 璋�
+- 鐑�
+- 鏃�
+- kno
+- 鐖�
+- 绲�
+- 娆�
+- 鍦�
+- 鑷�
+- ch
+- 璋�
+- 鑽�
+- 宸�
+- 鎿�
+- 宓�
+- 闇�
+- ames
+- wel
+- 鍙�
+- 鍌�
+- tra
+- 闅�
+- 鍞�
+- 婢�
+- 鐒�
+- 绐�
+- 鍓�
+- 鑿�
+- 閿�
+- 閿�
+- 鏋�
+- 濞�
+- 渚�
+- 姗�
+- 鎰�
+- 娉�
+- 鑼�
+- 鑽�
+- 閬�
+- 锜�
+- 鎹�
+- 鎸�
+- 韫�
+- 鏆�
+- ity
+- 鐝�
+- 椹�
+- 鍡�
+- 璐�
+- 鎵�
+- 纭�
+- 娌�
+- 楦�
+- 钑�
+- 浠�
+- looking
+- 鍐�
+- 閭�
+- 璧�
+- 宓�
+- chi
+- 鍏�
+- 妗�
+- about
+- 鑼�
+- es
+- 缂�
+- 钑�
+- 闂�
+- 鐙�
+- 楠�
+- 灏�
+- 椴�
+- und
+- 璇�
+- 濮�
+- 鑴�
+- 鑵�
+- 鎷�
+- 濠�
+- 鏉�
+- 淇�
+- 缁�
+- they
+- 璺�
+- 鍍�
+- 杈�
+- 楠�
+- 鐫�
+- 澶�
+- door
+- 鎹�
+- 闈�
+- 搴�
+- 鍔�
+- 鑸�
+- 楗�
+- 骞�
+- ry
+- 鎮�
+- 鐣�
+- 鍏�
+- 鐣�
+- ye
+- those
+- 鑿�
+- 姒�
+- 閭�
+- city
+- 娉�
+- 澶�
+- better
+- 鍚�
+- 閰�
+- 钑�
+- 鐘�
+- 纾�
+- 閿�
+- 楣�
+- 楣�
+- you
+- 鑰�
+- 姊�
+- 楦�
+- 姒�
+- 绗�
+- 鐫�
+- 姝�
+- 閬�
+- 椋�
+- 绗�
+- 钃�
+- 浜�
+- 娆�
+- 涓�
+- 鐞�
+- 瀹�
+- ki
+- tle
+- 鐩�
+- 婕�
+- years
+- 鐑�
+- 鎹�
+- 棣�
+- re
+- 璇�
+- tur
+- deci
+- 鍔�
+- 浣�
+- bl
+- of
+- 鍣�
+- 闃�
+- 姊�
+- 瀛�
+- 鏉�
+- 瑕�
+- 鐬�
+- 鐟�
+- 瑙�
+- 寰�
+- 娉�
+- six
+- 闆�
+- cust
+- 鐦�
+- 灏�
+- 楸�
+- 铏�
+- 鏉�
+- other
+- 琛�
+- 鑺�
+- 鍗�
+- 鎿�
+- 閿�
+- 鐜�
+- lu
+- 缂�
+- 鐎�
+- 缂�
+- 瀚�
+- 鍋�
+- 杩�
+- 鍘�
+- 椤�
+- 鐠�
+- 閿�
+- 鎮�
+- 'off'
+- 鑻�
+- 鎭�
+- 鍥�
+- 閿�
+- 鑸�
+- 绮�
+- 涓�
+- 瀛�
+- 閰�
+- 椴�
+- 璞�
+- 鍘�
+- 鐞�
+- 璋�
+- 绨�
+- 鐧�
+- 铓�
+- 閫�
+- 鎴�
+- 浜�
+- 棰�
+- cur
+- 鑵�
+- 鍛�
+- 娌�
+- 楠�
+- 宕�
+- 鎵�
+- 鎬�
+- app
+- 瀛�
+- 瀹�
+- 鑹�
+- 韪�
+- 娣�
+- 鍡�
+- 鐛�
+- 鎬�
+- 鐮�
+- 闀�
+- 姣�
+- table
+- '@'
+- 椴�
+- 瀵�
+- 妗�
+- 鍢�
+- 楂�
+- 鍝�
+- 缁�
+- 鍡�
+- 楹�
+- let
+- 鍋�
+- 鑻�
+- 鐕�
+- rou
+- 鍝�
+- 閯�
+- 缈�
+- 閰�
+- 鍜�
+- 鍡�
+- 钂�
+- 铏�
+- 鍥�
+- 鏉�
+- 楣�
+- 鎸�
+- per
+- 鐭�
+- 鐝�
+- 鍥�
+- 鍐�
+- 浣�
+- 娲�
+- 婕�
+- 鑿�
+- 闀�
+- 灏�
+- low
+- 鑻�
+- 鍓�
+- could
+- 鍩�
+- 閾�
+- 缁�
+- 渚�
+- 鏄�
+- then
+- 鍩�
+- 宄�
+- 鏅�
+- 鐫�
+- 鎼�
+- 鏅�
+- 閾�
+- 鍋�
+- might
+- 姘�
+- 鍛�
+- 娣�
+- ge
+- 缂�
+- 鑴�
+- 鑶�
+- 鑻�
+- pla
+- 妗�
+- 鐢�
+- 椤�
+- 璁�
+- 鑹�
+- tic
+- 鐮�
+- 鑽�
+- 瀹�
+- 娑�
+- 闉�
+- 娼�
+- 閮�
+- 瀹�
+- 鐮�
+- 锜�
+- 宀�
+- 璧�
+- 铔�
+- 绮�
+- 椹�
+- 鑶�
+- 鍨�
+- nee
+- 閫�
+- 鑺�
+- 铻�
+- 瓒�
+- change
+- j
+- 閿�
+- 绠�
+- 绗�
+- 寮�
+- 鐣�
+- 鍒�
+- 鏀�
+- 铔�
+- 鐑�
+- 鑴�
+- 鏄�
+- 鍜�
+- 缇�
+- 鑺�
+- 鍗�
+- 鍣�
+- 椋�
+- 铚�
+- some
+- 宕�
+- 瀣�
+- 浠�
+- 鏌�
+- 楣�
+- 澧�
+- 涔�
+- 瀚�
+- 楠�
+- 宸�
+- 鎲�
+- 鍒�
+- 椤�
+- 纾�
+- 浼�
+- 瀛�
+- 纰�
+- 鍫�
+- 绁�
+- 槌�
+- 姹�
+- ab
+- 琚�
+- 璇�
+- 鍓�
+- 閿�
+- 閿�
+- 鐩�
+- 绠�
+- 鐦�
+- restaur
+- 楣�
+- 鐦�
+- 鐭�
+- his
+- 鐏�
+- 鍕�
+- 鎷�
+- 鍛�
+- 婕�
+- 缃�
+- 鍤�
+- 鐒�
+- tru
+- 璧�
+- 浼�
+- 鍨�
+- 鎿�
+- 闀�
+- 鎯�
+- 璐�
+- 鎯�
+- 鎹�
+- sto
+- 鐘�
+- were
+- 鑹�
+- 浣�
+- 閰�
+- 鑰�
+- 鐚�
+- 閬�
+- 璞�
+- 鎬�
+- 钃�
+- 娉�
+- 灞�
+- 鐒�
+- 绛�
+- 缂�
+- 鏌�
+- 铚�
+- 鐝�
+- 鎬�
+- 娣�
+- .
+- 闈�
+- 杞�
+- 鍫�
+- 娼�
+- 鎳�
+- 閭�
+- 鎯�
+- 绮�
+- 鏂�
+- 婕�
+- re's
+- 绨�
+- 淇�
+- 姊�
+- tr
+- 鑲�
+- 钁�
+- 椴�
+- 鍛�
+- 娓�
+- 寤�
+- 鏀�
+- 槌�
+- 瑕�
+- 鐞�
+- 鐔�
+- 缈�
+- 姘�
+- 椋�
+- 铻�
+- 濯�
+- 杈�
+- 鏃�
+- 姒�
+- 鐟�
+- 鍡�
+- 娓�
+- ic
+- 鐔�
+- 鏆�
+- 鏌�
+- 楝�
+- 鏂�
+- 澶�
+- 鏅�
+- 姘�
+- 鐐�
+- dn't
+- 鎶�
+- 鎾�
+- 鐑�
+- 鍋�
+- 璨�
+- 鍐�
+- 鑳�
+- 绯�
+- 娑�
+- 鍜�
+- 鍚�
+- 鎽�
+- 閷�
+- 鍨�
+- 鎻�
+- 鑸�
+- 宥�
+- 绛�
+- 浣�
+- 閰�
+- 闀�
+- 宸�
+- 鍡�
+- 閾�
+- 鏆�
+- off
+- 鑼�
+- 楂�
+- 鎸�
+- 鐬�
+- 浼�
+- 蹇�
+- 鍍�
+- 钀�
+- 鏅�
+- that
+- 鐖�
+- 绋�
+- 鐡�
+- 宕�
+- um
+- 纾�
+- 鎶�
+- 鐣�
+- 缂�
+- 妤�
+- 鎮�
+- 鍒�
+- 绛�
+- 棣�
+- 鑺�
+- 閬�
+- 榛�
+- 杩�
+- 铦�
+- 鑵�
+- 鎵�
+- 宄�
+- 鎮�
+- 绂�
+- 鎯�
+- five
+- 灞�
+- 鐮�
+- ver
+- 鎭�
+- 鏂�
+- 韫�
+- 鐘�
+- 缁�
+- ep
+- 鍠�
+- 浜�
+- 宓�
+- 姹�
+- 鐙�
+- 杩�
+- 鐩�
+- line
+- 濡�
+- 韬�
+- 鐞�
+- 璧�
+- le
+- 鐤�
+- 绌�
+- 鐒�
+- 璺�
+- 鍖�
+- feat
+- 楣�
+- 鐠�
+- 钄�
+- 鐧�
+- 鎰�
+- 鑺�
+- 鍚�
+- 璋�
+- 鐔�
+- 鍝�
+- ad
+- 杞�
+- 绁�
+- 娉�
+- 鐦�
+- 鍒�
+- 绨�
+- 璋�
+- lea
+- 鍨�
+- 鍟�
+- 婊�
+- 楗�
+- 鐬�
+- 鐛�
+- 鍊�
+- 鑲�
+- 瀹�
+- 娑�
+- 绀�
+- 鏁�
+- 灞�
+- 婵�
+- 锜�
+- 涓�
+- 瑭�
+- 瑜�
+- 鐪�
+- 鑿�
+- 鐦�
+- 铦�
+- 鐪�
+- 缇�
+- 纭�
+- 鑳�
+- 鑺�
+- 姝�
+- 绾�
+- 琛�
+- 娣�
+- 姗�
+- 妯�
+- 绾�
+- 鍦�
+- stay
+- 鍐�
+- 绔�
+- 鐜�
+- 瀹�
+- 鎬�
+- 钖�
+- 娈�
+- 鐪�
+- 鍓�
+- 妫�
+- 鐦�
+- be
+- 瑁�
+- 绡�
+- 甯�
+- 鍔�
+- 楠�
+- 閿�
+- 寤�
+- 搴�
+- 缁�
+- 鍔�
+- 铦�
+- 閿�
+- 婵�
+- 绛�
+- 榄�
+- 閰�
+- 闃�
+- 寮�
+- 鎮�
+- 濮�
+- 鍌�
+- which
+- 鑲�
+- 婀�
+- 璐�
+- 鐩�
+- 鑼�
+- 鑽�
+- 閽�
+- 鑷�
+- 娌�
+- 鍙�
+- 妗�
+- 浣�
+- 鐝�
+- 鏄�
+- 鍩�
+- 鍓�
+- 绠�
+- 宀�
+- 娑�
+- 鏌�
+- 涓�
+- 鍕�
+- 姘�
+- 绋�
+- 鍖�
+- ting
+- 鐦�
+- 缂�
+- 楣�
+- 闆�
+- 铔�
+- 闀�
+- 榄�
+- 璎�
+- me
+- set
+- 璞�
+- h
+- 绉�
+- 鑽�
+- 璧�
+- 娌�
+- 鍚�
+- 鐭�
+- 韪�
+- 杩�
+- 鍙�
+- 鎶�
+- 闀�
+- 鍤�
+- 鍙�
+- 閿�
+- ex
+- 杈�
+- 璋�
+- 缍�
+- ws
+- 鍚�
+- 婀�
+- ll
+- 甯�
+- 濞�
+- 鍫�
+- 璺�
+- 澶�
+- 鍘�
+- 鏈�
+- 绗�
+- 鎮�
+- 瀛�
+- 鍢�
+- 鎭�
+- 鐨�
+- 濉�
+- who
+- 濡�
+- 鏍�
+- 鐡�
+- 绋�
+- 纰�
+- 鍝�
+- pol
+- 寰�
+- s
+- 閭�
+- 鎭�
+- 妯�
+- 榧�
+- 鎷�
+- 璋�
+- 澧�
+- 鍖�
+- speci
+- ha
+- 搴�
+- 楣�
+- ak
+- 闀�
+- m
+- 鍞�
+- 宸�
+- 绛�
+- 鎬�
+- 楂�
+- 鍝�
+- 闀�
+- 璐�
+- and
+- 鑿�
+- 濮�
+- 鑱�
+- tion
+- 鍌�
+- 閱�
+- 鍞�
+- 闆�
+- 鍫�
+- 鑺�
+- 鎿�
+- 鍖�
+- 鎮�
+- 閽�
+- gy
+- 寰�
+- 娓�
+- 瑾�
+- ti
+- 鍐�
+- 閾�
+- 鍜�
+- 娌�
+- 鍏�
+- 鐭�
+- 鑲�
+- 闊�
+- 閭�
+- 璋�
+- 鑰�
+- 娴�
+- public
+- 鑳�
+- 鐚�
+- six
+- 榛�
+- being
+- 寮�
+- 鎹�
+- ical
+- 鐧�
+- 鍫�
+- 浼�
+- 鍓�
+- 寮�
+- la
+- 榧�
+- 閫�
+- 鍫�
+- cap
+- 棣�
+- 璋�
+- 鐦�
+- 纭�
+- 閮�
+- 鍑�
+- fu
+- b
+- 鐫�
+- 楠�
+- 浣�
+- 绡�
+- 璐�
+- 閾�
+- act
+- 鍛�
+- 绔�
+- 鑾�
+- 闄�
+- 椋�
+- 榛�
+- 闀�
+- 灞�
+- 浠�
+- 鏁�
+- 椹�
+- 鍩�
+- 鍨�
+- 闃�
+- 鑰�
+- 绗�
+- 钃�
+- 濡�
+- 璁�
+- see
+- 鍚�
+- 韪�
+- 寮�
+- 姊�
+- 鍧�
+- 娓�
+- au
+- 璧�
+- 鍧�
+- 鑻�
+- don
+- 瀵�
+- 淇�
+- fir
+- 婵�
+- 鍧�
+- 娴�
+- 鍞�
+- 濠�
+- 鎭�
+- 闅�
+- 楹�
+- ca
+- 鑾�
+- 寰�
+- 闉�
+- 绂�
+- 琛�
+- 娼�
+- 鑼�
+- ven
+- 鐬�
+- 楦�
+- 鍑�
+- 鍜�
+- 楗�
+- 鑰�
+- uni
+- 鐟�
+- 鐓�
+- 鑸�
+- 灏�
+- 甯�
+- 榧�
+- 瀚�
+- 濠�
+- 璧�
+- 妫�
+- 娌�
+- 绉�
+- 绮�
+- roo
+- 铓�
+- 浜�
+- 澶�
+- 鍘�
+- 鎮�
+- 鍤�
+- fin
+- 浜�
+- 鐧�
+- 璺�
+- 鑾�
+- 闂�
+- 缃�
+- 鐐�
+- 閽�
+- 楣�
+- 鍙�
+- 鍌�
+- 绯�
+- service
+- 閭�
+- 绐�
+- 鎵�
+- 椴�
+- 鐩�
+- 鐜�
+- 鍗�
+- ne
+- 鐪�
+- 楦�
+- 鍏�
+- 鏉�
+- 纰�
+- 绡�
+- 瑁�
+- 閽�
+- 澶�
+- 浠�
+- 鎻�
+- 鐮�
+- 鍗�
+- 寮�
+- 棰�
+- 瀚�
+- 浠�
+- 浼�
+- 鎮�
+- 鑰�
+- 绠�
+- 鐣�
+- 鐤�
+- sp
+- 鑱�
+- 鐎�
+- 鎸�
+- 鎷�
+- 姘�
+- 鐠�
+- 鑲�
+- tan
+- ul
+- 锠�
+- 榫�
+- 瀹�
+- 鎸�
+- must
+- 韫�
+- 妯�
+- 浠�
+- 楗�
+- buy
+- 蹇�
+- 濞�
+- 鎵�
+- house
+- 璐�
+- 鍢�
+- 鑾�
+- 鑼�
+- 瀹�
+- 鍒�
+- 鍌�
+- rest
+- 鍘�
+- 娲�
+- 鐗�
+- 鏋�
+- 閾�
+- 瑙�
+- 瀣�
+- 鑺�
+- 鍏�
+- 绲�
+- 闄�
+- zer
+- 钘�
+- air
+- 璐�
+- 瓒�
+- 瀛�
+- 琚�
+- 姘�
+- 绔�
+- 鏌�
+- 鎭�
+- 楹�
+- 鑳�
+- came
+- 浼�
+- 鑸�
+- 绫�
+- 鎯�
+- 娉�
+- 棣�
+- 鐢�
+- wer
+- 楣�
+- 鍜�
+- 宕�
+- 宓�
+- 铦�
+- ils
+- 婀�
+- 鎶�
+- 铚�
+- 鍓�
+- 鏍�
+- 鎭�
+- 鍛�
+- 閮�
+- 鎶�
+- 绐�
+- 鐧�
+- xt
+- 姒�
+- 鍌�
+- 鐢�
+- 鎾�
+- 铓�
+- 鑿�
+- 宕�
+- 閰�
+- 瑗�
+- cu
+- 鍛�
+- 甯�
+- 鐐�
+- 鎻�
+- 鍟�
+- 椴�
+- ve
+- 鑹�
+- 鍡�
+- ili
+- 鍑�
+- 鐞�
+- 楝�
+- 鑽�
+- 缁�
+- 浜�
+- 閱�
+- 鐕�
+- 槌�
+- 渚�
+- 绨�
+- 娲�
+- 鍢�
+- 绋�
+- '''t'
+- 姘�
+- 鏆�
+- 鎸�
+- 娉�
+- 鎬�
+- offer
+- 绯�
+- 鑶�
+- 姣�
+- 韫�
+- 鐓�
+- 妗�
+- 浠�
+- 娴�
+- 鍏�
+- 娑�
+- 璺�
+- 绁�
+- 婀�
+- 鑴�
+- 鎻�
+- pr
+- 鐙�
+- 璧�
+- 鐮�
+- 琚�
+- 棣�
+- 鏋�
+- 纰�
+- 濞�
+- 杈�
+- 棰�
+- 鎹�
+- 宄�
+- 鑱�
+- 鍠�
+- 璇�
+- 鐤�
+- 鍧�
+- 纰�
+- 缂�
+- 榧�
+- 閽�
+- 鑵�
+- 婧�
+- 鑴�
+- 娈�
+- 鑲�
+- 娉�
+- 鎹�
+- 妲�
+- 鍚�
+- 娌�
+- 婧�
+- 搴�
+- 娈�
+- 楠�
+- 楗�
+- cour
+- 鑷�
+- 绌�
+- 2
+- 鍏�
+- 鏋�
+- 钂�
+- or
+- 瓒�
+- 鑿�
+- 鏍�
+- view
+- possi
+- 浠�
+- 槌�
+- 鎽�
+- 瀵�
+- 韬�
+- 婀�
+- 鍚�
+- 鍒�
+- 鎻�
+- 寰�
+- 瀵�
+- 鍧�
+- 鎸�
+- 涓�
+- 姘�
+- 鑷�
+- 楗�
+- 涓�
+- 褰�
+- 娉�
+- 鏇�
+- 琛�
+- ei
+- 鍛�
+- 鐙�
+- 鐢�
+- 绂�
+- 钖�
+- shipping
+- ght
+- 鎱�
+- 骞�
+- 鍖�
+- 鐐�
+- 璁�
+- four
+- 绉�
+- 妯�
+- 闄�
+- 鑴�
+- 鏋�
+- 纾�
+- 鍝�
+- 鍚�
+- 鑴�
+- 鐏�
+- 鍥�
+- 绱�
+- 鐠�
+- 閭�
+- 鎰�
+- 钖�
+- 杈�
+- 鐝�
+- 闆�
+- 楝�
+- 璧�
+- 鏋�
+- 閭�
+- 浣�
+- 棰�
+- 瀵�
+- 鎶�
+- 椴�
+- 鍒�
+- 绲�
+- 鎴�
+- 婧�
+- 琚�
+- less
+- 閫�
+- 灏�
+- food
+- 鍖�
+- 鍏�
+- 灏�
+- 鐨�
+- 槌�
+- 鎼�
+- 骞�
+- 杩�
+- 鏉�
+- 韫�
+- 褰�
+- 铏�
+- 鎬�
+- 闆�
+- 閽�
+- 婢�
+- 鎵�
+- 閿�
+- 妯�
+- 榛�
+- 鑳�
+- 钖�
+- 闃�
+- 澧�
+- 瑙�
+- 鍒�
+- 姹�
+- 缇�
+- 鏉�
+- 娣�
+- 钃�
+- 鑵�
+- 鍝�
+- was
+- 閽�
+- 鑳�
+- 鑹�
+- 鎾�
+- 铏�
+- 鐓�
+- 椋�
+- throu
+- 闂�
+- 鐜�
+- 鐞�
+- 鐓�
+- 閰�
+- 濠�
+- serv
+- 纭�
+- 鍑�
+- 澶�
+- 鎾�
+- 闋�
+- want
+- 鐘�
+- 鍘�
+- p
+- en
+- 鏂�
+- 灞�
+- 鐜�
+- life
+- 娼�
+- 楠�
+- 鑷�
+- 璋�
+- 娈�
+- 鍘�
+- 鎽�
+- 纾�
+- v
+- dress
+- 缁�
+- 鍥�
+- 鐪�
+- 蹇�
+- 澹�
+- 鍜�
+- 鎼�
+- 鑲�
+- 榄�
+- 鑺�
+- 绐�
+- 鎷�
+- 绾�
+- 妤�
+- 钄�
+- 瀵�
+- 濡�
+- 鏁�
+- 淇�
+- 棰�
+- 纰�
+- 浜�
+- 濂�
+- 鎲�
+- 鐐�
+- 韫�
+- 鑱�
+- 鍛�
+- 鍛�
+- 鐬�
+- il
+- 鎭�
+- 闃�
+- 鍗�
+- mi
+- 绂�
+- 妞�
+- yo
+- 甯�
+- 閱�
+- 甯�
+- 闅�
+- 蹇�
+- 鍝�
+- 鍔�
+- 妤�
+- 榧�
+- 濉�
+- 鑻�
+- 铚�
+- 鍋�
+- 閱�
+- ju
+- 鏂�
+- 绋�
+- 鑼�
+- 鐞�
+- 鍑�
+- 鎻�
+- 鍖�
+- 鐮�
+- 绂�
+- 缃�
+- 鍕�
+- 鎿�
+- 鐣�
+- 妗�
+- 娉�
+- 鏋�
+- 娌�
+- 鍋�
+- 绻�
+- 鍡�
+- 鍛�
+- 蹇�
+- so
+- 婧�
+- 鏇�
+- spon
+- 鐙�
+- 鍊�
+- 濞�
+- 娼�
+- 韪�
+- 鏅�
+- 鍚�
+- 琚�
+- 鍠�
+- 娲�
+- 鐐�
+- 绾�
+- 鎶�
+- 绨�
+- c
+- 涔�
+- 鍚�
+- 淇�
+- 姊�
+- 鍙�
+- 绁�
+- 鐑�
+- 鑽�
+- 鐪�
+- <unk>
+split_with_space: true
+init: null
+input_size: null
+ctc_conf:
+ dropout_rate: 0.0
+ ctc_type: builtin
+ reduce: true
+ ignore_nan_grad: true
+joint_net_conf: null
+use_preprocessor: true
+token_type: char
+bpemodel: null
+non_linguistic_symbols: null
+cleaner: null
+g2p: null
+speech_volume_normalize: null
+rir_scp: null
+rir_apply_prob: 1.0
+noise_scp: null
+noise_apply_prob: 1.0
+noise_db_range: '13_15'
+frontend: wav_frontend
+frontend_conf:
+ fs: 16000
+ window: hamming
+ n_mels: 80
+ frame_length: 25
+ frame_shift: 10
+ lfr_m: 7
+ lfr_n: 6
+specaug: specaug_lfr
+specaug_conf:
+ apply_time_warp: false
+ time_warp_window: 5
+ time_warp_mode: bicubic
+ apply_freq_mask: true
+ freq_mask_width_range:
+ - 0
+ - 30
+ lfr_rate: 6
+ num_freq_mask: 1
+ apply_time_mask: true
+ time_mask_width_range:
+ - 0
+ - 12
+ num_time_mask: 1
+normalize: null
+normalize_conf: {}
+model: paraformer_online
+model_conf:
+ ctc_weight: 0.0
+ lsm_weight: 0.1
+ length_normalized_loss: true
+ predictor_weight: 1.0
+ predictor_bias: 1
+ sampling_ratio: 0.75
+preencoder: null
+preencoder_conf: {}
+encoder: sanm_chunk_opt
+encoder_conf:
+ output_size: 512
+ attention_heads: 4
+ linear_units: 2048
+ num_blocks: 50
+ dropout_rate: 0.1
+ positional_dropout_rate: 0.1
+ attention_dropout_rate: 0.1
+ input_layer: pe_online
+ pos_enc_class: SinusoidalPositionEncoder
+ normalize_before: true
+ kernel_size: 11
+ sanm_shfit: 0
+ selfattention_layer_type: sanm
+ chunk_size:
+ - 12
+ - 15
+ stride:
+ - 8
+ - 10
+ pad_left:
+ - 0
+ - 0
+ encoder_att_look_back_factor:
+ - 4
+ - 4
+ decoder_att_look_back_factor:
+ - 1
+ - 1
+postencoder: null
+postencoder_conf: {}
+decoder: paraformer_decoder_sanm
+decoder_conf:
+ attention_heads: 4
+ linear_units: 2048
+ num_blocks: 16
+ dropout_rate: 0.1
+ positional_dropout_rate: 0.1
+ self_attention_dropout_rate: 0.1
+ src_attention_dropout_rate: 0.1
+ att_layer_num: 16
+ kernel_size: 11
+ sanm_shfit: 5
+predictor: cif_predictor_v2
+predictor_conf:
+ idim: 512
+ threshold: 1.0
+ l_order: 1
+ r_order: 1
+ tail_threshold: 0.45
+gpu_id: 1
+required:
+- output_dir
+- token_list
+distributed: false
+version: '202211'
diff --git a/runtime/triton_gpu/model_repo_paraformer_large_online/lfr_cmvn_pe/am.mvn b/runtime/triton_gpu/model_repo_paraformer_large_online/lfr_cmvn_pe/am.mvn
new file mode 100755
index 0000000..681910c
--- /dev/null
+++ b/runtime/triton_gpu/model_repo_paraformer_large_online/lfr_cmvn_pe/am.mvn
@@ -0,0 +1,8 @@
+<Nnet>
+<Splice> 560 560
+[ 0 ]
+<AddShift> 560 560
+<LearnRateCoef> 0 [ -8.311879 -8.600912 -9.615928 -10.43595 -11.21292 -11.88333 -12.36243 -12.63706 -12.8818 -12.83066 -12.89103 -12.95666 -13.19763 -13.40598 -13.49113 -13.5546 -13.55639 -13.51915 -13.68284 -13.53289 -13.42107 -13.65519 -13.50713 -13.75251 -13.76715 -13.87408 -13.73109 -13.70412 -13.56073 -13.53488 -13.54895 -13.56228 -13.59408 -13.62047 -13.64198 -13.66109 -13.62669 -13.58297 -13.57387 -13.4739 -13.53063 -13.48348 -13.61047 -13.64716 -13.71546 -13.79184 -13.90614 -14.03098 -14.18205 -14.35881 -14.48419 -14.60172 -14.70591 -14.83362 -14.92122 -15.00622 -15.05122 -15.03119 -14.99028 -14.92302 -14.86927 -14.82691 -14.7972 -14.76909 -14.71356 -14.61277 -14.51696 -14.42252 -14.36405 -14.30451 -14.23161 -14.19851 -14.16633 -14.15649 -14.10504 -13.99518 -13.79562 -13.3996 -12.7767 -11.71208 -8.311879 -8.600912 -9.615928 -10.43595 -11.21292 -11.88333 -12.36243 -12.63706 -12.8818 -12.83066 -12.89103 -12.95666 -13.19763 -13.40598 -13.49113 -13.5546 -13.55639 -13.51915 -13.68284 -13.53289 -13.42107 -13.65519 -13.50713 -13.75251 -13.76715 -13.87408 -13.73109 -13.70412 -13.56073 -13.53488 -13.54895 -13.56228 -13.59408 -13.62047 -13.64198 -13.66109 -13.62669 -13.58297 -13.57387 -13.4739 -13.53063 -13.48348 -13.61047 -13.64716 -13.71546 -13.79184 -13.90614 -14.03098 -14.18205 -14.35881 -14.48419 -14.60172 -14.70591 -14.83362 -14.92122 -15.00622 -15.05122 -15.03119 -14.99028 -14.92302 -14.86927 -14.82691 -14.7972 -14.76909 -14.71356 -14.61277 -14.51696 -14.42252 -14.36405 -14.30451 -14.23161 -14.19851 -14.16633 -14.15649 -14.10504 -13.99518 -13.79562 -13.3996 -12.7767 -11.71208 -8.311879 -8.600912 -9.615928 -10.43595 -11.21292 -11.88333 -12.36243 -12.63706 -12.8818 -12.83066 -12.89103 -12.95666 -13.19763 -13.40598 -13.49113 -13.5546 -13.55639 -13.51915 -13.68284 -13.53289 -13.42107 -13.65519 -13.50713 -13.75251 -13.76715 -13.87408 -13.73109 -13.70412 -13.56073 -13.53488 -13.54895 -13.56228 -13.59408 -13.62047 -13.64198 -13.66109 -13.62669 -13.58297 -13.57387 -13.4739 -13.53063 -13.48348 -13.61047 -13.64716 -13.71546 -13.79184 -13.90614 -14.03098 -14.18205 -14.35881 -14.48419 -14.60172 -14.70591 -14.83362 -14.92122 -15.00622 -15.05122 -15.03119 -14.99028 -14.92302 -14.86927 -14.82691 -14.7972 -14.76909 -14.71356 -14.61277 -14.51696 -14.42252 -14.36405 -14.30451 -14.23161 -14.19851 -14.16633 -14.15649 -14.10504 -13.99518 -13.79562 -13.3996 -12.7767 -11.71208 -8.311879 -8.600912 -9.615928 -10.43595 -11.21292 -11.88333 -12.36243 -12.63706 -12.8818 -12.83066 -12.89103 -12.95666 -13.19763 -13.40598 -13.49113 -13.5546 -13.55639 -13.51915 -13.68284 -13.53289 -13.42107 -13.65519 -13.50713 -13.75251 -13.76715 -13.87408 -13.73109 -13.70412 -13.56073 -13.53488 -13.54895 -13.56228 -13.59408 -13.62047 -13.64198 -13.66109 -13.62669 -13.58297 -13.57387 -13.4739 -13.53063 -13.48348 -13.61047 -13.64716 -13.71546 -13.79184 -13.90614 -14.03098 -14.18205 -14.35881 -14.48419 -14.60172 -14.70591 -14.83362 -14.92122 -15.00622 -15.05122 -15.03119 -14.99028 -14.92302 -14.86927 -14.82691 -14.7972 -14.76909 -14.71356 -14.61277 -14.51696 -14.42252 -14.36405 -14.30451 -14.23161 -14.19851 -14.16633 -14.15649 -14.10504 -13.99518 -13.79562 -13.3996 -12.7767 -11.71208 -8.311879 -8.600912 -9.615928 -10.43595 -11.21292 -11.88333 -12.36243 -12.63706 -12.8818 -12.83066 -12.89103 -12.95666 -13.19763 -13.40598 -13.49113 -13.5546 -13.55639 -13.51915 -13.68284 -13.53289 -13.42107 -13.65519 -13.50713 -13.75251 -13.76715 -13.87408 -13.73109 -13.70412 -13.56073 -13.53488 -13.54895 -13.56228 -13.59408 -13.62047 -13.64198 -13.66109 -13.62669 -13.58297 -13.57387 -13.4739 -13.53063 -13.48348 -13.61047 -13.64716 -13.71546 -13.79184 -13.90614 -14.03098 -14.18205 -14.35881 -14.48419 -14.60172 -14.70591 -14.83362 -14.92122 -15.00622 -15.05122 -15.03119 -14.99028 -14.92302 -14.86927 -14.82691 -14.7972 -14.76909 -14.71356 -14.61277 -14.51696 -14.42252 -14.36405 -14.30451 -14.23161 -14.19851 -14.16633 -14.15649 -14.10504 -13.99518 -13.79562 -13.3996 -12.7767 -11.71208 -8.311879 -8.600912 -9.615928 -10.43595 -11.21292 -11.88333 -12.36243 -12.63706 -12.8818 -12.83066 -12.89103 -12.95666 -13.19763 -13.40598 -13.49113 -13.5546 -13.55639 -13.51915 -13.68284 -13.53289 -13.42107 -13.65519 -13.50713 -13.75251 -13.76715 -13.87408 -13.73109 -13.70412 -13.56073 -13.53488 -13.54895 -13.56228 -13.59408 -13.62047 -13.64198 -13.66109 -13.62669 -13.58297 -13.57387 -13.4739 -13.53063 -13.48348 -13.61047 -13.64716 -13.71546 -13.79184 -13.90614 -14.03098 -14.18205 -14.35881 -14.48419 -14.60172 -14.70591 -14.83362 -14.92122 -15.00622 -15.05122 -15.03119 -14.99028 -14.92302 -14.86927 -14.82691 -14.7972 -14.76909 -14.71356 -14.61277 -14.51696 -14.42252 -14.36405 -14.30451 -14.23161 -14.19851 -14.16633 -14.15649 -14.10504 -13.99518 -13.79562 -13.3996 -12.7767 -11.71208 -8.311879 -8.600912 -9.615928 -10.43595 -11.21292 -11.88333 -12.36243 -12.63706 -12.8818 -12.83066 -12.89103 -12.95666 -13.19763 -13.40598 -13.49113 -13.5546 -13.55639 -13.51915 -13.68284 -13.53289 -13.42107 -13.65519 -13.50713 -13.75251 -13.76715 -13.87408 -13.73109 -13.70412 -13.56073 -13.53488 -13.54895 -13.56228 -13.59408 -13.62047 -13.64198 -13.66109 -13.62669 -13.58297 -13.57387 -13.4739 -13.53063 -13.48348 -13.61047 -13.64716 -13.71546 -13.79184 -13.90614 -14.03098 -14.18205 -14.35881 -14.48419 -14.60172 -14.70591 -14.83362 -14.92122 -15.00622 -15.05122 -15.03119 -14.99028 -14.92302 -14.86927 -14.82691 -14.7972 -14.76909 -14.71356 -14.61277 -14.51696 -14.42252 -14.36405 -14.30451 -14.23161 -14.19851 -14.16633 -14.15649 -14.10504 -13.99518 -13.79562 -13.3996 -12.7767 -11.71208 ]
+<Rescale> 560 560
+<LearnRateCoef> 0 [ 0.155775 0.154484 0.1527379 0.1518718 0.1506028 0.1489256 0.147067 0.1447061 0.1436307 0.1443568 0.1451849 0.1455157 0.1452821 0.1445717 0.1439195 0.1435867 0.1436018 0.1438781 0.1442086 0.1448844 0.1454756 0.145663 0.146268 0.1467386 0.1472724 0.147664 0.1480913 0.1483739 0.1488841 0.1493636 0.1497088 0.1500379 0.1502916 0.1505389 0.1506787 0.1507102 0.1505992 0.1505445 0.1505938 0.1508133 0.1509569 0.1512396 0.1514625 0.1516195 0.1516156 0.1515561 0.1514966 0.1513976 0.1512612 0.151076 0.1510596 0.1510431 0.151077 0.1511168 0.1511917 0.151023 0.1508045 0.1505885 0.1503493 0.1502373 0.1501726 0.1500762 0.1500065 0.1499782 0.150057 0.1502658 0.150469 0.1505335 0.1505505 0.1505328 0.1504275 0.1502438 0.1499674 0.1497118 0.1494661 0.1493102 0.1493681 0.1495501 0.1499738 0.1509654 0.155775 0.154484 0.1527379 0.1518718 0.1506028 0.1489256 0.147067 0.1447061 0.1436307 0.1443568 0.1451849 0.1455157 0.1452821 0.1445717 0.1439195 0.1435867 0.1436018 0.1438781 0.1442086 0.1448844 0.1454756 0.145663 0.146268 0.1467386 0.1472724 0.147664 0.1480913 0.1483739 0.1488841 0.1493636 0.1497088 0.1500379 0.1502916 0.1505389 0.1506787 0.1507102 0.1505992 0.1505445 0.1505938 0.1508133 0.1509569 0.1512396 0.1514625 0.1516195 0.1516156 0.1515561 0.1514966 0.1513976 0.1512612 0.151076 0.1510596 0.1510431 0.151077 0.1511168 0.1511917 0.151023 0.1508045 0.1505885 0.1503493 0.1502373 0.1501726 0.1500762 0.1500065 0.1499782 0.150057 0.1502658 0.150469 0.1505335 0.1505505 0.1505328 0.1504275 0.1502438 0.1499674 0.1497118 0.1494661 0.1493102 0.1493681 0.1495501 0.1499738 0.1509654 0.155775 0.154484 0.1527379 0.1518718 0.1506028 0.1489256 0.147067 0.1447061 0.1436307 0.1443568 0.1451849 0.1455157 0.1452821 0.1445717 0.1439195 0.1435867 0.1436018 0.1438781 0.1442086 0.1448844 0.1454756 0.145663 0.146268 0.1467386 0.1472724 0.147664 0.1480913 0.1483739 0.1488841 0.1493636 0.1497088 0.1500379 0.1502916 0.1505389 0.1506787 0.1507102 0.1505992 0.1505445 0.1505938 0.1508133 0.1509569 0.1512396 0.1514625 0.1516195 0.1516156 0.1515561 0.1514966 0.1513976 0.1512612 0.151076 0.1510596 0.1510431 0.151077 0.1511168 0.1511917 0.151023 0.1508045 0.1505885 0.1503493 0.1502373 0.1501726 0.1500762 0.1500065 0.1499782 0.150057 0.1502658 0.150469 0.1505335 0.1505505 0.1505328 0.1504275 0.1502438 0.1499674 0.1497118 0.1494661 0.1493102 0.1493681 0.1495501 0.1499738 0.1509654 0.155775 0.154484 0.1527379 0.1518718 0.1506028 0.1489256 0.147067 0.1447061 0.1436307 0.1443568 0.1451849 0.1455157 0.1452821 0.1445717 0.1439195 0.1435867 0.1436018 0.1438781 0.1442086 0.1448844 0.1454756 0.145663 0.146268 0.1467386 0.1472724 0.147664 0.1480913 0.1483739 0.1488841 0.1493636 0.1497088 0.1500379 0.1502916 0.1505389 0.1506787 0.1507102 0.1505992 0.1505445 0.1505938 0.1508133 0.1509569 0.1512396 0.1514625 0.1516195 0.1516156 0.1515561 0.1514966 0.1513976 0.1512612 0.151076 0.1510596 0.1510431 0.151077 0.1511168 0.1511917 0.151023 0.1508045 0.1505885 0.1503493 0.1502373 0.1501726 0.1500762 0.1500065 0.1499782 0.150057 0.1502658 0.150469 0.1505335 0.1505505 0.1505328 0.1504275 0.1502438 0.1499674 0.1497118 0.1494661 0.1493102 0.1493681 0.1495501 0.1499738 0.1509654 0.155775 0.154484 0.1527379 0.1518718 0.1506028 0.1489256 0.147067 0.1447061 0.1436307 0.1443568 0.1451849 0.1455157 0.1452821 0.1445717 0.1439195 0.1435867 0.1436018 0.1438781 0.1442086 0.1448844 0.1454756 0.145663 0.146268 0.1467386 0.1472724 0.147664 0.1480913 0.1483739 0.1488841 0.1493636 0.1497088 0.1500379 0.1502916 0.1505389 0.1506787 0.1507102 0.1505992 0.1505445 0.1505938 0.1508133 0.1509569 0.1512396 0.1514625 0.1516195 0.1516156 0.1515561 0.1514966 0.1513976 0.1512612 0.151076 0.1510596 0.1510431 0.151077 0.1511168 0.1511917 0.151023 0.1508045 0.1505885 0.1503493 0.1502373 0.1501726 0.1500762 0.1500065 0.1499782 0.150057 0.1502658 0.150469 0.1505335 0.1505505 0.1505328 0.1504275 0.1502438 0.1499674 0.1497118 0.1494661 0.1493102 0.1493681 0.1495501 0.1499738 0.1509654 0.155775 0.154484 0.1527379 0.1518718 0.1506028 0.1489256 0.147067 0.1447061 0.1436307 0.1443568 0.1451849 0.1455157 0.1452821 0.1445717 0.1439195 0.1435867 0.1436018 0.1438781 0.1442086 0.1448844 0.1454756 0.145663 0.146268 0.1467386 0.1472724 0.147664 0.1480913 0.1483739 0.1488841 0.1493636 0.1497088 0.1500379 0.1502916 0.1505389 0.1506787 0.1507102 0.1505992 0.1505445 0.1505938 0.1508133 0.1509569 0.1512396 0.1514625 0.1516195 0.1516156 0.1515561 0.1514966 0.1513976 0.1512612 0.151076 0.1510596 0.1510431 0.151077 0.1511168 0.1511917 0.151023 0.1508045 0.1505885 0.1503493 0.1502373 0.1501726 0.1500762 0.1500065 0.1499782 0.150057 0.1502658 0.150469 0.1505335 0.1505505 0.1505328 0.1504275 0.1502438 0.1499674 0.1497118 0.1494661 0.1493102 0.1493681 0.1495501 0.1499738 0.1509654 0.155775 0.154484 0.1527379 0.1518718 0.1506028 0.1489256 0.147067 0.1447061 0.1436307 0.1443568 0.1451849 0.1455157 0.1452821 0.1445717 0.1439195 0.1435867 0.1436018 0.1438781 0.1442086 0.1448844 0.1454756 0.145663 0.146268 0.1467386 0.1472724 0.147664 0.1480913 0.1483739 0.1488841 0.1493636 0.1497088 0.1500379 0.1502916 0.1505389 0.1506787 0.1507102 0.1505992 0.1505445 0.1505938 0.1508133 0.1509569 0.1512396 0.1514625 0.1516195 0.1516156 0.1515561 0.1514966 0.1513976 0.1512612 0.151076 0.1510596 0.1510431 0.151077 0.1511168 0.1511917 0.151023 0.1508045 0.1505885 0.1503493 0.1502373 0.1501726 0.1500762 0.1500065 0.1499782 0.150057 0.1502658 0.150469 0.1505335 0.1505505 0.1505328 0.1504275 0.1502438 0.1499674 0.1497118 0.1494661 0.1493102 0.1493681 0.1495501 0.1499738 0.1509654 ]
+</Nnet>
diff --git a/runtime/triton_gpu/model_repo_paraformer_large_online/lfr_cmvn_pe/config.pbtxt b/runtime/triton_gpu/model_repo_paraformer_large_online/lfr_cmvn_pe/config.pbtxt
new file mode 100755
index 0000000..43bc83f
--- /dev/null
+++ b/runtime/triton_gpu/model_repo_paraformer_large_online/lfr_cmvn_pe/config.pbtxt
@@ -0,0 +1,85 @@
+# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Created on 2024-01-01
+# Author: GuAn Zhu
+
+name: "lfr_cmvn_pe"
+backend: "onnxruntime"
+default_model_filename: "lfr_cmvn_pe.onnx"
+
+max_batch_size: 128
+
+sequence_batching{
+ max_sequence_idle_microseconds: 15000000
+ oldest {
+ max_candidate_sequences: 1024
+ preferred_batch_size: [32, 64, 128]
+ max_queue_delay_microseconds: 300
+ }
+ control_input [
+ ]
+ state [
+ {
+ input_name: "cache"
+ output_name: "r_cache"
+ data_type: TYPE_FP32
+ dims: [10, 560]
+ initial_state: {
+ data_type: TYPE_FP32
+ dims: [10, 560]
+ zero_data: true
+ name: "initial state"
+ }
+ },
+ {
+ input_name: "offset"
+ output_name: "r_offset"
+ data_type: TYPE_INT32
+ dims: [1]
+ initial_state: {
+ data_type: TYPE_INT32
+ dims: [1]
+ zero_data: true
+ name: "initial state"
+ }
+ }
+ ]
+}
+input [
+ {
+ name: "chunk_xs"
+ data_type: TYPE_FP32
+ dims: [61, 80]
+ }
+]
+output [
+ {
+ name: "chunk_xs_out"
+ data_type: TYPE_FP32
+ dims: [-1, 560]
+ },
+ {
+ name: "chunk_xs_out_len"
+ data_type: TYPE_INT32
+ dims: [-1]
+ }
+]
+instance_group [
+ {
+ count: 1
+ kind: KIND_GPU
+ }
+]
+
diff --git a/runtime/triton_gpu/model_repo_paraformer_large_online/lfr_cmvn_pe/export_lfr_cmvn_pe_onnx.py b/runtime/triton_gpu/model_repo_paraformer_large_online/lfr_cmvn_pe/export_lfr_cmvn_pe_onnx.py
new file mode 100755
index 0000000..c3328fd
--- /dev/null
+++ b/runtime/triton_gpu/model_repo_paraformer_large_online/lfr_cmvn_pe/export_lfr_cmvn_pe_onnx.py
@@ -0,0 +1,131 @@
+# Created on 2024-01-01
+# Author: GuAn Zhu
+
+import torch
+import numpy as np
+import math
+import torch.nn.functional as F
+
+
+class LFR_CMVN_PE(torch.nn.Module):
+ def __init__(self,
+ mean: torch.Tensor,
+ istd: torch.Tensor,
+ m: int = 7,
+ n: int = 6,
+ max_len: int = 5000,
+ encoder_input_size: int = 560,
+ encoder_output_size: int = 512):
+ super().__init__()
+
+ # LRF
+ self.m = m
+ self.n = n
+ self.subsample = (m - 1) // 2
+
+ # CMVN
+ assert mean.shape == istd.shape
+ # The buffer can be accessed from this module using self.mean
+ self.register_buffer("mean", mean)
+ self.register_buffer("istd", istd)
+
+ # PE
+ self.encoder_input_size = encoder_input_size
+ self.encoder_output_size = encoder_output_size
+ self.max_len = max_len
+ self.pe = torch.zeros(self.max_len, self.encoder_input_size)
+ position = torch.arange(0, self.max_len,
+ dtype=torch.float32).unsqueeze(1)
+ div_term = torch.exp(
+ torch.arange((self.encoder_input_size/2), dtype=torch.float32) *
+ -(math.log(10000.0) / (self.encoder_input_size/2-1)))
+ self.pe[:, 0::1] = torch.cat((torch.sin(position * div_term), torch.cos(position * div_term)), dim=1)
+
+ def forward(self, x, cache, offset):
+ """
+ Args:
+ x (torch.Tensor): (batch, max_len, feat_dim)
+
+ Returns:
+ (torch.Tensor): normalized feature
+ """
+ B, _, D = x.size()
+ x = x.unfold(1, self.m, step=self.n).transpose(2, 3)
+ x = x.view(B, -1, D * self.m)
+
+ x = (x + self.mean) * self.istd
+ x = x * (self.encoder_output_size ** 0.5)
+
+ index = offset + torch.arange(1, x.size(1)+1).to(dtype=torch.int32)
+ pos_emb = F.embedding(index, self.pe) # B X T X d_model
+ r_cache = x + pos_emb
+
+ r_x = torch.cat((cache, r_cache), dim=1)
+ r_offset = offset + x.size(1)
+ r_x_len = torch.ones((B, 1), dtype=torch.int32) * r_x.size(1)
+
+ return r_x, r_x_len, r_cache, r_offset
+
+
+def load_cmvn(cmvn_file):
+ with open(cmvn_file, 'r', encoding='utf-8') as f:
+ lines = f.readlines()
+
+ means_list = []
+ vars_list = []
+ for i in range(len(lines)):
+ line_item = lines[i].split()
+ if line_item[0] == '<AddShift>':
+ line_item = lines[i + 1].split()
+ if line_item[0] == '<LearnRateCoef>':
+ add_shift_line = line_item[3:(len(line_item) - 1)]
+ means_list = list(add_shift_line)
+ continue
+ elif line_item[0] == '<Rescale>':
+ line_item = lines[i + 1].split()
+ if line_item[0] == '<LearnRateCoef>':
+ rescale_line = line_item[3:(len(line_item) - 1)]
+ vars_list = list(rescale_line)
+ continue
+
+ means = np.array(means_list).astype(np.float32)
+ vars = np.array(vars_list).astype(np.float32)
+ means = torch.from_numpy(means)
+ vars = torch.from_numpy(vars)
+ return means, vars
+
+if __name__ == "__main__":
+ means, vars = load_cmvn("am.mvn")
+ means = torch.tile(means, (10, 1))
+ vars = torch.tile(vars, (10, 1))
+
+ model = LFR_CMVN_PE(means, vars)
+ model.eval()
+
+ all_names = ['chunk_xs', 'cache', 'offset', 'chunk_xs_out', 'chunk_xs_out_len', 'r_cache', 'r_offset']
+ dynamic_axes = {}
+
+ for name in all_names:
+ dynamic_axes[name] = {0: 'B'}
+
+ input_data1 = torch.randn(4, 61, 80).to(torch.float32)
+ input_data2 = torch.randn(4, 10, 560).to(torch.float32)
+ input_data3 = torch.randn(4, 1).to(torch.int32)
+
+ onnx_path = "./1/lfr_cmvn_pe.onnx"
+ torch.onnx.export(model,
+ (input_data1, input_data2, input_data3),
+ onnx_path,
+ export_params=True,
+ opset_version=11,
+ do_constant_folding=True,
+ input_names=['chunk_xs', 'cache', 'offset'],
+ output_names=['chunk_xs_out', 'chunk_xs_out_len', 'r_cache', 'r_offset'],
+ dynamic_axes=dynamic_axes,
+ verbose=False
+ )
+
+ print("export to onnx model succeed!")
+
+
+
diff --git a/runtime/triton_gpu/model_repo_paraformer_large_online/streaming_paraformer/config.pbtxt b/runtime/triton_gpu/model_repo_paraformer_large_online/streaming_paraformer/config.pbtxt
new file mode 100755
index 0000000..d0ac745
--- /dev/null
+++ b/runtime/triton_gpu/model_repo_paraformer_large_online/streaming_paraformer/config.pbtxt
@@ -0,0 +1,122 @@
+# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Created on 2024-01-01
+# Author: GuAn Zhu
+
+name: "streaming_paraformer"
+platform: "ensemble"
+max_batch_size: 128 #MAX_BATCH
+
+input [
+ {
+ name: "WAV"
+ data_type: TYPE_FP32
+ dims: [-1]
+ },
+ {
+ name: "WAV_LENS"
+ data_type: TYPE_INT32
+ dims: [1]
+ }
+]
+
+output [
+ {
+ name: "TRANSCRIPTS"
+ data_type: TYPE_STRING
+ dims: [1]
+ }
+]
+
+ensemble_scheduling {
+ step [
+ {
+ model_name: "feature_extractor"
+ model_version: -1
+ input_map {
+ key: "wav"
+ value: "WAV"
+ }
+ input_map {
+ key: "wav_lens"
+ value: "WAV_LENS"
+ }
+ output_map {
+ key: "speech"
+ value: "SPEECH"
+ }
+ },
+ {
+ model_name: "lfr_cmvn_pe"
+ model_version: -1
+ input_map {
+ key: "chunk_xs"
+ value: "SPEECH"
+ }
+ output_map {
+ key: "chunk_xs_out"
+ value: "CHUNK_XS_OUT"
+ }
+ output_map {
+ key: "chunk_xs_out_len"
+ value: "CHUNK_XS_OUT_LEN"
+ }
+ },
+ {
+ model_name: "encoder"
+ model_version: -1
+ input_map {
+ key: "speech"
+ value: "CHUNK_XS_OUT"
+ }
+ input_map {
+ key: "speech_lengths"
+ value: "CHUNK_XS_OUT_LEN"
+ }
+ output_map {
+ key: "enc"
+ value: "ENC"
+ }
+ output_map {
+ key: "enc_len"
+ value: "ENC_LEN"
+ }
+ output_map {
+ key: "alphas"
+ value: "ALPHAS"
+ }
+ },
+ {
+ model_name: "cif_search"
+ model_version: -1
+ input_map {
+ key: "enc"
+ value: "ENC"
+ }
+ input_map {
+ key: "enc_len"
+ value: "ENC_LEN"
+ }
+ input_map {
+ key: "alphas"
+ value: "ALPHAS"
+ }
+ output_map {
+ key: "transcripts"
+ value: "TRANSCRIPTS"
+ }
+ }
+ ]
+}
diff --git a/runtime/websocket/bin/funasr-wss-client-2pass.cpp b/runtime/websocket/bin/funasr-wss-client-2pass.cpp
index e2cce28..0cbd10e 100644
--- a/runtime/websocket/bin/funasr-wss-client-2pass.cpp
+++ b/runtime/websocket/bin/funasr-wss-client-2pass.cpp
@@ -122,7 +122,7 @@
// This method will block until the connection is complete
void run(const std::string& uri, const std::vector<string>& wav_list,
- const std::vector<string>& wav_ids, std::string asr_mode,
+ const std::vector<string>& wav_ids, int audio_fs, std::string asr_mode,
std::vector<int> chunk_size, const std::unordered_map<std::string, int>& hws_map,
bool is_record=false, int use_itn=1) {
// Create a new connection to the given URI
@@ -148,7 +148,7 @@
if(is_record){
send_rec_data(asr_mode, chunk_size, hws_map, use_itn);
}else{
- send_wav_data(wav_list[0], wav_ids[0], asr_mode, chunk_size, hws_map, use_itn);
+ send_wav_data(wav_list[0], wav_ids[0], audio_fs, asr_mode, chunk_size, hws_map, use_itn);
}
WaitABit();
@@ -183,20 +183,20 @@
m_done = true;
}
// send wav to server
- void send_wav_data(string wav_path, string wav_id, std::string asr_mode,
+ void send_wav_data(string wav_path, string wav_id, int audio_fs, std::string asr_mode,
std::vector<int> chunk_vector, const std::unordered_map<std::string, int>& hws_map,
int use_itn) {
uint64_t count = 0;
std::stringstream val;
funasr::Audio audio(1);
- int32_t sampling_rate = 16000;
+ int32_t sampling_rate = audio_fs;
std::string wav_format = "pcm";
if (funasr::IsTargetFile(wav_path.c_str(), "wav")) {
- int32_t sampling_rate = -1;
- if (!audio.LoadWav(wav_path.c_str(), &sampling_rate)) return;
+ if (!audio.LoadWav(wav_path.c_str(), &sampling_rate, false))
+ return;
} else if (funasr::IsTargetFile(wav_path.c_str(), "pcm")) {
- if (!audio.LoadPcmwav(wav_path.c_str(), &sampling_rate)) return;
+ if (!audio.LoadPcmwav(wav_path.c_str(), &sampling_rate, false)) return;
} else {
wav_format = "others";
if (!audio.LoadOthers2Char(wav_path.c_str())) return;
@@ -238,6 +238,7 @@
jsonbegin["chunk_size"] = chunk_size;
jsonbegin["wav_name"] = wav_id;
jsonbegin["wav_format"] = wav_format;
+ jsonbegin["audio_fs"] = sampling_rate;
jsonbegin["is_speaking"] = true;
jsonbegin["itn"] = true;
if(use_itn == 0){
@@ -360,6 +361,7 @@
}
websocketpp::lib::error_code ec;
+ float sample_rate = 16000;
nlohmann::json jsonbegin;
nlohmann::json chunk_size = nlohmann::json::array();
chunk_size.push_back(chunk_vector[0]);
@@ -369,6 +371,7 @@
jsonbegin["chunk_size"] = chunk_size;
jsonbegin["wav_name"] = "record";
jsonbegin["wav_format"] = "pcm";
+ jsonbegin["audio_fs"] = sample_rate;
jsonbegin["is_speaking"] = true;
jsonbegin["itn"] = true;
if(use_itn == 0){
@@ -408,7 +411,6 @@
param.suggestedLatency = info->defaultLowInputLatency;
param.hostApiSpecificStreamInfo = nullptr;
- float sample_rate = 16000;
PaStream *stream;
std::vector<float> buffer;
@@ -473,6 +475,10 @@
};
int main(int argc, char* argv[]) {
+#ifdef _WIN32
+ #include <windows.h>
+ SetConsoleOutputCP(65001);
+#endif
google::InitGoogleLogging(argv[0]);
FLAGS_logtostderr = true;
@@ -486,6 +492,7 @@
"the input could be: wav_path, e.g.: asr_example.wav; pcm_path, e.g.: "
"asr_example.pcm; wav.scp, kaldi style wav list (wav_id \t wav_path)",
false, "", "string");
+ TCLAP::ValueArg<std::int32_t> audio_fs_("", "audio-fs", "the sample rate of audio", false, 16000, "int32_t");
TCLAP::ValueArg<int> record_(
"", "record",
"record is 1 means use record", false, 0,
@@ -511,6 +518,7 @@
cmd.add(server_ip_);
cmd.add(port_);
cmd.add(wav_path_);
+ cmd.add(audio_fs_);
cmd.add(asr_mode_);
cmd.add(record_);
cmd.add(chunk_size_);
@@ -558,6 +566,7 @@
funasr::ExtractHws(hotword_path, hws_map);
}
+ int audio_fs = audio_fs_.getValue();
if(is_record == 1){
std::vector<string> tmp_wav_list;
std::vector<string> tmp_wav_ids;
@@ -567,11 +576,11 @@
c.m_client.set_tls_init_handler(bind(&OnTlsInit, ::_1));
- c.run(uri, tmp_wav_list, tmp_wav_ids, asr_mode, chunk_size, hws_map, true, use_itn);
+ c.run(uri, tmp_wav_list, tmp_wav_ids, audio_fs, asr_mode, chunk_size, hws_map, true, use_itn);
} else {
WebsocketClient<websocketpp::config::asio_client> c(is_ssl);
- c.run(uri, tmp_wav_list, tmp_wav_ids, asr_mode, chunk_size, hws_map, true, use_itn);
+ c.run(uri, tmp_wav_list, tmp_wav_ids, audio_fs, asr_mode, chunk_size, hws_map, true, use_itn);
}
}else{
@@ -612,17 +621,17 @@
tmp_wav_ids.emplace_back(wav_ids[wav_i + i]);
client_threads.emplace_back(
- [uri, tmp_wav_list, tmp_wav_ids, asr_mode, chunk_size, is_ssl, hws_map, use_itn]() {
+ [uri, tmp_wav_list, tmp_wav_ids, audio_fs, asr_mode, chunk_size, is_ssl, hws_map, use_itn]() {
if (is_ssl == 1) {
WebsocketClient<websocketpp::config::asio_tls_client> c(is_ssl);
c.m_client.set_tls_init_handler(bind(&OnTlsInit, ::_1));
- c.run(uri, tmp_wav_list, tmp_wav_ids, asr_mode, chunk_size, hws_map, false, use_itn);
+ c.run(uri, tmp_wav_list, tmp_wav_ids, audio_fs, asr_mode, chunk_size, hws_map, false, use_itn);
} else {
WebsocketClient<websocketpp::config::asio_client> c(is_ssl);
- c.run(uri, tmp_wav_list, tmp_wav_ids, asr_mode, chunk_size, hws_map, false, use_itn);
+ c.run(uri, tmp_wav_list, tmp_wav_ids, audio_fs, asr_mode, chunk_size, hws_map, false, use_itn);
}
});
}
diff --git a/runtime/websocket/bin/funasr-wss-client.cpp b/runtime/websocket/bin/funasr-wss-client.cpp
index ad68c63..1dc9e3e 100644
--- a/runtime/websocket/bin/funasr-wss-client.cpp
+++ b/runtime/websocket/bin/funasr-wss-client.cpp
@@ -98,11 +98,12 @@
switch (msg->get_opcode()) {
case websocketpp::frame::opcode::text:
total_recv=total_recv+1;
- LOG(INFO)<< "Thread: " << this_thread::get_id() <<", on_message = " << payload;
- LOG(INFO)<< "Thread: " << this_thread::get_id() << ", total_recv=" << total_recv << " total_send=" <<total_send;
- if(total_recv==total_send)
+ LOG(INFO)<< "Thread: " << this_thread::get_id() << ", total_recv=" << total_recv <<", on_message = " << payload;
+ std::unique_lock<std::mutex> lock(msg_lock);
+ cv.notify_one();
+ if(close_client)
{
- LOG(INFO)<< "Thread: " << this_thread::get_id() << ", close client";
+ LOG(INFO)<< "Thread: " << this_thread::get_id() << ", close client thread";
websocketpp::lib::error_code ec;
m_client.close(m_hdl, websocketpp::close::status::going_away, "", ec);
if (ec){
@@ -114,7 +115,7 @@
// This method will block until the connection is complete
void run(const std::string& uri, const std::vector<string>& wav_list, const std::vector<string>& wav_ids,
- const std::unordered_map<std::string, int>& hws_map, int use_itn=1) {
+ int audio_fs, const std::unordered_map<std::string, int>& hws_map, int use_itn=1) {
// Create a new connection to the given URI
websocketpp::lib::error_code ec;
typename websocketpp::client<T>::connection_ptr con =
@@ -141,14 +142,17 @@
if (i >= wav_list.size()) {
break;
}
+ if (total_send !=0){
+ std::unique_lock<std::mutex> lock(msg_lock);
+ cv.wait(lock);
+ }
total_send += 1;
- send_wav_data(wav_list[i], wav_ids[i], hws_map, send_hotword, use_itn);
+ send_wav_data(wav_list[i], wav_ids[i], audio_fs, hws_map, send_hotword, use_itn);
if(send_hotword){
send_hotword = false;
}
}
- WaitABit();
-
+ close_client = true;
asio_thread.join();
}
@@ -180,21 +184,20 @@
m_done = true;
}
// send wav to server
- void send_wav_data(string wav_path, string wav_id,
+ void send_wav_data(string wav_path, string wav_id, int audio_fs,
const std::unordered_map<std::string, int>& hws_map,
bool send_hotword, bool use_itn) {
uint64_t count = 0;
std::stringstream val;
funasr::Audio audio(1);
- int32_t sampling_rate = 16000;
+ int32_t sampling_rate = audio_fs;
std::string wav_format = "pcm";
- if(funasr::IsTargetFile(wav_path.c_str(), "wav")){
- int32_t sampling_rate = -1;
- if(!audio.LoadWav(wav_path.c_str(), &sampling_rate))
- return ;
- }else if(funasr::IsTargetFile(wav_path.c_str(), "pcm")){
- if (!audio.LoadPcmwav(wav_path.c_str(), &sampling_rate))
+ if (funasr::IsTargetFile(wav_path.c_str(), "wav")) {
+ if (!audio.LoadWav(wav_path.c_str(), &sampling_rate, false))
+ return;
+ } else if(funasr::IsTargetFile(wav_path.c_str(), "pcm")){
+ if (!audio.LoadPcmwav(wav_path.c_str(), &sampling_rate, false))
return ;
}else{
wav_format = "others";
@@ -237,6 +240,7 @@
jsonbegin["chunk_interval"] = 10;
jsonbegin["wav_name"] = wav_id;
jsonbegin["wav_format"] = wav_format;
+ jsonbegin["audio_fs"] = sampling_rate;
jsonbegin["itn"] = true;
if(use_itn == 0){
jsonbegin["itn"] = false;
@@ -334,14 +338,20 @@
private:
websocketpp::connection_hdl m_hdl;
websocketpp::lib::mutex m_lock;
+ websocketpp::lib::mutex msg_lock;
+ websocketpp::lib::condition_variable cv;
bool m_open;
bool m_done;
+ bool close_client=false;
int total_send=0;
int total_recv=0;
};
int main(int argc, char* argv[]) {
-
+#ifdef _WIN32
+ #include <windows.h>
+ SetConsoleOutputCP(65001);
+#endif
google::InitGoogleLogging(argv[0]);
FLAGS_logtostderr = true;
@@ -352,6 +362,7 @@
TCLAP::ValueArg<std::string> wav_path_("", "wav-path",
"the input could be: wav_path, e.g.: asr_example.wav; pcm_path, e.g.: asr_example.pcm; wav.scp, kaldi style wav list (wav_id \t wav_path)",
true, "", "string");
+ TCLAP::ValueArg<std::int32_t> audio_fs_("", "audio-fs", "the sample rate of audio", false, 16000, "int32_t");
TCLAP::ValueArg<int> thread_num_("", "thread-num", "thread-num",
false, 1, "int");
TCLAP::ValueArg<int> is_ssl_(
@@ -366,6 +377,7 @@
cmd.add(server_ip_);
cmd.add(port_);
cmd.add(wav_path_);
+ cmd.add(audio_fs_);
cmd.add(thread_num_);
cmd.add(is_ssl_);
cmd.add(use_itn_);
@@ -420,18 +432,19 @@
wav_ids.emplace_back(default_id);
}
+ int audio_fs = audio_fs_.getValue();
for (size_t i = 0; i < threads_num; i++) {
- client_threads.emplace_back([uri, wav_list, wav_ids, is_ssl, hws_map, use_itn]() {
+ client_threads.emplace_back([uri, wav_list, wav_ids, audio_fs, is_ssl, hws_map, use_itn]() {
if (is_ssl == 1) {
WebsocketClient<websocketpp::config::asio_tls_client> c(is_ssl);
c.m_client.set_tls_init_handler(bind(&OnTlsInit, ::_1));
- c.run(uri, wav_list, wav_ids, hws_map, use_itn);
+ c.run(uri, wav_list, wav_ids, audio_fs, hws_map, use_itn);
} else {
WebsocketClient<websocketpp::config::asio_client> c(is_ssl);
- c.run(uri, wav_list, wav_ids, hws_map, use_itn);
+ c.run(uri, wav_list, wav_ids, audio_fs, hws_map, use_itn);
}
});
}
diff --git a/runtime/websocket/bin/funasr-wss-server-2pass.cpp b/runtime/websocket/bin/funasr-wss-server-2pass.cpp
index 40f8f07..ef27d5b 100644
--- a/runtime/websocket/bin/funasr-wss-server-2pass.cpp
+++ b/runtime/websocket/bin/funasr-wss-server-2pass.cpp
@@ -16,6 +16,7 @@
// hotwords
std::unordered_map<std::string, int> hws_map_;
int fst_inc_wts_=20;
+float global_beam_, lattice_beam_, am_scale_;
using namespace std;
void GetValue(TCLAP::ValueArg<std::string>& value_arg, string key,
@@ -25,6 +26,10 @@
}
int main(int argc, char* argv[]) {
+#ifdef _WIN32
+ #include <windows.h>
+ SetConsoleOutputCP(65001);
+#endif
try {
google::InitGoogleLogging(argv[0]);
FLAGS_logtostderr = true;
@@ -116,6 +121,14 @@
"connection",
false, "../../../ssl_key/server.key", "string");
+ TCLAP::ValueArg<float> global_beam("", GLOB_BEAM, "the decoding beam for beam searching ", false, 3.0, "float");
+ TCLAP::ValueArg<float> lattice_beam("", LAT_BEAM, "the lattice generation beam for beam searching ", false, 3.0, "float");
+ TCLAP::ValueArg<float> am_scale("", AM_SCALE, "the acoustic scale for beam searching ", false, 10.0, "float");
+
+ TCLAP::ValueArg<std::string> lm_dir("", LM_DIR,
+ "the LM model path, which contains compiled models: TLG.fst, config.yaml ", false, "damo/speech_ngram_lm_zh-cn-ai-wesp-fst", "string");
+ TCLAP::ValueArg<std::string> lm_revision(
+ "", "lm-revision", "LM model revision", false, "v1.0.2", "string");
TCLAP::ValueArg<std::string> hotword("", HOTWORD,
"the hotword file, one hotword perline, Format: Hotword Weight (could be: 闃块噷宸村反 20)",
false, "/workspace/resources/hotwords.txt", "string");
@@ -124,6 +137,10 @@
// add file
cmd.add(hotword);
+ cmd.add(fst_inc_wts);
+ cmd.add(global_beam);
+ cmd.add(lattice_beam);
+ cmd.add(am_scale);
cmd.add(certfile);
cmd.add(keyfile);
@@ -142,6 +159,8 @@
cmd.add(punc_quant);
cmd.add(itn_dir);
cmd.add(itn_revision);
+ cmd.add(lm_dir);
+ cmd.add(lm_revision);
cmd.add(listen_ip);
cmd.add(port);
@@ -159,6 +178,7 @@
GetValue(punc_dir, PUNC_DIR, model_path);
GetValue(punc_quant, PUNC_QUANT, model_path);
GetValue(itn_dir, ITN_DIR, model_path);
+ GetValue(lm_dir, LM_DIR, model_path);
GetValue(hotword, HOTWORD, model_path);
GetValue(offline_model_revision, "offline-model-revision", model_path);
@@ -166,6 +186,11 @@
GetValue(vad_revision, "vad-revision", model_path);
GetValue(punc_revision, "punc-revision", model_path);
GetValue(itn_revision, "itn-revision", model_path);
+ GetValue(lm_revision, "lm-revision", model_path);
+
+ global_beam_ = global_beam.getValue();
+ lattice_beam_ = lattice_beam.getValue();
+ am_scale_ = am_scale.getValue();
// Download model form Modelscope
try {
@@ -179,6 +204,7 @@
std::string s_punc_path = model_path[PUNC_DIR];
std::string s_punc_quant = model_path[PUNC_QUANT];
std::string s_itn_path = model_path[ITN_DIR];
+ std::string s_lm_path = model_path[LM_DIR];
std::string python_cmd =
"python -m funasr.utils.runtime_sdk_download_tool --type onnx --quantize True ";
@@ -237,11 +263,18 @@
size_t found = s_offline_asr_path.find("speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404");
if (found != std::string::npos) {
model_path["offline-model-revision"]="v1.2.4";
- } else{
- found = s_offline_asr_path.find("speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404");
- if (found != std::string::npos) {
- model_path["offline-model-revision"]="v1.0.5";
- }
+ }
+
+ found = s_offline_asr_path.find("speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404");
+ if (found != std::string::npos) {
+ model_path["offline-model-revision"]="v1.0.5";
+ }
+
+ found = s_offline_asr_path.find("speech_paraformer-large_asr_nat-en-16k-common-vocab10020");
+ if (found != std::string::npos) {
+ model_path["model-revision"]="v1.0.0";
+ s_itn_path="";
+ s_lm_path="";
}
if (access(s_offline_asr_path.c_str(), F_OK) == 0) {
@@ -328,6 +361,49 @@
LOG(INFO) << "ASR online model is not set, use default.";
}
+ if (!s_lm_path.empty() && s_lm_path != "NONE" && s_lm_path != "none") {
+ std::string python_cmd_lm;
+ std::string down_lm_path;
+ std::string down_lm_model;
+
+ if (access(s_lm_path.c_str(), F_OK) == 0) {
+ // local
+ python_cmd_lm = python_cmd + " --model-name " + s_lm_path +
+ " --export-dir ./ " + " --model_revision " +
+ model_path["lm-revision"] + " --export False ";
+ down_lm_path = s_lm_path;
+ } else {
+ // modelscope
+ LOG(INFO) << "Download model: " << s_lm_path
+ << " from modelscope : ";
+ python_cmd_lm = python_cmd + " --model-name " +
+ s_lm_path +
+ " --export-dir " + s_download_model_dir +
+ " --model_revision " + model_path["lm-revision"]
+ + " --export False ";
+ down_lm_path =
+ s_download_model_dir +
+ "/" + s_lm_path;
+ }
+
+ int ret = system(python_cmd_lm.c_str());
+ if (ret != 0) {
+ LOG(INFO) << "Failed to download model from modelscope. If you set local lm model path, you can ignore the errors.";
+ }
+ down_lm_model = down_lm_path + "/TLG.fst";
+
+ if (access(down_lm_model.c_str(), F_OK) != 0) {
+ LOG(ERROR) << down_lm_model << " do not exists.";
+ exit(-1);
+ } else {
+ model_path[LM_DIR] = down_lm_path;
+ LOG(INFO) << "Set " << LM_DIR << " : " << model_path[LM_DIR];
+ }
+ } else {
+ LOG(INFO) << "LM model is not set, not executed.";
+ model_path[LM_DIR] = "";
+ }
+
if (!s_punc_path.empty()) {
std::string python_cmd_punc;
std::string down_punc_path;
diff --git a/runtime/websocket/bin/funasr-wss-server.cpp b/runtime/websocket/bin/funasr-wss-server.cpp
index f54bc5b..8f2a7ab 100644
--- a/runtime/websocket/bin/funasr-wss-server.cpp
+++ b/runtime/websocket/bin/funasr-wss-server.cpp
@@ -26,6 +26,10 @@
}
int main(int argc, char* argv[]) {
+#ifdef _WIN32
+ #include <windows.h>
+ SetConsoleOutputCP(65001);
+#endif
try {
google::InitGoogleLogging(argv[0]);
@@ -111,7 +115,7 @@
TCLAP::ValueArg<std::string> lm_dir("", LM_DIR,
"the LM model path, which contains compiled models: TLG.fst, config.yaml ", false, "damo/speech_ngram_lm_zh-cn-ai-wesp-fst", "string");
TCLAP::ValueArg<std::string> lm_revision(
- "", "lm-revision", "LM model revision", false, "v1.0.1", "string");
+ "", "lm-revision", "LM model revision", false, "v1.0.2", "string");
TCLAP::ValueArg<std::string> hotword("", HOTWORD,
"the hotword file, one hotword perline, Format: Hotword Weight (could be: 闃块噷宸村反 20)",
false, "/workspace/resources/hotwords.txt", "string");
diff --git a/runtime/websocket/bin/websocket-server-2pass.cpp b/runtime/websocket/bin/websocket-server-2pass.cpp
index 5cfd461..c251e1d 100644
--- a/runtime/websocket/bin/websocket-server-2pass.cpp
+++ b/runtime/websocket/bin/websocket-server-2pass.cpp
@@ -18,6 +18,7 @@
extern std::unordered_map<std::string, int> hws_map_;
extern int fst_inc_wts_;
+extern float global_beam_, lattice_beam_, am_scale_;
context_ptr WebSocketServer::on_tls_init(tls_mode mode,
websocketpp::connection_hdl hdl,
@@ -80,6 +81,19 @@
jsonresult["timestamp"] = tmp_stamp_msg;
}
+ std::string tmp_stamp_sents = FunASRGetStampSents(result);
+ if (tmp_stamp_sents != "") {
+ try{
+ nlohmann::json json_stamp = nlohmann::json::parse(tmp_stamp_sents);
+ LOG(INFO) << "offline stamp_sents : " << json_stamp;
+ jsonresult["stamp_sents"] = json_stamp;
+ }catch (std::exception const &e)
+ {
+ LOG(ERROR)<< tmp_stamp_sents << e.what();
+ jsonresult["stamp_sents"] = "";
+ }
+ }
+
return jsonresult;
}
// feed buffer to asr engine for decoder
@@ -96,7 +110,8 @@
bool itn,
int audio_fs,
std::string wav_format,
- FUNASR_HANDLE& tpass_online_handle) {
+ FUNASR_HANDLE& tpass_online_handle,
+ FUNASR_DEC_HANDLE& decoder_handle) {
// lock for each connection
if(!tpass_online_handle){
scoped_lock guard(thread_lock);
@@ -125,7 +140,7 @@
subvector.data(), subvector.size(),
punc_cache, false, audio_fs,
wav_format, (ASR_TYPE)asr_mode_,
- hotwords_embedding, itn);
+ hotwords_embedding, itn, decoder_handle);
} else {
scoped_lock guard(thread_lock);
@@ -162,7 +177,7 @@
buffer.data(), buffer.size(), punc_cache,
is_final, audio_fs,
wav_format, (ASR_TYPE)asr_mode_,
- hotwords_embedding, itn);
+ hotwords_embedding, itn, decoder_handle);
} else {
scoped_lock guard(thread_lock);
msg["access_num"]=(int)msg["access_num"]-1;
@@ -232,9 +247,12 @@
data_msg->msg["wav_name"] = "wav-default-id";
data_msg->msg["mode"] = "2pass";
data_msg->msg["itn"] = true;
- data_msg->msg["audio_fs"] = 16000;
+ data_msg->msg["audio_fs"] = 16000; // default is 16k
data_msg->msg["access_num"] = 0; // the number of access for this object, when it is 0, we can free it saftly
data_msg->msg["is_eof"]=false; // if this connection is closed
+ FUNASR_DEC_HANDLE decoder_handle =
+ FunASRWfstDecoderInit(tpass_handle, ASR_TWO_PASS, global_beam_, lattice_beam_, am_scale_);
+ data_msg->decoder_handle = decoder_handle;
data_msg->punc_cache =
std::make_shared<std::vector<std::vector<std::string>>>(2);
data_msg->strand_ = std::make_shared<asio::io_context::strand>(io_decoder_);
@@ -261,6 +279,9 @@
// finished and avoid access freed tpass_online_handle
unique_lock guard_decoder(*(data_msg->thread_lock));
if (data_msg->msg["access_num"]==0 && data_msg->msg["is_eof"]==true) {
+ FunWfstDecoderUnloadHwsRes(data_msg->decoder_handle);
+ FunASRWfstDecoderUninit(data_msg->decoder_handle);
+ data_msg->decoder_handle = nullptr;
FunTpassOnlineUninit(data_msg->tpass_online_handle);
data_msg->tpass_online_handle = nullptr;
data_map.erase(hdl);
@@ -318,7 +339,7 @@
data_msg->msg["is_eof"]=true;
guard_decoder.unlock();
to_remove.push_back(hdl);
- LOG(INFO)<<"connection is closed: "<<e.what();
+ LOG(INFO)<<"connection is closed.";
}
iter++;
@@ -425,7 +446,7 @@
nn_hotwords += " " + pair.first;
LOG(INFO) << pair.first << " : " << pair.second;
}
- // FunWfstDecoderLoadHwsRes(msg_data->decoder_handle, fst_inc_wts_, merged_hws_map);
+ FunWfstDecoderLoadHwsRes(msg_data->decoder_handle, fst_inc_wts_, merged_hws_map);
// nn
std::vector<std::vector<float>> new_hotwords_embedding = CompileHotwordEmbedding(tpass_handle, nn_hotwords, ASR_TWO_PASS);
@@ -477,7 +498,8 @@
msg_data->msg["itn"],
msg_data->msg["audio_fs"],
msg_data->msg["wav_format"],
- std::ref(msg_data->tpass_online_handle)));
+ std::ref(msg_data->tpass_online_handle),
+ std::ref(msg_data->decoder_handle)));
msg_data->msg["access_num"]=(int)(msg_data->msg["access_num"])+1;
}
catch (std::exception const &e)
@@ -524,7 +546,8 @@
msg_data->msg["itn"],
msg_data->msg["audio_fs"],
msg_data->msg["wav_format"],
- std::ref(msg_data->tpass_online_handle)));
+ std::ref(msg_data->tpass_online_handle),
+ std::ref(msg_data->decoder_handle)));
msg_data->msg["access_num"]=(int)(msg_data->msg["access_num"])+1;
}
}
diff --git a/runtime/websocket/bin/websocket-server-2pass.h b/runtime/websocket/bin/websocket-server-2pass.h
index 3e78a34..6b2ba32 100644
--- a/runtime/websocket/bin/websocket-server-2pass.h
+++ b/runtime/websocket/bin/websocket-server-2pass.h
@@ -60,7 +60,8 @@
FUNASR_HANDLE tpass_online_handle=NULL;
std::string online_res = "";
std::string tpass_res = "";
- std::shared_ptr<asio::io_context::strand> strand_; // for data execute in order
+ std::shared_ptr<asio::io_context::strand> strand_; // for data execute in order
+ FUNASR_DEC_HANDLE decoder_handle=NULL;
} FUNASR_MESSAGE;
// See https://wiki.mozilla.org/Security/Server_Side_TLS for more details about
@@ -123,7 +124,8 @@
bool itn,
int audio_fs,
std::string wav_format,
- FUNASR_HANDLE& tpass_online_handle);
+ FUNASR_HANDLE& tpass_online_handle,
+ FUNASR_DEC_HANDLE& decoder_handle);
void initAsr(std::map<std::string, std::string>& model_path, int thread_num);
void on_message(websocketpp::connection_hdl hdl, message_ptr msg);
diff --git a/runtime/websocket/bin/websocket-server.cpp b/runtime/websocket/bin/websocket-server.cpp
index 99b150f..026954f 100644
--- a/runtime/websocket/bin/websocket-server.cpp
+++ b/runtime/websocket/bin/websocket-server.cpp
@@ -72,19 +72,23 @@
int num_samples = buffer.size(); // the size of the buf
if (!buffer.empty() && hotwords_embedding.size() > 0) {
- std::string asr_result;
- std::string stamp_res;
+ std::string asr_result="";
+ std::string stamp_res="";
+ std::string stamp_sents="";
try{
FUNASR_RESULT Result = FunOfflineInferBuffer(
asr_handle, buffer.data(), buffer.size(), RASR_NONE, NULL,
hotwords_embedding, audio_fs, wav_format, itn, decoder_handle);
-
- asr_result = ((FUNASR_RECOG_RESULT*)Result)->msg; // get decode result
- stamp_res = ((FUNASR_RECOG_RESULT*)Result)->stamp;
- FunASRFreeResult(Result);
+ if (Result != NULL){
+ asr_result = FunASRGetResult(Result, 0); // get decode result
+ stamp_res = FunASRGetStamp(Result);
+ stamp_sents = FunASRGetStampSents(Result);
+ FunASRFreeResult(Result);
+ } else{
+ LOG(ERROR) << "FUNASR_RESULT is NULL.";
+ }
}catch (std::exception const& e) {
LOG(ERROR) << e.what();
- return;
}
websocketpp::lib::error_code ec;
@@ -94,6 +98,16 @@
jsonresult["is_final"] = false;
if(stamp_res != ""){
jsonresult["timestamp"] = stamp_res;
+ }
+ if(stamp_sents != ""){
+ try{
+ nlohmann::json json_stamp = nlohmann::json::parse(stamp_sents);
+ jsonresult["stamp_sents"] = json_stamp;
+ }catch (std::exception const &e)
+ {
+ LOG(ERROR)<<e.what();
+ jsonresult["stamp_sents"] = "";
+ }
}
jsonresult["wav_name"] = wav_name;
@@ -144,7 +158,7 @@
data_msg->msg["wav_format"] = "pcm";
data_msg->msg["wav_name"] = "wav-default-id";
data_msg->msg["itn"] = true;
- data_msg->msg["audio_fs"] = 16000;
+ data_msg->msg["audio_fs"] = 16000; // default is 16k
data_msg->msg["access_num"] = 0; // the number of access for this object, when it is 0, we can free it saftly
data_msg->msg["is_eof"]=false;
FUNASR_DEC_HANDLE decoder_handle =
@@ -227,7 +241,7 @@
data_msg->msg["is_eof"]=true;
guard_decoder.unlock();
to_remove.push_back(hdl);
- LOG(INFO)<<"connection is closed: "<<e.what();
+ LOG(INFO)<<"connection is closed.";
}
iter++;
diff --git a/runtime/websocket/bin/websocket-server.h b/runtime/websocket/bin/websocket-server.h
index d511071..cacf12d 100644
--- a/runtime/websocket/bin/websocket-server.h
+++ b/runtime/websocket/bin/websocket-server.h
@@ -50,6 +50,7 @@
typedef struct {
std::string msg="";
std::string stamp="";
+ std::string stamp_sents;
std::string tpass_msg="";
float snippet_time=0;
} FUNASR_RECOG_RESULT;
diff --git a/runtime/websocket/readme.md b/runtime/websocket/readme.md
index 610468b..8d4e41f 100644
--- a/runtime/websocket/readme.md
+++ b/runtime/websocket/readme.md
@@ -40,12 +40,12 @@
### Download onnxruntime
https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/dep_libs/onnxruntime-win-x64-1.16.1.zip
-Download to d:\ffmpeg-master-latest-win64-gpl-shared
+Download to d:\onnxruntime-win-x64-1.16.1
### Download ffmpeg
https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/dep_libs/ffmpeg-master-latest-win64-gpl-shared.zip
-Download to d:\onnxruntime-win-x64-1.16.1
+Download to d:\ffmpeg-master-latest-win64-gpl-shared
### Download openssl
https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/dep_libs/openssl-1.1.1w.zip
diff --git a/runtime/websocket/readme_zh.md b/runtime/websocket/readme_zh.md
index 31c457b..d185425 100644
--- a/runtime/websocket/readme_zh.md
+++ b/runtime/websocket/readme_zh.md
@@ -41,12 +41,12 @@
### 涓嬭浇 onnxruntime
https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/dep_libs/onnxruntime-win-x64-1.16.1.zip
-涓嬭浇骞惰В鍘嬪埌 d:/ffmpeg-master-latest-win64-gpl-shared
+涓嬭浇骞惰В鍘嬪埌 d:/onnxruntime-win-x64-1.16.1
### 涓嬭浇 ffmpeg
https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/dep_libs/ffmpeg-master-latest-win64-gpl-shared.zip
-涓嬭浇骞惰В鍘嬪埌 d:/onnxruntime-win-x64-1.16.1
+涓嬭浇骞惰В鍘嬪埌 d:/ffmpeg-master-latest-win64-gpl-shared
### 缂栬瘧 openssl
https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/dep_libs/openssl-1.1.1w.zip
diff --git a/web-pages/readme.md b/web-pages/readme.md
index c8eba48..d202286 100644
--- a/web-pages/readme.md
+++ b/web-pages/readme.md
@@ -39,7 +39,7 @@
FunASR绂荤嚎鏂囦欢杞啓杞欢鍖咃紝鎻愪緵浜嗕竴娆惧姛鑳藉己澶х殑璇煶绂荤嚎鏂囦欢杞啓鏈嶅姟銆傛嫢鏈夊畬鏁寸殑璇煶璇嗗埆閾捐矾锛岀粨鍚堜簡璇煶绔偣妫�娴嬨�佽闊宠瘑鍒�佹爣鐐圭瓑妯″瀷锛屽彲浠ュ皢鍑犲崄涓皬鏃剁殑闀块煶棰戜笌瑙嗛璇嗗埆鎴愬甫鏍囩偣鐨勬枃瀛楋紝鑰屼笖鏀寔涓婄櫨璺姹傚悓鏃惰繘琛岃浆鍐欍�傝緭鍑轰负甯︽爣鐐圭殑鏂囧瓧锛屽惈鏈夊瓧绾у埆鏃堕棿鎴筹紝鏀寔ITN涓庣敤鎴疯嚜瀹氫箟鐑瘝绛夈�傛湇鍔$闆嗘垚鏈塮fmpeg锛屾敮鎸佸悇绉嶉煶瑙嗛鏍煎紡杈撳叆銆傝蒋浠跺寘鎻愪緵鏈塰tml銆乸ython銆乧++銆乯ava涓巆#绛夊绉嶇紪绋嬭瑷�瀹㈡埛绔紝鐢ㄦ埛鍙互鐩存帴浣跨敤涓庤繘涓�姝ュ紑鍙戙��
鍦ㄧ嚎浣撻獙锛�
- https://101.37.77.25:1335/static/index.html
+ https://121.43.113.106:1335/static/index.html
瀹夎锛�
@@ -63,7 +63,7 @@
FunASR瀹炴椂璇煶鍚啓杞欢鍖咃紝闆嗘垚浜嗗疄鏃剁増鏈殑璇煶绔偣妫�娴嬫ā鍨嬨�佽闊宠瘑鍒�佽闊宠瘑鍒�佹爣鐐归娴嬫ā鍨嬬瓑銆傞噰鐢ㄥ妯″瀷鍗忓悓锛屾棦鍙互瀹炴椂鐨勮繘琛岃闊宠浆鏂囧瓧锛屼篃鍙互鍦ㄨ璇濆彞灏剧敤楂樼簿搴﹁浆鍐欐枃瀛椾慨姝h緭鍑猴紝杈撳嚭鏂囧瓧甯︽湁鏍囩偣锛屾敮鎸佸璺姹傘�備緷鎹娇鐢ㄨ�呭満鏅笉鍚岋紝鏀寔瀹炴椂璇煶鍚啓鏈嶅姟锛坥nline锛夈�侀潪瀹炴椂涓�鍙ヨ瘽杞啓锛坥ffline锛変笌瀹炴椂涓庨潪瀹炴椂涓�浣撳寲鍗忓悓锛�2pass锛�3绉嶆湇鍔℃ā寮忋�傝蒋浠跺寘鎻愪緵鏈塰tml銆乸ython銆乧++銆乯ava涓巆#绛夊绉嶇紪绋嬭瑷�瀹㈡埛绔紝鐢ㄦ埛鍙互鐩存帴浣跨敤涓庤繘涓�姝ュ紑鍙戙��
鍦ㄧ嚎浣撻獙锛�
- https://101.37.77.25:1336/static/index.html
+ https://121.43.113.106:1336/static/index.html
瀹夎锛�
@@ -94,4 +94,4 @@
# 寮�鍙戞ā寮�
npm run dev
# 浜у搧妯″紡
-npm run example
\ No newline at end of file
+npm run example
diff --git a/web-pages/src/views/home/index.vue b/web-pages/src/views/home/index.vue
index 34e8483..ba105fb 100644
--- a/web-pages/src/views/home/index.vue
+++ b/web-pages/src/views/home/index.vue
@@ -43,24 +43,6 @@
</li>
</ul>
</div>
-
- <!-- <div class="line-box">
- <div></div>
- </div>
-
- <div class="ba-box">
- <ul>
- <li>
- Copyright @ 1998 - 2023 Tancant. All Rights Reserved.鍖楃杞欢鍏徃 鐗堟潈鎵�鏈�
- </li>
- <li>
- 鍏徃鍦板潃:姝︽眽甯傛椽灞卞尯閲庤姺婀栬タ璺�16鍙峰垱鎰忓ぉ鍦�10鍙烽珮灞�13妤�
- </li>
- <li>
- 鑱旂郴鐢佃瘽: 400 862 6126
- </li>
- </ul>
- </div> -->
</footer>
</div>
diff --git a/web-pages/src/views/home/lxwjzxfw.vue b/web-pages/src/views/home/lxwjzxfw.vue
index 4300e98..22567cc 100644
--- a/web-pages/src/views/home/lxwjzxfw.vue
+++ b/web-pages/src/views/home/lxwjzxfw.vue
@@ -130,7 +130,7 @@
{
icon: require('./assets/images/lxwj-zxty.png'),
title: '鍦ㄧ嚎浣撻獙',
- link: 'https://101.37.77.25:1335/static/index.html'
+ link: 'https://www.funasr.com:1335/static/index.html'
},
{
icon: require('./assets/images/lxwj-az.png'),
diff --git a/web-pages/src/views/home/sstx.vue b/web-pages/src/views/home/sstx.vue
index 01e4f53..3806032 100644
--- a/web-pages/src/views/home/sstx.vue
+++ b/web-pages/src/views/home/sstx.vue
@@ -148,7 +148,7 @@
{
icon: require('./assets/images/lxwj-zxty.png'),
title: '鍦ㄧ嚎浣撻獙',
- link: 'https://101.37.77.25:1336/static/index.html'
+ link: 'https://www.funasr.com:1336/static/index.html'
},
{
icon: require('./assets/images/lxwj-az.png'),
--
Gitblit v1.9.1