From e0fa63765bfb4a36bde7047c2a6066ca5a80e90f Mon Sep 17 00:00:00 2001
From: Yabin Li <wucong.lyb@alibaba-inc.com>
Date: 星期一, 21 八月 2023 10:37:42 +0800
Subject: [PATCH] Dev hw (#878)

---
 funasr/runtime/docs/websocket_protocol.md |   10 ++++++----
 1 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/funasr/runtime/docs/websocket_protocol.md b/funasr/runtime/docs/websocket_protocol.md
index 328fd41..5521935 100644
--- a/funasr/runtime/docs/websocket_protocol.md
+++ b/funasr/runtime/docs/websocket_protocol.md
@@ -10,7 +10,7 @@
 #### Initial Communication
 The message (which needs to be serialized in JSON) is:
 ```text
-{"mode": "offline", "wav_name": "wav_name", "is_speaking": True,"wav_format":"pcm"}
+{"mode": "offline", "wav_name": "wav_name","wav_format":"pcm","is_speaking": True,"wav_format":"pcm","hotwords":"闃块噷宸村反 杈炬懇闄� 闃块噷浜�"}
 ```
 Parameter explanation:
 ```text
@@ -19,6 +19,7 @@
 `wav_format`: the audio and video file extension, such as pcm, mp3, mp4, etc.
 `is_speaking`: False indicates the end of a sentence, such as a VAD segmentation point or the end of a WAV file
 `audio_fs`: when the input audio is in PCM format, the audio sampling rate parameter needs to be added
+`hotwords`锛欼f AM is the hotword model, hotword data needs to be sent to the server in string format, with " " used as a separator between hotwords. For example锛�"闃块噷宸村反 杈炬懇闄� 闃块噷浜�"
 ```
 
 #### Sending Audio Data
@@ -34,7 +35,7 @@
 #### Sending Recognition Results
 The message (serialized in JSON) is:
 ```text
-{"mode": "offline", "wav_name": "wav_name", "text": "asr ouputs", "is_final": True}
+{"mode": "offline", "wav_name": "wav_name", "text": "asr ouputs", "is_final": True, "timestamp":"[[100,200], [200,500]]"}
 ```
 Parameter explanation:
 ```text
@@ -42,6 +43,7 @@
 `wav_name`: the name of the audio file to be transcribed
 `text`: the text output of speech recognition
 `is_final`: indicating the end of recognition
+`timestamp`锛欼f AM is a timestamp model, it will return this field, indicating the timestamp, in the format of "[[100,200], [200,500]]"
 ```
 
 ## Real-time Speech Recognition
@@ -56,7 +58,7 @@
 #### Initial Communication
 The message (which needs to be serialized in JSON) is:
 ```text
-{"mode": "2pass", "wav_name": "wav_name", "is_speaking": True, "wav_format":"pcm", "chunk_size":[5,10,5]
+{"mode": "2pass", "wav_name": "wav_name", "is_speaking": True, "wav_format":"pcm", "chunk_size":[5,10,5]}
 ```
 Parameter explanation:
 ```text
@@ -87,4 +89,4 @@
 `wav_name`: the name of the audio file to be transcribed
 `text`: the text output of speech recognition
 `is_final`: indicating the end of recognition
-```
\ No newline at end of file
+```

--
Gitblit v1.9.1