From 0cf7339171ee136721059c3c185b5d92d160b17a Mon Sep 17 00:00:00 2001
From: zhaomingwork <61895407+zhaomingwork@users.noreply.github.com>
Date: 星期一, 21 八月 2023 14:43:07 +0800
Subject: [PATCH] add hotwords for h5 and java (#876)
---
funasr/runtime/html5/static/wsconnecter.js | 5 ++
funasr/runtime/html5/static/index.html | 4 ++
funasr/runtime/java/FunasrWsClient.java | 19 +++++++++
funasr/runtime/html5/static/main.js | 44 +++++++++++++++++++++-
4 files changed, 69 insertions(+), 3 deletions(-)
diff --git a/funasr/runtime/html5/static/index.html b/funasr/runtime/html5/static/index.html
index 2c76d82..27be908 100644
--- a/funasr/runtime/html5/static/index.html
+++ b/funasr/runtime/html5/static/index.html
@@ -52,7 +52,11 @@
</div>
<br>
+ 鐑瘝璁剧疆(绌烘牸闅斿紑,濡�"闃块噷宸村反 杈炬懇闄� 闃块噷浜�")锛�
+ <br>
+ <input type="text" id="varHot" style=" width: 100%;height:100%" />
+ <br>
璇煶璇嗗埆缁撴灉鏄剧ず锛�
<br>
diff --git a/funasr/runtime/html5/static/main.js b/funasr/runtime/html5/static/main.js
index 7e3573d..7e5a49a 100644
--- a/funasr/runtime/html5/static/main.js
+++ b/funasr/runtime/html5/static/main.js
@@ -172,6 +172,17 @@
}
}
+function getHotwords(){
+ var obj = document.getElementById("varHot");
+
+ if(typeof(obj) == 'undefined' || obj==null || obj.value.length<=0){
+ return "";
+ }
+ let val = obj.value.toString();
+ console.log("hotwords="+val);
+ return val;
+
+}
function getAsrMode(){
var item = null;
@@ -193,7 +204,34 @@
return item;
}
+function handleWithTimestamp(tmptext,tmptime)
+{
+ console.log( "tmptext: " + tmptext);
+ console.log( "tmptime: " + tmptime);
+ if(tmptime==null || tmptime=="undefined" || tmptext.length<=0)
+ {
+ return tmptext;
+ }
+ tmptext=tmptext.replace(/銆�/g, ","); // in case there are a lot of "銆�"
+ var words=tmptext.split(",");
+ var jsontime=JSON.parse(tmptime.replace(/\]\]\[\[/g, "],[")); // in case there are a lot segments by VAD
+ var char_index=0;
+ var text_withtime="";
+ for(var i=0;i<words.length;i++)
+ {
+ if(words[i]=="undefined" || words[i].length<=0)
+ {
+ continue;
+ }
+ console.log("words===",words[i]);
+ console.log( "words: " + words[i]+",time="+jsontime[char_index][0]/1000);
+ text_withtime=text_withtime+jsontime[char_index][0]/1000+":"+words[i]+"\n";
+ char_index=char_index+words[i].length;
+ }
+ return text_withtime;
+
+}
// 璇煶璇嗗埆缁撴灉; 瀵筳sonMsg鏁版嵁瑙f瀽,灏嗚瘑鍒粨鏋滈檮鍔犲埌缂栬緫妗嗕腑
function getJsonMessage( jsonMsg ) {
//console.log(jsonMsg);
@@ -201,9 +239,11 @@
var rectxt=""+JSON.parse(jsonMsg.data)['text'];
var asrmodel=JSON.parse(jsonMsg.data)['mode'];
var is_final=JSON.parse(jsonMsg.data)['is_final'];
- if(asrmodel=="2pass-offline")
+ var timestamp=JSON.parse(jsonMsg.data)['timestamp'];
+ if(asrmodel=="2pass-offline" || asrmodel=="offline")
{
- offline_text=offline_text+rectxt; //.replace(/ +/g,"");
+
+ offline_text=offline_text+handleWithTimestamp(rectxt,timestamp); //rectxt; //.replace(/ +/g,"");
rec_text=offline_text;
}
else
diff --git a/funasr/runtime/html5/static/wsconnecter.js b/funasr/runtime/html5/static/wsconnecter.js
index 4beec83..cc6129f 100644
--- a/funasr/runtime/html5/static/wsconnecter.js
+++ b/funasr/runtime/html5/static/wsconnecter.js
@@ -78,6 +78,11 @@
{
request.wav_format=file_ext;
}
+ var hotwords=getHotwords();
+ if(hotwords.length>0)
+ {
+ request.hotwords=hotwords;
+ }
console.log(request);
speechSokt.send( JSON.stringify(request) );
console.log("杩炴帴鎴愬姛");
diff --git a/funasr/runtime/java/FunasrWsClient.java b/funasr/runtime/java/FunasrWsClient.java
index 43407f3..74a8695 100644
--- a/funasr/runtime/java/FunasrWsClient.java
+++ b/funasr/runtime/java/FunasrWsClient.java
@@ -79,6 +79,10 @@
obj.put("chunk_size", array);
obj.put("chunk_interval", new Integer(chunkInterval));
obj.put("wav_name", wavName);
+ if(FunasrWsClient.hotwords.trim().length()>0)
+ {
+ obj.put("hotwords", FunasrWsClient.hotwords.trim());
+ }
if(suffix.equals("wav")){
suffix="pcm";
}
@@ -185,6 +189,10 @@
try {
jsonObject = (JSONObject) jsonParser.parse(message);
logger.info("text: " + jsonObject.get("text"));
+ if(jsonObject.containsKey("timestamp"))
+ {
+ logger.info("timestamp: " + jsonObject.get("timestamp"));
+ }
} catch (org.json.simple.parser.ParseException e) {
e.printStackTrace();
}
@@ -222,6 +230,7 @@
static String strChunkSize = "5,10,5";
static int chunkInterval = 10;
static int sendChunkSize = 1920;
+ static String hotwords="";
String wavName = "javatest";
@@ -270,6 +279,12 @@
.setDefault("offline")
.type(String.class)
.required(false);
+ parser
+ .addArgument("--hotwords")
+ .help("hotwords, splited by space")
+ .setDefault("")
+ .type(String.class)
+ .required(false);
String srvIp = "";
String srvPort = "";
String wavPath = "";
@@ -277,7 +292,7 @@
String chunk_size = "";
int chunk_interval = 10;
String strmode = "offline";
-
+ String hot="";
try {
Namespace ns = parser.parseArgs(args);
srvIp = ns.get("host");
@@ -287,6 +302,7 @@
chunk_size = ns.get("chunk_size");
chunk_interval = ns.get("chunk_interval");
strmode = ns.get("mode");
+ hot=ns.get("hotwords");
System.out.println(srvPort);
} catch (ArgumentParserException ex) {
@@ -298,6 +314,7 @@
FunasrWsClient.chunkInterval = chunk_interval;
FunasrWsClient.wavPath = wavPath;
FunasrWsClient.mode = strmode;
+ FunasrWsClient.hotwords=hot;
System.out.println(
"serIp="
+ srvIp
--
Gitblit v1.9.1