zhaomingwork
2023-08-21 0cf7339171ee136721059c3c185b5d92d160b17a
add hotwords for h5 and java (#876)

* add hotwords for h5 and java

* add timestamp for h5 and java
4个文件已修改
72 ■■■■■ 已修改文件
funasr/runtime/html5/static/index.html 4 ●●●● 补丁 | 查看 | 原始文档 | blame | 历史
funasr/runtime/html5/static/main.js 44 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
funasr/runtime/html5/static/wsconnecter.js 5 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
funasr/runtime/java/FunasrWsClient.java 19 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
funasr/runtime/html5/static/index.html
@@ -52,7 +52,11 @@
                </div>
                <br>
        
                热词设置(空格隔开,如"阿里巴巴 达摩院 阿里云"):
                <br>
                
                <input  type="text"  id="varHot"  style=" width: 100%;height:100%" />
                <br>
             
                语音识别结果显示:
                <br>
funasr/runtime/html5/static/main.js
@@ -172,6 +172,17 @@
     
            }
}
function getHotwords(){
  var obj = document.getElementById("varHot");
  if(typeof(obj) == 'undefined' || obj==null || obj.value.length<=0){
    return "";
  }
  let val = obj.value.toString();
  console.log("hotwords="+val);
  return val;
}
function getAsrMode(){
            var item = null;
@@ -193,7 +204,34 @@
           return item;
}
           
function handleWithTimestamp(tmptext,tmptime)
{
    console.log( "tmptext: " + tmptext);
    console.log( "tmptime: " + tmptime);
    if(tmptime==null || tmptime=="undefined" || tmptext.length<=0)
    {
        return tmptext;
    }
    tmptext=tmptext.replace(/。/g, ","); // in case there are a lot of "。"
    var words=tmptext.split(",");
    var jsontime=JSON.parse(tmptime.replace(/\]\]\[\[/g, "],[")); // in case there are a lot segments by VAD
    var char_index=0;
    var text_withtime="";
    for(var i=0;i<words.length;i++)
    {
    if(words[i]=="undefined"  || words[i].length<=0)
    {
        continue;
    }
        console.log("words===",words[i]);
        console.log( "words: " + words[i]+",time="+jsontime[char_index][0]/1000);
        text_withtime=text_withtime+jsontime[char_index][0]/1000+":"+words[i]+"\n";
        char_index=char_index+words[i].length;
    }
    return text_withtime;
}
// 语音识别结果; 对jsonMsg数据解析,将识别结果附加到编辑框中
function getJsonMessage( jsonMsg ) {
    //console.log(jsonMsg);
@@ -201,9 +239,11 @@
    var rectxt=""+JSON.parse(jsonMsg.data)['text'];
    var asrmodel=JSON.parse(jsonMsg.data)['mode'];
    var is_final=JSON.parse(jsonMsg.data)['is_final'];
    if(asrmodel=="2pass-offline")
    var timestamp=JSON.parse(jsonMsg.data)['timestamp'];
    if(asrmodel=="2pass-offline" || asrmodel=="offline")
    {
        offline_text=offline_text+rectxt; //.replace(/ +/g,"");
        offline_text=offline_text+handleWithTimestamp(rectxt,timestamp); //rectxt; //.replace(/ +/g,"");
        rec_text=offline_text;
    }
    else
funasr/runtime/html5/static/wsconnecter.js
@@ -78,6 +78,11 @@
        {
            request.wav_format=file_ext;
        }
        var hotwords=getHotwords();
        if(hotwords.length>0)
        {
            request.hotwords=hotwords;
        }
        console.log(request);
        speechSokt.send( JSON.stringify(request) );
        console.log("连接成功");
funasr/runtime/java/FunasrWsClient.java
@@ -79,6 +79,10 @@
      obj.put("chunk_size", array);
      obj.put("chunk_interval", new Integer(chunkInterval));
      obj.put("wav_name", wavName);
      if(FunasrWsClient.hotwords.trim().length()>0)
      {
          obj.put("hotwords", FunasrWsClient.hotwords.trim());
      }
      if(suffix.equals("wav")){
          suffix="pcm";
      }
@@ -185,6 +189,10 @@
    try {
      jsonObject = (JSONObject) jsonParser.parse(message);
      logger.info("text: " + jsonObject.get("text"));
      if(jsonObject.containsKey("timestamp"))
      {
          logger.info("timestamp: " + jsonObject.get("timestamp"));
      }
    } catch (org.json.simple.parser.ParseException e) {
      e.printStackTrace();
    }
@@ -222,6 +230,7 @@
  static String strChunkSize = "5,10,5";
  static int chunkInterval = 10;
  static int sendChunkSize = 1920;
  static String hotwords="";
  String wavName = "javatest";
@@ -270,6 +279,12 @@
        .setDefault("offline")
        .type(String.class)
        .required(false);
    parser
        .addArgument("--hotwords")
        .help("hotwords, splited by space")
        .setDefault("")
        .type(String.class)
        .required(false);
    String srvIp = "";
    String srvPort = "";
    String wavPath = "";
@@ -277,7 +292,7 @@
    String chunk_size = "";
    int chunk_interval = 10;
    String strmode = "offline";
    String hot="";
    try {
      Namespace ns = parser.parseArgs(args);
      srvIp = ns.get("host");
@@ -287,6 +302,7 @@
      chunk_size = ns.get("chunk_size");
      chunk_interval = ns.get("chunk_interval");
      strmode = ns.get("mode");
      hot=ns.get("hotwords");
      System.out.println(srvPort);
    } catch (ArgumentParserException ex) {
@@ -298,6 +314,7 @@
    FunasrWsClient.chunkInterval = chunk_interval;
    FunasrWsClient.wavPath = wavPath;
    FunasrWsClient.mode = strmode;
    FunasrWsClient.hotwords=hot;
    System.out.println(
        "serIp="
            + srvIp