zhaomingwork
2024-06-03 99ecaca8695fc482fd351c1619e3f9ebb274af93
Add python funasr api support for websocket srv (#1777)

* add python funasr_api supoort

* change little to README.md

* add core tools stream

* modified a little

* fix bug for timeout

* support for buffer decode

* add ffmpeg decode for buffer
8个文件已添加
624 ■■■■■ 已修改文件
runtime/funasr_api/README.md 72 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
runtime/funasr_api/asr_example.mp3 补丁 | 查看 | 原始文档 | blame | 历史
runtime/funasr_api/asr_example.wav 补丁 | 查看 | 原始文档 | blame | 历史
runtime/funasr_api/example.py 70 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
runtime/funasr_api/funasr_api.py 96 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
runtime/funasr_api/funasr_core.py 230 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
runtime/funasr_api/funasr_stream.py 72 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
runtime/funasr_api/funasr_tools.py 84 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
runtime/funasr_api/README.md
New file
@@ -0,0 +1,72 @@
# python funasr_api
This is the api for python to use funasr engine, only support 2pass server.
## For install
### Install websocket-client and ffmpeg
```shell
pip install websocket-client
apt install ffmpeg -y
```
#### recognizer examples
support many audio type as ffmpeg support, detail see FunASR/runtime/funasr_api/example.py
```shell
    # create an recognizer
    rcg = FunasrApi(
        uri="wss://www.funasr.com:10096/"
    )
    # recognizer by filepath
    text=rcg.rec_file("asr_example.mp3")
    print("recognizer by filepath result=",text)
    # recognizer by buffer
    # rec_buf(audio_buf,ffmpeg_decode=False),set ffmpeg_decode=True if audio is not PCM or WAV type
    with open("asr_example.wav", "rb") as f:
        audio_bytes = f.read()
    text=rcg.rec_buf(audio_bytes)
    print("recognizer by buffer result=",text)
```
#### streaming recognizer examples,use FunasrApi.audio2wav to covert to WAV type if need
```shell
    rcg = FunasrApi(
        uri="wss://www.funasr.com:10096/"
    )
    #define call_back function for msg
    def on_msg(msg):
       print("stream msg=",msg)
    stream=rcg.create_stream(msg_callback=on_msg)
    wav_path = "asr_example.wav"
    with open(wav_path, "rb") as f:
        audio_bytes = f.read()
    # use FunasrApi's audio2wav to covert other audio to PCM if needed
    #import os
    #from funasr_tools import FunasrTools
    #file_ext=os.path.splitext(wav_path)[-1].upper()
    #if not file_ext =="PCM" and not file_ext =="WAV":
    #       audio_bytes=FunasrTools.audio2wav(audio_bytes)
    stride = int(60 * 10 / 10 / 1000 * 16000 * 2)
    chunk_num = (len(audio_bytes) - 1) // stride + 1
    for i in range(chunk_num):
        beg = i * stride
        data = audio_bytes[beg : beg + stride]
        stream.feed_chunk(data)
    final_result=stream.wait_for_end()
    print("asr_example.wav stream_result=",final_result)
```
## Acknowledge
1. This project is maintained by [FunASR community](https://github.com/alibaba-damo-academy/FunASR).
2. We acknowledge [zhaoming](https://github.com/zhaomingwork/FunASR/tree/fix_bug_for_python_websocket) for contributing the websocket service.
3. We acknowledge [cgisky1980](https://github.com/cgisky1980/FunASR) for contributing the websocket service of offline model.
runtime/funasr_api/asr_example.mp3
Binary files differ
runtime/funasr_api/asr_example.wav
Binary files differ
runtime/funasr_api/example.py
New file
@@ -0,0 +1,70 @@
"""
  Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights
  Reserved. MIT License  (https://opensource.org/licenses/MIT)
  2023-2024 by zhaomingwork@qq.com
"""
from funasr_api import FunasrApi
import wave
def recognizer_example():
    # create an recognizer
    rcg = FunasrApi(
        uri="wss://www.funasr.com:10096/"
    )
    # recognizer by filepath
    text=rcg.rec_file("asr_example.mp3")
    print("recognizer by filepath result=",text)
    # recognizer by buffer
    # rec_buf(audio_buf,ffmpeg_decode=False),set ffmpeg_decode=True if audio is not PCM or WAV type
    with open("asr_example.wav", "rb") as f:
        audio_bytes = f.read()
    text=rcg.rec_buf(audio_bytes)
    print("recognizer by buffer result=",text)
def recognizer_stream_example():
    rcg = FunasrApi(
        uri="wss://www.funasr.com:10096/"
    )
    #define call_back function for msg
    def on_msg(msg):
       print("stream msg=",msg)
    stream=rcg.create_stream(msg_callback=on_msg)
    wav_path = "asr_example.wav"
    with open(wav_path, "rb") as f:
        audio_bytes = f.read()
    # use FunasrApi's audio2wav to covert other audio to PCM if needed
    #import os
    #from funasr_tools import FunasrTools
    #file_ext=os.path.splitext(wav_path)[-1].upper()
    #if not file_ext =="PCM" and not file_ext =="WAV":
    #       audio_bytes=FunasrTools.audio2wav(audio_bytes)
    stride = int(60 * 10 / 10 / 1000 * 16000 * 2)
    chunk_num = (len(audio_bytes) - 1) // stride + 1
    for i in range(chunk_num):
        beg = i * stride
        data = audio_bytes[beg : beg + stride]
        stream.feed_chunk(data)
    final_result=stream.wait_for_end()
    print("asr_example.wav stream_result=",final_result)
if __name__ == "__main__":
    print("example for Funasr_websocket_recognizer")
    recognizer_stream_example()
    recognizer_example()
runtime/funasr_api/funasr_api.py
New file
@@ -0,0 +1,96 @@
"""
  Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights
  Reserved. MIT License  (https://opensource.org/licenses/MIT)
  2023-2024 by zhaomingwork@qq.com
"""
# pip install websocket-client
# apt install ffmpeg
import threading
import traceback
import json
import time
import numpy as np
from funasr_stream import FunasrStream
from funasr_tools import FunasrTools
from funasr_core import FunasrCore
# class for recognizer in websocket
class FunasrApi:
    """
    python asr recognizer lib
    """
    def __init__(
        self,
        uri="wss://www.funasr.com:10096/",
        timeout=1000,
        msg_callback=None,
    ):
        """
        uri: ws or wss server uri
        msg_callback: for message received
        timeout: timeout for get result
        """
        try:
            self.uri=uri
            self.timeout=timeout
            self.msg_callback=msg_callback
            self.funasr_core=None
        except Exception as e:
            print("Exception:", e)
            traceback.print_exc()
    def create_stream(self,msg_callback=None):
        if self.funasr_core is not None:
            self.funasr_core.close()
        funasr_core=self.new_core(msg_callback=msg_callback)
        return FunasrStream(funasr_core)
    def new_core(self,msg_callback=None):
     try:
         if self.funasr_core is not None:
            self.funasr_core.close()
         if msg_callback==None:
            msg_callback=self.msg_callback
         funasr_core=FunasrCore(self.uri,msg_callback=msg_callback,timeout=self.timeout)
         funasr_core.new_connection()
         self.funasr_core=funasr_core
         return funasr_core
     except Exception as e:
            print("init_core",e)
            exit(0)
    # rec buffer, set ffmpeg_decode=True if audio is not PCM or WAV type
    def rec_buf(self,audio_buf,ffmpeg_decode=False):
       try:
           funasr_core=self.new_core()
           funasr_core.rec_buf(audio_buf,ffmpeg_decode=ffmpeg_decode)
           return funasr_core.get_result()
       except  Exception  as e:
            print("rec_file",e)
            return
    # rec file
    def rec_file(self,file_path):
       try:
           funasr_core=self.new_core()
           funasr_core.rec_file(file_path)
           return funasr_core.get_result()
       except  Exception  as e:
            print("rec_file",e)
            return
runtime/funasr_api/funasr_core.py
New file
@@ -0,0 +1,230 @@
"""
  Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights
  Reserved. MIT License  (https://opensource.org/licenses/MIT)
  2023-2024 by zhaomingwork@qq.com
"""
# pip install websocket-client
# apt install ffmpeg
import ssl
from websocket import ABNF
from websocket import create_connection
from queue import Queue
import threading
import traceback
import json
import time
import numpy as np
from funasr_tools import FunasrTools
# class for recognizer in websocket
class FunasrCore:
    """
    python asr recognizer lib
    """
    def __init__(
        self,
        uri="wss://www.funasr.com:10096/",
        msg_callback=None,
        timeout=1000,
    ):
        """
        uri: ws or wss server uri
        msg_callback: for message received
        timeout: timeout for get result
        """
        try:
            if uri.find("wss://"):
                       is_ssl=True
            elif uri.find("ws://"):
                 is_ssl=False
            else:
                print("not support uri",uri)
                exit(0)
            if is_ssl == True:
                ssl_context = ssl.SSLContext()
                ssl_context.check_hostname = False
                ssl_context.verify_mode = ssl.CERT_NONE
                uri = uri
                ssl_opt = {"cert_reqs": ssl.CERT_NONE}
            else:
                uri = uri
                ssl_context = None
                ssl_opt = None
            self.ssl_opt=ssl_opt
            self.ssl_context=ssl_context
            self.uri = uri
            print("connect to url", uri)
            self.msg_callback=msg_callback
            self.is_final=False
            self.rec_text=""
            self.timeout=timeout
            self.rec_file_len=0
            self.connect_state=0
        except Exception as e:
            print("Exception:", e)
            traceback.print_exc()
    def new_connection(self):
       try:
         self.websocket = create_connection(self.uri, ssl=self.ssl_context, sslopt=self.ssl_opt)
         self.is_final=False
         self.rec_text=""
         self.rec_file_len=0
         self.connect_state=0
         message = json.dumps(
                {
                    "mode": "2pass",
                    "chunk_size": [int(x) for x in "0,10,5".split(",")],
                    "encoder_chunk_look_back": 4,
                    "decoder_chunk_look_back": 1,
                    "chunk_interval": 10,
                    "wav_name": "funasr_api",
                    "is_speaking": True,
                }
            )
         self.websocket.send(message)
         self.connect_state=1
         # thread for receive message
         self.thread_msg = threading.Thread(
                target=FunasrCore.thread_rec_msg, args=(self,)
            )
         self.thread_msg.start()
         print("new_connection: ",message)
       except Exception as e:
            print("new_connection",e)
    # threads for rev msg
    def thread_rec_msg(self):
        try:
            while True:
                if  self.connect_state==0:
                    time.sleep(0.1)
                    continue
                if self.connect_state==2:
                    break
                msg = self.websocket.recv()
                if msg is None or len(msg) == 0:
                    continue
                msg = json.loads(msg)
                if msg['is_final']==True:
                    self.is_final=True
                if msg['mode']=='2pass-offline':
                   self.rec_text=self.rec_text+msg['text']
                if not self.msg_callback is None:
                   self.msg_callback(msg)
        except Exception as e:
            #print("client closed")
            return
    # feed data to asr engine in stream way
    def feed_chunk(self, chunk):
        try:
            self.websocket.send(chunk, ABNF.OPCODE_BINARY)
            return
        except:
            print("feed chunk error")
            return
    def close(self):
         self.connect_state==2
         self.websocket.close()
    def rec_buf(self,audio_bytes,ffmpeg_decode=False):
       try:
        if ffmpeg_decode:
            audio_bytes=FunasrTools.audio2wav(audio_bytes)
        self.rec_file_len=len(audio_bytes)
        stride = int(60 * 10 / 10 / 1000 * 16000 * 2)
        chunk_num = (len(audio_bytes) - 1) // stride + 1
        for i in range(chunk_num):
            beg = i * stride
            data = audio_bytes[beg : beg + stride]
            self.feed_chunk(data)
        return self.get_result()
       except  Exception  as e:
            print("rec_file",e)
            return
    # rec file
    def rec_file(self,file_path):
       try:
        #self.new_connection()
        import os
        file_ext=os.path.splitext(file_path)[-1].upper()
        with  open(file_path, "rb") as f:
           audio_bytes = f.read()
        if not file_ext =="PCM" and not file_ext =="WAV":
           audio_bytes=FunasrTools.audio2wav(audio_bytes)
        if audio_bytes==None:
           print("error, ffmpeg can not decode such file!")
           exit(0)
        return self.rec_buf(audio_bytes)
       except  Exception  as e:
            print("rec_file",e)
            return
    def wait_for_result(self):
       try:
        timeout=self.timeout
        file_dur=self.rec_file_len/16000/2*100
        if file_dur>timeout:
           timeout=file_dur
           self.timeout=timeout
        #print("wait_for_result timeout=",timeout)
        # if file_dur==0 means in stream way and no timeout
        while(self.is_final==False and (timeout>0 or file_dur==0 )):
            time.sleep(0.01)
            timeout=timeout-1
        if timeout<=0 and not file_dur==0:
           print("time out!",self.timeout)
       except Exception  as e:
            print("wait_for_result",e)
            return
    def get_result(self):
       try:
        message = json.dumps({"is_speaking": False})
        self.websocket.send(message)
        self.wait_for_result()
        self.close()
        # return the  msg
        return self.rec_text
       except Exception  as e:
            #print("get_result ",e)
            return self.rec_text
runtime/funasr_api/funasr_stream.py
New file
@@ -0,0 +1,72 @@
"""
  Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights
  Reserved. MIT License  (https://opensource.org/licenses/MIT)
  2023-2024 by zhaomingwork@qq.com
"""
# pip install websocket-client
# apt install ffmpeg
import threading
import traceback
import json
import time
# class for recognizer in websocket
class FunasrStream:
    """
    python asr recognizer lib
    """
    def __init__(
        self,
        funasr_core
    ):
        """
        uri: ws or wss server uri
        msg_callback: for message received
        timeout: timeout for get result
        """
        try:
            self.funasr_core=funasr_core
        except Exception as e:
            print("FunasrStream init Exception:", e)
            traceback.print_exc()
    # feed data to asr engine in stream way
    def feed_chunk(self, chunk):
        try:
            if self.funasr_core is None:
                print("error in stream, funasr_core is None")
                exit(0)
            self.funasr_core.feed_chunk(chunk)
            return
        except:
            print("feed chunk error")
            return
    # return all result for this stream
    def wait_for_end(self):
       try:
        message = json.dumps({"is_speaking": False})
        self.funasr_core.websocket.send(message)
        self.funasr_core.wait_for_result()
        self.funasr_core.close()
        # return the  msg
        return self.funasr_core.rec_text
       except Exception  as e:
            print("error get_final_result ",e)
            return ""
runtime/funasr_api/funasr_tools.py
New file
@@ -0,0 +1,84 @@
"""
  Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights
  Reserved. MIT License  (https://opensource.org/licenses/MIT)
  2023-2024 by zhaomingwork@qq.com
"""
# pip install websocket-client
# apt install ffmpeg
import threading
import traceback
import time
# class for recognizer in websocket
class FunasrTools:
    """
    python asr recognizer lib
    """
    def __init__(
        self
    ):
        """
        """
        try:
              if FunasrTools.check_ffmpeg()==False:
                 print("pls instal ffmpeg firest, in ubuntu, you can type apt install -y ffmpeg")
                 exit(0)
        except Exception as e:
            print("Exception:", e)
            traceback.print_exc()
    # check ffmpeg installed
    @staticmethod
    def check_ffmpeg():
        import subprocess
        try:
            subprocess.run(['ffmpeg', '-version'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
            return True
        except FileNotFoundError:
            return False
    # use ffmpeg to convert audio to wav
    @staticmethod
    def audio2wav(audiobuf):
     try:
      import os
      import subprocess
      if FunasrTools.check_ffmpeg()==False:
         print("pls instal ffmpeg firest, in ubuntu, you can type apt install -y ffmpeg")
         exit(0)
         return
      ffmpeg_target_to_outwav = ["ffmpeg", "-i", '-',  "-ac", "1", "-ar", "16000",  "-f", "wav", "pipe:1"]
      pipe_to = subprocess.Popen(ffmpeg_target_to_outwav,
                       stdin=subprocess.PIPE,
                       stdout=subprocess.PIPE,
                       stderr=subprocess.PIPE)
      wavbuf, err = pipe_to.communicate(audiobuf)
      if str(err).find("Error")>=0 or str(err).find("Unknown")>=0 or str(err).find("Invalid")>=0:
            print("ffmpeg err",err)
            return None
      return wavbuf
     except Exception as e:
            print("audio2wav",e)
            return None