python/FunASR-XL.git

parent: 284c4967 | 补丁 | 提交 | ignore whitespace

Merge pull request #290 from veelion/main

Yabin Li

2023-04-06 bced0c251b390067de435237446065cecbcf2760

Merge pull request #290 from veelion/main

Read audio_data to buf when speaking is False for non-stream inferring

2个文件已修改

	funasr/runtime/grpc/Readme.md	62 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/runtime/grpc/paraformer_server.cc	26 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史

 funasr/runtime/grpc/Readme.md

@@ -53,6 +53,68 @@
python grpc_main_client_mic.py  --host $server_ip --port 10108
```

The `grpc_main_client_mic.py` follows the [original design] (https://github.com/alibaba-damo-academy/FunASR/tree/main/funasr/runtime/python/grpc#workflow-in-desgin) by sending audio_data with chunks. If you want to send audio_data in one request, here is an example:

```
# go to ../python/grpc to find this package
import paraformer_pb2


class RecognizeStub:
    def __init__(self, channel):
        self.Recognize = channel.stream_stream(
                '/paraformer.ASR/Recognize',
                request_serializer=paraformer_pb2.Request.SerializeToString,
                response_deserializer=paraformer_pb2.Response.FromString,
                )


async def send(channel, data, speaking, isEnd):
    stub = RecognizeStub(channel)
    req = paraformer_pb2.Request()
    if data:
        req.audio_data = data
    req.user = 'zz'
    req.language = 'zh-CN'
    req.speaking = speaking
    req.isEnd = isEnd
    q = queue.SimpleQueue()
    q.put(req)
    return stub.Recognize(iter(q.get, None))

# send the audio data once
async def grpc_rec(data, grpc_uri):
    with grpc.insecure_channel(grpc_uri) as channel:
        b = time.time()
        response = await send(channel, data, False, False)
        resp = response.next()
        text = ''
        if 'decoding' == resp.action:
            resp = response.next()
            if 'finish' == resp.action:
                text = json.loads(resp.sentence)['text']
        response = await send(channel, None, False, True)
        return {
                'text': text,
                'time': time.time() - b,
                }

async def test():
    # fc = FunAsrGrpcClient('127.0.0.1', 9900)
    # t = await fc.rec(wav.tobytes())
    # print(t)
    wav, _ = sf.read('z-10s.wav', dtype='int16')
    uri = '127.0.0.1:9900'
    res = await grpc_rec(wav.tobytes(), uri)
    print(res)


if __name__ == '__main__':
    asyncio.run(test())

```


## Acknowledge
1. This project is maintained by [FunASR community](https://github.com/alibaba-damo-academy/FunASR).
2. We acknowledge [DeepScience](https://www.deepscience.cn) for contributing the grpc service.

 funasr/runtime/grpc/paraformer_server.cc

@@ -88,7 +88,7 @@
            res.set_language(req.language());
            stream->Write(res);
        } else if (!req.speaking()) {
            if (client_buffers.count(req.user()) == 0) {
            if (client_buffers.count(req.user()) == 0 && req.audio_data().size() == 0) {
                Response res;
                res.set_sentence(
                    R"({"success": true, "detail": "waiting_for_voice"})"
@@ -99,14 +99,18 @@
                stream->Write(res);
            }else {
                auto begin_time = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now().time_since_epoch()).count();
                if (req.audio_data().size() > 0) {
                  auto& buf = client_buffers[req.user()];
                  buf.insert(buf.end(), req.audio_data().begin(), req.audio_data().end());
                }
                std::string tmp_data = this->client_buffers[req.user()];
                this->clear_states(req.user());
                

                Response res;
                res.set_sentence(
                    R"({"success": true, "detail": "decoding data: " + std::to_string(tmp_data.length()) + " bytes"})"
                );
        int data_len_int = tmp_data.length();
                int data_len_int = tmp_data.length();
                std::string data_len = std::to_string(data_len_int);
                std::stringstream ss;
                ss << R"({"success": true, "detail": "decoding data: )" << data_len << R"( bytes")"  << R"("})";
@@ -129,18 +133,18 @@
                    res.set_user(req.user());
                    res.set_action("finish");
                    res.set_language(req.language());
                    
                    
                    



                    stream->Write(res);
                }
                else {
                    RPASR_RESULT Result= RapidAsrRecogPCMBuffer(AsrHanlde, tmp_data.c_str(), data_len_int, RASR_NONE, NULL);   
                    RPASR_RESULT Result= RapidAsrRecogPCMBuffer(AsrHanlde, tmp_data.c_str(), data_len_int, RASR_NONE, NULL);
                    std::string asr_result = ((RPASR_RECOG_RESULT*)Result)->msg;

                    auto end_time = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now().time_since_epoch()).count();
                    std::string delay_str = std::to_string(end_time - begin_time);
                    

                    std::cout << "user: " << req.user() << " , delay(ms): " << delay_str << ", text: " << asr_result << std::endl;
                    Response res;
                    std::stringstream ss;
@@ -150,8 +154,8 @@
                    res.set_user(req.user());
                    res.set_action("finish");
                    res.set_language(req.language());
                    
                    


                    stream->Write(res);
                }
            }
@@ -165,7 +169,7 @@
            res.set_language(req.language());
            stream->Write(res);
        }
    }    
    }
    return Status::OK;
}

			@@ -53,6 +53,68 @@
			python grpc_main_client_mic.py --host $server_ip --port 10108
			```

			The `grpc_main_client_mic.py` follows the [original design] (https://github.com/alibaba-damo-academy/FunASR/tree/main/funasr/runtime/python/grpc#workflow-in-desgin) by sending audio_data with chunks. If you want to send audio_data in one request, here is an example:

			```
			# go to ../python/grpc to find this package
			import paraformer_pb2


			class RecognizeStub:
			def __init__(self, channel):
			self.Recognize = channel.stream_stream(
			'/paraformer.ASR/Recognize',
			request_serializer=paraformer_pb2.Request.SerializeToString,
			response_deserializer=paraformer_pb2.Response.FromString,
			)


			async def send(channel, data, speaking, isEnd):
			stub = RecognizeStub(channel)
			req = paraformer_pb2.Request()
			if data:
			req.audio_data = data
			req.user = 'zz'
			req.language = 'zh-CN'
			req.speaking = speaking
			req.isEnd = isEnd
			q = queue.SimpleQueue()
			q.put(req)
			return stub.Recognize(iter(q.get, None))

			# send the audio data once
			async def grpc_rec(data, grpc_uri):
			with grpc.insecure_channel(grpc_uri) as channel:
			b = time.time()
			response = await send(channel, data, False, False)
			resp = response.next()
			text = ''
			if 'decoding' == resp.action:
			resp = response.next()
			if 'finish' == resp.action:
			text = json.loads(resp.sentence)['text']
			response = await send(channel, None, False, True)
			return {
			'text': text,
			'time': time.time() - b,
			}

			async def test():
			# fc = FunAsrGrpcClient('127.0.0.1', 9900)
			# t = await fc.rec(wav.tobytes())
			# print(t)
			wav, _ = sf.read('z-10s.wav', dtype='int16')
			uri = '127.0.0.1:9900'
			res = await grpc_rec(wav.tobytes(), uri)
			print(res)


			if __name__ == '__main__':
			asyncio.run(test())

			```


			## Acknowledge
			1. This project is maintained by [FunASR community](https://github.com/alibaba-damo-academy/FunASR).
			2. We acknowledge [DeepScience](https://www.deepscience.cn) for contributing the grpc service.

			@@ -88,7 +88,7 @@
			res.set_language(req.language());
			stream->Write(res);
			} else if (!req.speaking()) {
			if (client_buffers.count(req.user()) == 0) {
			if (client_buffers.count(req.user()) == 0 && req.audio_data().size() == 0) {
			Response res;
			res.set_sentence(
			R"({"success": true, "detail": "waiting_for_voice"})"
			@@ -99,14 +99,18 @@
			stream->Write(res);
			}else {
			auto begin_time = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now().time_since_epoch()).count();
			if (req.audio_data().size() > 0) {
			auto& buf = client_buffers[req.user()];
			buf.insert(buf.end(), req.audio_data().begin(), req.audio_data().end());
			}
			std::string tmp_data = this->client_buffers[req.user()];
			this->clear_states(req.user());


			Response res;
			res.set_sentence(
			R"({"success": true, "detail": "decoding data: " + std::to_string(tmp_data.length()) + " bytes"})"
			);
			int data_len_int = tmp_data.length();
			int data_len_int = tmp_data.length();
			std::string data_len = std::to_string(data_len_int);
			std::stringstream ss;
			ss << R"({"success": true, "detail": "decoding data: )" << data_len << R"( bytes")" << R"("})";
			@@ -129,18 +133,18 @@
			res.set_user(req.user());
			res.set_action("finish");
			res.set_language(req.language());






			stream->Write(res);
			}
			else {
			RPASR_RESULT Result= RapidAsrRecogPCMBuffer(AsrHanlde, tmp_data.c_str(), data_len_int, RASR_NONE, NULL);
			RPASR_RESULT Result= RapidAsrRecogPCMBuffer(AsrHanlde, tmp_data.c_str(), data_len_int, RASR_NONE, NULL);
			std::string asr_result = ((RPASR_RECOG_RESULT*)Result)->msg;

			auto end_time = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now().time_since_epoch()).count();
			std::string delay_str = std::to_string(end_time - begin_time);


			std::cout << "user: " << req.user() << " , delay(ms): " << delay_str << ", text: " << asr_result << std::endl;
			Response res;
			std::stringstream ss;
			@@ -150,8 +154,8 @@
			res.set_user(req.user());
			res.set_action("finish");
			res.set_language(req.language());




			stream->Write(res);
			}
			}
			@@ -165,7 +169,7 @@
			res.set_language(req.language());
			stream->Write(res);
			}
			}
			}
			return Status::OK;
			}