| | |
| | | #print(type(data)) |
| | | segments_result = vad_pipline(audio_in=data) |
| | | #print(segments_result) |
| | | if len(segments_result) == 0: |
| | | speech_start = False |
| | | speech_end = False |
| | | if len(segments_result) == 0 or len(segments_result["text"] > 1): |
| | | return False |
| | | else: |
| | | return True |
| | | elif segments_result["text"][0][0] != -1: |
| | | speech_start = True |
| | | elif segments_result["text"][0][1] != -1: |
| | | speech_end = True |
| | | return speech_start, speech_end |
| | | |
| | | def asr(): # 推理 |
| | | global inference_pipeline2 |
| | |
| | | def main(): # 推理 |
| | | frames = [] # 存储所有的帧数据 |
| | | buffer = [] # 存储缓存中的帧数据(最多两个片段) |
| | | silence_count = 0 # 统计连续静音的次数 |
| | | speech_detected = False # 标记是否检测到语音 |
| | | # silence_count = 0 # 统计连续静音的次数 |
| | | # speech_detected = False # 标记是否检测到语音 |
| | | RECORD_NUM = 0 |
| | | global voices |
| | | global speek |
| | | speech_start, speech_end = False, False |
| | | while True: |
| | | while not voices.empty(): |
| | | |
| | |
| | | if len(buffer) > 2: |
| | | buffer.pop(0) # 如果缓存超过两个片段,则删除最早的一个 |
| | | |
| | | if speech_detected: |
| | | if speech_start: |
| | | frames.append(data) |
| | | RECORD_NUM += 1 |
| | | |
| | | if vad(data): |
| | | if not speech_detected: |
| | | print("检测到人声...") |
| | | speech_detected = True # 标记为检测到语音 |
| | | frames = [] |
| | | frames.extend(buffer) # 把之前2个语音数据快加入 |
| | | silence_count = 0 # 重置静音次数 |
| | | else: |
| | | silence_count += 1 # 增加静音次数 |
| | | |
| | | if speech_detected and (silence_count > 4 or RECORD_NUM > 50): #这里 50 可根据需求改为合适的数据快数量 |
| | | print("说话结束或者超过设置最长时间...") |
| | | audio_in = b"".join(frames) |
| | | #asrt = threading.Thread(target=asr,args=(audio_in,)) |
| | | #asrt.start() |
| | | speek.put(audio_in) |
| | | #rec_result = inference_pipeline2(audio_in=audio_in) # ASR 模型里跑一跑 |
| | | frames = [] # 清空所有的帧数据 |
| | | buffer = [] # 清空缓存中的帧数据(最多两个片段) |
| | | silence_count = 0 # 统计连续静音的次数清零 |
| | | speech_detected = False # 标记是否检测到语音 |
| | | RECORD_NUM = 0 |
| | | RECORD_NUM += 1 |
| | | speech_start_i, speech_end_i = vad(data) |
| | | if speech_start_i: |
| | | speech_start = speech_start_i |
| | | # if not speech_detected: |
| | | print("检测到人声...") |
| | | # speech_detected = True # 标记为检测到语音 |
| | | frames = [] |
| | | frames.extend(buffer) # 把之前2个语音数据快加入 |
| | | # silence_count = 0 # 重置静音次数 |
| | | elif speech_end_i or RECORD_NUM > 300: |
| | | # silence_count += 1 # 增加静音次数 |
| | | # speech_end = speech_end_i |
| | | speech_start = False |
| | | # if RECORD_NUM > 300: #这里 50 可根据需求改为合适的数据快数量 |
| | | print("说话结束或者超过设置最长时间...") |
| | | audio_in = b"".join(frames) |
| | | #asrt = threading.Thread(target=asr,args=(audio_in,)) |
| | | #asrt.start() |
| | | speek.put(audio_in) |
| | | #rec_result = inference_pipeline2(audio_in=audio_in) # ASR 模型里跑一跑 |
| | | frames = [] # 清空所有的帧数据 |
| | | buffer = [] # 清空缓存中的帧数据(最多两个片段) |
| | | # silence_count = 0 # 统计连续静音的次数清零 |
| | | # speech_detected = False # 标记是否检测到语音 |
| | | RECORD_NUM = 0 |
| | | time.sleep(0.01) |
| | | time.sleep(0.01) |
| | | |