From 8c732716d62a2c2bdf2f302a7904f1811aa4301d Mon Sep 17 00:00:00 2001
From: shixian.shi <shixian.shi@alibaba-inc.com>
Date: 星期四, 11 五月 2023 16:55:50 +0800
Subject: [PATCH] update timestamp for batch inference
---
funasr/bin/asr_inference_paraformer_vad_punc.py | 14 ++++----------
1 files changed, 4 insertions(+), 10 deletions(-)
diff --git a/funasr/bin/asr_inference_paraformer_vad_punc.py b/funasr/bin/asr_inference_paraformer_vad_punc.py
index 8555449..edaad37 100644
--- a/funasr/bin/asr_inference_paraformer_vad_punc.py
+++ b/funasr/bin/asr_inference_paraformer_vad_punc.py
@@ -3,6 +3,7 @@
import json
import argparse
import logging
+from re import L
import sys
import time
import os
@@ -635,6 +636,9 @@
result[0] += restored_data[j][0]
result[1] += restored_data[j][1]
result[2] += restored_data[j][2]
+ for t in restored_data[j][4]:
+ t[0] += vadsegments[j][0]
+ t[1] += vadsegments[j][0]
result[4] += restored_data[j][4]
# result = [result[k]+restored_data[j][k] for k in range(len(result[:-2]))]
@@ -642,16 +646,6 @@
# result = result_segments[0]
text, token, token_int = result[0], result[1], result[2]
time_stamp = None if len(result) < 5 else result[4]
- # post process timestamp
- offset_list = [0]
- offset = 0
- for i in range(1, len(time_stamp)):
- if time_stamp[i][0] < time_stamp[i-1][1]:
- offset += time_stamp[i-1][1]
- offset_list.append(offset)
- for i in range(1, len(time_stamp)):
- time_stamp[i][0] += offset_list[i]
- time_stamp[i][1] += offset_list[i]
if use_timestamp and time_stamp is not None:
postprocessed_result = postprocess_utils.sentence_postprocess(token, time_stamp)
--
Gitblit v1.9.1