python/FunASR-XL.git

parent: bd8f163e | 补丁 | 提交 | ignore whitespace

lzr265946

2023-02-10 7aa2e885f41829e5148ed3be44d3ebb43e04ff40

support for turning off timestamps

8个文件已修改

	funasr/bin/asr_inference.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/bin/asr_inference_paraformer.py	8 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/bin/asr_inference_paraformer_timestamp.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/bin/asr_inference_paraformer_vad.py	19 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/bin/asr_inference_paraformer_vad_punc.py	21 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/bin/asr_inference_uniasr.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/bin/asr_inference_uniasr_vad.py	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/utils/postprocess_utils.py	6 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史

 funasr/bin/asr_inference.py

@@ -453,7 +453,7 @@
                    ibest_writer["score"][key] = str(hyp.score)
                
                if text is not None:
                    text_postprocessed = postprocess_utils.sentence_postprocess(token)
                    text_postprocessed, _ = postprocess_utils.sentence_postprocess(token)
                    item = {'key': key, 'value': text_postprocessed}
                    asr_result_list.append(item)
                    finish_count += 1

 funasr/bin/asr_inference_paraformer.py

@@ -428,7 +428,11 @@
        format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s",
    )

    hotword_list_or_file = param_dict['hotword']
    if param_dict is not None:
        hotword_list_or_file = param_dict.get('hotword')
    else:
        hotword_list_or_file = None

    if ngpu >= 1 and torch.cuda.is_available():
        device = "cuda"
    else:
@@ -539,7 +543,7 @@
                        ibest_writer["rtf"][key] = rtf_cur

                    if text is not None:
                        text_postprocessed = postprocess_utils.sentence_postprocess(token)
                        text_postprocessed, _ = postprocess_utils.sentence_postprocess(token)
                        item = {'key': key, 'value': text_postprocessed}
                        asr_result_list.append(item)
                        finish_count += 1

 funasr/bin/asr_inference_paraformer_timestamp.py

@@ -436,7 +436,7 @@
                    ibest_writer["score"][key] = str(hyp.score)
    
                if text is not None:
                    text_postprocessed = postprocess_utils.sentence_postprocess(token)
                    text_postprocessed, _ = postprocess_utils.sentence_postprocess(token)
                    item = {'key': key, 'value': text_postprocessed}
                    asr_result_list.append(item)
                    finish_count += 1

 funasr/bin/asr_inference_paraformer_vad.py

@@ -241,6 +241,11 @@
            allow_variable_data_keys=allow_variable_data_keys,
            inference=True,
        )

        if param_dict is not None:
            use_timestamp = param_dict.get('use_timestamp', True)
        else:
            use_timestamp = True
        
        finish_count = 0
        file_count = 1
@@ -284,8 +289,10 @@
                text, token, token_int = result[0], result[1], result[2]
                time_stamp = None if len(result) < 4 else result[3]
               
                
                postprocessed_result = postprocess_utils.sentence_postprocess(token, time_stamp)
                if use_timestamp and time_stamp is not None:
                    postprocessed_result = postprocess_utils.sentence_postprocess(token, time_stamp)
                else:
                    postprocessed_result = postprocess_utils.sentence_postprocess(token)
                text_postprocessed = ""
                time_stamp_postprocessed = ""
                text_postprocessed_punc = postprocessed_result
@@ -293,9 +300,11 @@
                    text_postprocessed, time_stamp_postprocessed, word_lists = postprocessed_result[0], \
                                                                               postprocessed_result[1], \
                                                                               postprocessed_result[2]
                    text_postprocessed_punc = text_postprocessed
                    if len(word_lists) > 0 and text2punc is not None:
                        text_postprocessed_punc, punc_id_list = text2punc(word_lists, 20)
                else:
                    text_postprocessed, word_lists = postprocessed_result[0], postprocessed_result[1]
                text_postprocessed_punc = text_postprocessed
                if len(word_lists) > 0 and text2punc is not None:
                    text_postprocessed_punc, punc_id_list = text2punc(word_lists, 20)

                
                item = {'key': key, 'value': text_postprocessed_punc}

 funasr/bin/asr_inference_paraformer_vad_punc.py

@@ -570,6 +570,11 @@
            allow_variable_data_keys=allow_variable_data_keys,
            inference=True,
        )

        if param_dict is not None:
            use_timestamp = param_dict.get('use_timestamp', True)
        else:
            use_timestamp = True
    
        finish_count = 0
        file_count = 1
@@ -612,8 +617,11 @@
                result = result_segments[0]
                text, token, token_int = result[0], result[1], result[2]
                time_stamp = None if len(result) < 4 else result[3]
    
                postprocessed_result = postprocess_utils.sentence_postprocess(token, time_stamp)
   
                if use_timestamp and time_stamp is not None: 
                    postprocessed_result = postprocess_utils.sentence_postprocess(token, time_stamp)
                else:
                    postprocessed_result = postprocess_utils.sentence_postprocess(token)
                text_postprocessed = ""
                time_stamp_postprocessed = ""
                text_postprocessed_punc = postprocessed_result
@@ -621,9 +629,12 @@
                    text_postprocessed, time_stamp_postprocessed, word_lists = postprocessed_result[0], \
                                                                               postprocessed_result[1], \
                                                                               postprocessed_result[2]
                    text_postprocessed_punc = text_postprocessed
                    if len(word_lists) > 0 and text2punc is not None:
                        text_postprocessed_punc, punc_id_list = text2punc(word_lists, 20)
                else:
                    text_postprocessed, word_lists = postprocessed_result[0], postprocessed_result[1]

                text_postprocessed_punc = text_postprocessed
                if len(word_lists) > 0 and text2punc is not None:
                    text_postprocessed_punc, punc_id_list = text2punc(word_lists, 20)
    
                item = {'key': key, 'value': text_postprocessed_punc}
                if text_postprocessed != "":

 funasr/bin/asr_inference_uniasr.py

@@ -492,7 +492,7 @@
                    ibest_writer["score"][key] = str(hyp.score)
    
                if text is not None:
                    text_postprocessed = postprocess_utils.sentence_postprocess(token)
                    text_postprocessed, _ = postprocess_utils.sentence_postprocess(token)
                    item = {'key': key, 'value': text_postprocessed}
                    asr_result_list.append(item)
                    finish_count += 1

 funasr/bin/asr_inference_uniasr_vad.py

@@ -492,7 +492,7 @@
                    ibest_writer["score"][key] = str(hyp.score)
    
                if text is not None:
                    text_postprocessed = postprocess_utils.sentence_postprocess(token)
                    text_postprocessed, _ = postprocess_utils.sentence_postprocess(token)
                    item = {'key': key, 'value': text_postprocessed}
                    asr_result_list.append(item)
                    finish_count += 1

 funasr/utils/postprocess_utils.py

@@ -232,5 +232,9 @@
        return sentence, ts_lists, real_word_lists
    else:
        word_lists = abbr_dispose(word_lists)
        real_word_lists = []
        for ch in word_lists:
            if ch != ' ':
                real_word_lists.append(ch)
        sentence = ''.join(word_lists).strip()
        return sentence
        return sentence, real_word_lists

			@@ -453,7 +453,7 @@
			ibest_writer["score"][key] = str(hyp.score)

			if text is not None:
			text_postprocessed = postprocess_utils.sentence_postprocess(token)
			text_postprocessed, _ = postprocess_utils.sentence_postprocess(token)
			item = {'key': key, 'value': text_postprocessed}
			asr_result_list.append(item)
			finish_count += 1

			@@ -428,7 +428,11 @@
			format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s",
			)

			hotword_list_or_file = param_dict['hotword']
			if param_dict is not None:
			hotword_list_or_file = param_dict.get('hotword')
			else:
			hotword_list_or_file = None

			if ngpu >= 1 and torch.cuda.is_available():
			device = "cuda"
			else:
			@@ -539,7 +543,7 @@
			ibest_writer["rtf"][key] = rtf_cur

			if text is not None:
			text_postprocessed = postprocess_utils.sentence_postprocess(token)
			text_postprocessed, _ = postprocess_utils.sentence_postprocess(token)
			item = {'key': key, 'value': text_postprocessed}
			asr_result_list.append(item)
			finish_count += 1

			@@ -436,7 +436,7 @@
			ibest_writer["score"][key] = str(hyp.score)

			if text is not None:
			text_postprocessed = postprocess_utils.sentence_postprocess(token)
			text_postprocessed, _ = postprocess_utils.sentence_postprocess(token)
			item = {'key': key, 'value': text_postprocessed}
			asr_result_list.append(item)
			finish_count += 1

			@@ -241,6 +241,11 @@
			allow_variable_data_keys=allow_variable_data_keys,
			inference=True,
			)

			if param_dict is not None:
			use_timestamp = param_dict.get('use_timestamp', True)
			else:
			use_timestamp = True

			finish_count = 0
			file_count = 1
			@@ -284,8 +289,10 @@
			text, token, token_int = result[0], result[1], result[2]
			time_stamp = None if len(result) < 4 else result[3]


			postprocessed_result = postprocess_utils.sentence_postprocess(token, time_stamp)
			if use_timestamp and time_stamp is not None:
			postprocessed_result = postprocess_utils.sentence_postprocess(token, time_stamp)
			else:
			postprocessed_result = postprocess_utils.sentence_postprocess(token)
			text_postprocessed = ""
			time_stamp_postprocessed = ""
			text_postprocessed_punc = postprocessed_result
			@@ -293,9 +300,11 @@
			text_postprocessed, time_stamp_postprocessed, word_lists = postprocessed_result[0], \
			postprocessed_result[1], \
			postprocessed_result[2]
			text_postprocessed_punc = text_postprocessed
			if len(word_lists) > 0 and text2punc is not None:
			text_postprocessed_punc, punc_id_list = text2punc(word_lists, 20)
			else:
			text_postprocessed, word_lists = postprocessed_result[0], postprocessed_result[1]
			text_postprocessed_punc = text_postprocessed
			if len(word_lists) > 0 and text2punc is not None:
			text_postprocessed_punc, punc_id_list = text2punc(word_lists, 20)


			item = {'key': key, 'value': text_postprocessed_punc}

			@@ -570,6 +570,11 @@
			allow_variable_data_keys=allow_variable_data_keys,
			inference=True,
			)

			if param_dict is not None:
			use_timestamp = param_dict.get('use_timestamp', True)
			else:
			use_timestamp = True

			finish_count = 0
			file_count = 1
			@@ -612,8 +617,11 @@
			result = result_segments[0]
			text, token, token_int = result[0], result[1], result[2]
			time_stamp = None if len(result) < 4 else result[3]

			postprocessed_result = postprocess_utils.sentence_postprocess(token, time_stamp)

			if use_timestamp and time_stamp is not None:
			postprocessed_result = postprocess_utils.sentence_postprocess(token, time_stamp)
			else:
			postprocessed_result = postprocess_utils.sentence_postprocess(token)
			text_postprocessed = ""
			time_stamp_postprocessed = ""
			text_postprocessed_punc = postprocessed_result
			@@ -621,9 +629,12 @@
			text_postprocessed, time_stamp_postprocessed, word_lists = postprocessed_result[0], \
			postprocessed_result[1], \
			postprocessed_result[2]
			text_postprocessed_punc = text_postprocessed
			if len(word_lists) > 0 and text2punc is not None:
			text_postprocessed_punc, punc_id_list = text2punc(word_lists, 20)
			else:
			text_postprocessed, word_lists = postprocessed_result[0], postprocessed_result[1]

			text_postprocessed_punc = text_postprocessed
			if len(word_lists) > 0 and text2punc is not None:
			text_postprocessed_punc, punc_id_list = text2punc(word_lists, 20)

			item = {'key': key, 'value': text_postprocessed_punc}
			if text_postprocessed != "":

			@@ -492,7 +492,7 @@
			ibest_writer["score"][key] = str(hyp.score)

			if text is not None:
			text_postprocessed = postprocess_utils.sentence_postprocess(token)
			text_postprocessed, _ = postprocess_utils.sentence_postprocess(token)
			item = {'key': key, 'value': text_postprocessed}
			asr_result_list.append(item)
			finish_count += 1

			@@ -232,5 +232,9 @@
			return sentence, ts_lists, real_word_lists
			else:
			word_lists = abbr_dispose(word_lists)
			real_word_lists = []
			for ch in word_lists:
			if ch != ' ':
			real_word_lists.append(ch)
			sentence = ''.join(word_lists).strip()
			return sentence
			return sentence, real_word_lists