游雁
2023-07-05 4e2fe544ae37174a3e09dfcdbbdae5abfe711e53
funasr/bin/asr_inference_launch.py
@@ -255,8 +255,10 @@
    if param_dict is not None:
        hotword_list_or_file = param_dict.get('hotword')
        export_mode = param_dict.get("export_mode", False)
        clas_scale = param_dict.get('clas_scale', 1.0)
    else:
        hotword_list_or_file = None
        clas_scale = 1.0
    if kwargs.get("device", None) == "cpu":
        ngpu = 0
@@ -289,6 +291,7 @@
        penalty=penalty,
        nbest=nbest,
        hotword_list_or_file=hotword_list_or_file,
        clas_scale=clas_scale,
    )
    speech2text = Speech2TextParaformer(**speech2text_kwargs)
@@ -617,6 +620,22 @@
            sorted_data = sorted(data_with_index, key=lambda x: x[0][1] - x[0][0])
            results_sorted = []
            
            if not len(sorted_data):
                key = keys[0]
                # no active segments after VAD
                if writer is not None:
                    # Write empty results
                    ibest_writer["token"][key] = ""
                    ibest_writer["token_int"][key] = ""
                    ibest_writer["vad"][key] = ""
                    ibest_writer["text"][key] = ""
                    ibest_writer["text_with_punc"][key] = ""
                    if use_timestamp:
                        ibest_writer["time_stamp"][key] = ""
                logging.info("decoding, utt: {}, empty speech".format(key))
                continue
            batch_size_token_ms = batch_size_token*60
            if speech2text.device == "cpu":
                batch_size_token_ms = 0