zhifu gao
2024-06-11 997374b88fe6b2ae5cb4dcaf47d78cb3eff09fc2
funasr/auto/auto_model.py
@@ -42,8 +42,9 @@
    filelist = [".scp", ".txt", ".json", ".jsonl", ".text"]
    chars = string.ascii_letters + string.digits
    if isinstance(data_in, str) and data_in.startswith("http"):  # url
        data_in = download_from_url(data_in)
    if isinstance(data_in, str):
        if data_in.startswith("http://") or data_in.startswith("https://"):  # url
            data_in = download_from_url(data_in)
    if isinstance(data_in, str) and os.path.exists(
        data_in
@@ -232,6 +233,8 @@
        # fp16
        if kwargs.get("fp16", False):
            model.to(torch.float16)
        elif kwargs.get("bf16", False):
            model.to(torch.bfloat16)
        return model, kwargs
    def __call__(self, *args, **cfg):
@@ -284,7 +287,7 @@
            with torch.no_grad():
                res = model.inference(**batch, **kwargs)
                if isinstance(res, (list, tuple)):
                    results = res[0]
                    results = res[0] if len(res) > 0 else [{"text": ""}]
                    meta_data = res[1] if len(res) > 1 else {}
            time2 = time.perf_counter()
@@ -358,6 +361,7 @@
            results_sorted = []
            if not len(sorted_data):
                results_ret_list.append({"key": key, "text": "", "timestamp": []})
                logging.info("decoding, utt: {}, empty speech".format(key))
                continue
@@ -425,6 +429,10 @@
            #                      f"time_speech_total_per_sample: {time_speech_total_per_sample: 0.3f}, "
            #                      f"time_escape_total_per_sample: {time_escape_total_per_sample:0.3f}")
            if len(results_sorted) != n:
                results_ret_list.append({"key": key, "text": "", "timestamp": []})
                logging.info("decoding, utt: {}, empty result".format(key))
                continue
            restored_data = [0] * n
            for j in range(n):
                index = sorted_data[j][1]