From 561bdbdfc0f8fd6234c5130fdc8631bf8e294bd8 Mon Sep 17 00:00:00 2001
From: 王梦迪 <73778524+di-osc@users.noreply.github.com>
Date: 星期四, 22 五月 2025 11:27:01 +0800
Subject: [PATCH] 通过缓存seg_dict,加快seaco_paraformer推理 (#2520)
---
funasr/models/seaco_paraformer/model.py | 26 +++++++++++++++-----------
1 files changed, 15 insertions(+), 11 deletions(-)
diff --git a/funasr/models/seaco_paraformer/model.py b/funasr/models/seaco_paraformer/model.py
index 3b6595c..b30c5cb 100644
--- a/funasr/models/seaco_paraformer/model.py
+++ b/funasr/models/seaco_paraformer/model.py
@@ -16,6 +16,7 @@
from typing import Dict, Tuple
from contextlib import contextmanager
from distutils.version import LooseVersion
+from functools import lru_cache
from funasr.register import tables
from funasr.utils import postprocess_utils
@@ -510,17 +511,6 @@
return results, meta_data
def generate_hotwords_list(self, hotword_list_or_file, tokenizer=None, frontend=None):
- def load_seg_dict(seg_dict_file):
- seg_dict = {}
- assert isinstance(seg_dict_file, str)
- with open(seg_dict_file, "r", encoding="utf8") as f:
- lines = f.readlines()
- for line in lines:
- s = line.strip().split()
- key = s[0]
- value = s[1:]
- seg_dict[key] = " ".join(value)
- return seg_dict
def seg_tokenize(txt, seg_dict):
pattern = re.compile(r"^[\u4E00-\u9FA50-9]+$")
@@ -626,3 +616,17 @@
models = export_rebuild_model(model=self, **kwargs)
return models
+
+
+@lru_cache(maxsize=1)
+def load_seg_dict(seg_dict_file):
+ seg_dict = {}
+ assert isinstance(seg_dict_file, str)
+ with open(seg_dict_file, "r", encoding="utf8") as f:
+ lines = f.readlines()
+ for line in lines:
+ s = line.strip().split()
+ key = s[0]
+ value = s[1:]
+ seg_dict[key] = " ".join(value)
+ return seg_dict
\ No newline at end of file
--
Gitblit v1.9.1