From 1596f6f414f6f41da66506debb1dff19fffeb3ec Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期一, 24 六月 2024 11:55:17 +0800
Subject: [PATCH] fixbug hotwords
---
runtime/tools/fst/generate_lexicon.py | 16 ++++++++--------
1 files changed, 8 insertions(+), 8 deletions(-)
diff --git a/runtime/tools/fst/generate_lexicon.py b/runtime/tools/fst/generate_lexicon.py
index a5a47ec..f8c7443 100755
--- a/runtime/tools/fst/generate_lexicon.py
+++ b/runtime/tools/fst/generate_lexicon.py
@@ -8,18 +8,19 @@
# sys.argv[3]: lexicon file for corpus.dict
lex_dict = {}
-with open(sys.argv[2], 'r', encoding='utf8') as fin:
+with open(sys.argv[2], "r", encoding="utf8") as fin:
for line in fin:
- words = line.strip().split('\t')
+ words = line.strip().split("\t")
if len(words) != 2:
continue
lex_dict[words[0]] = words[1]
-with open(sys.argv[1], 'r', encoding='utf8') as fin, \
- open(sys.argv[3], 'w', encoding='utf8') as fout:
+with open(sys.argv[1], "r", encoding="utf8") as fin, open(
+ sys.argv[3], "w", encoding="utf8"
+) as fout:
for line in fin:
word = line.strip()
- if word == '<s>' or word == '</s>':
+ if word == "<s>" or word == "</s>":
continue
word_lex = ""
if word in lex_dict:
@@ -29,7 +30,6 @@
if word[i] in lex_dict:
word_lex += " " + lex_dict[word[i]]
else:
- word_lex += " <unk>"
-
- fout.write('{}\t{}\n'.format(word, word_lex.strip()))
+ word_lex += " <unk>"
+ fout.write("{}\t{}\n".format(word, word_lex.strip()))
--
Gitblit v1.9.1