From 93a02feda1848531a48a1794a8de12a962c84a8e Mon Sep 17 00:00:00 2001
From: Xingchen Song(宋星辰) <xingchensong1996@163.com>
Date: 星期五, 07 六月 2024 23:10:13 +0800
Subject: [PATCH] [fix] fix empty asr result (#1794)
---
runtime/tools/fst/generate_lexicon.py | 16 ++++++++--------
1 files changed, 8 insertions(+), 8 deletions(-)
diff --git a/runtime/tools/fst/generate_lexicon.py b/runtime/tools/fst/generate_lexicon.py
index a5a47ec..f8c7443 100755
--- a/runtime/tools/fst/generate_lexicon.py
+++ b/runtime/tools/fst/generate_lexicon.py
@@ -8,18 +8,19 @@
# sys.argv[3]: lexicon file for corpus.dict
lex_dict = {}
-with open(sys.argv[2], 'r', encoding='utf8') as fin:
+with open(sys.argv[2], "r", encoding="utf8") as fin:
for line in fin:
- words = line.strip().split('\t')
+ words = line.strip().split("\t")
if len(words) != 2:
continue
lex_dict[words[0]] = words[1]
-with open(sys.argv[1], 'r', encoding='utf8') as fin, \
- open(sys.argv[3], 'w', encoding='utf8') as fout:
+with open(sys.argv[1], "r", encoding="utf8") as fin, open(
+ sys.argv[3], "w", encoding="utf8"
+) as fout:
for line in fin:
word = line.strip()
- if word == '<s>' or word == '</s>':
+ if word == "<s>" or word == "</s>":
continue
word_lex = ""
if word in lex_dict:
@@ -29,7 +30,6 @@
if word[i] in lex_dict:
word_lex += " " + lex_dict[word[i]]
else:
- word_lex += " <unk>"
-
- fout.write('{}\t{}\n'.format(word, word_lex.strip()))
+ word_lex += " <unk>"
+ fout.write("{}\t{}\n".format(word, word_lex.strip()))
--
Gitblit v1.9.1