From adcee8828ef5d78b575043954deb662a35e318f7 Mon Sep 17 00:00:00 2001
From: huangmingming <huangmingming@deepscience.cn>
Date: 星期一, 30 一月 2023 16:02:54 +0800
Subject: [PATCH] update the minimum size of audio

---
 funasr/modules/streaming_utils/utils.py |   46 +++++++++++++++++++++++++++++++++++++++++++++-
 1 files changed, 45 insertions(+), 1 deletions(-)

diff --git a/funasr/modules/streaming_utils/utils.py b/funasr/modules/streaming_utils/utils.py
index dd76de9..4bb9d4f 100644
--- a/funasr/modules/streaming_utils/utils.py
+++ b/funasr/modules/streaming_utils/utils.py
@@ -1,6 +1,7 @@
+import os
 import torch
 from torch.nn import functional as F
-
+import yaml
 import numpy as np
 
 def sequence_mask(lengths, maxlen=None, dtype=torch.float32, device=None):
@@ -45,3 +46,46 @@
 		outputs += inputs
 	return outputs
 
+
+def proc_tf_vocab(vocab_path):
+	with open(vocab_path, encoding="utf-8") as f:
+		token_list = [line.rstrip() for line in f]
+		if '<unk>' not in token_list:
+			token_list.append('<unk>')
+	return token_list
+
+
+def gen_config_for_tfmodel(config_path, vocab_path, output_dir):
+	token_list = proc_tf_vocab(vocab_path)
+	with open(config_path, encoding="utf-8") as f:
+		config = yaml.safe_load(f)
+	
+	config['token_list'] = token_list
+	
+	if not os.path.exists(output_dir):
+		os.makedirs(output_dir)
+	
+	with open(os.path.join(output_dir, "config.yaml"), "w", encoding="utf-8") as f:
+		yaml_no_alias_safe_dump(config, f, indent=4, sort_keys=False)
+
+
+class NoAliasSafeDumper(yaml.SafeDumper):
+	# Disable anchor/alias in yaml because looks ugly
+	def ignore_aliases(self, data):
+		return True
+
+
+def yaml_no_alias_safe_dump(data, stream=None, **kwargs):
+	"""Safe-dump in yaml with no anchor/alias"""
+	return yaml.dump(
+		data, stream, allow_unicode=True, Dumper=NoAliasSafeDumper, **kwargs
+	)
+
+
+if __name__ == '__main__':
+	import sys
+	
+	config_path = sys.argv[1]
+	vocab_path = sys.argv[2]
+	output_dir = sys.argv[3]
+	gen_config_for_tfmodel(config_path, vocab_path, output_dir)
\ No newline at end of file

--
Gitblit v1.9.1