From ee06cb9c6870d9e1579015aabfe1a84a61a5c087 Mon Sep 17 00:00:00 2001
From: 九耳 <mengzhe.cmz@alibaba-inc.com>
Date: 星期二, 28 二月 2023 18:11:12 +0800
Subject: [PATCH] punctuation:add training code, support largedataset
---
funasr/tasks/abs_task.py | 2 ++
1 files changed, 2 insertions(+), 0 deletions(-)
diff --git a/funasr/tasks/abs_task.py b/funasr/tasks/abs_task.py
index 5be9089..d2a00b2 100644
--- a/funasr/tasks/abs_task.py
+++ b/funasr/tasks/abs_task.py
@@ -1350,10 +1350,12 @@
train_iter_factory = ArkDataLoader(args.train_data_file, args.token_list, args.dataset_conf,
seg_dict_file=args.seg_dict_file if hasattr(args,
"seg_dict_file") else None,
+ punc_dict_file=args.punc_list if hasattr(args, "punc_list") else None,
mode="train")
valid_iter_factory = ArkDataLoader(args.valid_data_file, args.token_list, args.dataset_conf,
seg_dict_file=args.seg_dict_file if hasattr(args,
"seg_dict_file") else None,
+ punc_dict_file=args.punc_list if hasattr(args, "punc_list") else None,
mode="eval")
elif args.dataset_type == "small":
train_iter_factory = cls.build_iter_factory(
--
Gitblit v1.9.1