From ee06cb9c6870d9e1579015aabfe1a84a61a5c087 Mon Sep 17 00:00:00 2001
From: 九耳 <mengzhe.cmz@alibaba-inc.com>
Date: 星期二, 28 二月 2023 18:11:12 +0800
Subject: [PATCH] punctuation:add training code, support largedataset

---
 funasr/tasks/abs_task.py |    2 ++
 1 files changed, 2 insertions(+), 0 deletions(-)

diff --git a/funasr/tasks/abs_task.py b/funasr/tasks/abs_task.py
index 5be9089..d2a00b2 100644
--- a/funasr/tasks/abs_task.py
+++ b/funasr/tasks/abs_task.py
@@ -1350,10 +1350,12 @@
                 train_iter_factory = ArkDataLoader(args.train_data_file, args.token_list, args.dataset_conf,
                                                    seg_dict_file=args.seg_dict_file if hasattr(args,
                                                                                                "seg_dict_file") else None,
+                                                   punc_dict_file=args.punc_list if hasattr(args, "punc_list") else None,
                                                    mode="train")
                 valid_iter_factory = ArkDataLoader(args.valid_data_file, args.token_list, args.dataset_conf,
                                                    seg_dict_file=args.seg_dict_file if hasattr(args,
                                                                                                "seg_dict_file") else None,
+                                                   punc_dict_file=args.punc_list if hasattr(args, "punc_list") else None,
                                                    mode="eval")
             elif args.dataset_type == "small":
                 train_iter_factory = cls.build_iter_factory(

--
Gitblit v1.9.1