From 96e4ff1870656b6b9d10de5f1a994959b286b909 Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期二, 20 二月 2024 18:38:51 +0800
Subject: [PATCH] train finetune

---
 examples/aishell/conformer/conf/conformer_12e_6d_2048_256.yaml             |    2 --
 examples/aishell/e_branchformer/conf/e_branchformer_12e_6d_2048_256.yaml   |    1 -
 examples/aishell/transformer/conf/transformer_12e_6d_2048_256.yaml         |    2 --
 examples/aishell/branchformer/conf/branchformer_12e_6d_2048_256.yaml       |    1 -
 funasr/download/download_from_hub.py                                       |   24 +++++++++++-------------
 examples/aishell/paraformer/conf/paraformer_conformer_12e_6d_2048_256.yaml |    2 --
 6 files changed, 11 insertions(+), 21 deletions(-)

diff --git a/examples/aishell/branchformer/conf/branchformer_12e_6d_2048_256.yaml b/examples/aishell/branchformer/conf/branchformer_12e_6d_2048_256.yaml
index d6caf5d..d86e628 100644
--- a/examples/aishell/branchformer/conf/branchformer_12e_6d_2048_256.yaml
+++ b/examples/aishell/branchformer/conf/branchformer_12e_6d_2048_256.yaml
@@ -108,7 +108,6 @@
 tokenizer: CharTokenizer
 tokenizer_conf:
   unk_symbol: <unk>
-  split_with_space: true
 
 
 ctc_conf:
diff --git a/examples/aishell/conformer/conf/conformer_12e_6d_2048_256.yaml b/examples/aishell/conformer/conf/conformer_12e_6d_2048_256.yaml
index 8909b15..4238d84 100644
--- a/examples/aishell/conformer/conf/conformer_12e_6d_2048_256.yaml
+++ b/examples/aishell/conformer/conf/conformer_12e_6d_2048_256.yaml
@@ -108,8 +108,6 @@
 tokenizer: CharTokenizer
 tokenizer_conf:
   unk_symbol: <unk>
-  split_with_space: true
-
 
 ctc_conf:
     dropout_rate: 0.0
diff --git a/examples/aishell/e_branchformer/conf/e_branchformer_12e_6d_2048_256.yaml b/examples/aishell/e_branchformer/conf/e_branchformer_12e_6d_2048_256.yaml
index e6cc1e6..2773459 100644
--- a/examples/aishell/e_branchformer/conf/e_branchformer_12e_6d_2048_256.yaml
+++ b/examples/aishell/e_branchformer/conf/e_branchformer_12e_6d_2048_256.yaml
@@ -108,7 +108,6 @@
 tokenizer: CharTokenizer
 tokenizer_conf:
   unk_symbol: <unk>
-  split_with_space: true
 
 
 ctc_conf:
diff --git a/examples/aishell/paraformer/conf/paraformer_conformer_12e_6d_2048_256.yaml b/examples/aishell/paraformer/conf/paraformer_conformer_12e_6d_2048_256.yaml
index dc2da1c..c100f0d 100644
--- a/examples/aishell/paraformer/conf/paraformer_conformer_12e_6d_2048_256.yaml
+++ b/examples/aishell/paraformer/conf/paraformer_conformer_12e_6d_2048_256.yaml
@@ -107,8 +107,6 @@
 tokenizer: CharTokenizer
 tokenizer_conf:
   unk_symbol: <unk>
-  split_with_space: false
-
 
 ctc_conf:
     dropout_rate: 0.0
diff --git a/examples/aishell/transformer/conf/transformer_12e_6d_2048_256.yaml b/examples/aishell/transformer/conf/transformer_12e_6d_2048_256.yaml
index 24a6390..674fc94 100644
--- a/examples/aishell/transformer/conf/transformer_12e_6d_2048_256.yaml
+++ b/examples/aishell/transformer/conf/transformer_12e_6d_2048_256.yaml
@@ -102,8 +102,6 @@
 tokenizer: CharTokenizer
 tokenizer_conf:
   unk_symbol: <unk>
-  split_with_space: true
-
 
 ctc_conf:
     dropout_rate: 0.0
diff --git a/funasr/download/download_from_hub.py b/funasr/download/download_from_hub.py
index c102549..4a8e57a 100644
--- a/funasr/download/download_from_hub.py
+++ b/funasr/download/download_from_hub.py
@@ -21,10 +21,17 @@
         model_or_path = get_or_download_model_dir(model_or_path, model_revision, is_training=kwargs.get("is_training"), check_latest=kwargs.get("kwargs", True))
     kwargs["model_path"] = model_or_path
     
-    config = os.path.join(model_or_path, "config.yaml")
-    if os.path.exists(config) and os.path.exists(os.path.join(model_or_path, "model.pb")):
-        
-        config = OmegaConf.load(config)
+    if os.path.exists(os.path.join(model_or_path, "configuration.json")):
+        with open(os.path.join(model_or_path, "configuration.json"), 'r', encoding='utf-8') as f:
+            conf_json = json.load(f)
+            cfg = {}
+            add_file_root_path(model_or_path, conf_json["file_path_metas"], cfg)
+            cfg.update(kwargs)
+            config = OmegaConf.load(cfg["config"])
+            kwargs = OmegaConf.merge(config, cfg)
+        kwargs["model"] = config["model"]
+    elif os.path.exists(os.path.join(model_or_path, "config.yaml")) and os.path.exists(os.path.join(model_or_path, "model.pt")):
+        config = OmegaConf.load(os.path.join(model_or_path, "config.yaml"))
         kwargs = OmegaConf.merge(config, kwargs)
         init_param = os.path.join(model_or_path, "model.pb")
         kwargs["init_param"] = init_param
@@ -41,15 +48,6 @@
             kwargs["frontend_conf"]["cmvn_file"] = os.path.join(model_or_path, "am.mvn")
         if os.path.exists(os.path.join(model_or_path, "jieba_usr_dict")):
             kwargs["jieba_usr_dict"] = os.path.join(model_or_path, "jieba_usr_dict")
-    elif os.path.exists(os.path.join(model_or_path, "configuration.json")):
-        with open(os.path.join(model_or_path, "configuration.json"), 'r', encoding='utf-8') as f:
-            conf_json = json.load(f)
-            cfg = {}
-            add_file_root_path(model_or_path, conf_json["file_path_metas"], cfg)
-            cfg.update(kwargs)
-            config = OmegaConf.load(cfg["config"])
-            kwargs = OmegaConf.merge(config, cfg)
-        kwargs["model"] = config["model"]
     return OmegaConf.to_container(kwargs, resolve=True)
 
 def add_file_root_path(model_or_path: str, file_path_metas: dict, cfg = {}):

--
Gitblit v1.9.1