From 28a19dbc4e85d3b8a4ec2ef7483bba64d422b43f Mon Sep 17 00:00:00 2001
From: aky15 <ankeyu.aky@11.17.44.249>
Date: 星期三, 12 四月 2023 18:03:06 +0800
Subject: [PATCH] Merge remote-tracking branch 'origin/main' into dev_aky
---
funasr/tasks/diar.py | 87 +++++++++++++++++++++++++------------------
1 files changed, 51 insertions(+), 36 deletions(-)
diff --git a/funasr/tasks/diar.py b/funasr/tasks/diar.py
index ae7ee9b..096a5c8 100644
--- a/funasr/tasks/diar.py
+++ b/funasr/tasks/diar.py
@@ -507,7 +507,7 @@
config_file: Union[Path, str] = None,
model_file: Union[Path, str] = None,
cmvn_file: Union[Path, str] = None,
- device: str = "cpu",
+ device: Union[str, torch.device] = "cpu",
):
"""Build model from the files.
@@ -553,7 +553,7 @@
if ".bin" in model_name:
model_name_pth = os.path.join(model_dir, model_name.replace('.bin', '.pb'))
else:
- model_name_pth = os.path.join(model_dir, "{}.pth".format(model_name))
+ model_name_pth = os.path.join(model_dir, "{}.pb".format(model_name))
if os.path.exists(model_name_pth):
logging.info("model_file is load from pth: {}".format(model_name_pth))
model_dict = torch.load(model_name_pth, map_location=device)
@@ -562,12 +562,27 @@
model.load_state_dict(model_dict)
else:
model_dict = torch.load(model_file, map_location=device)
+ model_dict = cls.fileter_model_dict(model_dict, model.state_dict())
model.load_state_dict(model_dict)
if model_name_pth is not None and not os.path.exists(model_name_pth):
torch.save(model_dict, model_name_pth)
logging.info("model_file is saved to pth: {}".format(model_name_pth))
return model, args
+
+ @classmethod
+ def fileter_model_dict(cls, src_dict: dict, dest_dict: dict):
+ from collections import OrderedDict
+ new_dict = OrderedDict()
+ for key, value in src_dict.items():
+ if key in dest_dict:
+ new_dict[key] = value
+ else:
+ logging.info("{} is no longer needed in this model.".format(key))
+ for key, value in dest_dict.items():
+ if key not in new_dict:
+ logging.warning("{} is missed in checkpoint.".format(key))
+ return new_dict
@classmethod
def convert_tf2torch(
@@ -750,47 +765,47 @@
cls, args: argparse.Namespace, train: bool
) -> Optional[Callable[[str, Dict[str, np.array]], Dict[str, np.ndarray]]]:
assert check_argument_types()
- if args.use_preprocessor:
- retval = CommonPreprocessor(
- train=train,
- token_type=args.token_type,
- token_list=args.token_list,
- bpemodel=None,
- non_linguistic_symbols=None,
- text_cleaner=None,
- g2p_type=None,
- split_with_space=args.split_with_space if hasattr(args, "split_with_space") else False,
- seg_dict_file=args.seg_dict_file if hasattr(args, "seg_dict_file") else None,
- # NOTE(kamo): Check attribute existence for backward compatibility
- rir_scp=args.rir_scp if hasattr(args, "rir_scp") else None,
- rir_apply_prob=args.rir_apply_prob
- if hasattr(args, "rir_apply_prob")
- else 1.0,
- noise_scp=args.noise_scp if hasattr(args, "noise_scp") else None,
- noise_apply_prob=args.noise_apply_prob
- if hasattr(args, "noise_apply_prob")
- else 1.0,
- noise_db_range=args.noise_db_range
- if hasattr(args, "noise_db_range")
- else "13_15",
- speech_volume_normalize=args.speech_volume_normalize
- if hasattr(args, "rir_scp")
- else None,
- )
- else:
- retval = None
- assert check_return_type(retval)
- return retval
+ # if args.use_preprocessor:
+ # retval = CommonPreprocessor(
+ # train=train,
+ # token_type=args.token_type,
+ # token_list=args.token_list,
+ # bpemodel=None,
+ # non_linguistic_symbols=None,
+ # text_cleaner=None,
+ # g2p_type=None,
+ # split_with_space=args.split_with_space if hasattr(args, "split_with_space") else False,
+ # seg_dict_file=args.seg_dict_file if hasattr(args, "seg_dict_file") else None,
+ # # NOTE(kamo): Check attribute existence for backward compatibility
+ # rir_scp=args.rir_scp if hasattr(args, "rir_scp") else None,
+ # rir_apply_prob=args.rir_apply_prob
+ # if hasattr(args, "rir_apply_prob")
+ # else 1.0,
+ # noise_scp=args.noise_scp if hasattr(args, "noise_scp") else None,
+ # noise_apply_prob=args.noise_apply_prob
+ # if hasattr(args, "noise_apply_prob")
+ # else 1.0,
+ # noise_db_range=args.noise_db_range
+ # if hasattr(args, "noise_db_range")
+ # else "13_15",
+ # speech_volume_normalize=args.speech_volume_normalize
+ # if hasattr(args, "rir_scp")
+ # else None,
+ # )
+ # else:
+ # retval = None
+ # assert check_return_type(retval)
+ return None
@classmethod
def required_data_names(
cls, train: bool = True, inference: bool = False
) -> Tuple[str, ...]:
if not inference:
- retval = ("speech", "profile", "binary_labels")
+ retval = ("speech", )
else:
# Recognition mode
- retval = ("speech")
+ retval = ("speech", )
return retval
@classmethod
@@ -823,7 +838,7 @@
# 2. Encoder
encoder_class = encoder_choices.get_class(args.encoder)
- encoder = encoder_class(input_size=input_size, **args.encoder_conf)
+ encoder = encoder_class(**args.encoder_conf)
# 3. EncoderDecoderAttractor
encoder_decoder_attractor_class = encoder_decoder_attractor_choices.get_class(args.encoder_decoder_attractor)
--
Gitblit v1.9.1