Funasr1.0 (#1275)
* funasr1.0 funetine
* funasr1.0 pbar
* update with main (#1260)
* Update websocket_protocol_zh.md
* update
---------
Co-authored-by: Yabin Li <wucong.lyb@alibaba-inc.com>
Co-authored-by: shixian.shi <shixian.shi@alibaba-inc.com>
* update with main (#1264)
* Funasr1.0 (#1261)
* funasr1.0 funetine
* funasr1.0 pbar
* update with main (#1260)
* Update websocket_protocol_zh.md
* update
---------
Co-authored-by: Yabin Li <wucong.lyb@alibaba-inc.com>
Co-authored-by: shixian.shi <shixian.shi@alibaba-inc.com>
---------
Co-authored-by: Yabin Li <wucong.lyb@alibaba-inc.com>
Co-authored-by: shixian.shi <shixian.shi@alibaba-inc.com>
* bug fix
---------
Co-authored-by: Yabin Li <wucong.lyb@alibaba-inc.com>
Co-authored-by: shixian.shi <shixian.shi@alibaba-inc.com>
* funasr1.0 sanm scama
* funasr1.0 infer_after_finetune
* funasr1.0 fsmn-vad bug fix
* funasr1.0 fsmn-vad bug fix
* funasr1.0 fsmn-vad bug fix
* funasr1.0 finetune
* funasr1.0 finetune
* funasr1.0 finetune
---------
Co-authored-by: Yabin Li <wucong.lyb@alibaba-inc.com>
Co-authored-by: shixian.shi <shixian.shi@alibaba-inc.com>
| | |
| | | +model_revision="v2.0.2" \ |
| | | +train_data_set_list="/Users/zhifu/funasr_github/test_local/aishell2_dev_ios/asr_task_debug_len_10.jsonl" \ |
| | | +valid_data_set_list="/Users/zhifu/funasr_github/test_local/aishell2_dev_ios/asr_task_debug_len_10.jsonl" \ |
| | | ++dataset_conf.batch_size=2 \ |
| | | ++dataset_conf.batch_size=64 \ |
| | | ++dataset_conf.batch_type="example" \ |
| | | ++train_conf.max_epoch=2 \ |
| | | ++dataset_conf.num_workers=4 \ |
| | | +output_dir="outputs/debug/ckpt/funasr2/exp2" \ |
| | | +device="cpu" \ |
| | | +debug="true" |
| | |
| | | self.punc_kwargs = punc_kwargs |
| | | self.spk_model = spk_model |
| | | self.spk_kwargs = spk_kwargs |
| | | self.model_path = kwargs["model_path"] |
| | | self.model_path = kwargs.get("model_path", "./") |
| | | |
| | | |
| | | def build_model(self, **kwargs): |
| | |
| | | |
| | | |
| | | def main(**kwargs): |
| | | |
| | | print(kwargs) |
| | | # set random seed |
| | | tables.print() |
| | | set_all_random_seed(kwargs.get("seed", 0)) |
| | |
| | | self.shuffle = shuffle and is_training |
| | | |
| | | def __len__(self): |
| | | return self.total_samples |
| | | return (self.total_samples-1) // self.batch_size + 1 |
| | | |
| | | def set_epoch(self, epoch): |
| | | np.random.seed(epoch) |
| | |
| | | self.waveform = None |
| | | self.last_drop_frames = 0 |
| | | |
| | | |
| | | @tables.register("model_classes", "FsmnVADStreaming") |
| | | class FsmnVADStreaming(nn.Module): |
| | | """ |
| | |
| | | # # reset class variables and clear the dict for the next query |
| | | # self.AllResetDetection() |
| | | return segments |
| | | |
| | | |
| | | def init_cache(self, cache: dict = {}, **kwargs): |
| | | |
| | |
| | | |
| | | self._train_epoch(epoch) |
| | | |
| | | |
| | | if self.use_ddp or self.use_fsdp: |
| | | dist.barrier() |
| | | |
| | | self._validate_epoch(epoch) |
| | | |
| | | if self.use_ddp or self.use_fsdp: |
| | | dist.barrier() |
| | | |
| | | |
| | | if self.rank == 0: |
| | | self._save_checkpoint(epoch) |
| | |
| | | |
| | | if self.use_ddp or self.use_fsdp: |
| | | dist.barrier() |
| | | |
| | | if self.writer: |
| | | self.writer.close() |
| | | |
| | | |
| | |
| | | continue |
| | | |
| | | # Execute an optimization step (update model parameters) |
| | | if self.use_ddp or self.use_fsdp: |
| | | dist.barrier() |
| | | self.optim.step() |
| | | self.scheduler.step() |
| | | # Clear gradients for the next accumulation stage |
| | |
| | | pbar.update(1) |
| | | if self.local_rank == 0: |
| | | description = ( |
| | | f"Epoch: {epoch}/{self.max_epoch}, " |
| | | f"Train epoch: {epoch}/{self.max_epoch}, " |
| | | f"step {batch_idx}/{len(self.dataloader_train)}, " |
| | | f"{speed_stats}, " |
| | | f"(loss: {loss.detach().cpu().item():.3f}), " |
| | |
| | | pbar.update(1) |
| | | if self.local_rank == 0: |
| | | description = ( |
| | | f"validation: \nEpoch: {epoch}/{self.max_epoch}, " |
| | | f"validation epoch: {epoch}/{self.max_epoch}, " |
| | | f"step {batch_idx}/{len(self.dataloader_train)}, " |
| | | f"{speed_stats}, " |
| | | f"(loss: {loss.detach().cpu().item():.3f}), " |