| | |
| | | num_time_mask: 2 |
| | | |
| | | dataset_conf: |
| | | data_names: speech,text |
| | | data_types: sound,text |
| | | shuffle: True |
| | | shuffle_conf: |
| | | shuffle_size: 2048 |
| | |
| | | num_workers: 8 |
| | | |
| | | log_interval: 50 |
| | | normalize: None |
| | | normalize: None |
| | |
| | | utils/text2token.py -s 1 -n 1 --space "" ${feats_dir}/data/${train_set}/text | cut -f 2- -d" " | tr " " "\n" \ |
| | | | sort | uniq | grep -a -v -e '^\s*$' | awk '{print $0}' >> ${token_list} |
| | | echo "<unk>" >> ${token_list} |
| | | mkdir -p ${feats_dir}/asr_stats_fbank_zh_char/${train_set} |
| | | mkdir -p ${feats_dir}/asr_stats_fbank_zh_char/${valid_set} |
| | | fi |
| | | |
| | | # LM Training Stage |
| | |
| | | utils/text2token.py -s 1 -n 1 --space "" ${feats_dir}/data/${train_set}/text | cut -f 2- -d" " | tr " " "\n" \ |
| | | | sort | uniq | grep -a -v -e '^\s*$' | awk '{print $0}' >> ${token_list} |
| | | echo "<unk>" >> ${token_list} |
| | | mkdir -p ${feats_dir}/asr_stats_fbank_zh_char/${train_set} |
| | | mkdir -p ${feats_dir}/asr_stats_fbank_zh_char/${valid_set} |
| | | fi |
| | | |
| | | # Training Stage |
| | |
| | | pooling_type: statistic |
| | | num_nodes_resnet1: 256 |
| | | num_nodes_last_layer: 256 |
| | | batchnorm_momentum: 0.5 |
| | | |
| | | # decoder related |
| | | decoder: sa_decoder |