Merge pull request #325 from alibaba-damo-academy/dev_lzr
fix compute cer problems
| | |
| | | if out_item['wrong'] > 0: |
| | | rst['wrong_sentences'] += 1 |
| | | cer_detail_writer.write(hyp_key + print_cer_detail(out_item) + '\n') |
| | | cer_detail_writer.write("ref:" + '\t' + "".join(ref_dict[hyp_key]) + '\n') |
| | | cer_detail_writer.write("hyp:" + '\t' + "".join(hyp_dict[hyp_key]) + '\n') |
| | | cer_detail_writer.write("ref:" + '\t' + " ".join(list(map(lambda x: x.lower(), ref_dict[hyp_key]))) + '\n') |
| | | cer_detail_writer.write("hyp:" + '\t' + " ".join(list(map(lambda x: x.lower(), hyp_dict[hyp_key]))) + '\n') |
| | | |
| | | if rst['Wrd'] > 0: |
| | | rst['Err'] = round(rst['wrong_words'] * 100 / rst['Wrd'], 2) |
| | |
| | | # If text exists, compute CER |
| | | text_in = os.path.join(params["data_dir"], "text") |
| | | if os.path.exists(text_in): |
| | | text_proc_file = os.path.join(best_recog_path, "token") |
| | | text_proc_file = os.path.join(best_recog_path, "text") |
| | | compute_wer(text_in, text_proc_file, os.path.join(best_recog_path, "text.cer")) |
| | | |
| | | |
| | |
| | | # computer CER if GT text is set |
| | | text_in = os.path.join(params["data_dir"], "text") |
| | | if os.path.exists(text_in): |
| | | text_proc_file = os.path.join(decoding_path, "1best_recog/token") |
| | | text_proc_file = os.path.join(decoding_path, "1best_recog/text") |
| | | compute_wer(text_in, text_proc_file, os.path.join(decoding_path, "text.cer")) |
| | | |
| | | |
| | |
| | | # If text exists, compute CER |
| | | text_in = os.path.join(params["data_dir"], "text") |
| | | if os.path.exists(text_in): |
| | | text_proc_file = os.path.join(best_recog_path, "token") |
| | | text_proc_file = os.path.join(best_recog_path, "text") |
| | | compute_wer(text_in, text_proc_file, os.path.join(best_recog_path, "text.cer")) |
| | | |
| | | |
| | |
| | | # computer CER if GT text is set |
| | | text_in = os.path.join(params["data_dir"], "text") |
| | | if os.path.exists(text_in): |
| | | text_proc_file = os.path.join(decoding_path, "1best_recog/token") |
| | | text_proc_file = os.path.join(decoding_path, "1best_recog/text") |
| | | compute_wer(text_in, text_proc_file, os.path.join(decoding_path, "text.cer")) |
| | | |
| | | |
| | |
| | | |
| | | if [ $stage -le 2 ] && [ $stop_stage -ge 2 ];then |
| | | echo "Computing WER ..." |
| | | python utils/proce_text.py ${output_dir}/1best_recog/text ${output_dir}/1best_recog/text.proc |
| | | python utils/proce_text.py ${data_dir}/text ${output_dir}/1best_recog/text.ref |
| | | cp ${output_dir}/1best_recog/text ${output_dir}/1best_recog/text.proc |
| | | cp ${data_dir}/text ${output_dir}/1best_recog/text.ref |
| | | python utils/compute_wer.py ${output_dir}/1best_recog/text.ref ${output_dir}/1best_recog/text.proc ${output_dir}/1best_recog/text.cer |
| | | tail -n 3 ${output_dir}/1best_recog/text.cer |
| | | fi |
| | |
| | | # computer CER if GT text is set |
| | | text_in = os.path.join(params["data_dir"], "text") |
| | | if os.path.exists(text_in): |
| | | text_proc_file = os.path.join(decoding_path, "1best_recog/token") |
| | | text_proc_file = os.path.join(decoding_path, "1best_recog/text") |
| | | compute_wer(text_in, text_proc_file, os.path.join(decoding_path, "text.cer")) |
| | | |
| | | |
| | |
| | | |
| | | if [ $stage -le 2 ] && [ $stop_stage -ge 2 ];then |
| | | echo "Computing WER ..." |
| | | python utils/proce_text.py ${output_dir}/1best_recog/text ${output_dir}/1best_recog/text.proc |
| | | python utils/proce_text.py ${data_dir}/text ${output_dir}/1best_recog/text.ref |
| | | cp ${output_dir}/1best_recog/text ${output_dir}/1best_recog/text.proc |
| | | cp ${data_dir}/text ${output_dir}/1best_recog/text.ref |
| | | python utils/compute_wer.py ${output_dir}/1best_recog/text.ref ${output_dir}/1best_recog/text.proc ${output_dir}/1best_recog/text.cer |
| | | tail -n 3 ${output_dir}/1best_recog/text.cer |
| | | fi |
| | |
| | | # computer CER if GT text is set |
| | | text_in = os.path.join(params["data_dir"], "text") |
| | | if os.path.exists(text_in): |
| | | text_proc_file = os.path.join(decoding_path, "1best_recog/token") |
| | | text_proc_file = os.path.join(decoding_path, "1best_recog/text") |
| | | compute_wer(text_in, text_proc_file, os.path.join(decoding_path, "text.cer")) |
| | | |
| | | |
| | |
| | | # If text exists, compute CER |
| | | text_in = os.path.join(params["data_dir"], "text") |
| | | if os.path.exists(text_in): |
| | | text_proc_file = os.path.join(best_recog_path, "token") |
| | | text_proc_file = os.path.join(best_recog_path, "text") |
| | | compute_wer(text_in, text_proc_file, os.path.join(best_recog_path, "text.cer")) |
| | | |
| | | |
| | |
| | | # computer CER if GT text is set |
| | | text_in = os.path.join(params["data_dir"], "text") |
| | | if os.path.exists(text_in): |
| | | text_proc_file = os.path.join(decoding_path, "1best_recog/token") |
| | | text_proc_file = os.path.join(decoding_path, "1best_recog/text") |
| | | compute_wer(text_in, text_proc_file, os.path.join(decoding_path, "text.cer")) |
| | | |
| | | |
| | |
| | | # If text exists, compute CER |
| | | text_in = os.path.join(params["data_dir"], "text") |
| | | if os.path.exists(text_in): |
| | | text_proc_file = os.path.join(best_recog_path, "token") |
| | | text_proc_file = os.path.join(best_recog_path, "text") |
| | | compute_wer(text_in, text_proc_file, os.path.join(best_recog_path, "text.cer")) |
| | | |
| | | |
| | |
| | | # computer CER if GT text is set |
| | | text_in = os.path.join(params["data_dir"], "text") |
| | | if os.path.exists(text_in): |
| | | text_proc_file = os.path.join(decoding_path, "1best_recog/token") |
| | | text_proc_file = os.path.join(decoding_path, "1best_recog/text") |
| | | compute_wer(text_in, text_proc_file, os.path.join(decoding_path, "text.cer")) |
| | | |
| | | |
| | |
| | | finish_count += 1 |
| | | # asr_utils.print_progress(finish_count / file_count) |
| | | if writer is not None: |
| | | ibest_writer["text"][key] = text_postprocessed |
| | | ibest_writer["text"][key] = " ".join(word_lists) |
| | | |
| | | logging.info("decoding, utt: {}, predictions: {}".format(key, text)) |
| | | rtf_avg = "decoding, feature length total: {}, forward_time total: {:.4f}, rtf avg: {:.4f}".format(length_total, forward_time_total, 100 * forward_time_total / (length_total * lfr_factor)) |
| | |
| | | ibest_writer["token"][key] = " ".join(token) |
| | | ibest_writer["token_int"][key] = " ".join(map(str, token_int)) |
| | | ibest_writer["vad"][key] = "{}".format(vadsegments) |
| | | ibest_writer["text"][key] = text_postprocessed |
| | | ibest_writer["text"][key] = " ".join(word_lists) |
| | | ibest_writer["text_with_punc"][key] = text_postprocessed_punc |
| | | if time_stamp_postprocessed is not None: |
| | | ibest_writer["time_stamp"][key] = "{}".format(time_stamp_postprocessed) |
| | |
| | | ibest_writer["token"][key] = " ".join(token) |
| | | ibest_writer["token_int"][key] = " ".join(map(str, token_int)) |
| | | ibest_writer["vad"][key] = "{}".format(vadsegments) |
| | | ibest_writer["text"][key] = text_postprocessed |
| | | ibest_writer["text"][key] = " ".join(word_lists) |
| | | ibest_writer["text_with_punc"][key] = text_postprocessed_punc |
| | | if time_stamp_postprocessed is not None: |
| | | ibest_writer["time_stamp"][key] = "{}".format(time_stamp_postprocessed) |
| | |
| | | ibest_writer["rtf"][key] = rtf_cur |
| | | |
| | | if text is not None: |
| | | text_postprocessed, _ = postprocess_utils.sentence_postprocess(token) |
| | | text_postprocessed, word_lists = postprocess_utils.sentence_postprocess(token) |
| | | item = {'key': key, 'value': text_postprocessed} |
| | | asr_result_list.append(item) |
| | | finish_count += 1 |
| | | # asr_utils.print_progress(finish_count / file_count) |
| | | if writer is not None: |
| | | ibest_writer["text"][key] = text_postprocessed |
| | | ibest_writer["text"][key] = " ".join(word_lists) |
| | | |
| | | logging.info("decoding, utt: {}, predictions: {}".format(key, text)) |
| | | rtf_avg = "decoding, feature length total: {}, forward_time total: {:.4f}, rtf avg: {:.4f}".format(length_total, forward_time_total, 100 * forward_time_total / (length_total * lfr_factor)) |
| | |
| | | ibest_writer["score"][key] = str(hyp.score) |
| | | |
| | | if text is not None: |
| | | text_postprocessed, _ = postprocess_utils.sentence_postprocess(token) |
| | | text_postprocessed, word_lists = postprocess_utils.sentence_postprocess(token) |
| | | item = {'key': key, 'value': text_postprocessed} |
| | | asr_result_list.append(item) |
| | | finish_count += 1 |
| | | asr_utils.print_progress(finish_count / file_count) |
| | | if writer is not None: |
| | | ibest_writer["text"][key] = text_postprocessed |
| | | ibest_writer["text"][key] = " ".join(word_lists) |
| | | return asr_result_list |
| | | |
| | | return _forward |
| | |
| | | ibest_writer["score"][key] = str(hyp.score) |
| | | |
| | | if text is not None: |
| | | text_postprocessed, _ = postprocess_utils.sentence_postprocess(token) |
| | | text_postprocessed, word_lists = postprocess_utils.sentence_postprocess(token) |
| | | item = {'key': key, 'value': text_postprocessed} |
| | | asr_result_list.append(item) |
| | | finish_count += 1 |
| | | asr_utils.print_progress(finish_count / file_count) |
| | | if writer is not None: |
| | | ibest_writer["text"][key] = text_postprocessed |
| | | ibest_writer["text"][key] = " ".join(word_lists) |
| | | return asr_result_list |
| | | |
| | | return _forward |
| | |
| | | if out_item['wrong'] > 0: |
| | | rst['wrong_sentences'] += 1 |
| | | cer_detail_writer.write(hyp_key + print_cer_detail(out_item) + '\n') |
| | | cer_detail_writer.write("ref:" + '\t' + "".join(ref_dict[hyp_key]) + '\n') |
| | | cer_detail_writer.write("hyp:" + '\t' + "".join(hyp_dict[hyp_key]) + '\n') |
| | | cer_detail_writer.write("ref:" + '\t' + " ".join(list(map(lambda x: x.lower(), ref_dict[hyp_key]))) + '\n') |
| | | cer_detail_writer.write("hyp:" + '\t' + " ".join(list(map(lambda x: x.lower(), hyp_dict[hyp_key]))) + '\n') |
| | | |
| | | if rst['Wrd'] > 0: |
| | | rst['Err'] = round(rst['wrong_words'] * 100 / rst['Wrd'], 2) |