From 1b6659a600eaf7fa0d2b800865cc6b75525d14e7 Mon Sep 17 00:00:00 2001 From: Richard Wong Date: Wed, 25 Dec 2024 20:35:28 +0900 Subject: [PATCH] Feat: reworked overall evaluation to evaluate at each stage of the pipeline --- ...ned_mapping_and_classification_analysis.py | 34 -------- overall/pipeline_evaluation.py | 78 +++++++++++++++++++ 2 files changed, 78 insertions(+), 34 deletions(-) delete mode 100644 overall/combined_mapping_and_classification_analysis.py create mode 100644 overall/pipeline_evaluation.py diff --git a/overall/combined_mapping_and_classification_analysis.py b/overall/combined_mapping_and_classification_analysis.py deleted file mode 100644 index 6df1ea8..0000000 --- a/overall/combined_mapping_and_classification_analysis.py +++ /dev/null @@ -1,34 +0,0 @@ -# %% -import pandas as pd - -# following code computes final mapping + classification accuracy -# %% -def run(fold): - data_path = f'../relevant_class/binary_classifier_desc_unit/classification_prediction/exports/result_group_{fold}.csv' - df = pd.read_csv(data_path, skipinitialspace=True) - p_mdm = df['p_mdm'] - - # data_path = f'../train/mapping_t5_complete_desc_unit_name/mapping_prediction/exports/result_group_{fold}.csv' - data_path = f'../train/modified_t5_decoder_4_layers/mapping_prediction/exports/result_group_{fold}.csv' - df = pd.read_csv(data_path, skipinitialspace=True) - actual_mdm = df['MDM'] - - thing_correctness = df['thing'] == df['p_thing'] - property_correctness = df['property'] == df['p_property'] - answer = thing_correctness & property_correctness - - # if is non-MDM -> then should be unmapped - # if is MDM -> then should be mapped correctly - - # out of correctly predicted relevant data, how many are mapped correctly? - correct_positive_mdm_and_map = sum(p_mdm & actual_mdm & answer) - - # number of correctly predicted non-relevant data - correct_negative_mdm = sum(~(p_mdm) & ~(actual_mdm)) - - overall_correct = (correct_positive_mdm_and_map + correct_negative_mdm)/len(actual_mdm) - print(overall_correct) -# %% -for fold in [1,2,3,4,5]: - run(fold) - diff --git a/overall/pipeline_evaluation.py b/overall/pipeline_evaluation.py new file mode 100644 index 0000000..81a8db3 --- /dev/null +++ b/overall/pipeline_evaluation.py @@ -0,0 +1,78 @@ +# %% +import pandas as pd +import numpy as np + +# following code computes final mapping + classification accuracy +# %% +def run(fold): + data_path = f'../relevant_class/binary_classifier_desc_unit/classification_prediction/exports/result_group_{fold}.csv' + df = pd.read_csv(data_path, skipinitialspace=True) + p_mdm = df['p_mdm'] + + # data_path = f'../train/mapping_t5_complete_desc_unit_name/mapping_prediction/exports/result_group_{fold}.csv' + data_path = f'../train/modified_t5_decoder_4_layers/mapping_prediction/exports/result_group_{fold}.csv' + df = pd.read_csv(data_path, skipinitialspace=True) + actual_mdm = df['MDM'] + + thing_correctness = df['thing'] == df['p_thing'] + property_correctness = df['property'] == df['p_property'] + answer = thing_correctness & property_correctness + + ############## + # evaluate relevant-class prediction performance + + # correct relevant prediction + # both 1's + correct_relevant_prediction = sum(p_mdm & actual_mdm) + correct_relevant_rate = correct_relevant_prediction/sum(actual_mdm) + print('correct relevant rate:') + print(correct_relevant_rate) + print('size', correct_relevant_prediction, '/', sum(actual_mdm)) + + # correct non-relevant prediction + correct_non_relevant_prediction = sum(~p_mdm & ~actual_mdm) + correct_non_relevant_rate = correct_non_relevant_prediction/sum(~actual_mdm) + print('correct non-relevant rate:') + print(correct_non_relevant_rate) + print('size', correct_non_relevant_prediction, '/', sum(~actual_mdm)) + + + # correct stage 1 prediction + correct_stage1_prediction = sum(~(np.logical_xor(p_mdm, actual_mdm))) + stage1_rate = correct_stage1_prediction/len(df['MDM']) + + print('stage1 rate:') + print(stage1_rate) + print('size', correct_stage1_prediction, '/', len(p_mdm)) + + ############## + # evaluate mapping on predicted relevant entries + correct_positive_mdm_and_map = sum(p_mdm & actual_mdm & answer) + mapping_rate = correct_positive_mdm_and_map / sum(p_mdm & actual_mdm) + print('mapping rate') + print(mapping_rate) + print('size', correct_positive_mdm_and_map, '/', sum(p_mdm & actual_mdm)) + + + ############## + # evaluate overall pipeline result + + # if is non-MDM -> then should be unmapped + # if is MDM -> then should be mapped correctly + + # out of correctly predicted relevant data, how many are mapped correctly? + correct_positive_mdm_and_map = sum(p_mdm & actual_mdm & answer) + + # number of correctly predicted non-relevant data + correct_negative_mdm = sum(~(p_mdm) & ~(actual_mdm)) + + overall_correct = (correct_positive_mdm_and_map + correct_negative_mdm)/len(actual_mdm) + print('overall rate') + print(overall_correct) + print('breakdown:', correct_positive_mdm_and_map, ', ', correct_negative_mdm) + print('size:', correct_positive_mdm_and_map + correct_negative_mdm, '/', len(actual_mdm)) +# %% +for fold in [1,2,3,4,5]: + print('*' * 40) + run(fold) +