# %% import pandas as pd import numpy as np # following code computes final mapping + classification accuracy # %% def run(fold): data_path = f'../relevant_class/binary_classifier_desc_unit/classification_prediction/exports/result_group_{fold}.csv' df = pd.read_csv(data_path, skipinitialspace=True) p_mdm = df['p_mdm'] data_path = f'../train/mapping_t5_complete_desc_unit/mapping_prediction/exports/result_group_{fold}.csv' df = pd.read_csv(data_path, skipinitialspace=True) actual_mdm = df['MDM'] # grounded labels data_path = f'../analysis/delta_analysis/exports/result_group_{fold}.csv' df_grounded = pd.read_csv(data_path, skipinitialspace=True) answer = df_grounded['grounded_pred'] # original labels # thing_correctness = df['thing'] == df['p_thing'] # property_correctness = df['property'] == df['p_property'] # answer = thing_correctness & property_correctness ############## # evaluate relevant-class prediction performance # correct relevant prediction # both 1's correct_relevant_prediction = sum(p_mdm & actual_mdm) correct_relevant_rate = correct_relevant_prediction/sum(actual_mdm) print('correct relevant rate:') print(correct_relevant_rate) print('size', correct_relevant_prediction, '/', sum(actual_mdm)) # correct non-relevant prediction correct_non_relevant_prediction = sum(~p_mdm & ~actual_mdm) correct_non_relevant_rate = correct_non_relevant_prediction/sum(~actual_mdm) print('correct non-relevant rate:') print(correct_non_relevant_rate) print('size', correct_non_relevant_prediction, '/', sum(~actual_mdm)) # correct stage 1 prediction correct_stage1_prediction = sum(~(np.logical_xor(p_mdm, actual_mdm))) stage1_rate = correct_stage1_prediction/len(df['MDM']) print('stage1 rate:') print(stage1_rate) print('size', correct_stage1_prediction, '/', len(p_mdm)) ############## # evaluate mapping on predicted relevant entries correct_positive_mdm_and_map = sum(p_mdm & actual_mdm & answer) mapping_rate = correct_positive_mdm_and_map / sum(p_mdm & actual_mdm) print('mapping rate') print(mapping_rate) print('size', correct_positive_mdm_and_map, '/', sum(p_mdm & actual_mdm)) # evaluate relevant mappings correct_positive_mdm_and_map = sum(p_mdm & actual_mdm & answer) mapping_rate = correct_positive_mdm_and_map / sum(actual_mdm) print('relevant data mapping rate') print(mapping_rate) print('size', correct_positive_mdm_and_map, '/', sum(actual_mdm)) ############## # evaluate overall pipeline result # if is non-MDM -> then should be unmapped # if is MDM -> then should be mapped correctly # out of correctly predicted relevant data, how many are mapped correctly? correct_positive_mdm_and_map = sum(p_mdm & actual_mdm & answer) # number of correctly predicted non-relevant data correct_negative_mdm = sum(~(p_mdm) & ~(actual_mdm)) overall_correct = (correct_positive_mdm_and_map + correct_negative_mdm)/len(actual_mdm) print('overall rate') print(overall_correct) print('breakdown:', correct_positive_mdm_and_map, ', ', correct_negative_mdm) print('size:', correct_positive_mdm_and_map + correct_negative_mdm, '/', len(actual_mdm)) # %% for fold in [1,2,3,4,5]: print('*' * 40) run(fold) # %%