# %% import pandas as pd # following code computes final mapping + classification accuracy # %% def run(fold): data_path = f'../relevant_class/binary_classifier_desc_unit/classification_prediction/exports/result_group_{fold}.csv' df = pd.read_csv(data_path, skipinitialspace=True) p_mdm = df['p_mdm'] # data_path = f'../train/mapping_t5_complete_desc_unit_name/mapping_prediction/exports/result_group_{fold}.csv' data_path = f'../train/modified_t5_decoder_4_layers/mapping_prediction/exports/result_group_{fold}.csv' df = pd.read_csv(data_path, skipinitialspace=True) actual_mdm = df['MDM'] thing_correctness = df['thing'] == df['p_thing'] property_correctness = df['property'] == df['p_property'] answer = thing_correctness & property_correctness # if is non-MDM -> then should be unmapped # if is MDM -> then should be mapped correctly # out of correctly predicted relevant data, how many are mapped correctly? correct_positive_mdm_and_map = sum(p_mdm & actual_mdm & answer) # number of correctly predicted non-relevant data correct_negative_mdm = sum(~(p_mdm) & ~(actual_mdm)) overall_correct = (correct_positive_mdm_and_map + correct_negative_mdm)/len(actual_mdm) print(overall_correct) # %% for fold in [1,2,3,4,5]: run(fold)