35 lines
1.3 KiB
Python
35 lines
1.3 KiB
Python
|
# %%
|
||
|
import pandas as pd
|
||
|
|
||
|
# following code computes final mapping + classification accuracy
|
||
|
# %%
|
||
|
def run(fold):
|
||
|
data_path = f'../relevant_class/binary_classifier_desc_unit/classification_prediction/exports/result_group_{fold}.csv'
|
||
|
df = pd.read_csv(data_path, skipinitialspace=True)
|
||
|
p_mdm = df['p_mdm']
|
||
|
|
||
|
# data_path = f'../train/mapping_t5_complete_desc_unit_name/mapping_prediction/exports/result_group_{fold}.csv'
|
||
|
data_path = f'../train/modified_t5_decoder_4_layers/mapping_prediction/exports/result_group_{fold}.csv'
|
||
|
df = pd.read_csv(data_path, skipinitialspace=True)
|
||
|
actual_mdm = df['MDM']
|
||
|
|
||
|
thing_correctness = df['thing'] == df['p_thing']
|
||
|
property_correctness = df['property'] == df['p_property']
|
||
|
answer = thing_correctness & property_correctness
|
||
|
|
||
|
# if is non-MDM -> then should be unmapped
|
||
|
# if is MDM -> then should be mapped correctly
|
||
|
|
||
|
# out of correctly predicted relevant data, how many are mapped correctly?
|
||
|
correct_positive_mdm_and_map = sum(p_mdm & actual_mdm & answer)
|
||
|
|
||
|
# number of correctly predicted non-relevant data
|
||
|
correct_negative_mdm = sum(~(p_mdm) & ~(actual_mdm))
|
||
|
|
||
|
overall_correct = (correct_positive_mdm_and_map + correct_negative_mdm)/len(actual_mdm)
|
||
|
print(overall_correct)
|
||
|
# %%
|
||
|
for fold in [1,2,3,4,5]:
|
||
|
run(fold)
|
||
|
|