Feat: reworked overall evaluation to evaluate at each stage of the
pipeline
This commit is contained in:
parent
086b867d91
commit
1b6659a600
|
@ -1,34 +0,0 @@
|
||||||
# %%
|
|
||||||
import pandas as pd
|
|
||||||
|
|
||||||
# following code computes final mapping + classification accuracy
|
|
||||||
# %%
|
|
||||||
def run(fold):
|
|
||||||
data_path = f'../relevant_class/binary_classifier_desc_unit/classification_prediction/exports/result_group_{fold}.csv'
|
|
||||||
df = pd.read_csv(data_path, skipinitialspace=True)
|
|
||||||
p_mdm = df['p_mdm']
|
|
||||||
|
|
||||||
# data_path = f'../train/mapping_t5_complete_desc_unit_name/mapping_prediction/exports/result_group_{fold}.csv'
|
|
||||||
data_path = f'../train/modified_t5_decoder_4_layers/mapping_prediction/exports/result_group_{fold}.csv'
|
|
||||||
df = pd.read_csv(data_path, skipinitialspace=True)
|
|
||||||
actual_mdm = df['MDM']
|
|
||||||
|
|
||||||
thing_correctness = df['thing'] == df['p_thing']
|
|
||||||
property_correctness = df['property'] == df['p_property']
|
|
||||||
answer = thing_correctness & property_correctness
|
|
||||||
|
|
||||||
# if is non-MDM -> then should be unmapped
|
|
||||||
# if is MDM -> then should be mapped correctly
|
|
||||||
|
|
||||||
# out of correctly predicted relevant data, how many are mapped correctly?
|
|
||||||
correct_positive_mdm_and_map = sum(p_mdm & actual_mdm & answer)
|
|
||||||
|
|
||||||
# number of correctly predicted non-relevant data
|
|
||||||
correct_negative_mdm = sum(~(p_mdm) & ~(actual_mdm))
|
|
||||||
|
|
||||||
overall_correct = (correct_positive_mdm_and_map + correct_negative_mdm)/len(actual_mdm)
|
|
||||||
print(overall_correct)
|
|
||||||
# %%
|
|
||||||
for fold in [1,2,3,4,5]:
|
|
||||||
run(fold)
|
|
||||||
|
|
|
@ -0,0 +1,78 @@
|
||||||
|
# %%
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
# following code computes final mapping + classification accuracy
|
||||||
|
# %%
|
||||||
|
def run(fold):
|
||||||
|
data_path = f'../relevant_class/binary_classifier_desc_unit/classification_prediction/exports/result_group_{fold}.csv'
|
||||||
|
df = pd.read_csv(data_path, skipinitialspace=True)
|
||||||
|
p_mdm = df['p_mdm']
|
||||||
|
|
||||||
|
# data_path = f'../train/mapping_t5_complete_desc_unit_name/mapping_prediction/exports/result_group_{fold}.csv'
|
||||||
|
data_path = f'../train/modified_t5_decoder_4_layers/mapping_prediction/exports/result_group_{fold}.csv'
|
||||||
|
df = pd.read_csv(data_path, skipinitialspace=True)
|
||||||
|
actual_mdm = df['MDM']
|
||||||
|
|
||||||
|
thing_correctness = df['thing'] == df['p_thing']
|
||||||
|
property_correctness = df['property'] == df['p_property']
|
||||||
|
answer = thing_correctness & property_correctness
|
||||||
|
|
||||||
|
##############
|
||||||
|
# evaluate relevant-class prediction performance
|
||||||
|
|
||||||
|
# correct relevant prediction
|
||||||
|
# both 1's
|
||||||
|
correct_relevant_prediction = sum(p_mdm & actual_mdm)
|
||||||
|
correct_relevant_rate = correct_relevant_prediction/sum(actual_mdm)
|
||||||
|
print('correct relevant rate:')
|
||||||
|
print(correct_relevant_rate)
|
||||||
|
print('size', correct_relevant_prediction, '/', sum(actual_mdm))
|
||||||
|
|
||||||
|
# correct non-relevant prediction
|
||||||
|
correct_non_relevant_prediction = sum(~p_mdm & ~actual_mdm)
|
||||||
|
correct_non_relevant_rate = correct_non_relevant_prediction/sum(~actual_mdm)
|
||||||
|
print('correct non-relevant rate:')
|
||||||
|
print(correct_non_relevant_rate)
|
||||||
|
print('size', correct_non_relevant_prediction, '/', sum(~actual_mdm))
|
||||||
|
|
||||||
|
|
||||||
|
# correct stage 1 prediction
|
||||||
|
correct_stage1_prediction = sum(~(np.logical_xor(p_mdm, actual_mdm)))
|
||||||
|
stage1_rate = correct_stage1_prediction/len(df['MDM'])
|
||||||
|
|
||||||
|
print('stage1 rate:')
|
||||||
|
print(stage1_rate)
|
||||||
|
print('size', correct_stage1_prediction, '/', len(p_mdm))
|
||||||
|
|
||||||
|
##############
|
||||||
|
# evaluate mapping on predicted relevant entries
|
||||||
|
correct_positive_mdm_and_map = sum(p_mdm & actual_mdm & answer)
|
||||||
|
mapping_rate = correct_positive_mdm_and_map / sum(p_mdm & actual_mdm)
|
||||||
|
print('mapping rate')
|
||||||
|
print(mapping_rate)
|
||||||
|
print('size', correct_positive_mdm_and_map, '/', sum(p_mdm & actual_mdm))
|
||||||
|
|
||||||
|
|
||||||
|
##############
|
||||||
|
# evaluate overall pipeline result
|
||||||
|
|
||||||
|
# if is non-MDM -> then should be unmapped
|
||||||
|
# if is MDM -> then should be mapped correctly
|
||||||
|
|
||||||
|
# out of correctly predicted relevant data, how many are mapped correctly?
|
||||||
|
correct_positive_mdm_and_map = sum(p_mdm & actual_mdm & answer)
|
||||||
|
|
||||||
|
# number of correctly predicted non-relevant data
|
||||||
|
correct_negative_mdm = sum(~(p_mdm) & ~(actual_mdm))
|
||||||
|
|
||||||
|
overall_correct = (correct_positive_mdm_and_map + correct_negative_mdm)/len(actual_mdm)
|
||||||
|
print('overall rate')
|
||||||
|
print(overall_correct)
|
||||||
|
print('breakdown:', correct_positive_mdm_and_map, ', ', correct_negative_mdm)
|
||||||
|
print('size:', correct_positive_mdm_and_map + correct_negative_mdm, '/', len(actual_mdm))
|
||||||
|
# %%
|
||||||
|
for fold in [1,2,3,4,5]:
|
||||||
|
print('*' * 40)
|
||||||
|
run(fold)
|
||||||
|
|
Loading…
Reference in New Issue