hipom_data_mapping/overall/combined_mapping_and_classi...

# %%
import pandas as pd

# following code computes final mapping + classification accuracy
# %%
def run(fold):
    data_path = f'../relevant_class/binary_classifier_desc_unit/classification_prediction/exports/result_group_{fold}.csv'
    df = pd.read_csv(data_path, skipinitialspace=True)
    p_mdm = df['p_mdm']

    # data_path = f'../train/mapping_t5_complete_desc_unit_name/mapping_prediction/exports/result_group_{fold}.csv'
    data_path = f'../train/modified_t5_decoder_4_layers/mapping_prediction/exports/result_group_{fold}.csv'
    df = pd.read_csv(data_path, skipinitialspace=True)
    actual_mdm = df['MDM']

    thing_correctness = df['thing'] == df['p_thing']
    property_correctness = df['property'] == df['p_property']
    answer = thing_correctness & property_correctness

    # if is non-MDM -> then should be unmapped
    # if is MDM -> then should be mapped correctly

    # out of correctly predicted relevant data, how many are mapped correctly?
    correct_positive_mdm_and_map = sum(p_mdm & actual_mdm & answer)

    # number of correctly predicted non-relevant data
    correct_negative_mdm = sum(~(p_mdm) & ~(actual_mdm))

    overall_correct = (correct_positive_mdm_and_map + correct_negative_mdm)/len(actual_mdm)
    print(overall_correct)
# %%
for fold in [1,2,3,4,5]:
    run(fold)
Feat: added overall section to evaluate combined accuracy - added relevant-class section 2024-12-24 21:57:48 +09:00			`# %%`
			`import pandas as pd`

			`# following code computes final mapping + classification accuracy`
			`# %%`
			`def run(fold):`
			`data_path = f'../relevant_class/binary_classifier_desc_unit/classification_prediction/exports/result_group_{fold}.csv'`
			`df = pd.read_csv(data_path, skipinitialspace=True)`
			`p_mdm = df['p_mdm']`

			`# data_path = f'../train/mapping_t5_complete_desc_unit_name/mapping_prediction/exports/result_group_{fold}.csv'`
			`data_path = f'../train/modified_t5_decoder_4_layers/mapping_prediction/exports/result_group_{fold}.csv'`
			`df = pd.read_csv(data_path, skipinitialspace=True)`
			`actual_mdm = df['MDM']`

			`thing_correctness = df['thing'] == df['p_thing']`
			`property_correctness = df['property'] == df['p_property']`
			`answer = thing_correctness & property_correctness`

			`# if is non-MDM -> then should be unmapped`
			`# if is MDM -> then should be mapped correctly`

			`# out of correctly predicted relevant data, how many are mapped correctly?`
			`correct_positive_mdm_and_map = sum(p_mdm & actual_mdm & answer)`

			`# number of correctly predicted non-relevant data`
			`correct_negative_mdm = sum(~(p_mdm) & ~(actual_mdm))`

			`overall_correct = (correct_positive_mdm_and_map + correct_negative_mdm)/len(actual_mdm)`
			`print(overall_correct)`
			`# %%`
			`for fold in [1,2,3,4,5]:`
			`run(fold)`