73 lines
3.0 KiB
Python
73 lines
3.0 KiB
Python
# %%
|
|
import pandas as pd
|
|
import numpy as np
|
|
|
|
# %%
|
|
data_path = '../../data_import/exports/data_mapping_mdm.csv'
|
|
full_df = pd.read_csv(data_path, skipinitialspace=True)
|
|
mdm_list = sorted(list((set(full_df['thing'] + full_df['property']))))
|
|
|
|
# %%
|
|
def run_mdm(fold):
|
|
file_path = f'../../train/classification_bert_complete_desc_unit/classification_prediction/exports/result_group_{fold}.csv'
|
|
df_bert = pd.read_csv(file_path)
|
|
df_bert = df_bert[df_bert['MDM']].reset_index(drop=True)
|
|
|
|
file_path = f'../../train/mapping_t5_complete_desc_unit/mapping_prediction/exports/result_group_{fold}.csv'
|
|
# file_path = f'../../train/mapping_t5-base_desc_unit/mapping_prediction/exports/result_group_{fold}.csv'
|
|
df_t5 = pd.read_csv(file_path)
|
|
df_t5 = df_t5[df_t5['MDM']].reset_index(drop=True)
|
|
df_t5['class_prediction'] = (df_t5['p_thing'] + df_t5['p_property'])
|
|
df_t5['in_vocab'] = df_t5['class_prediction'].isin(mdm_list)
|
|
|
|
df_t5['bert_prediction'] = df_bert['class_prediction']
|
|
df_bert['t5_prediction'] = df_t5['class_prediction']
|
|
|
|
bert_correct = (df_bert['thing'] + df_bert['property']) == df_bert['class_prediction']
|
|
t5_correct = (df_t5['thing'] + df_t5['property']) == (df_t5['p_thing'] + df_t5['p_property'])
|
|
|
|
t5_original_accuracy = sum(t5_correct)/len(t5_correct)
|
|
|
|
# replace t5 not in vocab with bert values
|
|
t5_correct_modified = t5_correct.copy()
|
|
condition = ~df_t5['in_vocab']
|
|
t5_correct_modified[condition] = np.array(bert_correct[condition])
|
|
pd.Series(t5_correct_modified).to_csv(f'exports/result_group_{fold}.csv')
|
|
|
|
t5_new_accuracy = sum(t5_correct_modified)/len(t5_correct_modified)
|
|
|
|
print('original accuracy', t5_original_accuracy)
|
|
print('new accuracy', t5_new_accuracy)
|
|
|
|
|
|
# %%
|
|
# this does replacement for the full prediction
|
|
def run_full(fold):
|
|
file_path = f'../../train/classification_bert_complete_desc_unit/classification_prediction/exports/result_group_{fold}.csv'
|
|
df_bert = pd.read_csv(file_path)
|
|
|
|
file_path = f'../../train/mapping_t5_complete_desc_unit/mapping_prediction/exports/result_group_{fold}.csv'
|
|
# file_path = f'../../train/mapping_t5-base_desc_unit/mapping_prediction/exports/result_group_{fold}.csv'
|
|
df_t5 = pd.read_csv(file_path)
|
|
df_t5['class_prediction'] = (df_t5['p_thing'] + df_t5['p_property'])
|
|
df_t5['in_vocab'] = df_t5['class_prediction'].isin(mdm_list)
|
|
|
|
df_t5['bert_prediction'] = df_bert['class_prediction']
|
|
df_bert['t5_prediction'] = df_t5['class_prediction']
|
|
|
|
bert_correct = (df_bert['thing'] + df_bert['property']) == df_bert['class_prediction']
|
|
t5_correct = (df_t5['thing'] + df_t5['property']) == (df_t5['p_thing'] + df_t5['p_property'])
|
|
|
|
# replace t5 not in vocab with bert values
|
|
t5_correct_modified = t5_correct.copy()
|
|
condition = ~df_t5['in_vocab']
|
|
t5_correct_modified[condition] = np.array(bert_correct[condition])
|
|
pd.Series(t5_correct_modified, name='grounded_pred').to_csv(f'exports/result_group_{fold}.csv')
|
|
|
|
|
|
# %%
|
|
for fold in [1,2,3,4,5]:
|
|
run_mdm(fold)
|
|
run_full(fold)
|
|
# %%
|