63 lines
		
	
	
		
			1.8 KiB
		
	
	
	
		
			Python
		
	
	
	
			
		
		
	
	
			63 lines
		
	
	
		
			1.8 KiB
		
	
	
	
		
			Python
		
	
	
	
| # %%
 | |
| import pandas as pd
 | |
| import numpy as np
 | |
| 
 | |
| # %%
 | |
| data_path = '../../data_import/exports/data_mapping_mdm.csv'
 | |
| full_df = pd.read_csv(data_path, skipinitialspace=True)
 | |
| mdm_list = sorted(list((set(full_df['thing'] + full_df['property']))))
 | |
| 
 | |
| 
 | |
| # %%
 | |
| fold = 5
 | |
| file_path = f'../../train/classification_bert_complete_desc_unit/classification_prediction/exports/result_group_{fold}.csv'
 | |
| df_bert = pd.read_csv(file_path)
 | |
| # %%
 | |
| file_path = f'../../train/mapping_t5_complete_desc_unit/mapping_prediction/exports/result_group_{fold}.csv'
 | |
| # file_path = f'../../train/mapping_t5-base_desc_unit/mapping_prediction/exports/result_group_{fold}.csv'
 | |
| df_t5 = pd.read_csv(file_path)
 | |
| df_t5 = df_t5[df_t5['MDM']].reset_index(drop=True)
 | |
| df_t5['class_prediction'] = (df_t5['p_thing'] + df_t5['p_property'])
 | |
| df_t5['in_vocab'] = df_t5['class_prediction'].isin(mdm_list)
 | |
| 
 | |
| # %%
 | |
| df_t5['bert_prediction'] = df_bert['class_prediction']
 | |
| df_bert['t5_prediction'] = df_t5['class_prediction']
 | |
| # %%
 | |
| bert_correct = (df_bert['thing'] + df_bert['property']) == df_bert['class_prediction']
 | |
| # %%
 | |
| t5_correct = (df_t5['thing'] + df_t5['property']) == (df_t5['p_thing'] + df_t5['p_property'])
 | |
| 
 | |
| # %%
 | |
| sum(t5_correct)/len(t5_correct)
 | |
| 
 | |
| # %%
 | |
| # replace t5 not in vocab with bert values
 | |
| t5_correct_modified = t5_correct.copy()
 | |
| condition = ~df_t5['in_vocab']
 | |
| t5_correct_modified[condition] = np.array(bert_correct[condition])
 | |
| 
 | |
| # %%
 | |
| # new replacement correctness
 | |
| sum(t5_correct_modified)/len(t5_correct_modified)
 | |
| # %%
 | |
| # when bert is correct and t5 is wrong
 | |
| cond_mask = bert_correct & (~t5_correct)
 | |
| print(sum(cond_mask))
 | |
| print(df_t5[cond_mask].to_string())
 | |
| # %%
 | |
| # when bert is wrong and t5 is correct
 | |
| cond_mask = (~bert_correct) & (t5_correct)
 | |
| print(sum(cond_mask))
 | |
| print(df_bert[cond_mask].to_string())
 | |
| 
 | |
| 
 | |
| 
 | |
| # %%
 | |
| # when both are wrong
 | |
| cond_mask = (~bert_correct) & (~t5_correct)
 | |
| print(sum(cond_mask))
 | |
| 
 | |
| 
 | |
| # %%
 |