import pandas as pd import os import glob from inference import Inference checkpoint_directory = '../' BATCH_SIZE = 512 def infer_and_select(fold): print(f"Inference for fold {fold}") # import test data data_path = f"../../../data_preprocess/exports/dataset/group_{fold}/test_all.csv" df = pd.read_csv(data_path, skipinitialspace=True) df = df[df['MDM']].reset_index(drop=True) # get target data data_path = f"../../../data_preprocess/exports/dataset/group_{fold}/train_all.csv" train_df = pd.read_csv(data_path, skipinitialspace=True) # processing to help with selection later train_df['thing_property'] = train_df['thing'] + " " + train_df['property'] ########################################## # run inference # checkpoint # Use glob to find matching paths directory = os.path.join(checkpoint_directory, f'checkpoint_fold_{fold}b') # Use glob to find matching paths # path is usually checkpoint_fold_1/checkpoint- # we are guaranteed to save only 1 checkpoint from training pattern = 'checkpoint-*' checkpoint_path = glob.glob(os.path.join(directory, pattern))[0] infer = Inference(checkpoint_path) infer.prepare_dataloader(df, batch_size=BATCH_SIZE, max_length=128) thing_prediction_list, property_prediction_list = infer.generate() # add labels too # thing_actual_list, property_actual_list = decode_preds(pred_labels) # Convert the list to a Pandas DataFrame df_out = pd.DataFrame({ 'p_thing': thing_prediction_list, 'p_property': property_prediction_list }) # df_out['p_thing_correct'] = df_out['p_thing'] == df_out['thing'] # df_out['p_property_correct'] = df_out['p_property'] == df_out['property'] df = pd.concat([df, df_out], axis=1) # we can save the t5 generation output here df.to_csv(f"exports/result_group_{fold}.csv", index=False) # here we want to evaluate mapping accuracy within the valid in mdm data only in_mdm = df['MDM'] condition_correct_thing = df['p_thing'] == df['thing'] condition_correct_property = df['p_property'] == df['property'] prediction_mdm_correct = sum(condition_correct_thing & condition_correct_property & in_mdm) pred_correct_proportion = prediction_mdm_correct/sum(in_mdm) # write output to file output.txt with open("output.txt", "a") as f: print(f'Accuracy for fold {fold}: {pred_correct_proportion}', file=f) ########################################### # Execute for all folds # reset file before writing to it with open("output.txt", "w") as f: print('', file=f) for fold in [1]: infer_and_select(fold)