# we want to compare the labels between the train data and test data # %% import pandas as pd # %% file_path = '../../data_import/exports/raw_data.csv' # Adjust this path to your actual file location df = pd.read_csv(file_path) df = df[df['MDM']] # %% unit_list = df['unit'] unit_list = [elem if (isinstance(elem, str)) else '' for elem in unit_list] print(sorted(list(set(unit_list)))) # %% test = '℃' # df[df['unit'] == test]['property_pattern'].to_list() df[df['unit'] == test] ############# # 1 import test data # %% fold = 1 data_path = f'../../train/mapping_pattern/mapping_prediction/exports/result_group_{fold}.csv' df = pd.read_csv(data_path, skipinitialspace=True) # %% # subset to mdm df = df[df['MDM']] thing_condition = df['p_thing'] == df['thing_pattern'] error_thing_df = df[~thing_condition][['tag_description', 'thing_pattern','p_thing']] property_condition = df['p_property'] == df['property_pattern'] error_property_df = df[~property_condition][['tag_description', 'property_pattern','p_property']] correct_df = df[thing_condition & property_condition][['tag_description', 'property_pattern', 'p_property']] test_df = df # %% test_pattern = df['thing_pattern'] + ' ' + df['property_pattern'] ########################## # 2 import train data # %% data_path = f"../../data_preprocess/exports/dataset/group_{fold}/train_all.csv" train_df = pd.read_csv(data_path) train_pattern = train_df['pattern'] # %% test_pattern_set = set(test_pattern) train_pattern_set = set(train_pattern) # %% # use this to get labels in test not found in training data test_pattern_set - train_pattern_set # verdict: we see that FOMassFlowTotal is not found in the training set # hence it is not possible for this to be classified correctly ################################### # experiment 2 # %% # we want to check load and loadpercent test_df[test_df['property_pattern'] == 'Load'] # %% test_df[test_df['property_pattern'] == 'LoadPercent'] # set(df['unit']) # %%