# we want to see if there are clear rules to filling numbers in the pattern # format # %% # %% import pandas as pd # from utils import Retriever, cosine_similarity_chunked import os import glob import numpy as np # %% fold = 5 data_path = f'../../train/mapping_pattern/mapping_prediction/exports/result_group_{fold}.csv' test_df = pd.read_csv(data_path, skipinitialspace=True) data_path = f"../../data_preprocess/exports/dataset/group_{fold}/train_all.csv" train_df = pd.read_csv(data_path, skipinitialspace=True) # %% data_path = '../../data_import/exports/data_mapping_mdm.csv' # data_path = '../../data_preprocess/exports/preprocessed_data.csv' df = pd.read_csv(data_path, skipinitialspace=True) mdm_list = sorted(list((set(df['pattern'])))) # %% symbol_pattern_list = [elem for elem in mdm_list if '#' in elem] # %% symbol_pattern_list # %% len(symbol_pattern_list) # %% idx = 22 print(symbol_pattern_list[idx]) condition1 = df['pattern'] == symbol_pattern_list[idx] subset_df = df[df['pattern'] == symbol_pattern_list[idx]] ship = list(set(subset_df['ships_idx'])) print(ship) # %% subset_df[['thing', 'property', 'tag_name', 'tag_description', 'ships_idx']].to_csv('output.csv') # %% ship_idx = 10 condition2 = df['ships_idx'] == ship_idx subset_df = df[condition1 & condition2] subset_df # %%