56 lines
1.3 KiB
Python
56 lines
1.3 KiB
Python
|
# we want to see if there are clear rules to filling numbers in the pattern
|
||
|
# format
|
||
|
|
||
|
# %%
|
||
|
# %%
|
||
|
import pandas as pd
|
||
|
# from utils import Retriever, cosine_similarity_chunked
|
||
|
import os
|
||
|
import glob
|
||
|
import numpy as np
|
||
|
|
||
|
# %%
|
||
|
fold = 5
|
||
|
data_path = f'../../train/mapping_pattern/mapping_prediction/exports/result_group_{fold}.csv'
|
||
|
test_df = pd.read_csv(data_path, skipinitialspace=True)
|
||
|
|
||
|
data_path = f"../../data_preprocess/exports/dataset/group_{fold}/train_all.csv"
|
||
|
train_df = pd.read_csv(data_path, skipinitialspace=True)
|
||
|
|
||
|
|
||
|
|
||
|
# %%
|
||
|
data_path = '../../data_import/exports/data_mapping_mdm.csv'
|
||
|
# data_path = '../../data_preprocess/exports/preprocessed_data.csv'
|
||
|
df = pd.read_csv(data_path, skipinitialspace=True)
|
||
|
mdm_list = sorted(list((set(df['pattern']))))
|
||
|
|
||
|
|
||
|
|
||
|
# %%
|
||
|
symbol_pattern_list = [elem for elem in mdm_list if '#' in elem]
|
||
|
|
||
|
# %%
|
||
|
symbol_pattern_list
|
||
|
|
||
|
# %%
|
||
|
len(symbol_pattern_list)
|
||
|
|
||
|
# %%
|
||
|
idx = 22
|
||
|
print(symbol_pattern_list[idx])
|
||
|
condition1 = df['pattern'] == symbol_pattern_list[idx]
|
||
|
subset_df = df[df['pattern'] == symbol_pattern_list[idx]]
|
||
|
ship = list(set(subset_df['ships_idx']))
|
||
|
print(ship)
|
||
|
|
||
|
# %%
|
||
|
subset_df[['thing', 'property', 'tag_name', 'tag_description', 'ships_idx']].to_csv('output.csv')
|
||
|
# %%
|
||
|
ship_idx = 10
|
||
|
condition2 = df['ships_idx'] == ship_idx
|
||
|
subset_df = df[condition1 & condition2]
|
||
|
subset_df
|
||
|
|
||
|
# %%
|