hipom_data_mapping/data_import/make_csv.py

39 lines
1.7 KiB
Python
Raw Normal View History

2024-08-26 19:51:11 +09:00
import pandas as pd
import re
# Load the data_mapping CSV file
data_mapping_file_path = 'exports/data_mapping.csv' # Adjust this path to your actual file location
2024-08-26 19:51:11 +09:00
data_mapping = pd.read_csv(data_mapping_file_path, dtype=str)
df_master = pd.read_csv('exports/data_model_master_export.csv')
2024-08-26 19:51:11 +09:00
# Generate patterns
data_mapping['thing_pattern'] = data_mapping['thing'].str.replace(r'\d', '#', regex=True)
data_mapping['property_pattern'] = data_mapping['property'].str.replace(r'\d', '#', regex=True)
data_mapping['pattern'] = data_mapping['thing_pattern'] + " " + data_mapping['property_pattern']
df_master['master_pattern'] = df_master['thing'] + " " + df_master['property']
# Create a set of unique patterns from master for fast lookup
master_patterns = set(df_master['master_pattern'])
# Check each pattern in data_mapping if it exists in df_master and assign the "MDM" field
data_mapping['MDM'] = data_mapping['pattern'].apply(lambda x: x in master_patterns)
# Remove specified fields
fields_to_remove = ['equip_type_code', 'tx_period', 'tx_type', 'on_change_yn', 'scaling_const', 'description', 'updated_time', 'status_code', 'is_timeout']
merged_data = data_mapping.drop(columns=fields_to_remove)
# Save the updated DataFrame to a new CSV file
output_file_path = 'exports/raw_data.csv'
2024-08-26 19:51:11 +09:00
merged_data.to_csv(output_file_path, index=False, encoding='utf-8-sig')
print(f"Updated data saved to {output_file_path}")
# Filter the DataFrame where MDM is TRUE
data_mapping_mdm_true = merged_data[merged_data['MDM']]
# Save the filtered DataFrame to a new CSV file
mdm_true_output_file_path = 'exports/data_mapping_mdm.csv'
2024-08-26 19:51:11 +09:00
data_mapping_mdm_true.to_csv(mdm_true_output_file_path, index=False, encoding='utf-8-sig')
print(f"MDM TRUE data saved to {mdm_true_output_file_path}")