hipom_data_mapping/analysis/data_properties/ship_counts.py

72 lines
1.4 KiB
Python

# %%
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os
# note: we assume that you will execute from the directory of this code
# check your current directory
print("Current Working Directory:", os.getcwd())
# %%
# plt.rcParams.update({'font.size': 18})
df = pd.read_csv('../../data_import/exports/raw_data.csv')
total_counts = df['ships_idx'].value_counts().sort_index()
mdm_true_counts = df[df['MDM']]['ships_idx'].value_counts().sort_index()
summary_df = pd.DataFrame({
'SD': total_counts,
'PD': mdm_true_counts
}).fillna(0)
total_SD = summary_df['SD'].sum()
total_PD = summary_df['PD'].sum()
print(f"Total SD: {total_SD}")
print(f"Total PD: {total_PD}")
# %%
plt.figure(figsize=(8, 6))
fig, ax = plt.subplots(figsize=(8, 6))
summary_df['SD'].plot(
kind='bar',
ax=ax,
color='orange',
alpha=0.5,
label='Ship Domain',
width=0.8)
summary_df['PD'].plot(
kind='bar',
ax=ax,
color='blue',
alpha=0.7,
label='Platform Domain',
width=0.8)
x_labels = ax.get_xticks()
ax.set_xticks(np.arange(min(x_labels), max(x_labels) + 1, 10))
ax.set_xticklabels(
[int(label) for label in np.arange(min(x_labels), max(x_labels) + 1, 10)],
rotation=0,
)
ax.grid(True)
# plt.legend(prop={'size': 18})
plt.legend()
plt.ylabel('Counts')
plt.xlabel('Ships')
plt.savefig('count_statistics_of_each_ship.png')
plt.show()
# %%