hipom_data_mapping/analysis/unit_label_differences/unit.py

27 lines
551 B
Python
Raw Normal View History

# %%
import pandas as pd
# %%
data_path = '../../data_preprocess/exports/preprocessed_data.csv'
full_df = pd.read_csv(data_path, skipinitialspace=True)
# %%
df_in = full_df[full_df['MDM']].reset_index(drop=True)
# %%
df_out = full_df[~full_df['MDM']].reset_index(drop=True)
# %%
label_counts_in = df_in['unit'].value_counts()
print(label_counts_in.to_string())
# %%
label_counts_out = df_out['unit'].value_counts()
print(label_counts_out.to_string())
# %%
label_counts_out['NOVALUE']/len(df_out)
# %%
label_counts_in['NOVALUE']/len(df_out)
# %%