domain_mapping/analysis/class_imbalance.py

18 lines
279 B
Python
Raw Normal View History

# %%
import pandas as pd
import matplotlib.pyplot as plt
# %%
# import training file
data_path = '../data_import/train.csv'
train_df = pd.read_csv(data_path, skipinitialspace=True)
# %%
id_counts = train_df['entity_id'].value_counts()
# %%
plt.hist(id_counts, bins=50)
# %%