|
# %%
|
|
import pandas as pd
|
|
import matplotlib.pyplot as plt
|
|
|
|
# %%
|
|
# import training file
|
|
data_path = '../data_import/train.csv'
|
|
train_df = pd.read_csv(data_path, skipinitialspace=True)
|
|
|
|
|
|
# %%
|
|
id_counts = train_df['entity_id'].value_counts()
|
|
|
|
# %%
|
|
|
|
plt.hist(id_counts, bins=50)
|
|
# %%
|