hipom_data_mapping/analysis/result_report_statistics/result_statistics.py

200 lines
4.5 KiB
Python

# %%
import pandas as pd
import numpy as np
####################################################################################
# stage 1
# %%
# stage 1a: binary classification
df_stage1a = pd.read_csv('stage1a.csv')
# %%
# desc only
mask = df_stage1a['type'] == 'desc'
df_stage1a[mask].describe().loc[['mean', 'std']]
# %%
# desc and unit
mask = df_stage1a['type'] == 'desc_unit'
df_stage1a[mask].describe().loc[['mean', 'std']]
# %%
# stage 1b: similarity-based classification
df_stage1b = pd.read_csv('stage1b.csv')
# %%
# desc only
mask = df_stage1b['type'] == 'desc'
df_stage1b[mask].describe().loc[['mean', 'std']]
# %%
# desc and unit
mask = df_stage1b['type'] == 'desc_unit'
df_stage1b[mask].describe().loc[['mean', 'std']]
# %%
#################################################################################
# stage 2: mapping model
# %%
# stage 2a: mapping by classification
df_stage2a = pd.read_csv('stage2a.csv')
# %%
# desc only
mask = df_stage2a['type'] == 'desc'
df_stage2a[mask].describe().loc[['mean', 'std']]
# %%
# desc and unit
mask = df_stage2a['type'] == 'desc_unit'
df_stage2a[mask].describe().loc[['mean', 'std']]
# %%
# stage 2b: mapping by seq2seq
df_stage2b = pd.read_csv('stage2b.csv')
# %%
# desc only
mask = df_stage2b['type'] == 'desc'
df_stage2b[mask].describe().loc[['mean', 'std']]
# %%
# desc and unit
mask = df_stage2b['type'] == 'desc_unit'
df_stage2b[mask].describe().loc[['mean', 'std']]
############################
# frozen encoder
# %%
df = pd.read_csv('frozen_encoder.csv')
# %%
# normal
mask = df['type'] == 'normal'
df[mask].describe().loc[['mean', 'std']]
# %%
# frozen
mask = df['type'] == 'frozen'
df[mask].describe().loc[['mean', 'std']]
# %%
############################
# frozen encoder
# %%
df = pd.read_csv('decoder_scaling.csv')
# %%
# 1 layer
mask = df['type'] == '1layer'
df[mask].describe().loc[['mean', 'std']]
# %%
# 2 layer
mask = df['type'] == '2layer'
df[mask].describe().loc[['mean', 'std']]
# %%
# 4 layer
mask = df['type'] == '4layer'
df[mask].describe().loc[['mean', 'std']]
# %%
# 6 layer
mask = df['type'] == '6layer'
df[mask].describe().loc[['mean', 'std']]
# %%
# 8 layer
mask = df['type'] == '8layer'
df[mask].describe().loc[['mean', 'std']]
# %%
#########################
# compute overall result
# frac{1808}{2113} = 0.856$ & $\frac{10692}{10961} = 0.975$ & $\frac{12500}{13074} = 0.956$ \\
# frac{1932}{2140} = 0.903$ & $\frac{8304}{8582} = 0.968$ & $\frac{10236}{10722} = 0.955$ \\
# frac{1789}{1992} = 0.898$ & $\frac{7613}{7863} = 0.968$ & $\frac{9402}{9855} = 0.954$ \\
# frac{1967}{2102} = 0.936$ & $\frac{12929}{13349} = 0.969$ & $\frac{14896}{15451} = 0.964$ \\
# frac{1915}{2183} = 0.877$ & $\frac{10381}{10786} = 0.962$ & $\frac{12296}{12969} = 0.948$ \\
# %%
matrix = np.array([
[1808, 2113, 10692, 10961, 13074],
[1932, 2140, 8304, 8582, 10722],
[1789, 1992, 7613, 7863, 9855],
[1967, 2102, 12929, 13349, 15451],
[1915, 2183, 10381, 10786, 12969]
])
# %%
relevant_class = matrix[:,0]/matrix[:,1]
print(relevant_class)
print(np.std(relevant_class))
# %%
non_relevant_class = matrix[:,2]/matrix[:,3]
print(non_relevant_class)
print(np.std(non_relevant_class))
# %%
numerator = (matrix[:,0] + matrix[:,2])
denominator = (matrix[:,1] + matrix[:,3])
print(numerator)
print(denominator) # same as last column
overall = numerator/denominator
print(overall)
print(np.std(overall))
######################
# compute mapping result
# %%
# $\frac{1761}{1808} = 0.974$ \\
# $\frac{1802}{1932} = 0.933$ \\
# $\frac{1760}{1789} = 0.984$ \\
# $\frac{1945}{1967} = 0.989$ \\
# $\frac{1837}{1915} = 0.959$ \\
matrix = np.array([
[1761, 1808],
[1802, 1932],
[1760, 1789],
[1945, 1967],
[1837, 1915]
])
# %%
result = matrix[:,0]/matrix[:,1]
print(result)
print(np.mean(result))
print(np.std(result))
# %%
####################################
# compute overall result
# & 1761 & 10692 & $\frac{1761 + 10692}{13074} = 0.953$ \\
# & 1802 & 8304 & $\frac{1802 + 8304}{10722} = 0.943$ \\
# & 1760 & 7613 & $\frac{1760 + 7613}{9855} = 0.951$ \\
# & 1945 & 12929 & $\frac{1945 + 12929}{15451} = 0.963$ \\
# & 1837 & 10381 & $\frac{1837 + 10381}{12969} = 0.942$ \\
matrix = np.array([
[1761,10692, 13074],
[1802, 8304, 10722],
[1760, 7613, 9855],
[1945,12929, 15451],
[1837,10381, 12969]
])
# %%
overall = (matrix[:,0] + matrix[:,1])/matrix[:,2]
print(overall)
print(np.mean(overall))
print(np.std(overall))
# %%