200 lines
4.5 KiB
Python
200 lines
4.5 KiB
Python
|
# %%
|
||
|
import pandas as pd
|
||
|
import numpy as np
|
||
|
|
||
|
####################################################################################
|
||
|
# stage 1
|
||
|
# %%
|
||
|
# stage 1a: binary classification
|
||
|
df_stage1a = pd.read_csv('stage1a.csv')
|
||
|
# %%
|
||
|
# desc only
|
||
|
mask = df_stage1a['type'] == 'desc'
|
||
|
df_stage1a[mask].describe().loc[['mean', 'std']]
|
||
|
|
||
|
# %%
|
||
|
# desc and unit
|
||
|
mask = df_stage1a['type'] == 'desc_unit'
|
||
|
df_stage1a[mask].describe().loc[['mean', 'std']]
|
||
|
|
||
|
# %%
|
||
|
# stage 1b: similarity-based classification
|
||
|
df_stage1b = pd.read_csv('stage1b.csv')
|
||
|
# %%
|
||
|
# desc only
|
||
|
mask = df_stage1b['type'] == 'desc'
|
||
|
df_stage1b[mask].describe().loc[['mean', 'std']]
|
||
|
|
||
|
# %%
|
||
|
# desc and unit
|
||
|
mask = df_stage1b['type'] == 'desc_unit'
|
||
|
df_stage1b[mask].describe().loc[['mean', 'std']]
|
||
|
|
||
|
|
||
|
# %%
|
||
|
#################################################################################
|
||
|
# stage 2: mapping model
|
||
|
|
||
|
# %%
|
||
|
# stage 2a: mapping by classification
|
||
|
df_stage2a = pd.read_csv('stage2a.csv')
|
||
|
# %%
|
||
|
# desc only
|
||
|
mask = df_stage2a['type'] == 'desc'
|
||
|
df_stage2a[mask].describe().loc[['mean', 'std']]
|
||
|
|
||
|
# %%
|
||
|
# desc and unit
|
||
|
mask = df_stage2a['type'] == 'desc_unit'
|
||
|
df_stage2a[mask].describe().loc[['mean', 'std']]
|
||
|
|
||
|
|
||
|
# %%
|
||
|
# stage 2b: mapping by seq2seq
|
||
|
df_stage2b = pd.read_csv('stage2b.csv')
|
||
|
# %%
|
||
|
# desc only
|
||
|
mask = df_stage2b['type'] == 'desc'
|
||
|
df_stage2b[mask].describe().loc[['mean', 'std']]
|
||
|
|
||
|
# %%
|
||
|
# desc and unit
|
||
|
mask = df_stage2b['type'] == 'desc_unit'
|
||
|
df_stage2b[mask].describe().loc[['mean', 'std']]
|
||
|
|
||
|
|
||
|
|
||
|
############################
|
||
|
# frozen encoder
|
||
|
# %%
|
||
|
df = pd.read_csv('frozen_encoder.csv')
|
||
|
# %%
|
||
|
# normal
|
||
|
mask = df['type'] == 'normal'
|
||
|
df[mask].describe().loc[['mean', 'std']]
|
||
|
|
||
|
# %%
|
||
|
# frozen
|
||
|
mask = df['type'] == 'frozen'
|
||
|
df[mask].describe().loc[['mean', 'std']]
|
||
|
|
||
|
|
||
|
# %%
|
||
|
############################
|
||
|
# frozen encoder
|
||
|
# %%
|
||
|
df = pd.read_csv('decoder_scaling.csv')
|
||
|
# %%
|
||
|
# 1 layer
|
||
|
mask = df['type'] == '1layer'
|
||
|
df[mask].describe().loc[['mean', 'std']]
|
||
|
|
||
|
|
||
|
# %%
|
||
|
# 2 layer
|
||
|
mask = df['type'] == '2layer'
|
||
|
df[mask].describe().loc[['mean', 'std']]
|
||
|
|
||
|
# %%
|
||
|
# 4 layer
|
||
|
mask = df['type'] == '4layer'
|
||
|
df[mask].describe().loc[['mean', 'std']]
|
||
|
|
||
|
# %%
|
||
|
# 6 layer
|
||
|
mask = df['type'] == '6layer'
|
||
|
df[mask].describe().loc[['mean', 'std']]
|
||
|
|
||
|
# %%
|
||
|
# 8 layer
|
||
|
mask = df['type'] == '8layer'
|
||
|
df[mask].describe().loc[['mean', 'std']]
|
||
|
|
||
|
|
||
|
|
||
|
# %%
|
||
|
#########################
|
||
|
# compute overall result
|
||
|
|
||
|
# frac{1808}{2113} = 0.856$ & $\frac{10692}{10961} = 0.975$ & $\frac{12500}{13074} = 0.956$ \\
|
||
|
# frac{1932}{2140} = 0.903$ & $\frac{8304}{8582} = 0.968$ & $\frac{10236}{10722} = 0.955$ \\
|
||
|
# frac{1789}{1992} = 0.898$ & $\frac{7613}{7863} = 0.968$ & $\frac{9402}{9855} = 0.954$ \\
|
||
|
# frac{1967}{2102} = 0.936$ & $\frac{12929}{13349} = 0.969$ & $\frac{14896}{15451} = 0.964$ \\
|
||
|
# frac{1915}{2183} = 0.877$ & $\frac{10381}{10786} = 0.962$ & $\frac{12296}{12969} = 0.948$ \\
|
||
|
|
||
|
# %%
|
||
|
matrix = np.array([
|
||
|
[1808, 2113, 10692, 10961, 13074],
|
||
|
[1932, 2140, 8304, 8582, 10722],
|
||
|
[1789, 1992, 7613, 7863, 9855],
|
||
|
[1967, 2102, 12929, 13349, 15451],
|
||
|
[1915, 2183, 10381, 10786, 12969]
|
||
|
])
|
||
|
# %%
|
||
|
relevant_class = matrix[:,0]/matrix[:,1]
|
||
|
print(relevant_class)
|
||
|
print(np.std(relevant_class))
|
||
|
|
||
|
# %%
|
||
|
non_relevant_class = matrix[:,2]/matrix[:,3]
|
||
|
print(non_relevant_class)
|
||
|
print(np.std(non_relevant_class))
|
||
|
|
||
|
# %%
|
||
|
numerator = (matrix[:,0] + matrix[:,2])
|
||
|
denominator = (matrix[:,1] + matrix[:,3])
|
||
|
print(numerator)
|
||
|
print(denominator) # same as last column
|
||
|
overall = numerator/denominator
|
||
|
print(overall)
|
||
|
print(np.std(overall))
|
||
|
|
||
|
|
||
|
######################
|
||
|
# compute mapping result
|
||
|
# %%
|
||
|
|
||
|
# $\frac{1761}{1808} = 0.974$ \\
|
||
|
# $\frac{1802}{1932} = 0.933$ \\
|
||
|
# $\frac{1760}{1789} = 0.984$ \\
|
||
|
# $\frac{1945}{1967} = 0.989$ \\
|
||
|
# $\frac{1837}{1915} = 0.959$ \\
|
||
|
|
||
|
matrix = np.array([
|
||
|
[1761, 1808],
|
||
|
[1802, 1932],
|
||
|
[1760, 1789],
|
||
|
[1945, 1967],
|
||
|
[1837, 1915]
|
||
|
])
|
||
|
|
||
|
# %%
|
||
|
result = matrix[:,0]/matrix[:,1]
|
||
|
print(result)
|
||
|
print(np.mean(result))
|
||
|
print(np.std(result))
|
||
|
|
||
|
# %%
|
||
|
####################################
|
||
|
# compute overall result
|
||
|
# & 1761 & 10692 & $\frac{1761 + 10692}{13074} = 0.953$ \\
|
||
|
# & 1802 & 8304 & $\frac{1802 + 8304}{10722} = 0.943$ \\
|
||
|
# & 1760 & 7613 & $\frac{1760 + 7613}{9855} = 0.951$ \\
|
||
|
# & 1945 & 12929 & $\frac{1945 + 12929}{15451} = 0.963$ \\
|
||
|
# & 1837 & 10381 & $\frac{1837 + 10381}{12969} = 0.942$ \\
|
||
|
|
||
|
matrix = np.array([
|
||
|
[1761,10692, 13074],
|
||
|
[1802, 8304, 10722],
|
||
|
[1760, 7613, 9855],
|
||
|
[1945,12929, 15451],
|
||
|
[1837,10381, 12969]
|
||
|
])
|
||
|
|
||
|
# %%
|
||
|
overall = (matrix[:,0] + matrix[:,1])/matrix[:,2]
|
||
|
print(overall)
|
||
|
print(np.mean(overall))
|
||
|
print(np.std(overall))
|
||
|
# %%
|