# %% import pandas as pd import numpy as np #################################################################################### # stage 1 # %% # stage 1a: binary classification df_stage1a = pd.read_csv('stage1a.csv') # %% # desc only mask = df_stage1a['type'] == 'desc' df_stage1a[mask].describe().loc[['mean', 'std']] # %% # desc and unit mask = df_stage1a['type'] == 'desc_unit' df_stage1a[mask].describe().loc[['mean', 'std']] # %% # stage 1b: similarity-based classification df_stage1b = pd.read_csv('stage1b.csv') # %% # desc only mask = df_stage1b['type'] == 'desc' df_stage1b[mask].describe().loc[['mean', 'std']] # %% # desc and unit mask = df_stage1b['type'] == 'desc_unit' df_stage1b[mask].describe().loc[['mean', 'std']] # %% ################################################################################# # stage 2: mapping model # %% # stage 2a: mapping by classification df_stage2a = pd.read_csv('stage2a.csv') # %% # desc only mask = df_stage2a['type'] == 'desc' df_stage2a[mask].describe().loc[['mean', 'std']] # %% # desc and unit mask = df_stage2a['type'] == 'desc_unit' df_stage2a[mask].describe().loc[['mean', 'std']] # %% # stage 2b: mapping by seq2seq df_stage2b = pd.read_csv('stage2b.csv') # %% # desc only mask = df_stage2b['type'] == 'desc' df_stage2b[mask].describe().loc[['mean', 'std']] # %% # desc and unit mask = df_stage2b['type'] == 'desc_unit' df_stage2b[mask].describe().loc[['mean', 'std']] ############################ # frozen encoder # %% df = pd.read_csv('frozen_encoder.csv') # %% # normal mask = df['type'] == 'normal' df[mask].describe().loc[['mean', 'std']] # %% # frozen mask = df['type'] == 'frozen' df[mask].describe().loc[['mean', 'std']] # %% ############################ # frozen encoder # %% df = pd.read_csv('decoder_scaling.csv') # %% # 1 layer mask = df['type'] == '1layer' df[mask].describe().loc[['mean', 'std']] # %% # 2 layer mask = df['type'] == '2layer' df[mask].describe().loc[['mean', 'std']] # %% # 4 layer mask = df['type'] == '4layer' df[mask].describe().loc[['mean', 'std']] # %% # 6 layer mask = df['type'] == '6layer' df[mask].describe().loc[['mean', 'std']] # %% # 8 layer mask = df['type'] == '8layer' df[mask].describe().loc[['mean', 'std']] # %% ######################### # compute overall result # frac{1808}{2113} = 0.856$ & $\frac{10692}{10961} = 0.975$ & $\frac{12500}{13074} = 0.956$ \\ # frac{1932}{2140} = 0.903$ & $\frac{8304}{8582} = 0.968$ & $\frac{10236}{10722} = 0.955$ \\ # frac{1789}{1992} = 0.898$ & $\frac{7613}{7863} = 0.968$ & $\frac{9402}{9855} = 0.954$ \\ # frac{1967}{2102} = 0.936$ & $\frac{12929}{13349} = 0.969$ & $\frac{14896}{15451} = 0.964$ \\ # frac{1915}{2183} = 0.877$ & $\frac{10381}{10786} = 0.962$ & $\frac{12296}{12969} = 0.948$ \\ # %% matrix = np.array([ [1808, 2113, 10692, 10961, 13074], [1932, 2140, 8304, 8582, 10722], [1789, 1992, 7613, 7863, 9855], [1967, 2102, 12929, 13349, 15451], [1915, 2183, 10381, 10786, 12969] ]) # %% relevant_class = matrix[:,0]/matrix[:,1] print(relevant_class) print(np.std(relevant_class)) # %% non_relevant_class = matrix[:,2]/matrix[:,3] print(non_relevant_class) print(np.std(non_relevant_class)) # %% numerator = (matrix[:,0] + matrix[:,2]) denominator = (matrix[:,1] + matrix[:,3]) print(numerator) print(denominator) # same as last column overall = numerator/denominator print(overall) print(np.std(overall)) ###################### # compute mapping result # %% # $\frac{1761}{1808} = 0.974$ \\ # $\frac{1802}{1932} = 0.933$ \\ # $\frac{1760}{1789} = 0.984$ \\ # $\frac{1945}{1967} = 0.989$ \\ # $\frac{1837}{1915} = 0.959$ \\ matrix = np.array([ [1761, 1808], [1802, 1932], [1760, 1789], [1945, 1967], [1837, 1915] ]) # %% result = matrix[:,0]/matrix[:,1] print(result) print(np.mean(result)) print(np.std(result)) # %% #################################### # compute overall result # & 1761 & 10692 & $\frac{1761 + 10692}{13074} = 0.953$ \\ # & 1802 & 8304 & $\frac{1802 + 8304}{10722} = 0.943$ \\ # & 1760 & 7613 & $\frac{1760 + 7613}{9855} = 0.951$ \\ # & 1945 & 12929 & $\frac{1945 + 12929}{15451} = 0.963$ \\ # & 1837 & 10381 & $\frac{1837 + 10381}{12969} = 0.942$ \\ matrix = np.array([ [1761,10692, 13074], [1802, 8304, 10722], [1760, 7613, 9855], [1945,12929, 15451], [1837,10381, 12969] ]) # %% overall = (matrix[:,0] + matrix[:,1])/matrix[:,2] print(overall) print(np.mean(overall)) print(np.std(overall)) # %%