parent
a1d000d9c8
commit
e90bc69ea9
|
@ -0,0 +1 @@
|
||||||
|
*.zip
|
|
@ -1 +1,3 @@
|
||||||
__pycache__
|
__pycache__
|
||||||
|
*.csv
|
||||||
|
*.md
|
|
@ -1,6 +1,7 @@
|
||||||
# %%
|
# %%
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
# import training file
|
# import training file
|
||||||
|
@ -13,5 +14,45 @@ id_counts = train_df['entity_id'].value_counts()
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
|
|
||||||
plt.hist(id_counts, bins=50)
|
# %%
|
||||||
|
id_counts[:50]
|
||||||
|
|
||||||
|
# %%
|
||||||
|
|
||||||
|
plt.hist(id_counts, bins=50)
|
||||||
|
|
||||||
|
# %%
|
||||||
|
def compute_normalized_class_weights(class_counts, max_resamples=10):
|
||||||
|
"""
|
||||||
|
Compute normalized class weights inversely proportional to class counts.
|
||||||
|
The weights are normalized so that they sum to 1.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
class_counts (array-like): An array or list where each element represents the count of samples for a class.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
numpy.ndarray: A normalized array of weights for each class.
|
||||||
|
"""
|
||||||
|
class_counts = np.array(class_counts)
|
||||||
|
total_samples = np.sum(class_counts)
|
||||||
|
class_weights = total_samples / class_counts
|
||||||
|
# so that highest weight is 1
|
||||||
|
normalized_weights = class_weights / np.max(class_weights)
|
||||||
|
# Scale weights such that the highest weight corresponds to `max_resamples`
|
||||||
|
resample_counts = normalized_weights * max_resamples
|
||||||
|
# Round resamples to nearest integer
|
||||||
|
resample_counts = np.round(resample_counts).astype(int)
|
||||||
|
return resample_counts
|
||||||
|
|
||||||
|
# %%
|
||||||
|
id_weights = compute_normalized_class_weights(id_counts, max_resamples=10)
|
||||||
|
|
||||||
|
# %%
|
||||||
|
id_weights
|
||||||
|
# %%
|
||||||
|
id_mask = train_df['entity_id'] == 536
|
||||||
|
train_df[id_mask]
|
||||||
|
|
||||||
|
# %%
|
||||||
|
id_counts.index.to_list()
|
||||||
# %%
|
# %%
|
||||||
|
|
|
@ -18,12 +18,11 @@ id2label = {}
|
||||||
for _, row in entity_df.iterrows():
|
for _, row in entity_df.iterrows():
|
||||||
id2label[row['id']] = row['name']
|
id2label[row['id']] = row['name']
|
||||||
|
|
||||||
|
# %%
|
||||||
|
train_df.sort_values(by=['entity_id']).to_markdown('out.md')
|
||||||
|
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
data_path = '../train/class_bert_process/classification_prediction/exports/result.csv'
|
data_path = '../train/class_bert_process/prediction/exports/result.csv'
|
||||||
prediction_df = pd.read_csv(data_path)
|
prediction_df = pd.read_csv(data_path)
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
|
@ -39,26 +38,31 @@ new_df = pd.concat((test_df, prediction_df ), axis=1)
|
||||||
mismatch_mask = new_df['entity_id'] != new_df['class_prediction']
|
mismatch_mask = new_df['entity_id'] != new_df['class_prediction']
|
||||||
mismatch_df = new_df[mismatch_mask]
|
mismatch_df = new_df[mismatch_mask]
|
||||||
|
|
||||||
|
# %%
|
||||||
|
len(mismatch_df)
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
# print the top 10 offending classes
|
# print the top 10 offending classes
|
||||||
print(mismatch_df['entity_id'].value_counts()[:10])
|
print(mismatch_df['entity_id'].value_counts()[:10])
|
||||||
|
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
# Convert the whole dataframe as a string and display
|
# Convert the whole dataframe as a string and display
|
||||||
# print the mismatch_df
|
# print the mismatch_df
|
||||||
print(mismatch_df.to_markdown())
|
print(mismatch_df.sort_values(by=['entity_id']).to_markdown())
|
||||||
|
|
||||||
|
# %%
|
||||||
|
mismatch_df.to_csv('error.csv')
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
# let us see the test mentions
|
# let us see the test mentions
|
||||||
select_value = 434
|
select_value = 268
|
||||||
select_mask = mismatch_df['entity_id'] == select_value
|
select_mask = mismatch_df['entity_id'] == select_value
|
||||||
mismatch_df[select_mask]
|
mismatch_df[select_mask]
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
# let us see the train mentions
|
# let us see the train mentions
|
||||||
select_value = 434
|
select_value = 452
|
||||||
select_mask = train_df['entity_id'] == select_value
|
select_mask = train_df['entity_id'] == select_value
|
||||||
train_df[select_mask]
|
train_df[select_mask]
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,3 @@
|
||||||
|
*
|
||||||
|
!.gitignore
|
||||||
|
!*.txt
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,3 @@
|
||||||
|
*
|
||||||
|
!.gitignore
|
||||||
|
!*.txt
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,3 @@
|
||||||
|
*
|
||||||
|
!.gitignore
|
||||||
|
!*.txt
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,3 @@
|
||||||
|
*
|
||||||
|
!.gitignore
|
||||||
|
!*.txt
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -1 +0,0 @@
|
||||||
*.csv
|
|
|
@ -0,0 +1,699 @@
|
||||||
|
id,name,type_id,type_name
|
||||||
|
1,(E)JES,2,App
|
||||||
|
2,A-Auto Job Scheduling Software,2,App
|
||||||
|
3,Activiti,2,App
|
||||||
|
4,Adobe Acrobat Reader,2,App
|
||||||
|
5,Ansible,2,App
|
||||||
|
6,Apache ActiveMQ,2,App
|
||||||
|
7,Apache Hbase,2,App
|
||||||
|
8,Apache Hive,2,App
|
||||||
|
9,Apache Kafka,2,App
|
||||||
|
10,Apache ServiceMix,2,App
|
||||||
|
11,Apache Solr,2,App
|
||||||
|
12,Apache Subversion,2,App
|
||||||
|
13,Application Development Facility (ADF),2,App
|
||||||
|
14,Asterisk,2,App
|
||||||
|
15,Automic Job Scheduler,2,App
|
||||||
|
16,Autosys,2,App
|
||||||
|
17,Bluebeam|Bluebeam Q,2,App
|
||||||
|
18,BMC Control-M,2,App
|
||||||
|
19,BMC Identity Management,2,App
|
||||||
|
20,Borland Database Engine (BDE),2,App
|
||||||
|
21,Business Intelligence and Reporting Tools (BIRT),2,App
|
||||||
|
22,CA Gen,2,App
|
||||||
|
23,CA Introscope,2,App
|
||||||
|
24,CA-Panvalet,2,App
|
||||||
|
25,CA-TELON,2,App
|
||||||
|
26,Casegen,2,App
|
||||||
|
27,Chef Automate,2,App
|
||||||
|
28,Cisco AMP for Endpoints,2,App
|
||||||
|
29,CiscoWorks LAN Management Solution (LMS),2,App
|
||||||
|
30,Citrix Virtual Apps and Desktops,2,App
|
||||||
|
31,Citrix ADC CPX,2,App
|
||||||
|
32,Citrix Provisioning,2,App
|
||||||
|
33,Clarify,2,App
|
||||||
|
34,Clarity LIMS,2,App
|
||||||
|
35,LabWare LIMS,2,App
|
||||||
|
36,Cognos,2,App
|
||||||
|
37,Coldfusion,2,App
|
||||||
|
38,ConceptWave,2,App
|
||||||
|
39,CONNAPI,2,App
|
||||||
|
40,Connect Direct,2,App
|
||||||
|
41,Cornerstone software,2,App
|
||||||
|
42,Crystal Reports,2,App
|
||||||
|
43,DB2,2,App
|
||||||
|
44,Documentum Content Server,2,App
|
||||||
|
45,Drupal,2,App
|
||||||
|
46,Eclipse,2,App
|
||||||
|
47,Elastic (ELK) Stack,2,App
|
||||||
|
48,ETAP License Manager (LM),2,App
|
||||||
|
49,ExamDiff,2,App
|
||||||
|
50,F5 Secure Web Gateway Services,2,App
|
||||||
|
51,FileMaker Pro,2,App
|
||||||
|
52,FlexNet Manager Suite,2,App
|
||||||
|
53,FTP Voyager,2,App
|
||||||
|
54,Genymotion,2,App
|
||||||
|
55,Google Chrome,2,App
|
||||||
|
56,Greenplum DB,2,App
|
||||||
|
57,Hadoop,2,App
|
||||||
|
58,HP aC++ compiler,2,App
|
||||||
|
59,HP C/ANSI C compiler,2,App
|
||||||
|
60,HP Operations Orchestration (HPOO),2,App
|
||||||
|
61,HP Server Automation (HPSA),2,App
|
||||||
|
62,IBM BigFix Platform,2,App
|
||||||
|
63,IBM Business Monitor,2,App
|
||||||
|
64,IBM Business Process Manager,2,App
|
||||||
|
65,IBM Content Manager OnDemand (CMOD),2,App
|
||||||
|
66,IBM FileNet P8 Platform,2,App
|
||||||
|
67,IBM InfoSphere DataStage,2,App
|
||||||
|
68,IBM Integration Bus,2,App
|
||||||
|
69,IBM License Metric Tool,2,App
|
||||||
|
70,IBM Maximo,2,App
|
||||||
|
71,IBM Migration Utility,2,App
|
||||||
|
72,IBM Mobile Foundation,2,App
|
||||||
|
73,IBM Operational Decision Manager (ODM),2,App
|
||||||
|
74,IBM Spectrum Scale,2,App
|
||||||
|
75,IBM Tivoli Asset Management,2,App
|
||||||
|
76,IBM Tivoli Composite Application Manager,2,App
|
||||||
|
77,IBM Tivoli Monitoring,2,App
|
||||||
|
78,IBM Tivoli Storage Manager,2,App
|
||||||
|
79,IBM Tivoli Workload Scheduler (TWS),2,App
|
||||||
|
80,IBM WebSphere Business Integration Adaptor,2,App
|
||||||
|
81,IBM Websphere MQ,2,App
|
||||||
|
82,IBM WebSphere MQ Telemetry,2,App
|
||||||
|
83,IBM WebSphere Transformation Extender (WTX),2,App
|
||||||
|
84,IMS DB,2,App
|
||||||
|
85,Info-ZIP,2,App
|
||||||
|
86,Infobright Community Edition (ICE),2,App
|
||||||
|
87,Informatica PowerCenter,2,App
|
||||||
|
88,Ingres,2,App
|
||||||
|
89,JBoss|JBoss Enterprise Service Bus,2,App
|
||||||
|
90,Jenkins,2,App
|
||||||
|
91,joinIT,2,App
|
||||||
|
92,LifeFlow,2,App
|
||||||
|
93,Lotus Notes,2,App
|
||||||
|
94,MaaS360,2,App
|
||||||
|
95,Malwarebytes Anti-Malware,2,App
|
||||||
|
96,ManageEngine ADSelfService Plus,2,App
|
||||||
|
97,MarkLogic DB,2,App
|
||||||
|
98,Memcached,2,App
|
||||||
|
99,Microsoft Access,2,App
|
||||||
|
100,Microsoft BizTalk Adapters for Host Systems,2,App
|
||||||
|
101,Microsoft Dynamics AX,2,App
|
||||||
|
102,Microsoft Endpoint Configuration Manager (SCCM),2,App
|
||||||
|
103,Microsoft Excel,2,App
|
||||||
|
104,Microsoft Exchange Server,2,App
|
||||||
|
105,Microsoft Forefront Identity Manager (FIM),2,App
|
||||||
|
106,Microsoft InfoPath,2,App
|
||||||
|
107,Microsoft Internet Explorer,2,App
|
||||||
|
108,Microsoft ISA Server,2,App
|
||||||
|
109,Microsoft MQ,2,App
|
||||||
|
110,Microsoft System Center Endpoint Protection,2,App
|
||||||
|
111,Microsoft Visual Studio,2,App
|
||||||
|
112,Microsoft Web Deploy,2,App
|
||||||
|
113,Microsoft Web Farm Framework (WFF),2,App
|
||||||
|
114,Microsoft Web Platform Installer,2,App
|
||||||
|
115,Model Driven Workflow (MDW),2,App
|
||||||
|
116,MongoDB,2,App
|
||||||
|
117,Mozilla Firefox,2,App
|
||||||
|
118,MQ Client,2,App
|
||||||
|
119,MS Office 365,2,App
|
||||||
|
120,MS SQL Server,2,App
|
||||||
|
121,MS SQL Server Compact,2,App
|
||||||
|
122,MySQL,2,App
|
||||||
|
123,Neo4j,2,App
|
||||||
|
124,Nexus Repository OSS,2,App
|
||||||
|
125,Nix package manager,2,App
|
||||||
|
126,OpenLDAP,2,App
|
||||||
|
127,OpenText Exstream,2,App
|
||||||
|
128,OpenVPN,2,App
|
||||||
|
129,Oracle Access Management,2,App
|
||||||
|
130,Oracle ADF,2,App
|
||||||
|
131,Oracle APEX,2,App
|
||||||
|
132,Oracle BI Publisher,2,App
|
||||||
|
133,Oracle Business Intelligence,2,App
|
||||||
|
134,Oracle Database,2,App
|
||||||
|
135,Oracle Designer,2,App
|
||||||
|
136,Oracle Enterprise Manager,2,App
|
||||||
|
137,Oracle Forms,2,App
|
||||||
|
138,Oracle Hyperion|Hyperion Interactive Reporting,2,App
|
||||||
|
139,Oracle Hyperion|Hyperion Planning,2,App
|
||||||
|
140,Oracle Net Services,2,App
|
||||||
|
141,Oracle Real Application Clusters (RAC),2,App
|
||||||
|
142,Oracle Retail Point-of-Service,2,App
|
||||||
|
143,Oracle Service Bus,2,App
|
||||||
|
144,Oracle Smart View,2,App
|
||||||
|
145,Oracle SOA Suite,2,App
|
||||||
|
146,Oracle SQL Developer,2,App
|
||||||
|
147,Oracle TimesTen In-Memory Database,2,App
|
||||||
|
148,Oracle Warehouse Builder (OWB),2,App
|
||||||
|
149,Orbix,2,App
|
||||||
|
150,Pentaho,2,App
|
||||||
|
151,PeopleSoft,2,App
|
||||||
|
152,Perkin Elmer Informatics (PKI),2,App
|
||||||
|
153,Pervasive PSQL,2,App
|
||||||
|
154,PIPE-FLO,2,App
|
||||||
|
155,PKZIP,2,App
|
||||||
|
156,Planview,2,App
|
||||||
|
157,PostgreSQL,2,App
|
||||||
|
158,Powerbuilder,2,App
|
||||||
|
159,Primavera P6,2,App
|
||||||
|
160,Pro*COBOL,2,App
|
||||||
|
161,ProjectWise,2,App
|
||||||
|
162,ProjectWise Web Server,2,App
|
||||||
|
163,PVCS Version Manager,2,App
|
||||||
|
164,QlikView,2,App
|
||||||
|
165,RabbitMQ,2,App
|
||||||
|
166,Rational ClearCase,2,App
|
||||||
|
167,Rational ClearQuest,2,App
|
||||||
|
168,Redis,2,App
|
||||||
|
169,Remedy,2,App
|
||||||
|
170,Riak,2,App
|
||||||
|
171,RightFax,2,App
|
||||||
|
172,Rumba,2,App
|
||||||
|
173,SAP BusinessObjects BI server,2,App
|
||||||
|
174,SAP ERP,2,App
|
||||||
|
175,SAP HANA DB,2,App
|
||||||
|
176,SAP MaxDB,2,App
|
||||||
|
177,SAP NetWeaver Business Warehouse,2,App
|
||||||
|
178,SAP SQL Anywhere,2,App
|
||||||
|
179,SAP Web Dynpro,2,App
|
||||||
|
180,Sentry,2,App
|
||||||
|
181,SharePoint,2,App
|
||||||
|
182,Siebel,2,App
|
||||||
|
183,SNA Manager,2,App
|
||||||
|
184,SnagIt,2,App
|
||||||
|
185,solidDB,2,App
|
||||||
|
186,SonarQube,2,App
|
||||||
|
187,SpaceMonger,2,App
|
||||||
|
188,Splunk,2,App
|
||||||
|
189,SQLIO,2,App
|
||||||
|
190,Sybase SQL Server,2,App
|
||||||
|
191,Syncsort,2,App
|
||||||
|
192,Sysinternal Tools,2,App
|
||||||
|
193,Sysinternal Tools|*,2,App
|
||||||
|
194,Sysinternal Tools|AccessEnum,2,App
|
||||||
|
195,Sysinternal Tools|ClockRes,2,App
|
||||||
|
196,Sysinternal Tools|Coreinfo,2,App
|
||||||
|
197,Sysinternal Tools|DiskExt,2,App
|
||||||
|
198,Sysinternal Tools|DiskMon,2,App
|
||||||
|
199,Sysinternal Tools|Hex2dec,2,App
|
||||||
|
200,Sysinternal Tools|Junction,2,App
|
||||||
|
201,Sysinternal Tools|LDMDump,2,App
|
||||||
|
202,Sysinternal Tools|LoadOrder,2,App
|
||||||
|
203,Sysinternal Tools|PipeList,2,App
|
||||||
|
204,Sysinternal Tools|Process Explorer,2,App
|
||||||
|
205,Sysinternal Tools|PsKill,2,App
|
||||||
|
206,Sysinternal Tools|PsPasswd,2,App
|
||||||
|
207,Sysinternal Tools|SDelete,2,App
|
||||||
|
208,Sysinternal Tools|ShareEnum,2,App
|
||||||
|
209,Sysinternal Tools|Sync,2,App
|
||||||
|
210,Sysinternal Tools|TCPView,2,App
|
||||||
|
211,Sysinternal Tools|VMMap,2,App
|
||||||
|
212,Sysinternal Tools|Whois,2,App
|
||||||
|
213,Tableau,2,App
|
||||||
|
214,TCPLink Enterprise Server,2,App
|
||||||
|
215,Teradata,2,App
|
||||||
|
216,Teradata QS Server,2,App
|
||||||
|
217,TIBCO Business Works (BW),2,App
|
||||||
|
218,TIBCO InConcert,2,App
|
||||||
|
219,TIBCO Rendezvous,2,App
|
||||||
|
220,Tivoli Access Manager (TAM),2,App
|
||||||
|
221,TortoiseCVS,2,App
|
||||||
|
222,TortoiseSVN,2,App
|
||||||
|
223,TSO/ISPF,2,App
|
||||||
|
224,TWS zCentric,2,App
|
||||||
|
225,Uniface,2,App
|
||||||
|
226,ViewNow X Server,2,App
|
||||||
|
227,Virtual I/O Server,2,App
|
||||||
|
228,Visibroker,2,App
|
||||||
|
229,VMware Solution Exchange Marketplace (VSX),2,App
|
||||||
|
230,VMware Tools,2,App
|
||||||
|
231,VMware vCenter,2,App
|
||||||
|
232,WebFOCUS,2,App
|
||||||
|
233,WebLogic Integration,2,App
|
||||||
|
234,WebSphere Commerce Suite (WCS),2,App
|
||||||
|
235,WebSphere Message Broker,2,App
|
||||||
|
236,Wherescape Red,2,App
|
||||||
|
237,Windchill,2,App
|
||||||
|
238,Windows Indexing Service,2,App
|
||||||
|
239,Windows Terminal Server (WTS),2,App
|
||||||
|
240,WingArc SVF,2,App
|
||||||
|
241,WinMerge,2,App
|
||||||
|
242,WinRAR,2,App
|
||||||
|
243,WinSCP,2,App
|
||||||
|
244,Wise Package Studio,2,App
|
||||||
|
245,Wordpress,2,App
|
||||||
|
246,XAMPP,2,App
|
||||||
|
247,ZAP BI,2,App
|
||||||
|
248,ZeroMQ,2,App
|
||||||
|
249,Zerto Virtual Replication,2,App
|
||||||
|
250,IBM PowerHA,2,App
|
||||||
|
251,Tivoli Netcool/OMNIbus,2,App
|
||||||
|
252,IBM ILOG Views,2,App
|
||||||
|
253,IBM ILOG CPLEX,2,App
|
||||||
|
254,IBM ILOG Jviews,2,App
|
||||||
|
255,IBM ILOG Elixir,2,App
|
||||||
|
256,IBM ILOG Supply Chain Apps,2,App
|
||||||
|
257,ILOG Solver,2,App
|
||||||
|
258,SQLite,2,App
|
||||||
|
259,Apache HTTP Server,8,App Server
|
||||||
|
260,Apache Tomcat,8,App Server
|
||||||
|
261,ArcGIS Server,8,App Server
|
||||||
|
262,Oracle WebLogic Server,8,App Server
|
||||||
|
263,GlassFish,8,App Server
|
||||||
|
264,HAProxy,8,App Server
|
||||||
|
265,IBM HTTP Server,8,App Server
|
||||||
|
266,IIS,8,App Server
|
||||||
|
267,JBoss,8,App Server
|
||||||
|
268,JBoss|*,8,App Server
|
||||||
|
269,Kitura,8,App Server
|
||||||
|
270,Lotus Domino,8,App Server
|
||||||
|
271,Lucee,8,App Server
|
||||||
|
272,Netscape Application Server (NAS),8,App Server
|
||||||
|
273,Netscape Enterprise Server (NES),8,App Server
|
||||||
|
274,Nginx,8,App Server
|
||||||
|
275,Oracle Application Server,8,App Server
|
||||||
|
276,Oracle WebCenter Content Server,8,App Server
|
||||||
|
277,Pivotal tc Server,8,App Server
|
||||||
|
278,Resin Web Server,8,App Server
|
||||||
|
279,SAP NetWeaver App Server,8,App Server
|
||||||
|
280,Spark,8,App Server
|
||||||
|
281,Oracle iPlanet Web Server,8,App Server
|
||||||
|
282,UltiDev Web Server Pro (UWS),8,App Server
|
||||||
|
283,webMethods Integration Server,8,App Server
|
||||||
|
284,Websphere Application Server (WAS),8,App Server
|
||||||
|
285,WebSphere Liberty,8,App Server
|
||||||
|
286,WebSphere Portal Server,8,App Server
|
||||||
|
287,Websphere Process Server,8,App Server
|
||||||
|
288,WebSphere Process Server,8,App Server
|
||||||
|
289,Oracle Real-Time Decisions (RTD),8,App Server
|
||||||
|
290,CA API Gateway,4,HW
|
||||||
|
291,Citrix ADC SDX,4,HW
|
||||||
|
292,Citrix ADC MPX,4,HW
|
||||||
|
293,HP Nonstop,4,HW
|
||||||
|
294,IBM DataPower Gateway,4,HW
|
||||||
|
295,IBM Power Systems,4,HW
|
||||||
|
296,Intel Xeon Processor,4,HW
|
||||||
|
297,Net Optics Taps,4,HW
|
||||||
|
298,Oracle Exadata,4,HW
|
||||||
|
299,AutoIt,9,Lang
|
||||||
|
300,AWK,9,Lang
|
||||||
|
301,BASIC,9,Lang
|
||||||
|
302,Brainscript,9,Lang
|
||||||
|
303,C,9,Lang
|
||||||
|
304,C#,9,Lang
|
||||||
|
305,C++,9,Lang
|
||||||
|
306,C++|Visual C++,9,Lang
|
||||||
|
307,Cascading Style Sheets (CSS),9,Lang
|
||||||
|
308,Clipper,9,Lang
|
||||||
|
309,CLIST,9,Lang
|
||||||
|
310,COBOL,9,Lang
|
||||||
|
311,ColdFusion Markup Language (CFML),9,Lang
|
||||||
|
312,Data Language Interface (DL/I),9,Lang
|
||||||
|
313,Delphi,9,Lang
|
||||||
|
314,Easytrieve,9,Lang
|
||||||
|
315,Expect,9,Lang
|
||||||
|
316,eXtensible HyperText Markup Language (XHTML),9,Lang
|
||||||
|
317,Extensible Markup Language (XML),9,Lang
|
||||||
|
318,Extensible Markup Language (XML)|MSXML,9,Lang
|
||||||
|
319,Extensible Stylesheet Language (XSL),9,Lang
|
||||||
|
320,Extensible Stylesheet Language Transformations (XLST),9,Lang
|
||||||
|
321,FOCUS,9,Lang
|
||||||
|
322,Fortran,9,Lang
|
||||||
|
323,Go,9,Lang
|
||||||
|
324,GraphQL,9,Lang
|
||||||
|
325,Groovy,9,Lang
|
||||||
|
326,HiveQL,9,Lang
|
||||||
|
327,Hypertext Markup Language (HTML),9,Lang
|
||||||
|
328,IBM High Level Assembler (HLASM),9,Lang
|
||||||
|
329,IBM i Control Language (CL),9,Lang
|
||||||
|
330,IBM Informix-4GL,9,Lang
|
||||||
|
331,Java,9,Lang
|
||||||
|
332,Java|Extensible Stylesheet Language (XSL),9,Lang
|
||||||
|
333,Java|Java Enterprise Edition (Java EE),9,Lang
|
||||||
|
334,Java|Java Standard Edition (Java SE),9,Lang
|
||||||
|
335,Java|JavaServer Pages (JSP),9,Lang
|
||||||
|
336,Java|JavaServer Pages (JSP)|Scriptlets,9,Lang
|
||||||
|
337,JavaScript,9,Lang
|
||||||
|
338,JCL,9,Lang
|
||||||
|
339,Job Information Language (JIL),9,Lang
|
||||||
|
340,JScript,9,Lang
|
||||||
|
341,Lisp,9,Lang
|
||||||
|
342,Niakwa Programming Language (NPL),9,Lang
|
||||||
|
343,Objective C,9,Lang
|
||||||
|
344,OpenEdge ABL,9,Lang
|
||||||
|
345,Pascal,9,Lang
|
||||||
|
346,Pascal|Object Pascal,9,Lang
|
||||||
|
347,Perl,9,Lang
|
||||||
|
348,Perl|ActivePerl,9,Lang
|
||||||
|
349,Perl|Rex,9,Lang
|
||||||
|
350,PHP,9,Lang
|
||||||
|
351,PL/I,9,Lang
|
||||||
|
352,PL/SQL,9,Lang
|
||||||
|
353,PRO*C,9,Lang
|
||||||
|
354,Python,9,Lang
|
||||||
|
355,R,9,Lang
|
||||||
|
356,Rexx,9,Lang
|
||||||
|
357,RPG,9,Lang
|
||||||
|
358,Ruby,9,Lang
|
||||||
|
359,Salesforce Object Query Language (SOQL),9,Lang
|
||||||
|
360,SAS,9,Lang
|
||||||
|
361,Sass,9,Lang
|
||||||
|
362,Scala,9,Lang
|
||||||
|
363,Smalltalk,9,Lang
|
||||||
|
364,Swift,9,Lang
|
||||||
|
365,TCL,9,Lang
|
||||||
|
366,Transact-SQL,9,Lang
|
||||||
|
367,TypeScript,9,Lang
|
||||||
|
368,VB.NET,9,Lang
|
||||||
|
369,VBScript,9,Lang
|
||||||
|
370,Visual Basic,9,Lang
|
||||||
|
371,Visual Basic for Applications (VBA),9,Lang
|
||||||
|
372,Visual FoxPro,9,Lang
|
||||||
|
373,VoiceXML,9,Lang
|
||||||
|
374,Xbase++,9,Lang
|
||||||
|
375,Apache Lucene,12,Lib
|
||||||
|
376,Apache Xerces,12,Lib
|
||||||
|
377,Cascading Style Sheets (CSS)|Bootstrap,12,Lib
|
||||||
|
378,Java|Apache Camel,12,Lib
|
||||||
|
379,Java|Apache Commons BeanUtils,12,Lib
|
||||||
|
380,Java|Apache PDFBox,12,Lib
|
||||||
|
381,Java|Apache Velocity,12,Lib
|
||||||
|
382,Java|EclipseLink,12,Lib
|
||||||
|
383,Java|Enterprise JavaBeans (EJB),12,Lib
|
||||||
|
384,Java|EZMorph,12,Lib
|
||||||
|
385,Java|Google Web Toolkit (GWT),12,Lib
|
||||||
|
386,Java|Hibernate,12,Lib
|
||||||
|
387,Java|IBM SDK,12,Lib
|
||||||
|
388,Java|Java Development Kit (JDK),12,Lib
|
||||||
|
389,Java|Java Message Service (JMS),12,Lib
|
||||||
|
390,Java|Java Web Start,12,Lib
|
||||||
|
391,Java|JavaServer Faces (JSF),12,Lib
|
||||||
|
392,Java|JDBC,12,Lib
|
||||||
|
393,Java|JRuby Core,12,Lib
|
||||||
|
394,Java|Log4j,12,Lib
|
||||||
|
395,Java|Quartz,12,Lib
|
||||||
|
396,Java|Remote Method Invocation (RMI),12,Lib
|
||||||
|
397,Java|Servlet,12,Lib
|
||||||
|
398,Java|Spring,12,Lib
|
||||||
|
399,Java|Spring|Spring Boot,12,Lib
|
||||||
|
400,Java|Spring|Spring Cloud Data Flow,12,Lib
|
||||||
|
401,Java|Spring|Spring MVC,12,Lib
|
||||||
|
402,Java|Struts,12,Lib
|
||||||
|
403,Java|Swing,12,Lib
|
||||||
|
404,Java|Vaadin,12,Lib
|
||||||
|
405,JavaScript|AJAX,12,Lib
|
||||||
|
406,JavaScript|AngularJS,12,Lib
|
||||||
|
407,JavaScript|Draw2D,12,Lib
|
||||||
|
408,JavaScript|Express.js,12,Lib
|
||||||
|
409,JavaScript|Ext JS,12,Lib
|
||||||
|
410,JavaScript|jqGrid,12,Lib
|
||||||
|
411,JavaScript|JQuery,12,Lib
|
||||||
|
412,JavaScript|Jquery|jQuery UI,12,Lib
|
||||||
|
413,JavaScript|React,12,Lib
|
||||||
|
414,JavaScript|script.aculo.us,12,Lib
|
||||||
|
415,JavaScript|Valums AJAX File Uploader,12,Lib
|
||||||
|
416,OWASP Enterprise Security API (ESAPI),12,Lib
|
||||||
|
417,Perl|Oraperl,12,Lib
|
||||||
|
418,Android,6,OS
|
||||||
|
419,BeOS,6,OS
|
||||||
|
420,Cisco IOS,6,OS
|
||||||
|
421,DART,6,OS
|
||||||
|
422,Fabric OS,6,OS
|
||||||
|
423,GNU,6,OS
|
||||||
|
424,IBM i,6,OS
|
||||||
|
425,iOS,6,OS
|
||||||
|
426,Linux,6,OS
|
||||||
|
427,Linux|CentOS,6,OS
|
||||||
|
428,Linux|Check Point,6,OS
|
||||||
|
429,Linux|Debian,6,OS
|
||||||
|
430,Linux|Junos OS,6,OS
|
||||||
|
431,Linux|openSUSE,6,OS
|
||||||
|
432,Linux|Oracle Linux,6,OS
|
||||||
|
433,Linux|Photon OS,6,OS
|
||||||
|
434,Linux|Red Hat Enterprise Linux,6,OS
|
||||||
|
435,Linux|SUSE Linux Enterprise Server,6,OS
|
||||||
|
436,Linux|Ubuntu,6,OS
|
||||||
|
437,Linux|zLinux,6,OS
|
||||||
|
438,macOS,6,OS
|
||||||
|
439,MVS,6,OS
|
||||||
|
440,MVS|OS/390,6,OS
|
||||||
|
441,MVS|z/OS,6,OS
|
||||||
|
442,OpenVMS,6,OS
|
||||||
|
443,OS/2,6,OS
|
||||||
|
444,Unix,6,OS
|
||||||
|
445,Unix|AIX,6,OS
|
||||||
|
446,Unix|BSD,6,OS
|
||||||
|
447,Unix|BSD|FreeBSD,6,OS
|
||||||
|
448,Unix|BSD|SunOS,6,OS
|
||||||
|
449,Unix|HP-UX,6,OS
|
||||||
|
450,Windows,6,OS
|
||||||
|
451,Windows|Windows Desktop,6,OS
|
||||||
|
452,Windows|Windows Server,6,OS
|
||||||
|
453,Linux|Fedora,6,OS
|
||||||
|
454,Linux|Amazon Linux,6,OS
|
||||||
|
455,Clarify|Clear Basic,5,Plugin
|
||||||
|
456,Eclipse|ATLAS Transformation Language (ATL),5,Plugin
|
||||||
|
457,IBM BigFix Platform|Client Deploy Tool,5,Plugin
|
||||||
|
458,IBM Integration Bus|Extended Structured Query Language (ESQL),5,Plugin
|
||||||
|
459,IBM Tivoli Asset Management|Asset Discovery for Distributed,5,Plugin
|
||||||
|
460,IBM Tivoli Storage Manager|TSM API,5,Plugin
|
||||||
|
461,IBM Tivoli Storage Manager|TSM Client,5,Plugin
|
||||||
|
462,IBM Tivoli Storage Manager|TSM Storage Agent,5,Plugin
|
||||||
|
463,IBM Tivoli Storage Manager|VSS Requestor,5,Plugin
|
||||||
|
464,Microsoft Exchange Server|Veeam Explorer,5,Plugin
|
||||||
|
465,MS SQL Server|MS SQL Server Browser,5,Plugin
|
||||||
|
466,MS SQL Server|Data Transformation Services,5,Plugin
|
||||||
|
467,MS SQL Server|Log Reader Agent,5,Plugin
|
||||||
|
468,MS SQL Server|SQL Server Analysis Services (SSAS),5,Plugin
|
||||||
|
469,MS SQL Server|SQL Server Database Engine,5,Plugin
|
||||||
|
470,MS SQL Server|SQL Server Integration Services (SSIS),5,Plugin
|
||||||
|
471,MS SQL Server|SQL Server Management Studio,5,Plugin
|
||||||
|
472,MS SQL Server|SQL Server Report Builder,5,Plugin
|
||||||
|
473,MS SQL Server|SQL Server Reporting Services (SSRS),5,Plugin
|
||||||
|
474,Oracle Database|Jserver,5,Plugin
|
||||||
|
475,Oracle Database|Oracle Spatial and Graph,5,Plugin
|
||||||
|
476,SAP ERP|SAP EHP,5,Plugin
|
||||||
|
477,SAP ERP|SAP Kernel,5,Plugin
|
||||||
|
478,Oracle Database|SQL*Plus,5,Plugin
|
||||||
|
479,Sybase SQL Server|Sybase Central,5,Plugin
|
||||||
|
480,Sybase SQL Server|Sybase Dsedit,5,Plugin
|
||||||
|
481,TIBCO Business Works (BW)|Integration Manager,5,Plugin
|
||||||
|
482,.NET Framework|Common Runtime Library,7,Runlib
|
||||||
|
483,.NET Framework|log4net,7,Runlib
|
||||||
|
484,.NET Framework|Magick.NET,7,Runlib
|
||||||
|
485,.NET Framework|Windows Communication Foundation (WCF),7,Runlib
|
||||||
|
486,.NET Framework|Windows Workflow Foundation (WF),7,Runlib
|
||||||
|
487,.NET Framework|WinForms,7,Runlib
|
||||||
|
488,ActiveX|ADO,7,Runlib
|
||||||
|
489,IIS|Easy Migration Tool (IEMT),7,Runlib
|
||||||
|
490,IIS|Application Request Routing (ARR),7,Runlib
|
||||||
|
491,IIS|IIS Manager,7,Runlib
|
||||||
|
492,JBoss|JBoss Seam,7,Runlib
|
||||||
|
493,JBoss|Wildfly,7,Runlib
|
||||||
|
494,Oracle Application Server|Oracle Transparent Gateway,7,Runlib
|
||||||
|
495,Oracle WebCenter Content Server|Idoc Script,7,Runlib
|
||||||
|
496,SAP NetWeaver App Server|ABAP,7,Runlib
|
||||||
|
497,.NET Framework,10,Runtime
|
||||||
|
498,Active Directory (AD),10,Runtime
|
||||||
|
499,Active Server Pages (ASP),10,Runtime
|
||||||
|
500,ActiveX,10,Runtime
|
||||||
|
501,Apache Cordova,10,Runtime
|
||||||
|
502,CICS,10,Runtime
|
||||||
|
503,Docker,10,Runtime
|
||||||
|
504,Flash,10,Runtime
|
||||||
|
505,HTTP File Server,10,Runtime
|
||||||
|
506,Java Runtime Environment (JRE),10,Runtime
|
||||||
|
507,Node.js,10,Runtime
|
||||||
|
508,Ruby on Rails,10,Runtime
|
||||||
|
509,VisualForce,10,Runtime
|
||||||
|
510,EMC Celerra,11,Storage
|
||||||
|
511,Application Lifecycle Management (ALM),1,Technology
|
||||||
|
512,Assembler Language,1,Technology
|
||||||
|
513,Batch Management Software (BMS),1,Technology
|
||||||
|
514,Business Object Reports,1,Technology
|
||||||
|
515,Common Gateway Interface (CGI),1,Technology
|
||||||
|
516,Compopent Object Model (COM),1,Technology
|
||||||
|
517,Common Object Request Broker Architecture (CORBA),1,Technology
|
||||||
|
518,CORBA Interface Definition Language (CORBA IDL),1,Technology
|
||||||
|
519,Data Control Language (DCL),1,Technology
|
||||||
|
520,Database (DB),1,Technology
|
||||||
|
521,Electronic Data Interchange (EDI),1,Technology
|
||||||
|
522,Application Web Server,1,Technology
|
||||||
|
523,Java-based Document Object Model for XML (JDOM),1,Technology
|
||||||
|
524,Lightweight Directory Access Protocol (LDAP),1,Technology
|
||||||
|
525,Open Database Connectivity (ODBC),1,Technology
|
||||||
|
526,Order Management System (OMS),1,Technology
|
||||||
|
527,Oracle Web Services,1,Technology
|
||||||
|
528,Reporting Services,1,Technology
|
||||||
|
529,Representational State Transfer (REST),1,Technology
|
||||||
|
530,Service-Oriented Architecture (SOA),1,Technology
|
||||||
|
531,Simple Object Access Protocol (SOAP),1,Technology
|
||||||
|
532,SQL,9,Lang
|
||||||
|
533,YAML,1,Technology
|
||||||
|
534,Model-view-controller (MVC),1,Technology
|
||||||
|
535,Application Server,1,Technology
|
||||||
|
536,Cloud,1,Technology
|
||||||
|
537,Competency and Quality Assurance Server,1,Technology
|
||||||
|
538,Device Provisioning Engines (DPE),1,Technology
|
||||||
|
539,E-business solution,1,Technology
|
||||||
|
540,Enterprise Service Bus(ESB),1,Technology
|
||||||
|
541,File Server,1,Technology
|
||||||
|
542,General Ledger,1,Technology
|
||||||
|
543,HTTP client,1,Technology
|
||||||
|
544,HTTP Server,1,Technology
|
||||||
|
545,Integrated Safe System of Work (ISSOW),1,Technology
|
||||||
|
546,Internet Exchange Point - Full Stack (ixp-ft),1,Technology
|
||||||
|
547,Internet Message Access Protocol (IMAP),1,Technology
|
||||||
|
548,JSON,1,Technology
|
||||||
|
549,KVS Application Server,1,Technology
|
||||||
|
550,KVS File Server,1,Technology
|
||||||
|
551,KVS Proxy Server,1,Technology
|
||||||
|
552,mainframe,1,Technology
|
||||||
|
553,Manufacturing Execution System (MES),1,Technology
|
||||||
|
554,Mobile,1,Technology
|
||||||
|
555,NonSQL,1,Technology
|
||||||
|
556,SaaS,1,Technology
|
||||||
|
557,Storage Area Network (SAN),1,Technology
|
||||||
|
558,Supplier Registration System Application Server,1,Technology
|
||||||
|
559,Virtual Appliance,1,Technology
|
||||||
|
560,Webtop,1,Technology
|
||||||
|
561,Proxy Server,1,Technology
|
||||||
|
562,Utility,1,Technology
|
||||||
|
563,Citrix ADC,3,VM
|
||||||
|
564,Citrix ADC VPX,3,VM
|
||||||
|
565,Citrix ADC BLX,3,VM
|
||||||
|
566,InterScan Messaging Security Virtual Appliance (IMSVA),3,VM
|
||||||
|
567,Oracle VM,3,VM
|
||||||
|
568,VMware ESXi,3,VM
|
||||||
|
569,VMware Server,3,VM
|
||||||
|
570,IBM WebSphere Transformation Extender (WTX),2,App
|
||||||
|
571,Oracle Retail Point-of-Service,2,App
|
||||||
|
572,Structured Query Language (SQL),1,Technology
|
||||||
|
573,TSO/ISPF,2,App
|
||||||
|
574,Model view controller (MVC),1,Technology
|
||||||
|
575,|*,6,OS
|
||||||
|
576,Linux|*,6,OS
|
||||||
|
577,MVS|*,6,OS
|
||||||
|
578,Unix|*,6,OS
|
||||||
|
579,Unix|BSD|*,6,OS
|
||||||
|
580,Windows|*,6,OS
|
||||||
|
581,MS SQL Server|*,2,App
|
||||||
|
582,C#|*,9,Lang
|
||||||
|
583,C++|*,9,Lang
|
||||||
|
584,Java|*,9,Lang
|
||||||
|
585,Perl|*,9,Lang
|
||||||
|
586,PHP|*,9,Lang
|
||||||
|
587,Python|*,9,Lang
|
||||||
|
588,Ruby|*,9,Lang
|
||||||
|
589,JavaScript|*,9,Lang
|
||||||
|
590,Unix|BSD|OpenBSD,6,OS
|
||||||
|
591,z/VSE,6,OS
|
||||||
|
592,Active Server Pages (ASP)|*,9,Lang
|
||||||
|
593,MS-DOS,6,OS
|
||||||
|
594,COBOL|*,9,Lang
|
||||||
|
595,VME,6,OS
|
||||||
|
596,Extensible Markup Language (XML)|*,9,Lang
|
||||||
|
597,DOS/360,6,OS
|
||||||
|
598,z/TPF,6,OS
|
||||||
|
599,Pascal|*,9,Lang
|
||||||
|
600,Oracle WebLogic Server|*,8,App Server
|
||||||
|
601,Websphere ILOG JRules BRMS,2,App
|
||||||
|
602,Unix|BSD|NetBSD,6,OS
|
||||||
|
603,SharePoint|*,2,App
|
||||||
|
604,IBM Tivoli Storage Manager|*,2,App
|
||||||
|
605,IBM Spectrum Scale|*,2,App
|
||||||
|
606,IBM Tivoli Asset Management|*,2,App
|
||||||
|
607,Oracle Hyperion|*,2,App
|
||||||
|
608,z/VM,6,OS
|
||||||
|
609,IIS|*,8,App Server
|
||||||
|
610,Oracle Application Server|*,8,App Server
|
||||||
|
611,instana,10,Runtime
|
||||||
|
612,credstash,2,App
|
||||||
|
613,Snyk,2,App
|
||||||
|
614,Akka,2,App
|
||||||
|
615,Varnish,8,App Server
|
||||||
|
616,Datadog,10,Runtime
|
||||||
|
617,API,1,Technology
|
||||||
|
618,Hazelcast,10,Runtime
|
||||||
|
619,Infinispan,2,App
|
||||||
|
620,Nuxeo,10,Runtime
|
||||||
|
621,ArangoDB,2,App
|
||||||
|
622,Eclipse Che,2,App
|
||||||
|
623,Amazon S3,2,App
|
||||||
|
624,ClickHouse,2,App
|
||||||
|
625,MinIO,2,App
|
||||||
|
626,Elasticsearch,2,App
|
||||||
|
627,XtraDB,2,App
|
||||||
|
628,Keycloak,2,App
|
||||||
|
629,Grafana,2,App
|
||||||
|
630,Mattermost,10,Runtime
|
||||||
|
631,Synapse,2,App
|
||||||
|
632,Cloud IAM,2,App
|
||||||
|
633,Knative,10,Runtime
|
||||||
|
634,Apache Cassandra,2,App
|
||||||
|
635,Kubeflow,10,Runtime
|
||||||
|
636,Qiskit,2,App
|
||||||
|
637,Microsoft Azure,2,App
|
||||||
|
638,Strimzi,10,Runtime
|
||||||
|
639,Sematext,10,Runtime
|
||||||
|
640,Eclipse hawkBit,2,App
|
||||||
|
641,Eclipse Ditto,2,App
|
||||||
|
642,MariaDB,2,App
|
||||||
|
643,Zadara,2,App
|
||||||
|
644,Istio,2,App
|
||||||
|
645,Vault,2,App
|
||||||
|
646,Apache Druid,2,App
|
||||||
|
647,etcd,2,App
|
||||||
|
648,Traefik,8,App Server
|
||||||
|
649,IBM Cloud,2,App
|
||||||
|
650,YugabyteDB,2,App
|
||||||
|
651,CockroachDB,2,App
|
||||||
|
652,Jaeger,10,Runtime
|
||||||
|
653,Natural Programming Language,9,Lang
|
||||||
|
654,AcuCOBOL,9,Lang
|
||||||
|
655,Ada,9,Lang
|
||||||
|
656,ADABAS,2,App
|
||||||
|
657,ADSO,9,Lang
|
||||||
|
658,Ansible,2,App
|
||||||
|
659,Batch,9,Lang
|
||||||
|
660,Powershell,9,Lang
|
||||||
|
661,COM+,10,Runtime
|
||||||
|
662,Dataflex,9,Lang
|
||||||
|
663,DDS,9,Lang
|
||||||
|
664,Forte,9,Lang
|
||||||
|
665,Foxpro,9,Lang
|
||||||
|
666,IBM DB2 Purescale,2,App
|
||||||
|
667,IDMS DB,2,App
|
||||||
|
668,IDMS DML,9,Lang
|
||||||
|
669,Jaguar,8,App Server
|
||||||
|
670,EAServer,8,App Server
|
||||||
|
671,Apache Cassandra,2,App
|
||||||
|
672,IBM Netezza,4,HW
|
||||||
|
673,OpenEdge,9,Lang
|
||||||
|
674,OpenROAD,9,Lang
|
||||||
|
675,Oracle Reports,2,App
|
||||||
|
676,SAP Replication Server,2,App
|
||||||
|
677,Git,2,App
|
||||||
|
678,GitLab,2,App
|
||||||
|
679,VSAM,2,App
|
||||||
|
680,Cloud<>Apache HTTP Server,2,App
|
||||||
|
681,Cloud<>Windows|Windows Server,2,App
|
||||||
|
682,Cloud<>MS SQL Server,2,App
|
||||||
|
683,Cloud<>Azure SQL Server Database,2,App
|
||||||
|
684,Cloud<>MySQL,2,App
|
||||||
|
685,Cloud<>Oracle Database,2,App
|
||||||
|
686,Cloud<>PostgreSQL,2,App
|
||||||
|
687,Cloud<>AWS RDS,2,App
|
||||||
|
688,Cloud<>SAP HANA DB,2,App
|
||||||
|
689,BMS Map,9,Lang
|
||||||
|
690,DB400,2,App
|
||||||
|
691,ILE,9,Lang
|
||||||
|
692,Integrated Data Store (IDS),2,App
|
||||||
|
693,ISAM,2,App
|
||||||
|
694,Oracle RDS,2,App
|
||||||
|
695,SAP IQ,2,App
|
||||||
|
696,Cloud<>Linux,2,App
|
||||||
|
697,Apache Maven,2,App
|
||||||
|
698,IBM Basic Assembly Language (BAL),9,Lang
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -1,6 +1,6 @@
|
||||||
|
|
||||||
*******************************************************************************
|
*******************************************************************************
|
||||||
Accuracy: 0.79090
|
Accuracy: 0.77655
|
||||||
F1 Score: 0.80996
|
F1 Score: 0.79605
|
||||||
Precision: 0.88827
|
Precision: 0.85637
|
||||||
Recall: 0.79090
|
Recall: 0.77655
|
|
@ -32,7 +32,7 @@ torch.set_float32_matmul_precision('high')
|
||||||
BATCH_SIZE = 256
|
BATCH_SIZE = 256
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
data_path = '../../../data_import/train.csv'
|
data_path = '../../../esAppMod_data_import/train.csv'
|
||||||
train_df = pd.read_csv(data_path, skipinitialspace=True)
|
train_df = pd.read_csv(data_path, skipinitialspace=True)
|
||||||
# rather than use pattern, we use the real thing and property
|
# rather than use pattern, we use the real thing and property
|
||||||
entity_ids = train_df['entity_id'].to_list()
|
entity_ids = train_df['entity_id'].to_list()
|
||||||
|
@ -49,20 +49,31 @@ for idx, val in enumerate(target_id_list):
|
||||||
|
|
||||||
# introduce pre-processing functions
|
# introduce pre-processing functions
|
||||||
def preprocess_text(text):
|
def preprocess_text(text):
|
||||||
|
|
||||||
# 1. Make all uppercase
|
# 1. Make all uppercase
|
||||||
text = text.upper()
|
text = text.lower()
|
||||||
|
|
||||||
# 2. Remove punctuations
|
# Remove any non alphanumeric character
|
||||||
# text = re.sub(r'[^\w\s]', '', text) # Retains only alphanumeric and spaces
|
# text = re.sub(r'[^\w\s]', ' ', text) # Retains only alphanumeric and spaces
|
||||||
|
text = re.sub(r"[-;:]", " ", text)
|
||||||
|
|
||||||
# 3. Substitute digits with '#'
|
# Add space between digit followed by a letter
|
||||||
text = re.sub(r'\d', '#', text)
|
text = re.sub(r"(\d)([A-Z])", r"\1 \2", text)
|
||||||
|
|
||||||
|
# Add space between letter followed by a digit
|
||||||
|
text = re.sub(r"([A-Z])(\d)", r"\1 \2", text)
|
||||||
|
|
||||||
|
|
||||||
|
# Substitute digits with '#'
|
||||||
|
text = re.sub(r'\d+', 'x', text)
|
||||||
|
|
||||||
|
# standardize spacing
|
||||||
|
text = re.sub(r'\s+', ' ', text).strip()
|
||||||
|
|
||||||
return text
|
return text
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# outputs a list of dictionaries
|
# outputs a list of dictionaries
|
||||||
# processes dataframe into lists of dictionaries
|
# processes dataframe into lists of dictionaries
|
||||||
# each element maps input to output
|
# each element maps input to output
|
||||||
|
@ -85,7 +96,7 @@ def process_df_to_dict(df):
|
||||||
|
|
||||||
def create_dataset():
|
def create_dataset():
|
||||||
# train
|
# train
|
||||||
data_path = '../../../data_import/test.csv'
|
data_path = '../../../esAppMod_data_import/test.csv'
|
||||||
test_df = pd.read_csv(data_path, skipinitialspace=True)
|
test_df = pd.read_csv(data_path, skipinitialspace=True)
|
||||||
|
|
||||||
|
|
|
@ -45,17 +45,47 @@ def set_seed(seed):
|
||||||
|
|
||||||
set_seed(42)
|
set_seed(42)
|
||||||
|
|
||||||
SHUFFLES=5
|
SHUFFLES=2
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
|
|
||||||
# import training file
|
# import training file
|
||||||
data_path = '../../data_import/train.csv'
|
data_path = '../../esAppMod_data_import/train.csv'
|
||||||
train_df = pd.read_csv(data_path, skipinitialspace=True)
|
train_df = pd.read_csv(data_path, skipinitialspace=True)
|
||||||
# rather than use pattern, we use the real thing and property
|
# rather than use pattern, we use the real thing and property
|
||||||
entity_ids = train_df['entity_id'].to_list()
|
entity_ids = train_df['entity_id'].to_list()
|
||||||
target_id_list = sorted(list(set(entity_ids)))
|
target_id_list = sorted(list(set(entity_ids)))
|
||||||
|
|
||||||
|
def compute_normalized_class_weights(class_counts, max_resamples=SHUFFLES):
|
||||||
|
"""
|
||||||
|
Compute normalized class weights inversely proportional to class counts.
|
||||||
|
The weights are normalized so that they sum to 1.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
class_counts (array-like): An array or list where each element represents the count of samples for a class.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
numpy.ndarray: A normalized array of weights for each class.
|
||||||
|
"""
|
||||||
|
class_counts = np.array(class_counts)
|
||||||
|
total_samples = np.sum(class_counts)
|
||||||
|
class_weights = total_samples / class_counts
|
||||||
|
# so that highest weight is 1
|
||||||
|
normalized_weights = class_weights / np.max(class_weights)
|
||||||
|
# Scale weights such that the highest weight corresponds to `max_resamples`
|
||||||
|
resample_counts = normalized_weights * max_resamples
|
||||||
|
# Round resamples to nearest integer
|
||||||
|
resample_counts = np.round(resample_counts).astype(int)
|
||||||
|
return resample_counts
|
||||||
|
|
||||||
|
# %%
|
||||||
|
id_counts = train_df['entity_id'].value_counts()
|
||||||
|
id_weights = compute_normalized_class_weights(id_counts, max_resamples=SHUFFLES)
|
||||||
|
id_index = id_counts.index
|
||||||
|
label2weight = {}
|
||||||
|
for idx, label in enumerate(id_index):
|
||||||
|
label2weight[label] = id_weights[idx]
|
||||||
|
|
||||||
|
|
||||||
# %%
|
# %%
|
||||||
id2label = {}
|
id2label = {}
|
||||||
|
@ -69,13 +99,25 @@ for idx, val in enumerate(target_id_list):
|
||||||
def preprocess_text(text):
|
def preprocess_text(text):
|
||||||
|
|
||||||
# 1. Make all uppercase
|
# 1. Make all uppercase
|
||||||
text = text.upper()
|
text = text.lower()
|
||||||
|
|
||||||
# 2. Remove punctuations
|
# Remove any non alphanumeric character
|
||||||
# text = re.sub(r'[^\w\s]', '', text) # Retains only alphanumeric and spaces
|
# text = re.sub(r'[^\w\s]', ' ', text) # Retains only alphanumeric and spaces
|
||||||
|
# replace dashes
|
||||||
|
text = re.sub(r"[-;:]", " ", text)
|
||||||
|
|
||||||
# 3. Substitute digits with '#'
|
# Add space between digit followed by a letter
|
||||||
text = re.sub(r'\d', '#', text)
|
text = re.sub(r"(\d)([A-Z])", r"\1 \2", text)
|
||||||
|
|
||||||
|
# Add space between letter followed by a digit
|
||||||
|
text = re.sub(r"([A-Z])(\d)", r"\1 \2", text)
|
||||||
|
|
||||||
|
|
||||||
|
# Substitute digits with 'x'
|
||||||
|
text = re.sub(r'\d+', 'x', text)
|
||||||
|
|
||||||
|
# standardize spacing
|
||||||
|
text = re.sub(r'\s+', ' ', text).strip()
|
||||||
|
|
||||||
return text
|
return text
|
||||||
|
|
||||||
|
@ -123,6 +165,42 @@ def shuffle_text(text, n_shuffles=SHUFFLES):
|
||||||
|
|
||||||
return all_processed
|
return all_processed
|
||||||
|
|
||||||
|
term_to_abbrev = {
|
||||||
|
r'job entry system': 'jes',
|
||||||
|
r'subversion': 'svn',
|
||||||
|
r'borland database engine': 'bde',
|
||||||
|
r'business intelligence and reporting tools': 'birt',
|
||||||
|
r'lan management solution': 'lms',
|
||||||
|
r'laboratory information management system': 'lims',
|
||||||
|
r'ibm database 2': 'db/2',
|
||||||
|
r'integrated development environment': 'ide',
|
||||||
|
r'software development kit': 'sdk',
|
||||||
|
r'hp operations orchestration': 'hpoo',
|
||||||
|
r'hp server automation': 'hpsa',
|
||||||
|
r'internet information server': 'iis',
|
||||||
|
r'release 2': 'r2',
|
||||||
|
r'red hat enterprise linux': 'rhel',
|
||||||
|
r'oracle enterprise linux': 'oel',
|
||||||
|
r'websphere application server': 'was',
|
||||||
|
r'application development facility': 'adf',
|
||||||
|
r'server analysis services': 'ssas'
|
||||||
|
}
|
||||||
|
|
||||||
|
abbrev_to_term = {rf'\b{value}\b': key for key, value in term_to_abbrev.items()}
|
||||||
|
|
||||||
|
def replace_terms_with_abbreviations(text):
|
||||||
|
for input, replacement in term_to_abbrev.items():
|
||||||
|
text = re.sub(input, replacement, text)
|
||||||
|
return text
|
||||||
|
|
||||||
|
def replace_abbreivations_with_terms(text):
|
||||||
|
for input, replacement in abbrev_to_term.items():
|
||||||
|
text = re.sub(input, replacement, text)
|
||||||
|
return text
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# outputs a list of dictionaries
|
# outputs a list of dictionaries
|
||||||
# processes dataframe into lists of dictionaries
|
# processes dataframe into lists of dictionaries
|
||||||
|
@ -134,9 +212,12 @@ def process_df_to_dict(df):
|
||||||
for _, row in df.iterrows():
|
for _, row in df.iterrows():
|
||||||
# produce shuffling
|
# produce shuffling
|
||||||
index = row['entity_id']
|
index = row['entity_id']
|
||||||
desc = row['mention']
|
parent_desc = row['mention']
|
||||||
desc = preprocess_text(desc)
|
parent_desc = preprocess_text(parent_desc)
|
||||||
processed_descs = shuffle_text(desc, n_shuffles=SHUFFLES)
|
# ensure at least 1 shuffle
|
||||||
|
# no_of_shuffles = label2weight[index] + 1
|
||||||
|
no_of_shuffles = SHUFFLES
|
||||||
|
processed_descs = shuffle_text(parent_desc, n_shuffles=no_of_shuffles)
|
||||||
|
|
||||||
for desc in processed_descs:
|
for desc in processed_descs:
|
||||||
element = {
|
element = {
|
||||||
|
@ -145,12 +226,38 @@ def process_df_to_dict(df):
|
||||||
}
|
}
|
||||||
output_list.append(element)
|
output_list.append(element)
|
||||||
|
|
||||||
|
|
||||||
|
# perform abbrev_to_term
|
||||||
|
desc = replace_terms_with_abbreviations(parent_desc)
|
||||||
|
no_of_shuffles = SHUFFLES
|
||||||
|
processed_descs = shuffle_text(desc, n_shuffles=no_of_shuffles)
|
||||||
|
|
||||||
|
for desc in processed_descs:
|
||||||
|
element = {
|
||||||
|
'text' : desc,
|
||||||
|
'label': label2id[index], # ensure labels starts from 0
|
||||||
|
}
|
||||||
|
output_list.append(element)
|
||||||
|
|
||||||
|
# perform term to abbrev
|
||||||
|
desc = replace_abbreivations_with_terms(parent_desc)
|
||||||
|
no_of_shuffles = SHUFFLES
|
||||||
|
processed_descs = shuffle_text(desc, n_shuffles=no_of_shuffles)
|
||||||
|
|
||||||
|
for desc in processed_descs:
|
||||||
|
element = {
|
||||||
|
'text' : desc,
|
||||||
|
'label': label2id[index], # ensure labels starts from 0
|
||||||
|
}
|
||||||
|
output_list.append(element)
|
||||||
|
|
||||||
|
|
||||||
return output_list
|
return output_list
|
||||||
|
|
||||||
|
|
||||||
def create_dataset():
|
def create_dataset():
|
||||||
# train
|
# train
|
||||||
data_path = '../../data_import/train.csv'
|
data_path = '../../esAppMod_data_import/train.csv'
|
||||||
train_df = pd.read_csv(data_path, skipinitialspace=True)
|
train_df = pd.read_csv(data_path, skipinitialspace=True)
|
||||||
|
|
||||||
|
|
||||||
|
@ -169,8 +276,9 @@ def train():
|
||||||
|
|
||||||
# prepare tokenizer
|
# prepare tokenizer
|
||||||
|
|
||||||
# model_checkpoint = "distilbert/distilbert-base-uncased"
|
model_checkpoint = "distilbert/distilbert-base-uncased"
|
||||||
model_checkpoint = 'google-bert/bert-base-cased'
|
# model_checkpoint = 'google-bert/bert-base-cased'
|
||||||
|
# model_checkpoint = 'prajjwal1/bert-small'
|
||||||
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, return_tensors="pt", clean_up_tokenization_spaces=True)
|
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, return_tensors="pt", clean_up_tokenization_spaces=True)
|
||||||
# Define additional special tokens
|
# Define additional special tokens
|
||||||
# additional_special_tokens = ["<DESC>"]
|
# additional_special_tokens = ["<DESC>"]
|
||||||
|
@ -246,14 +354,15 @@ def train():
|
||||||
logging_strategy="epoch",
|
logging_strategy="epoch",
|
||||||
# save_strategy="epoch",
|
# save_strategy="epoch",
|
||||||
load_best_model_at_end=False,
|
load_best_model_at_end=False,
|
||||||
learning_rate=1e-4,
|
learning_rate=5e-5,
|
||||||
per_device_train_batch_size=128,
|
per_device_train_batch_size=64,
|
||||||
per_device_eval_batch_size=128,
|
per_device_eval_batch_size=64,
|
||||||
auto_find_batch_size=False,
|
auto_find_batch_size=False,
|
||||||
ddp_find_unused_parameters=False,
|
ddp_find_unused_parameters=False,
|
||||||
weight_decay=0.01,
|
weight_decay=0.01,
|
||||||
save_total_limit=1,
|
save_total_limit=1,
|
||||||
num_train_epochs=120,
|
num_train_epochs=80,
|
||||||
|
warmup_steps=400,
|
||||||
bf16=True,
|
bf16=True,
|
||||||
push_to_hub=False,
|
push_to_hub=False,
|
||||||
remove_unused_columns=False,
|
remove_unused_columns=False,
|
||||||
|
|
|
@ -0,0 +1,51 @@
|
||||||
|
# %%
|
||||||
|
from transformers import AutoModelForCausalLM, AutoTokenizer
|
||||||
|
|
||||||
|
# %%
|
||||||
|
# Load model and tokenizer
|
||||||
|
# model_name = "bigscience/bloom-7b1" # Replace with your model
|
||||||
|
model_name = "bigscience/bloomz-1b1"
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
||||||
|
|
||||||
|
# Automatically map model layers to available GPUs
|
||||||
|
model = AutoModelForCausalLM.from_pretrained(
|
||||||
|
model_name,
|
||||||
|
device_map="auto", # Automatically split across multiple GPUs
|
||||||
|
torch_dtype="auto" # Use FP16 if available
|
||||||
|
)
|
||||||
|
|
||||||
|
# %%
|
||||||
|
# Prepare input
|
||||||
|
text = "The quick brown fox jumps over the lazy dog."
|
||||||
|
inputs = tokenizer(text, return_tensors="pt")
|
||||||
|
inputs = inputs.to("cuda")
|
||||||
|
|
||||||
|
# Generate output
|
||||||
|
outputs = model.generate(inputs["input_ids"], max_length=50)
|
||||||
|
|
||||||
|
# Decode and print result
|
||||||
|
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
|
||||||
|
# %%
|
||||||
|
# %%
|
||||||
|
# Prepare input
|
||||||
|
|
||||||
|
def generate(text):
|
||||||
|
|
||||||
|
# Define prompt
|
||||||
|
prompt = f"Answer Concisely: Give me a mapping between the acronym and descriptor in the format '(acronym: description): '{text}'"
|
||||||
|
|
||||||
|
# Generate acronym
|
||||||
|
inputs = tokenizer(prompt, return_tensors="pt")
|
||||||
|
inputs = inputs.to("cuda")
|
||||||
|
outputs = model.generate(
|
||||||
|
inputs["input_ids"],
|
||||||
|
max_length=100,
|
||||||
|
no_repeat_ngram_size=3)
|
||||||
|
return tokenizer.decode(outputs[0], skip_special_tokens=True)
|
||||||
|
|
||||||
|
# Example usage
|
||||||
|
# text = "Advanced Data Analytics Platform"
|
||||||
|
text = 'ColdFusion Markup Language (CFML)'
|
||||||
|
acronym = generate(text)
|
||||||
|
print(f"Acronym: {acronym}")
|
||||||
|
# %%
|
|
@ -0,0 +1,52 @@
|
||||||
|
# %%
|
||||||
|
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
|
||||||
|
|
||||||
|
# %%
|
||||||
|
# Load model and tokenizer
|
||||||
|
# model_name = "bigscience/bloom-7b1" # Replace with your model
|
||||||
|
model_name = "google/flan-t5-large"
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
||||||
|
|
||||||
|
# Automatically map model layers to available GPUs
|
||||||
|
model = AutoModelForSeq2SeqLM.from_pretrained(
|
||||||
|
model_name,
|
||||||
|
device_map="auto", # Automatically split across multiple GPUs
|
||||||
|
torch_dtype="auto" # Use FP16 if available
|
||||||
|
)
|
||||||
|
|
||||||
|
# %%
|
||||||
|
# Prepare input
|
||||||
|
text = "The quick brown fox jumps over the lazy dog."
|
||||||
|
inputs = tokenizer(text, return_tensors="pt")
|
||||||
|
inputs = inputs.to("cuda")
|
||||||
|
|
||||||
|
# Generate output
|
||||||
|
outputs = model.generate(inputs["input_ids"], max_length=50)
|
||||||
|
|
||||||
|
# Decode and print result
|
||||||
|
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
|
||||||
|
# %%
|
||||||
|
# %%
|
||||||
|
# Prepare input
|
||||||
|
|
||||||
|
def generate_acronym(text):
|
||||||
|
|
||||||
|
# Define prompt
|
||||||
|
prompt = f"Answer concisely: make a possible acronym from the following: '{text}'"
|
||||||
|
|
||||||
|
# Generate acronym
|
||||||
|
inputs = tokenizer(prompt, return_tensors="pt")
|
||||||
|
inputs = inputs.to("cuda")
|
||||||
|
outputs = model.generate(
|
||||||
|
inputs["input_ids"],
|
||||||
|
max_length=100,
|
||||||
|
no_repeat_ngram_size=3)
|
||||||
|
return tokenizer.decode(outputs[0], skip_special_tokens=True)
|
||||||
|
|
||||||
|
# %%
|
||||||
|
# Example usage
|
||||||
|
# text = "Advanced Data Analytics Platform"
|
||||||
|
text = "red hat enterprise linux"
|
||||||
|
acronym = generate_acronym(text)
|
||||||
|
print(f"Acronym: {acronym}")
|
||||||
|
# %%
|
Loading…
Reference in New Issue