Added datasets

- included code for augmentation
This commit is contained in:
Richard Wong 2025-01-14 17:34:17 +09:00
parent a1d000d9c8
commit e90bc69ea9
32 changed files with 2221698 additions and 41 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
*.zip

2
analysis/.gitignore vendored
View File

@ -1 +1,3 @@
__pycache__
*.csv
*.md

View File

@ -1,6 +1,7 @@
# %%
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
# %%
# import training file
@ -13,5 +14,45 @@ id_counts = train_df['entity_id'].value_counts()
# %%
plt.hist(id_counts, bins=50)
# %%
id_counts[:50]
# %%
plt.hist(id_counts, bins=50)
# %%
def compute_normalized_class_weights(class_counts, max_resamples=10):
"""
Compute normalized class weights inversely proportional to class counts.
The weights are normalized so that they sum to 1.
Args:
class_counts (array-like): An array or list where each element represents the count of samples for a class.
Returns:
numpy.ndarray: A normalized array of weights for each class.
"""
class_counts = np.array(class_counts)
total_samples = np.sum(class_counts)
class_weights = total_samples / class_counts
# so that highest weight is 1
normalized_weights = class_weights / np.max(class_weights)
# Scale weights such that the highest weight corresponds to `max_resamples`
resample_counts = normalized_weights * max_resamples
# Round resamples to nearest integer
resample_counts = np.round(resample_counts).astype(int)
return resample_counts
# %%
id_weights = compute_normalized_class_weights(id_counts, max_resamples=10)
# %%
id_weights
# %%
id_mask = train_df['entity_id'] == 536
train_df[id_mask]
# %%
id_counts.index.to_list()
# %%

View File

@ -18,12 +18,11 @@ id2label = {}
for _, row in entity_df.iterrows():
id2label[row['id']] = row['name']
# %%
train_df.sort_values(by=['entity_id']).to_markdown('out.md')
# %%
data_path = '../train/class_bert_process/classification_prediction/exports/result.csv'
data_path = '../train/class_bert_process/prediction/exports/result.csv'
prediction_df = pd.read_csv(data_path)
# %%
@ -39,26 +38,31 @@ new_df = pd.concat((test_df, prediction_df ), axis=1)
mismatch_mask = new_df['entity_id'] != new_df['class_prediction']
mismatch_df = new_df[mismatch_mask]
# %%
len(mismatch_df)
# %%
# print the top 10 offending classes
print(mismatch_df['entity_id'].value_counts()[:10])
# %%
# Convert the whole dataframe as a string and display
# print the mismatch_df
print(mismatch_df.to_markdown())
print(mismatch_df.sort_values(by=['entity_id']).to_markdown())
# %%
mismatch_df.to_csv('error.csv')
# %%
# let us see the test mentions
select_value = 434
select_value = 268
select_mask = mismatch_df['entity_id'] == select_value
mismatch_df[select_mask]
# %%
# let us see the train mentions
select_value = 434
select_value = 452
select_mask = train_df['entity_id'] == select_value
train_df[select_mask]

3
biomedical/bc2gm/.gitignore vendored Normal file
View File

@ -0,0 +1,3 @@
*
!.gitignore
!*.txt

277944
biomedical/bc2gm/test_dictionary.txt Executable file

File diff suppressed because it is too large Load Diff

277944
biomedical/bc2gm/train_dictionary.txt Executable file

File diff suppressed because it is too large Load Diff

3
biomedical/bc5cdr-chemical/.gitignore vendored Normal file
View File

@ -0,0 +1,3 @@
*
!.gitignore
!*.txt

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

3
biomedical/bc5cdr-disease/.gitignore vendored Normal file
View File

@ -0,0 +1,3 @@
*
!.gitignore
!*.txt

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

3
biomedical/ncbi/.gitignore vendored Normal file
View File

@ -0,0 +1,3 @@
*
!.gitignore
!*.txt

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1 +0,0 @@
*.csv

0
esAppMod_data_import/.gitignore vendored Normal file
View File

View File

@ -0,0 +1,699 @@
id,name,type_id,type_name
1,(E)JES,2,App
2,A-Auto Job Scheduling Software,2,App
3,Activiti,2,App
4,Adobe Acrobat Reader,2,App
5,Ansible,2,App
6,Apache ActiveMQ,2,App
7,Apache Hbase,2,App
8,Apache Hive,2,App
9,Apache Kafka,2,App
10,Apache ServiceMix,2,App
11,Apache Solr,2,App
12,Apache Subversion,2,App
13,Application Development Facility (ADF),2,App
14,Asterisk,2,App
15,Automic Job Scheduler,2,App
16,Autosys,2,App
17,Bluebeam|Bluebeam Q,2,App
18,BMC Control-M,2,App
19,BMC Identity Management,2,App
20,Borland Database Engine (BDE),2,App
21,Business Intelligence and Reporting Tools (BIRT),2,App
22,CA Gen,2,App
23,CA Introscope,2,App
24,CA-Panvalet,2,App
25,CA-TELON,2,App
26,Casegen,2,App
27,Chef Automate,2,App
28,Cisco AMP for Endpoints,2,App
29,CiscoWorks LAN Management Solution (LMS),2,App
30,Citrix Virtual Apps and Desktops,2,App
31,Citrix ADC CPX,2,App
32,Citrix Provisioning,2,App
33,Clarify,2,App
34,Clarity LIMS,2,App
35,LabWare LIMS,2,App
36,Cognos,2,App
37,Coldfusion,2,App
38,ConceptWave,2,App
39,CONNAPI,2,App
40,Connect Direct,2,App
41,Cornerstone software,2,App
42,Crystal Reports,2,App
43,DB2,2,App
44,Documentum Content Server,2,App
45,Drupal,2,App
46,Eclipse,2,App
47,Elastic (ELK) Stack,2,App
48,ETAP License Manager (LM),2,App
49,ExamDiff,2,App
50,F5 Secure Web Gateway Services,2,App
51,FileMaker Pro,2,App
52,FlexNet Manager Suite,2,App
53,FTP Voyager,2,App
54,Genymotion,2,App
55,Google Chrome,2,App
56,Greenplum DB,2,App
57,Hadoop,2,App
58,HP aC++ compiler,2,App
59,HP C/ANSI C compiler,2,App
60,HP Operations Orchestration (HPOO),2,App
61,HP Server Automation (HPSA),2,App
62,IBM BigFix Platform,2,App
63,IBM Business Monitor,2,App
64,IBM Business Process Manager,2,App
65,IBM Content Manager OnDemand (CMOD),2,App
66,IBM FileNet P8 Platform,2,App
67,IBM InfoSphere DataStage,2,App
68,IBM Integration Bus,2,App
69,IBM License Metric Tool,2,App
70,IBM Maximo,2,App
71,IBM Migration Utility,2,App
72,IBM Mobile Foundation,2,App
73,IBM Operational Decision Manager (ODM),2,App
74,IBM Spectrum Scale,2,App
75,IBM Tivoli Asset Management,2,App
76,IBM Tivoli Composite Application Manager,2,App
77,IBM Tivoli Monitoring,2,App
78,IBM Tivoli Storage Manager,2,App
79,IBM Tivoli Workload Scheduler (TWS),2,App
80,IBM WebSphere Business Integration Adaptor,2,App
81,IBM Websphere MQ,2,App
82,IBM WebSphere MQ Telemetry,2,App
83,IBM WebSphere Transformation Extender (WTX),2,App
84,IMS DB,2,App
85,Info-ZIP,2,App
86,Infobright Community Edition (ICE),2,App
87,Informatica PowerCenter,2,App
88,Ingres,2,App
89,JBoss|JBoss Enterprise Service Bus,2,App
90,Jenkins,2,App
91,joinIT,2,App
92,LifeFlow,2,App
93,Lotus Notes,2,App
94,MaaS360,2,App
95,Malwarebytes Anti-Malware,2,App
96,ManageEngine ADSelfService Plus,2,App
97,MarkLogic DB,2,App
98,Memcached,2,App
99,Microsoft Access,2,App
100,Microsoft BizTalk Adapters for Host Systems,2,App
101,Microsoft Dynamics AX,2,App
102,Microsoft Endpoint Configuration Manager (SCCM),2,App
103,Microsoft Excel,2,App
104,Microsoft Exchange Server,2,App
105,Microsoft Forefront Identity Manager (FIM),2,App
106,Microsoft InfoPath,2,App
107,Microsoft Internet Explorer,2,App
108,Microsoft ISA Server,2,App
109,Microsoft MQ,2,App
110,Microsoft System Center Endpoint Protection,2,App
111,Microsoft Visual Studio,2,App
112,Microsoft Web Deploy,2,App
113,Microsoft Web Farm Framework (WFF),2,App
114,Microsoft Web Platform Installer,2,App
115,Model Driven Workflow (MDW),2,App
116,MongoDB,2,App
117,Mozilla Firefox,2,App
118,MQ Client,2,App
119,MS Office 365,2,App
120,MS SQL Server,2,App
121,MS SQL Server Compact,2,App
122,MySQL,2,App
123,Neo4j,2,App
124,Nexus Repository OSS,2,App
125,Nix package manager,2,App
126,OpenLDAP,2,App
127,OpenText Exstream,2,App
128,OpenVPN,2,App
129,Oracle Access Management,2,App
130,Oracle ADF,2,App
131,Oracle APEX,2,App
132,Oracle BI Publisher,2,App
133,Oracle Business Intelligence,2,App
134,Oracle Database,2,App
135,Oracle Designer,2,App
136,Oracle Enterprise Manager,2,App
137,Oracle Forms,2,App
138,Oracle Hyperion|Hyperion Interactive Reporting,2,App
139,Oracle Hyperion|Hyperion Planning,2,App
140,Oracle Net Services,2,App
141,Oracle Real Application Clusters (RAC),2,App
142,Oracle Retail Point-of-Service,2,App
143,Oracle Service Bus,2,App
144,Oracle Smart View,2,App
145,Oracle SOA Suite,2,App
146,Oracle SQL Developer,2,App
147,Oracle TimesTen In-Memory Database,2,App
148,Oracle Warehouse Builder (OWB),2,App
149,Orbix,2,App
150,Pentaho,2,App
151,PeopleSoft,2,App
152,Perkin Elmer Informatics (PKI),2,App
153,Pervasive PSQL,2,App
154,PIPE-FLO,2,App
155,PKZIP,2,App
156,Planview,2,App
157,PostgreSQL,2,App
158,Powerbuilder,2,App
159,Primavera P6,2,App
160,Pro*COBOL,2,App
161,ProjectWise,2,App
162,ProjectWise Web Server,2,App
163,PVCS Version Manager,2,App
164,QlikView,2,App
165,RabbitMQ,2,App
166,Rational ClearCase,2,App
167,Rational ClearQuest,2,App
168,Redis,2,App
169,Remedy,2,App
170,Riak,2,App
171,RightFax,2,App
172,Rumba,2,App
173,SAP BusinessObjects BI server,2,App
174,SAP ERP,2,App
175,SAP HANA DB,2,App
176,SAP MaxDB,2,App
177,SAP NetWeaver Business Warehouse,2,App
178,SAP SQL Anywhere,2,App
179,SAP Web Dynpro,2,App
180,Sentry,2,App
181,SharePoint,2,App
182,Siebel,2,App
183,SNA Manager,2,App
184,SnagIt,2,App
185,solidDB,2,App
186,SonarQube,2,App
187,SpaceMonger,2,App
188,Splunk,2,App
189,SQLIO,2,App
190,Sybase SQL Server,2,App
191,Syncsort,2,App
192,Sysinternal Tools,2,App
193,Sysinternal Tools|*,2,App
194,Sysinternal Tools|AccessEnum,2,App
195,Sysinternal Tools|ClockRes,2,App
196,Sysinternal Tools|Coreinfo,2,App
197,Sysinternal Tools|DiskExt,2,App
198,Sysinternal Tools|DiskMon,2,App
199,Sysinternal Tools|Hex2dec,2,App
200,Sysinternal Tools|Junction,2,App
201,Sysinternal Tools|LDMDump,2,App
202,Sysinternal Tools|LoadOrder,2,App
203,Sysinternal Tools|PipeList,2,App
204,Sysinternal Tools|Process Explorer,2,App
205,Sysinternal Tools|PsKill,2,App
206,Sysinternal Tools|PsPasswd,2,App
207,Sysinternal Tools|SDelete,2,App
208,Sysinternal Tools|ShareEnum,2,App
209,Sysinternal Tools|Sync,2,App
210,Sysinternal Tools|TCPView,2,App
211,Sysinternal Tools|VMMap,2,App
212,Sysinternal Tools|Whois,2,App
213,Tableau,2,App
214,TCPLink Enterprise Server,2,App
215,Teradata,2,App
216,Teradata QS Server,2,App
217,TIBCO Business Works (BW),2,App
218,TIBCO InConcert,2,App
219,TIBCO Rendezvous,2,App
220,Tivoli Access Manager (TAM),2,App
221,TortoiseCVS,2,App
222,TortoiseSVN,2,App
223,TSO/ISPF,2,App
224,TWS zCentric,2,App
225,Uniface,2,App
226,ViewNow X Server,2,App
227,Virtual I/O Server,2,App
228,Visibroker,2,App
229,VMware Solution Exchange Marketplace (VSX),2,App
230,VMware Tools,2,App
231,VMware vCenter,2,App
232,WebFOCUS,2,App
233,WebLogic Integration,2,App
234,WebSphere Commerce Suite (WCS),2,App
235,WebSphere Message Broker,2,App
236,Wherescape Red,2,App
237,Windchill,2,App
238,Windows Indexing Service,2,App
239,Windows Terminal Server (WTS),2,App
240,WingArc SVF,2,App
241,WinMerge,2,App
242,WinRAR,2,App
243,WinSCP,2,App
244,Wise Package Studio,2,App
245,Wordpress,2,App
246,XAMPP,2,App
247,ZAP BI,2,App
248,ZeroMQ,2,App
249,Zerto Virtual Replication,2,App
250,IBM PowerHA,2,App
251,Tivoli Netcool/OMNIbus,2,App
252,IBM ILOG Views,2,App
253,IBM ILOG CPLEX,2,App
254,IBM ILOG Jviews,2,App
255,IBM ILOG Elixir,2,App
256,IBM ILOG Supply Chain Apps,2,App
257,ILOG Solver,2,App
258,SQLite,2,App
259,Apache HTTP Server,8,App Server
260,Apache Tomcat,8,App Server
261,ArcGIS Server,8,App Server
262,Oracle WebLogic Server,8,App Server
263,GlassFish,8,App Server
264,HAProxy,8,App Server
265,IBM HTTP Server,8,App Server
266,IIS,8,App Server
267,JBoss,8,App Server
268,JBoss|*,8,App Server
269,Kitura,8,App Server
270,Lotus Domino,8,App Server
271,Lucee,8,App Server
272,Netscape Application Server (NAS),8,App Server
273,Netscape Enterprise Server (NES),8,App Server
274,Nginx,8,App Server
275,Oracle Application Server,8,App Server
276,Oracle WebCenter Content Server,8,App Server
277,Pivotal tc Server,8,App Server
278,Resin Web Server,8,App Server
279,SAP NetWeaver App Server,8,App Server
280,Spark,8,App Server
281,Oracle iPlanet Web Server,8,App Server
282,UltiDev Web Server Pro (UWS),8,App Server
283,webMethods Integration Server,8,App Server
284,Websphere Application Server (WAS),8,App Server
285,WebSphere Liberty,8,App Server
286,WebSphere Portal Server,8,App Server
287,Websphere Process Server,8,App Server
288,WebSphere Process Server,8,App Server
289,Oracle Real-Time Decisions (RTD),8,App Server
290,CA API Gateway,4,HW
291,Citrix ADC SDX,4,HW
292,Citrix ADC MPX,4,HW
293,HP Nonstop,4,HW
294,IBM DataPower Gateway,4,HW
295,IBM Power Systems,4,HW
296,Intel Xeon Processor,4,HW
297,Net Optics Taps,4,HW
298,Oracle Exadata,4,HW
299,AutoIt,9,Lang
300,AWK,9,Lang
301,BASIC,9,Lang
302,Brainscript,9,Lang
303,C,9,Lang
304,C#,9,Lang
305,C++,9,Lang
306,C++|Visual C++,9,Lang
307,Cascading Style Sheets (CSS),9,Lang
308,Clipper,9,Lang
309,CLIST,9,Lang
310,COBOL,9,Lang
311,ColdFusion Markup Language (CFML),9,Lang
312,Data Language Interface (DL/I),9,Lang
313,Delphi,9,Lang
314,Easytrieve,9,Lang
315,Expect,9,Lang
316,eXtensible HyperText Markup Language (XHTML),9,Lang
317,Extensible Markup Language (XML),9,Lang
318,Extensible Markup Language (XML)|MSXML,9,Lang
319,Extensible Stylesheet Language (XSL),9,Lang
320,Extensible Stylesheet Language Transformations (XLST),9,Lang
321,FOCUS,9,Lang
322,Fortran,9,Lang
323,Go,9,Lang
324,GraphQL,9,Lang
325,Groovy,9,Lang
326,HiveQL,9,Lang
327,Hypertext Markup Language (HTML),9,Lang
328,IBM High Level Assembler (HLASM),9,Lang
329,IBM i Control Language (CL),9,Lang
330,IBM Informix-4GL,9,Lang
331,Java,9,Lang
332,Java|Extensible Stylesheet Language (XSL),9,Lang
333,Java|Java Enterprise Edition (Java EE),9,Lang
334,Java|Java Standard Edition (Java SE),9,Lang
335,Java|JavaServer Pages (JSP),9,Lang
336,Java|JavaServer Pages (JSP)|Scriptlets,9,Lang
337,JavaScript,9,Lang
338,JCL,9,Lang
339,Job Information Language (JIL),9,Lang
340,JScript,9,Lang
341,Lisp,9,Lang
342,Niakwa Programming Language (NPL),9,Lang
343,Objective C,9,Lang
344,OpenEdge ABL,9,Lang
345,Pascal,9,Lang
346,Pascal|Object Pascal,9,Lang
347,Perl,9,Lang
348,Perl|ActivePerl,9,Lang
349,Perl|Rex,9,Lang
350,PHP,9,Lang
351,PL/I,9,Lang
352,PL/SQL,9,Lang
353,PRO*C,9,Lang
354,Python,9,Lang
355,R,9,Lang
356,Rexx,9,Lang
357,RPG,9,Lang
358,Ruby,9,Lang
359,Salesforce Object Query Language (SOQL),9,Lang
360,SAS,9,Lang
361,Sass,9,Lang
362,Scala,9,Lang
363,Smalltalk,9,Lang
364,Swift,9,Lang
365,TCL,9,Lang
366,Transact-SQL,9,Lang
367,TypeScript,9,Lang
368,VB.NET,9,Lang
369,VBScript,9,Lang
370,Visual Basic,9,Lang
371,Visual Basic for Applications (VBA),9,Lang
372,Visual FoxPro,9,Lang
373,VoiceXML,9,Lang
374,Xbase++,9,Lang
375,Apache Lucene,12,Lib
376,Apache Xerces,12,Lib
377,Cascading Style Sheets (CSS)|Bootstrap,12,Lib
378,Java|Apache Camel,12,Lib
379,Java|Apache Commons BeanUtils,12,Lib
380,Java|Apache PDFBox,12,Lib
381,Java|Apache Velocity,12,Lib
382,Java|EclipseLink,12,Lib
383,Java|Enterprise JavaBeans (EJB),12,Lib
384,Java|EZMorph,12,Lib
385,Java|Google Web Toolkit (GWT),12,Lib
386,Java|Hibernate,12,Lib
387,Java|IBM SDK,12,Lib
388,Java|Java Development Kit (JDK),12,Lib
389,Java|Java Message Service (JMS),12,Lib
390,Java|Java Web Start,12,Lib
391,Java|JavaServer Faces (JSF),12,Lib
392,Java|JDBC,12,Lib
393,Java|JRuby Core,12,Lib
394,Java|Log4j,12,Lib
395,Java|Quartz,12,Lib
396,Java|Remote Method Invocation (RMI),12,Lib
397,Java|Servlet,12,Lib
398,Java|Spring,12,Lib
399,Java|Spring|Spring Boot,12,Lib
400,Java|Spring|Spring Cloud Data Flow,12,Lib
401,Java|Spring|Spring MVC,12,Lib
402,Java|Struts,12,Lib
403,Java|Swing,12,Lib
404,Java|Vaadin,12,Lib
405,JavaScript|AJAX,12,Lib
406,JavaScript|AngularJS,12,Lib
407,JavaScript|Draw2D,12,Lib
408,JavaScript|Express.js,12,Lib
409,JavaScript|Ext JS,12,Lib
410,JavaScript|jqGrid,12,Lib
411,JavaScript|JQuery,12,Lib
412,JavaScript|Jquery|jQuery UI,12,Lib
413,JavaScript|React,12,Lib
414,JavaScript|script.aculo.us,12,Lib
415,JavaScript|Valums AJAX File Uploader,12,Lib
416,OWASP Enterprise Security API (ESAPI),12,Lib
417,Perl|Oraperl,12,Lib
418,Android,6,OS
419,BeOS,6,OS
420,Cisco IOS,6,OS
421,DART,6,OS
422,Fabric OS,6,OS
423,GNU,6,OS
424,IBM i,6,OS
425,iOS,6,OS
426,Linux,6,OS
427,Linux|CentOS,6,OS
428,Linux|Check Point,6,OS
429,Linux|Debian,6,OS
430,Linux|Junos OS,6,OS
431,Linux|openSUSE,6,OS
432,Linux|Oracle Linux,6,OS
433,Linux|Photon OS,6,OS
434,Linux|Red Hat Enterprise Linux,6,OS
435,Linux|SUSE Linux Enterprise Server,6,OS
436,Linux|Ubuntu,6,OS
437,Linux|zLinux,6,OS
438,macOS,6,OS
439,MVS,6,OS
440,MVS|OS/390,6,OS
441,MVS|z/OS,6,OS
442,OpenVMS,6,OS
443,OS/2,6,OS
444,Unix,6,OS
445,Unix|AIX,6,OS
446,Unix|BSD,6,OS
447,Unix|BSD|FreeBSD,6,OS
448,Unix|BSD|SunOS,6,OS
449,Unix|HP-UX,6,OS
450,Windows,6,OS
451,Windows|Windows Desktop,6,OS
452,Windows|Windows Server,6,OS
453,Linux|Fedora,6,OS
454,Linux|Amazon Linux,6,OS
455,Clarify|Clear Basic,5,Plugin
456,Eclipse|ATLAS Transformation Language (ATL),5,Plugin
457,IBM BigFix Platform|Client Deploy Tool,5,Plugin
458,IBM Integration Bus|Extended Structured Query Language (ESQL),5,Plugin
459,IBM Tivoli Asset Management|Asset Discovery for Distributed,5,Plugin
460,IBM Tivoli Storage Manager|TSM API,5,Plugin
461,IBM Tivoli Storage Manager|TSM Client,5,Plugin
462,IBM Tivoli Storage Manager|TSM Storage Agent,5,Plugin
463,IBM Tivoli Storage Manager|VSS Requestor,5,Plugin
464,Microsoft Exchange Server|Veeam Explorer,5,Plugin
465,MS SQL Server|MS SQL Server Browser,5,Plugin
466,MS SQL Server|Data Transformation Services,5,Plugin
467,MS SQL Server|Log Reader Agent,5,Plugin
468,MS SQL Server|SQL Server Analysis Services (SSAS),5,Plugin
469,MS SQL Server|SQL Server Database Engine,5,Plugin
470,MS SQL Server|SQL Server Integration Services (SSIS),5,Plugin
471,MS SQL Server|SQL Server Management Studio,5,Plugin
472,MS SQL Server|SQL Server Report Builder,5,Plugin
473,MS SQL Server|SQL Server Reporting Services (SSRS),5,Plugin
474,Oracle Database|Jserver,5,Plugin
475,Oracle Database|Oracle Spatial and Graph,5,Plugin
476,SAP ERP|SAP EHP,5,Plugin
477,SAP ERP|SAP Kernel,5,Plugin
478,Oracle Database|SQL*Plus,5,Plugin
479,Sybase SQL Server|Sybase Central,5,Plugin
480,Sybase SQL Server|Sybase Dsedit,5,Plugin
481,TIBCO Business Works (BW)|Integration Manager,5,Plugin
482,.NET Framework|Common Runtime Library,7,Runlib
483,.NET Framework|log4net,7,Runlib
484,.NET Framework|Magick.NET,7,Runlib
485,.NET Framework|Windows Communication Foundation (WCF),7,Runlib
486,.NET Framework|Windows Workflow Foundation (WF),7,Runlib
487,.NET Framework|WinForms,7,Runlib
488,ActiveX|ADO,7,Runlib
489,IIS|Easy Migration Tool (IEMT),7,Runlib
490,IIS|Application Request Routing (ARR),7,Runlib
491,IIS|IIS Manager,7,Runlib
492,JBoss|JBoss Seam,7,Runlib
493,JBoss|Wildfly,7,Runlib
494,Oracle Application Server|Oracle Transparent Gateway,7,Runlib
495,Oracle WebCenter Content Server|Idoc Script,7,Runlib
496,SAP NetWeaver App Server|ABAP,7,Runlib
497,.NET Framework,10,Runtime
498,Active Directory (AD),10,Runtime
499,Active Server Pages (ASP),10,Runtime
500,ActiveX,10,Runtime
501,Apache Cordova,10,Runtime
502,CICS,10,Runtime
503,Docker,10,Runtime
504,Flash,10,Runtime
505,HTTP File Server,10,Runtime
506,Java Runtime Environment (JRE),10,Runtime
507,Node.js,10,Runtime
508,Ruby on Rails,10,Runtime
509,VisualForce,10,Runtime
510,EMC Celerra,11,Storage
511,Application Lifecycle Management (ALM),1,Technology
512,Assembler Language,1,Technology
513,Batch Management Software (BMS),1,Technology
514,Business Object Reports,1,Technology
515,Common Gateway Interface (CGI),1,Technology
516,Compopent Object Model (COM),1,Technology
517,Common Object Request Broker Architecture (CORBA),1,Technology
518,CORBA Interface Definition Language (CORBA IDL),1,Technology
519,Data Control Language (DCL),1,Technology
520,Database (DB),1,Technology
521,Electronic Data Interchange (EDI),1,Technology
522,Application Web Server,1,Technology
523,Java-based Document Object Model for XML (JDOM),1,Technology
524,Lightweight Directory Access Protocol (LDAP),1,Technology
525,Open Database Connectivity (ODBC),1,Technology
526,Order Management System (OMS),1,Technology
527,Oracle Web Services,1,Technology
528,Reporting Services,1,Technology
529,Representational State Transfer (REST),1,Technology
530,Service-Oriented Architecture (SOA),1,Technology
531,Simple Object Access Protocol (SOAP),1,Technology
532,SQL,9,Lang
533,YAML,1,Technology
534,Model-view-controller (MVC),1,Technology
535,Application Server,1,Technology
536,Cloud,1,Technology
537,Competency and Quality Assurance Server,1,Technology
538,Device Provisioning Engines (DPE),1,Technology
539,E-business solution,1,Technology
540,Enterprise Service Bus(ESB),1,Technology
541,File Server,1,Technology
542,General Ledger,1,Technology
543,HTTP client,1,Technology
544,HTTP Server,1,Technology
545,Integrated Safe System of Work (ISSOW),1,Technology
546,Internet Exchange Point - Full Stack (ixp-ft),1,Technology
547,Internet Message Access Protocol (IMAP),1,Technology
548,JSON,1,Technology
549,KVS Application Server,1,Technology
550,KVS File Server,1,Technology
551,KVS Proxy Server,1,Technology
552,mainframe,1,Technology
553,Manufacturing Execution System (MES),1,Technology
554,Mobile,1,Technology
555,NonSQL,1,Technology
556,SaaS,1,Technology
557,Storage Area Network (SAN),1,Technology
558,Supplier Registration System Application Server,1,Technology
559,Virtual Appliance,1,Technology
560,Webtop,1,Technology
561,Proxy Server,1,Technology
562,Utility,1,Technology
563,Citrix ADC,3,VM
564,Citrix ADC VPX,3,VM
565,Citrix ADC BLX,3,VM
566,InterScan Messaging Security Virtual Appliance (IMSVA),3,VM
567,Oracle VM,3,VM
568,VMware ESXi,3,VM
569,VMware Server,3,VM
570,IBM WebSphere Transformation Extender (WTX),2,App
571,Oracle Retail Point-of-Service,2,App
572,Structured Query Language (SQL),1,Technology
573,TSO/ISPF,2,App
574,Model view controller (MVC),1,Technology
575,|*,6,OS
576,Linux|*,6,OS
577,MVS|*,6,OS
578,Unix|*,6,OS
579,Unix|BSD|*,6,OS
580,Windows|*,6,OS
581,MS SQL Server|*,2,App
582,C#|*,9,Lang
583,C++|*,9,Lang
584,Java|*,9,Lang
585,Perl|*,9,Lang
586,PHP|*,9,Lang
587,Python|*,9,Lang
588,Ruby|*,9,Lang
589,JavaScript|*,9,Lang
590,Unix|BSD|OpenBSD,6,OS
591,z/VSE,6,OS
592,Active Server Pages (ASP)|*,9,Lang
593,MS-DOS,6,OS
594,COBOL|*,9,Lang
595,VME,6,OS
596,Extensible Markup Language (XML)|*,9,Lang
597,DOS/360,6,OS
598,z/TPF,6,OS
599,Pascal|*,9,Lang
600,Oracle WebLogic Server|*,8,App Server
601,Websphere ILOG JRules BRMS,2,App
602,Unix|BSD|NetBSD,6,OS
603,SharePoint|*,2,App
604,IBM Tivoli Storage Manager|*,2,App
605,IBM Spectrum Scale|*,2,App
606,IBM Tivoli Asset Management|*,2,App
607,Oracle Hyperion|*,2,App
608,z/VM,6,OS
609,IIS|*,8,App Server
610,Oracle Application Server|*,8,App Server
611,instana,10,Runtime
612,credstash,2,App
613,Snyk,2,App
614,Akka,2,App
615,Varnish,8,App Server
616,Datadog,10,Runtime
617,API,1,Technology
618,Hazelcast,10,Runtime
619,Infinispan,2,App
620,Nuxeo,10,Runtime
621,ArangoDB,2,App
622,Eclipse Che,2,App
623,Amazon S3,2,App
624,ClickHouse,2,App
625,MinIO,2,App
626,Elasticsearch,2,App
627,XtraDB,2,App
628,Keycloak,2,App
629,Grafana,2,App
630,Mattermost,10,Runtime
631,Synapse,2,App
632,Cloud IAM,2,App
633,Knative,10,Runtime
634,Apache Cassandra,2,App
635,Kubeflow,10,Runtime
636,Qiskit,2,App
637,Microsoft Azure,2,App
638,Strimzi,10,Runtime
639,Sematext,10,Runtime
640,Eclipse hawkBit,2,App
641,Eclipse Ditto,2,App
642,MariaDB,2,App
643,Zadara,2,App
644,Istio,2,App
645,Vault,2,App
646,Apache Druid,2,App
647,etcd,2,App
648,Traefik,8,App Server
649,IBM Cloud,2,App
650,YugabyteDB,2,App
651,CockroachDB,2,App
652,Jaeger,10,Runtime
653,Natural Programming Language,9,Lang
654,AcuCOBOL,9,Lang
655,Ada,9,Lang
656,ADABAS,2,App
657,ADSO,9,Lang
658,Ansible,2,App
659,Batch,9,Lang
660,Powershell,9,Lang
661,COM+,10,Runtime
662,Dataflex,9,Lang
663,DDS,9,Lang
664,Forte,9,Lang
665,Foxpro,9,Lang
666,IBM DB2 Purescale,2,App
667,IDMS DB,2,App
668,IDMS DML,9,Lang
669,Jaguar,8,App Server
670,EAServer,8,App Server
671,Apache Cassandra,2,App
672,IBM Netezza,4,HW
673,OpenEdge,9,Lang
674,OpenROAD,9,Lang
675,Oracle Reports,2,App
676,SAP Replication Server,2,App
677,Git,2,App
678,GitLab,2,App
679,VSAM,2,App
680,Cloud<>Apache HTTP Server,2,App
681,Cloud<>Windows|Windows Server,2,App
682,Cloud<>MS SQL Server,2,App
683,Cloud<>Azure SQL Server Database,2,App
684,Cloud<>MySQL,2,App
685,Cloud<>Oracle Database,2,App
686,Cloud<>PostgreSQL,2,App
687,Cloud<>AWS RDS,2,App
688,Cloud<>SAP HANA DB,2,App
689,BMS Map,9,Lang
690,DB400,2,App
691,ILE,9,Lang
692,Integrated Data Store (IDS),2,App
693,ISAM,2,App
694,Oracle RDS,2,App
695,SAP IQ,2,App
696,Cloud<>Linux,2,App
697,Apache Maven,2,App
698,IBM Basic Assembly Language (BAL),9,Lang
1 id name type_id type_name
2 1 (E)JES 2 App
3 2 A-Auto Job Scheduling Software 2 App
4 3 Activiti 2 App
5 4 Adobe Acrobat Reader 2 App
6 5 Ansible 2 App
7 6 Apache ActiveMQ 2 App
8 7 Apache Hbase 2 App
9 8 Apache Hive 2 App
10 9 Apache Kafka 2 App
11 10 Apache ServiceMix 2 App
12 11 Apache Solr 2 App
13 12 Apache Subversion 2 App
14 13 Application Development Facility (ADF) 2 App
15 14 Asterisk 2 App
16 15 Automic Job Scheduler 2 App
17 16 Autosys 2 App
18 17 Bluebeam|Bluebeam Q 2 App
19 18 BMC Control-M 2 App
20 19 BMC Identity Management 2 App
21 20 Borland Database Engine (BDE) 2 App
22 21 Business Intelligence and Reporting Tools (BIRT) 2 App
23 22 CA Gen 2 App
24 23 CA Introscope 2 App
25 24 CA-Panvalet 2 App
26 25 CA-TELON 2 App
27 26 Casegen 2 App
28 27 Chef Automate 2 App
29 28 Cisco AMP for Endpoints 2 App
30 29 CiscoWorks LAN Management Solution (LMS) 2 App
31 30 Citrix Virtual Apps and Desktops 2 App
32 31 Citrix ADC CPX 2 App
33 32 Citrix Provisioning 2 App
34 33 Clarify 2 App
35 34 Clarity LIMS 2 App
36 35 LabWare LIMS 2 App
37 36 Cognos 2 App
38 37 Coldfusion 2 App
39 38 ConceptWave 2 App
40 39 CONNAPI 2 App
41 40 Connect Direct 2 App
42 41 Cornerstone software 2 App
43 42 Crystal Reports 2 App
44 43 DB2 2 App
45 44 Documentum Content Server 2 App
46 45 Drupal 2 App
47 46 Eclipse 2 App
48 47 Elastic (ELK) Stack 2 App
49 48 ETAP License Manager (LM) 2 App
50 49 ExamDiff 2 App
51 50 F5 Secure Web Gateway Services 2 App
52 51 FileMaker Pro 2 App
53 52 FlexNet Manager Suite 2 App
54 53 FTP Voyager 2 App
55 54 Genymotion 2 App
56 55 Google Chrome 2 App
57 56 Greenplum DB 2 App
58 57 Hadoop 2 App
59 58 HP aC++ compiler 2 App
60 59 HP C/ANSI C compiler 2 App
61 60 HP Operations Orchestration (HPOO) 2 App
62 61 HP Server Automation (HPSA) 2 App
63 62 IBM BigFix Platform 2 App
64 63 IBM Business Monitor 2 App
65 64 IBM Business Process Manager 2 App
66 65 IBM Content Manager OnDemand (CMOD) 2 App
67 66 IBM FileNet P8 Platform 2 App
68 67 IBM InfoSphere DataStage 2 App
69 68 IBM Integration Bus 2 App
70 69 IBM License Metric Tool 2 App
71 70 IBM Maximo 2 App
72 71 IBM Migration Utility 2 App
73 72 IBM Mobile Foundation 2 App
74 73 IBM Operational Decision Manager (ODM) 2 App
75 74 IBM Spectrum Scale 2 App
76 75 IBM Tivoli Asset Management 2 App
77 76 IBM Tivoli Composite Application Manager 2 App
78 77 IBM Tivoli Monitoring 2 App
79 78 IBM Tivoli Storage Manager 2 App
80 79 IBM Tivoli Workload Scheduler (TWS) 2 App
81 80 IBM WebSphere Business Integration Adaptor 2 App
82 81 IBM Websphere MQ 2 App
83 82 IBM WebSphere MQ Telemetry 2 App
84 83 IBM WebSphere Transformation Extender (WTX) 2 App
85 84 IMS DB 2 App
86 85 Info-ZIP 2 App
87 86 Infobright Community Edition (ICE) 2 App
88 87 Informatica PowerCenter 2 App
89 88 Ingres 2 App
90 89 JBoss|JBoss Enterprise Service Bus 2 App
91 90 Jenkins 2 App
92 91 joinIT 2 App
93 92 LifeFlow 2 App
94 93 Lotus Notes 2 App
95 94 MaaS360 2 App
96 95 Malwarebytes Anti-Malware 2 App
97 96 ManageEngine ADSelfService Plus 2 App
98 97 MarkLogic DB 2 App
99 98 Memcached 2 App
100 99 Microsoft Access 2 App
101 100 Microsoft BizTalk Adapters for Host Systems 2 App
102 101 Microsoft Dynamics AX 2 App
103 102 Microsoft Endpoint Configuration Manager (SCCM) 2 App
104 103 Microsoft Excel 2 App
105 104 Microsoft Exchange Server 2 App
106 105 Microsoft Forefront Identity Manager (FIM) 2 App
107 106 Microsoft InfoPath 2 App
108 107 Microsoft Internet Explorer 2 App
109 108 Microsoft ISA Server 2 App
110 109 Microsoft MQ 2 App
111 110 Microsoft System Center Endpoint Protection 2 App
112 111 Microsoft Visual Studio 2 App
113 112 Microsoft Web Deploy 2 App
114 113 Microsoft Web Farm Framework (WFF) 2 App
115 114 Microsoft Web Platform Installer 2 App
116 115 Model Driven Workflow (MDW) 2 App
117 116 MongoDB 2 App
118 117 Mozilla Firefox 2 App
119 118 MQ Client 2 App
120 119 MS Office 365 2 App
121 120 MS SQL Server 2 App
122 121 MS SQL Server Compact 2 App
123 122 MySQL 2 App
124 123 Neo4j 2 App
125 124 Nexus Repository OSS 2 App
126 125 Nix package manager 2 App
127 126 OpenLDAP 2 App
128 127 OpenText Exstream 2 App
129 128 OpenVPN 2 App
130 129 Oracle Access Management 2 App
131 130 Oracle ADF 2 App
132 131 Oracle APEX 2 App
133 132 Oracle BI Publisher 2 App
134 133 Oracle Business Intelligence 2 App
135 134 Oracle Database 2 App
136 135 Oracle Designer 2 App
137 136 Oracle Enterprise Manager 2 App
138 137 Oracle Forms 2 App
139 138 Oracle Hyperion|Hyperion Interactive Reporting 2 App
140 139 Oracle Hyperion|Hyperion Planning 2 App
141 140 Oracle Net Services 2 App
142 141 Oracle Real Application Clusters (RAC) 2 App
143 142 Oracle Retail Point-of-Service 2 App
144 143 Oracle Service Bus 2 App
145 144 Oracle Smart View 2 App
146 145 Oracle SOA Suite 2 App
147 146 Oracle SQL Developer 2 App
148 147 Oracle TimesTen In-Memory Database 2 App
149 148 Oracle Warehouse Builder (OWB) 2 App
150 149 Orbix 2 App
151 150 Pentaho 2 App
152 151 PeopleSoft 2 App
153 152 Perkin Elmer Informatics (PKI) 2 App
154 153 Pervasive PSQL 2 App
155 154 PIPE-FLO 2 App
156 155 PKZIP 2 App
157 156 Planview 2 App
158 157 PostgreSQL 2 App
159 158 Powerbuilder 2 App
160 159 Primavera P6 2 App
161 160 Pro*COBOL 2 App
162 161 ProjectWise 2 App
163 162 ProjectWise Web Server 2 App
164 163 PVCS Version Manager 2 App
165 164 QlikView 2 App
166 165 RabbitMQ 2 App
167 166 Rational ClearCase 2 App
168 167 Rational ClearQuest 2 App
169 168 Redis 2 App
170 169 Remedy 2 App
171 170 Riak 2 App
172 171 RightFax 2 App
173 172 Rumba 2 App
174 173 SAP BusinessObjects BI server 2 App
175 174 SAP ERP 2 App
176 175 SAP HANA DB 2 App
177 176 SAP MaxDB 2 App
178 177 SAP NetWeaver Business Warehouse 2 App
179 178 SAP SQL Anywhere 2 App
180 179 SAP Web Dynpro 2 App
181 180 Sentry 2 App
182 181 SharePoint 2 App
183 182 Siebel 2 App
184 183 SNA Manager 2 App
185 184 SnagIt 2 App
186 185 solidDB 2 App
187 186 SonarQube 2 App
188 187 SpaceMonger 2 App
189 188 Splunk 2 App
190 189 SQLIO 2 App
191 190 Sybase SQL Server 2 App
192 191 Syncsort 2 App
193 192 Sysinternal Tools 2 App
194 193 Sysinternal Tools|* 2 App
195 194 Sysinternal Tools|AccessEnum 2 App
196 195 Sysinternal Tools|ClockRes 2 App
197 196 Sysinternal Tools|Coreinfo 2 App
198 197 Sysinternal Tools|DiskExt 2 App
199 198 Sysinternal Tools|DiskMon 2 App
200 199 Sysinternal Tools|Hex2dec 2 App
201 200 Sysinternal Tools|Junction 2 App
202 201 Sysinternal Tools|LDMDump 2 App
203 202 Sysinternal Tools|LoadOrder 2 App
204 203 Sysinternal Tools|PipeList 2 App
205 204 Sysinternal Tools|Process Explorer 2 App
206 205 Sysinternal Tools|PsKill 2 App
207 206 Sysinternal Tools|PsPasswd 2 App
208 207 Sysinternal Tools|SDelete 2 App
209 208 Sysinternal Tools|ShareEnum 2 App
210 209 Sysinternal Tools|Sync 2 App
211 210 Sysinternal Tools|TCPView 2 App
212 211 Sysinternal Tools|VMMap 2 App
213 212 Sysinternal Tools|Whois 2 App
214 213 Tableau 2 App
215 214 TCPLink Enterprise Server 2 App
216 215 Teradata 2 App
217 216 Teradata QS Server 2 App
218 217 TIBCO Business Works (BW) 2 App
219 218 TIBCO InConcert 2 App
220 219 TIBCO Rendezvous 2 App
221 220 Tivoli Access Manager (TAM) 2 App
222 221 TortoiseCVS 2 App
223 222 TortoiseSVN 2 App
224 223 TSO/ISPF 2 App
225 224 TWS zCentric 2 App
226 225 Uniface 2 App
227 226 ViewNow X Server 2 App
228 227 Virtual I/O Server 2 App
229 228 Visibroker 2 App
230 229 VMware Solution Exchange Marketplace (VSX) 2 App
231 230 VMware Tools 2 App
232 231 VMware vCenter 2 App
233 232 WebFOCUS 2 App
234 233 WebLogic Integration 2 App
235 234 WebSphere Commerce Suite (WCS) 2 App
236 235 WebSphere Message Broker 2 App
237 236 Wherescape Red 2 App
238 237 Windchill 2 App
239 238 Windows Indexing Service 2 App
240 239 Windows Terminal Server (WTS) 2 App
241 240 WingArc SVF 2 App
242 241 WinMerge 2 App
243 242 WinRAR 2 App
244 243 WinSCP 2 App
245 244 Wise Package Studio 2 App
246 245 Wordpress 2 App
247 246 XAMPP 2 App
248 247 ZAP BI 2 App
249 248 ZeroMQ 2 App
250 249 Zerto Virtual Replication 2 App
251 250 IBM PowerHA 2 App
252 251 Tivoli Netcool/OMNIbus 2 App
253 252 IBM ILOG Views 2 App
254 253 IBM ILOG CPLEX 2 App
255 254 IBM ILOG Jviews 2 App
256 255 IBM ILOG Elixir 2 App
257 256 IBM ILOG Supply Chain Apps 2 App
258 257 ILOG Solver 2 App
259 258 SQLite 2 App
260 259 Apache HTTP Server 8 App Server
261 260 Apache Tomcat 8 App Server
262 261 ArcGIS Server 8 App Server
263 262 Oracle WebLogic Server 8 App Server
264 263 GlassFish 8 App Server
265 264 HAProxy 8 App Server
266 265 IBM HTTP Server 8 App Server
267 266 IIS 8 App Server
268 267 JBoss 8 App Server
269 268 JBoss|* 8 App Server
270 269 Kitura 8 App Server
271 270 Lotus Domino 8 App Server
272 271 Lucee 8 App Server
273 272 Netscape Application Server (NAS) 8 App Server
274 273 Netscape Enterprise Server (NES) 8 App Server
275 274 Nginx 8 App Server
276 275 Oracle Application Server 8 App Server
277 276 Oracle WebCenter Content Server 8 App Server
278 277 Pivotal tc Server 8 App Server
279 278 Resin Web Server 8 App Server
280 279 SAP NetWeaver App Server 8 App Server
281 280 Spark 8 App Server
282 281 Oracle iPlanet Web Server 8 App Server
283 282 UltiDev Web Server Pro (UWS) 8 App Server
284 283 webMethods Integration Server 8 App Server
285 284 Websphere Application Server (WAS) 8 App Server
286 285 WebSphere Liberty 8 App Server
287 286 WebSphere Portal Server 8 App Server
288 287 Websphere Process Server 8 App Server
289 288 WebSphere Process Server 8 App Server
290 289 Oracle Real-Time Decisions (RTD) 8 App Server
291 290 CA API Gateway 4 HW
292 291 Citrix ADC SDX 4 HW
293 292 Citrix ADC MPX 4 HW
294 293 HP Nonstop 4 HW
295 294 IBM DataPower Gateway 4 HW
296 295 IBM Power Systems 4 HW
297 296 Intel Xeon Processor 4 HW
298 297 Net Optics Taps 4 HW
299 298 Oracle Exadata 4 HW
300 299 AutoIt 9 Lang
301 300 AWK 9 Lang
302 301 BASIC 9 Lang
303 302 Brainscript 9 Lang
304 303 C 9 Lang
305 304 C# 9 Lang
306 305 C++ 9 Lang
307 306 C++|Visual C++ 9 Lang
308 307 Cascading Style Sheets (CSS) 9 Lang
309 308 Clipper 9 Lang
310 309 CLIST 9 Lang
311 310 COBOL 9 Lang
312 311 ColdFusion Markup Language (CFML) 9 Lang
313 312 Data Language Interface (DL/I) 9 Lang
314 313 Delphi 9 Lang
315 314 Easytrieve 9 Lang
316 315 Expect 9 Lang
317 316 eXtensible HyperText Markup Language (XHTML) 9 Lang
318 317 Extensible Markup Language (XML) 9 Lang
319 318 Extensible Markup Language (XML)|MSXML 9 Lang
320 319 Extensible Stylesheet Language (XSL) 9 Lang
321 320 Extensible Stylesheet Language Transformations (XLST) 9 Lang
322 321 FOCUS 9 Lang
323 322 Fortran 9 Lang
324 323 Go 9 Lang
325 324 GraphQL 9 Lang
326 325 Groovy 9 Lang
327 326 HiveQL 9 Lang
328 327 Hypertext Markup Language (HTML) 9 Lang
329 328 IBM High Level Assembler (HLASM) 9 Lang
330 329 IBM i Control Language (CL) 9 Lang
331 330 IBM Informix-4GL 9 Lang
332 331 Java 9 Lang
333 332 Java|Extensible Stylesheet Language (XSL) 9 Lang
334 333 Java|Java Enterprise Edition (Java EE) 9 Lang
335 334 Java|Java Standard Edition (Java SE) 9 Lang
336 335 Java|JavaServer Pages (JSP) 9 Lang
337 336 Java|JavaServer Pages (JSP)|Scriptlets 9 Lang
338 337 JavaScript 9 Lang
339 338 JCL 9 Lang
340 339 Job Information Language (JIL) 9 Lang
341 340 JScript 9 Lang
342 341 Lisp 9 Lang
343 342 Niakwa Programming Language (NPL) 9 Lang
344 343 Objective C 9 Lang
345 344 OpenEdge ABL 9 Lang
346 345 Pascal 9 Lang
347 346 Pascal|Object Pascal 9 Lang
348 347 Perl 9 Lang
349 348 Perl|ActivePerl 9 Lang
350 349 Perl|Rex 9 Lang
351 350 PHP 9 Lang
352 351 PL/I 9 Lang
353 352 PL/SQL 9 Lang
354 353 PRO*C 9 Lang
355 354 Python 9 Lang
356 355 R 9 Lang
357 356 Rexx 9 Lang
358 357 RPG 9 Lang
359 358 Ruby 9 Lang
360 359 Salesforce Object Query Language (SOQL) 9 Lang
361 360 SAS 9 Lang
362 361 Sass 9 Lang
363 362 Scala 9 Lang
364 363 Smalltalk 9 Lang
365 364 Swift 9 Lang
366 365 TCL 9 Lang
367 366 Transact-SQL 9 Lang
368 367 TypeScript 9 Lang
369 368 VB.NET 9 Lang
370 369 VBScript 9 Lang
371 370 Visual Basic 9 Lang
372 371 Visual Basic for Applications (VBA) 9 Lang
373 372 Visual FoxPro 9 Lang
374 373 VoiceXML 9 Lang
375 374 Xbase++ 9 Lang
376 375 Apache Lucene 12 Lib
377 376 Apache Xerces 12 Lib
378 377 Cascading Style Sheets (CSS)|Bootstrap 12 Lib
379 378 Java|Apache Camel 12 Lib
380 379 Java|Apache Commons BeanUtils 12 Lib
381 380 Java|Apache PDFBox 12 Lib
382 381 Java|Apache Velocity 12 Lib
383 382 Java|EclipseLink 12 Lib
384 383 Java|Enterprise JavaBeans (EJB) 12 Lib
385 384 Java|EZMorph 12 Lib
386 385 Java|Google Web Toolkit (GWT) 12 Lib
387 386 Java|Hibernate 12 Lib
388 387 Java|IBM SDK 12 Lib
389 388 Java|Java Development Kit (JDK) 12 Lib
390 389 Java|Java Message Service (JMS) 12 Lib
391 390 Java|Java Web Start 12 Lib
392 391 Java|JavaServer Faces (JSF) 12 Lib
393 392 Java|JDBC 12 Lib
394 393 Java|JRuby Core 12 Lib
395 394 Java|Log4j 12 Lib
396 395 Java|Quartz 12 Lib
397 396 Java|Remote Method Invocation (RMI) 12 Lib
398 397 Java|Servlet 12 Lib
399 398 Java|Spring 12 Lib
400 399 Java|Spring|Spring Boot 12 Lib
401 400 Java|Spring|Spring Cloud Data Flow 12 Lib
402 401 Java|Spring|Spring MVC 12 Lib
403 402 Java|Struts 12 Lib
404 403 Java|Swing 12 Lib
405 404 Java|Vaadin 12 Lib
406 405 JavaScript|AJAX 12 Lib
407 406 JavaScript|AngularJS 12 Lib
408 407 JavaScript|Draw2D 12 Lib
409 408 JavaScript|Express.js 12 Lib
410 409 JavaScript|Ext JS 12 Lib
411 410 JavaScript|jqGrid 12 Lib
412 411 JavaScript|JQuery 12 Lib
413 412 JavaScript|Jquery|jQuery UI 12 Lib
414 413 JavaScript|React 12 Lib
415 414 JavaScript|script.aculo.us 12 Lib
416 415 JavaScript|Valums AJAX File Uploader 12 Lib
417 416 OWASP Enterprise Security API (ESAPI) 12 Lib
418 417 Perl|Oraperl 12 Lib
419 418 Android 6 OS
420 419 BeOS 6 OS
421 420 Cisco IOS 6 OS
422 421 DART 6 OS
423 422 Fabric OS 6 OS
424 423 GNU 6 OS
425 424 IBM i 6 OS
426 425 iOS 6 OS
427 426 Linux 6 OS
428 427 Linux|CentOS 6 OS
429 428 Linux|Check Point 6 OS
430 429 Linux|Debian 6 OS
431 430 Linux|Junos OS 6 OS
432 431 Linux|openSUSE 6 OS
433 432 Linux|Oracle Linux 6 OS
434 433 Linux|Photon OS 6 OS
435 434 Linux|Red Hat Enterprise Linux 6 OS
436 435 Linux|SUSE Linux Enterprise Server 6 OS
437 436 Linux|Ubuntu 6 OS
438 437 Linux|zLinux 6 OS
439 438 macOS 6 OS
440 439 MVS 6 OS
441 440 MVS|OS/390 6 OS
442 441 MVS|z/OS 6 OS
443 442 OpenVMS 6 OS
444 443 OS/2 6 OS
445 444 Unix 6 OS
446 445 Unix|AIX 6 OS
447 446 Unix|BSD 6 OS
448 447 Unix|BSD|FreeBSD 6 OS
449 448 Unix|BSD|SunOS 6 OS
450 449 Unix|HP-UX 6 OS
451 450 Windows 6 OS
452 451 Windows|Windows Desktop 6 OS
453 452 Windows|Windows Server 6 OS
454 453 Linux|Fedora 6 OS
455 454 Linux|Amazon Linux 6 OS
456 455 Clarify|Clear Basic 5 Plugin
457 456 Eclipse|ATLAS Transformation Language (ATL) 5 Plugin
458 457 IBM BigFix Platform|Client Deploy Tool 5 Plugin
459 458 IBM Integration Bus|Extended Structured Query Language (ESQL) 5 Plugin
460 459 IBM Tivoli Asset Management|Asset Discovery for Distributed 5 Plugin
461 460 IBM Tivoli Storage Manager|TSM API 5 Plugin
462 461 IBM Tivoli Storage Manager|TSM Client 5 Plugin
463 462 IBM Tivoli Storage Manager|TSM Storage Agent 5 Plugin
464 463 IBM Tivoli Storage Manager|VSS Requestor 5 Plugin
465 464 Microsoft Exchange Server|Veeam Explorer 5 Plugin
466 465 MS SQL Server|MS SQL Server Browser 5 Plugin
467 466 MS SQL Server|Data Transformation Services 5 Plugin
468 467 MS SQL Server|Log Reader Agent 5 Plugin
469 468 MS SQL Server|SQL Server Analysis Services (SSAS) 5 Plugin
470 469 MS SQL Server|SQL Server Database Engine 5 Plugin
471 470 MS SQL Server|SQL Server Integration Services (SSIS) 5 Plugin
472 471 MS SQL Server|SQL Server Management Studio 5 Plugin
473 472 MS SQL Server|SQL Server Report Builder 5 Plugin
474 473 MS SQL Server|SQL Server Reporting Services (SSRS) 5 Plugin
475 474 Oracle Database|Jserver 5 Plugin
476 475 Oracle Database|Oracle Spatial and Graph 5 Plugin
477 476 SAP ERP|SAP EHP 5 Plugin
478 477 SAP ERP|SAP Kernel 5 Plugin
479 478 Oracle Database|SQL*Plus 5 Plugin
480 479 Sybase SQL Server|Sybase Central 5 Plugin
481 480 Sybase SQL Server|Sybase Dsedit 5 Plugin
482 481 TIBCO Business Works (BW)|Integration Manager 5 Plugin
483 482 .NET Framework|Common Runtime Library 7 Runlib
484 483 .NET Framework|log4net 7 Runlib
485 484 .NET Framework|Magick.NET 7 Runlib
486 485 .NET Framework|Windows Communication Foundation (WCF) 7 Runlib
487 486 .NET Framework|Windows Workflow Foundation (WF) 7 Runlib
488 487 .NET Framework|WinForms 7 Runlib
489 488 ActiveX|ADO 7 Runlib
490 489 IIS|Easy Migration Tool (IEMT) 7 Runlib
491 490 IIS|Application Request Routing (ARR) 7 Runlib
492 491 IIS|IIS Manager 7 Runlib
493 492 JBoss|JBoss Seam 7 Runlib
494 493 JBoss|Wildfly 7 Runlib
495 494 Oracle Application Server|Oracle Transparent Gateway 7 Runlib
496 495 Oracle WebCenter Content Server|Idoc Script 7 Runlib
497 496 SAP NetWeaver App Server|ABAP 7 Runlib
498 497 .NET Framework 10 Runtime
499 498 Active Directory (AD) 10 Runtime
500 499 Active Server Pages (ASP) 10 Runtime
501 500 ActiveX 10 Runtime
502 501 Apache Cordova 10 Runtime
503 502 CICS 10 Runtime
504 503 Docker 10 Runtime
505 504 Flash 10 Runtime
506 505 HTTP File Server 10 Runtime
507 506 Java Runtime Environment (JRE) 10 Runtime
508 507 Node.js 10 Runtime
509 508 Ruby on Rails 10 Runtime
510 509 VisualForce 10 Runtime
511 510 EMC Celerra 11 Storage
512 511 Application Lifecycle Management (ALM) 1 Technology
513 512 Assembler Language 1 Technology
514 513 Batch Management Software (BMS) 1 Technology
515 514 Business Object Reports 1 Technology
516 515 Common Gateway Interface (CGI) 1 Technology
517 516 Compopent Object Model (COM) 1 Technology
518 517 Common Object Request Broker Architecture (CORBA) 1 Technology
519 518 CORBA Interface Definition Language (CORBA IDL) 1 Technology
520 519 Data Control Language (DCL) 1 Technology
521 520 Database (DB) 1 Technology
522 521 Electronic Data Interchange (EDI) 1 Technology
523 522 Application Web Server 1 Technology
524 523 Java-based Document Object Model for XML (JDOM) 1 Technology
525 524 Lightweight Directory Access Protocol (LDAP) 1 Technology
526 525 Open Database Connectivity (ODBC) 1 Technology
527 526 Order Management System (OMS) 1 Technology
528 527 Oracle Web Services 1 Technology
529 528 Reporting Services 1 Technology
530 529 Representational State Transfer (REST) 1 Technology
531 530 Service-Oriented Architecture (SOA) 1 Technology
532 531 Simple Object Access Protocol (SOAP) 1 Technology
533 532 SQL 9 Lang
534 533 YAML 1 Technology
535 534 Model-view-controller (MVC) 1 Technology
536 535 Application Server 1 Technology
537 536 Cloud 1 Technology
538 537 Competency and Quality Assurance Server 1 Technology
539 538 Device Provisioning Engines (DPE) 1 Technology
540 539 E-business solution 1 Technology
541 540 Enterprise Service Bus(ESB) 1 Technology
542 541 File Server 1 Technology
543 542 General Ledger 1 Technology
544 543 HTTP client 1 Technology
545 544 HTTP Server 1 Technology
546 545 Integrated Safe System of Work (ISSOW) 1 Technology
547 546 Internet Exchange Point - Full Stack (ixp-ft) 1 Technology
548 547 Internet Message Access Protocol (IMAP) 1 Technology
549 548 JSON 1 Technology
550 549 KVS Application Server 1 Technology
551 550 KVS File Server 1 Technology
552 551 KVS Proxy Server 1 Technology
553 552 mainframe 1 Technology
554 553 Manufacturing Execution System (MES) 1 Technology
555 554 Mobile 1 Technology
556 555 NonSQL 1 Technology
557 556 SaaS 1 Technology
558 557 Storage Area Network (SAN) 1 Technology
559 558 Supplier Registration System Application Server 1 Technology
560 559 Virtual Appliance 1 Technology
561 560 Webtop 1 Technology
562 561 Proxy Server 1 Technology
563 562 Utility 1 Technology
564 563 Citrix ADC 3 VM
565 564 Citrix ADC VPX 3 VM
566 565 Citrix ADC BLX 3 VM
567 566 InterScan Messaging Security Virtual Appliance (IMSVA) 3 VM
568 567 Oracle VM 3 VM
569 568 VMware ESXi 3 VM
570 569 VMware Server 3 VM
571 570 IBM WebSphere Transformation Extender (WTX) 2 App
572 571 Oracle Retail Point-of-Service 2 App
573 572 Structured Query Language (SQL) 1 Technology
574 573 TSO/ISPF 2 App
575 574 Model view controller (MVC) 1 Technology
576 575 |* 6 OS
577 576 Linux|* 6 OS
578 577 MVS|* 6 OS
579 578 Unix|* 6 OS
580 579 Unix|BSD|* 6 OS
581 580 Windows|* 6 OS
582 581 MS SQL Server|* 2 App
583 582 C#|* 9 Lang
584 583 C++|* 9 Lang
585 584 Java|* 9 Lang
586 585 Perl|* 9 Lang
587 586 PHP|* 9 Lang
588 587 Python|* 9 Lang
589 588 Ruby|* 9 Lang
590 589 JavaScript|* 9 Lang
591 590 Unix|BSD|OpenBSD 6 OS
592 591 z/VSE 6 OS
593 592 Active Server Pages (ASP)|* 9 Lang
594 593 MS-DOS 6 OS
595 594 COBOL|* 9 Lang
596 595 VME 6 OS
597 596 Extensible Markup Language (XML)|* 9 Lang
598 597 DOS/360 6 OS
599 598 z/TPF 6 OS
600 599 Pascal|* 9 Lang
601 600 Oracle WebLogic Server|* 8 App Server
602 601 Websphere ILOG JRules BRMS 2 App
603 602 Unix|BSD|NetBSD 6 OS
604 603 SharePoint|* 2 App
605 604 IBM Tivoli Storage Manager|* 2 App
606 605 IBM Spectrum Scale|* 2 App
607 606 IBM Tivoli Asset Management|* 2 App
608 607 Oracle Hyperion|* 2 App
609 608 z/VM 6 OS
610 609 IIS|* 8 App Server
611 610 Oracle Application Server|* 8 App Server
612 611 instana 10 Runtime
613 612 credstash 2 App
614 613 Snyk 2 App
615 614 Akka 2 App
616 615 Varnish 8 App Server
617 616 Datadog 10 Runtime
618 617 API 1 Technology
619 618 Hazelcast 10 Runtime
620 619 Infinispan 2 App
621 620 Nuxeo 10 Runtime
622 621 ArangoDB 2 App
623 622 Eclipse Che 2 App
624 623 Amazon S3 2 App
625 624 ClickHouse 2 App
626 625 MinIO 2 App
627 626 Elasticsearch 2 App
628 627 XtraDB 2 App
629 628 Keycloak 2 App
630 629 Grafana 2 App
631 630 Mattermost 10 Runtime
632 631 Synapse 2 App
633 632 Cloud IAM 2 App
634 633 Knative 10 Runtime
635 634 Apache Cassandra 2 App
636 635 Kubeflow 10 Runtime
637 636 Qiskit 2 App
638 637 Microsoft Azure 2 App
639 638 Strimzi 10 Runtime
640 639 Sematext 10 Runtime
641 640 Eclipse hawkBit 2 App
642 641 Eclipse Ditto 2 App
643 642 MariaDB 2 App
644 643 Zadara 2 App
645 644 Istio 2 App
646 645 Vault 2 App
647 646 Apache Druid 2 App
648 647 etcd 2 App
649 648 Traefik 8 App Server
650 649 IBM Cloud 2 App
651 650 YugabyteDB 2 App
652 651 CockroachDB 2 App
653 652 Jaeger 10 Runtime
654 653 Natural Programming Language 9 Lang
655 654 AcuCOBOL 9 Lang
656 655 Ada 9 Lang
657 656 ADABAS 2 App
658 657 ADSO 9 Lang
659 658 Ansible 2 App
660 659 Batch 9 Lang
661 660 Powershell 9 Lang
662 661 COM+ 10 Runtime
663 662 Dataflex 9 Lang
664 663 DDS 9 Lang
665 664 Forte 9 Lang
666 665 Foxpro 9 Lang
667 666 IBM DB2 Purescale 2 App
668 667 IDMS DB 2 App
669 668 IDMS DML 9 Lang
670 669 Jaguar 8 App Server
671 670 EAServer 8 App Server
672 671 Apache Cassandra 2 App
673 672 IBM Netezza 4 HW
674 673 OpenEdge 9 Lang
675 674 OpenROAD 9 Lang
676 675 Oracle Reports 2 App
677 676 SAP Replication Server 2 App
678 677 Git 2 App
679 678 GitLab 2 App
680 679 VSAM 2 App
681 680 Cloud<>Apache HTTP Server 2 App
682 681 Cloud<>Windows|Windows Server 2 App
683 682 Cloud<>MS SQL Server 2 App
684 683 Cloud<>Azure SQL Server Database 2 App
685 684 Cloud<>MySQL 2 App
686 685 Cloud<>Oracle Database 2 App
687 686 Cloud<>PostgreSQL 2 App
688 687 Cloud<>AWS RDS 2 App
689 688 Cloud<>SAP HANA DB 2 App
690 689 BMS Map 9 Lang
691 690 DB400 2 App
692 691 ILE 9 Lang
693 692 Integrated Data Store (IDS) 2 App
694 693 ISAM 2 App
695 694 Oracle RDS 2 App
696 695 SAP IQ 2 App
697 696 Cloud<>Linux 2 App
698 697 Apache Maven 2 App
699 698 IBM Basic Assembly Language (BAL) 9 Lang

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,6 @@
*******************************************************************************
Accuracy: 0.79090
F1 Score: 0.80996
Precision: 0.88827
Recall: 0.79090
Accuracy: 0.77655
F1 Score: 0.79605
Precision: 0.85637
Recall: 0.77655

View File

@ -32,7 +32,7 @@ torch.set_float32_matmul_precision('high')
BATCH_SIZE = 256
# %%
data_path = '../../../data_import/train.csv'
data_path = '../../../esAppMod_data_import/train.csv'
train_df = pd.read_csv(data_path, skipinitialspace=True)
# rather than use pattern, we use the real thing and property
entity_ids = train_df['entity_id'].to_list()
@ -49,20 +49,31 @@ for idx, val in enumerate(target_id_list):
# introduce pre-processing functions
def preprocess_text(text):
# 1. Make all uppercase
text = text.upper()
text = text.lower()
# 2. Remove punctuations
# text = re.sub(r'[^\w\s]', '', text) # Retains only alphanumeric and spaces
# Remove any non alphanumeric character
# text = re.sub(r'[^\w\s]', ' ', text) # Retains only alphanumeric and spaces
text = re.sub(r"[-;:]", " ", text)
# 3. Substitute digits with '#'
text = re.sub(r'\d', '#', text)
# Add space between digit followed by a letter
text = re.sub(r"(\d)([A-Z])", r"\1 \2", text)
# Add space between letter followed by a digit
text = re.sub(r"([A-Z])(\d)", r"\1 \2", text)
# Substitute digits with '#'
text = re.sub(r'\d+', 'x', text)
# standardize spacing
text = re.sub(r'\s+', ' ', text).strip()
return text
# outputs a list of dictionaries
# processes dataframe into lists of dictionaries
# each element maps input to output
@ -85,7 +96,7 @@ def process_df_to_dict(df):
def create_dataset():
# train
data_path = '../../../data_import/test.csv'
data_path = '../../../esAppMod_data_import/test.csv'
test_df = pd.read_csv(data_path, skipinitialspace=True)

View File

@ -45,17 +45,47 @@ def set_seed(seed):
set_seed(42)
SHUFFLES=5
SHUFFLES=2
# %%
# import training file
data_path = '../../data_import/train.csv'
data_path = '../../esAppMod_data_import/train.csv'
train_df = pd.read_csv(data_path, skipinitialspace=True)
# rather than use pattern, we use the real thing and property
entity_ids = train_df['entity_id'].to_list()
target_id_list = sorted(list(set(entity_ids)))
def compute_normalized_class_weights(class_counts, max_resamples=SHUFFLES):
"""
Compute normalized class weights inversely proportional to class counts.
The weights are normalized so that they sum to 1.
Args:
class_counts (array-like): An array or list where each element represents the count of samples for a class.
Returns:
numpy.ndarray: A normalized array of weights for each class.
"""
class_counts = np.array(class_counts)
total_samples = np.sum(class_counts)
class_weights = total_samples / class_counts
# so that highest weight is 1
normalized_weights = class_weights / np.max(class_weights)
# Scale weights such that the highest weight corresponds to `max_resamples`
resample_counts = normalized_weights * max_resamples
# Round resamples to nearest integer
resample_counts = np.round(resample_counts).astype(int)
return resample_counts
# %%
id_counts = train_df['entity_id'].value_counts()
id_weights = compute_normalized_class_weights(id_counts, max_resamples=SHUFFLES)
id_index = id_counts.index
label2weight = {}
for idx, label in enumerate(id_index):
label2weight[label] = id_weights[idx]
# %%
id2label = {}
@ -69,13 +99,25 @@ for idx, val in enumerate(target_id_list):
def preprocess_text(text):
# 1. Make all uppercase
text = text.upper()
text = text.lower()
# 2. Remove punctuations
# text = re.sub(r'[^\w\s]', '', text) # Retains only alphanumeric and spaces
# Remove any non alphanumeric character
# text = re.sub(r'[^\w\s]', ' ', text) # Retains only alphanumeric and spaces
# replace dashes
text = re.sub(r"[-;:]", " ", text)
# 3. Substitute digits with '#'
text = re.sub(r'\d', '#', text)
# Add space between digit followed by a letter
text = re.sub(r"(\d)([A-Z])", r"\1 \2", text)
# Add space between letter followed by a digit
text = re.sub(r"([A-Z])(\d)", r"\1 \2", text)
# Substitute digits with 'x'
text = re.sub(r'\d+', 'x', text)
# standardize spacing
text = re.sub(r'\s+', ' ', text).strip()
return text
@ -123,6 +165,42 @@ def shuffle_text(text, n_shuffles=SHUFFLES):
return all_processed
term_to_abbrev = {
r'job entry system': 'jes',
r'subversion': 'svn',
r'borland database engine': 'bde',
r'business intelligence and reporting tools': 'birt',
r'lan management solution': 'lms',
r'laboratory information management system': 'lims',
r'ibm database 2': 'db/2',
r'integrated development environment': 'ide',
r'software development kit': 'sdk',
r'hp operations orchestration': 'hpoo',
r'hp server automation': 'hpsa',
r'internet information server': 'iis',
r'release 2': 'r2',
r'red hat enterprise linux': 'rhel',
r'oracle enterprise linux': 'oel',
r'websphere application server': 'was',
r'application development facility': 'adf',
r'server analysis services': 'ssas'
}
abbrev_to_term = {rf'\b{value}\b': key for key, value in term_to_abbrev.items()}
def replace_terms_with_abbreviations(text):
for input, replacement in term_to_abbrev.items():
text = re.sub(input, replacement, text)
return text
def replace_abbreivations_with_terms(text):
for input, replacement in abbrev_to_term.items():
text = re.sub(input, replacement, text)
return text
# outputs a list of dictionaries
# processes dataframe into lists of dictionaries
@ -134,9 +212,12 @@ def process_df_to_dict(df):
for _, row in df.iterrows():
# produce shuffling
index = row['entity_id']
desc = row['mention']
desc = preprocess_text(desc)
processed_descs = shuffle_text(desc, n_shuffles=SHUFFLES)
parent_desc = row['mention']
parent_desc = preprocess_text(parent_desc)
# ensure at least 1 shuffle
# no_of_shuffles = label2weight[index] + 1
no_of_shuffles = SHUFFLES
processed_descs = shuffle_text(parent_desc, n_shuffles=no_of_shuffles)
for desc in processed_descs:
element = {
@ -145,12 +226,38 @@ def process_df_to_dict(df):
}
output_list.append(element)
# perform abbrev_to_term
desc = replace_terms_with_abbreviations(parent_desc)
no_of_shuffles = SHUFFLES
processed_descs = shuffle_text(desc, n_shuffles=no_of_shuffles)
for desc in processed_descs:
element = {
'text' : desc,
'label': label2id[index], # ensure labels starts from 0
}
output_list.append(element)
# perform term to abbrev
desc = replace_abbreivations_with_terms(parent_desc)
no_of_shuffles = SHUFFLES
processed_descs = shuffle_text(desc, n_shuffles=no_of_shuffles)
for desc in processed_descs:
element = {
'text' : desc,
'label': label2id[index], # ensure labels starts from 0
}
output_list.append(element)
return output_list
def create_dataset():
# train
data_path = '../../data_import/train.csv'
data_path = '../../esAppMod_data_import/train.csv'
train_df = pd.read_csv(data_path, skipinitialspace=True)
@ -169,8 +276,9 @@ def train():
# prepare tokenizer
# model_checkpoint = "distilbert/distilbert-base-uncased"
model_checkpoint = 'google-bert/bert-base-cased'
model_checkpoint = "distilbert/distilbert-base-uncased"
# model_checkpoint = 'google-bert/bert-base-cased'
# model_checkpoint = 'prajjwal1/bert-small'
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, return_tensors="pt", clean_up_tokenization_spaces=True)
# Define additional special tokens
# additional_special_tokens = ["<DESC>"]
@ -246,14 +354,15 @@ def train():
logging_strategy="epoch",
# save_strategy="epoch",
load_best_model_at_end=False,
learning_rate=1e-4,
per_device_train_batch_size=128,
per_device_eval_batch_size=128,
learning_rate=5e-5,
per_device_train_batch_size=64,
per_device_eval_batch_size=64,
auto_find_batch_size=False,
ddp_find_unused_parameters=False,
weight_decay=0.01,
save_total_limit=1,
num_train_epochs=120,
num_train_epochs=80,
warmup_steps=400,
bf16=True,
push_to_hub=False,
remove_unused_columns=False,

51
zero_shot/bloom.py Normal file
View File

@ -0,0 +1,51 @@
# %%
from transformers import AutoModelForCausalLM, AutoTokenizer
# %%
# Load model and tokenizer
# model_name = "bigscience/bloom-7b1" # Replace with your model
model_name = "bigscience/bloomz-1b1"
tokenizer = AutoTokenizer.from_pretrained(model_name)
# Automatically map model layers to available GPUs
model = AutoModelForCausalLM.from_pretrained(
model_name,
device_map="auto", # Automatically split across multiple GPUs
torch_dtype="auto" # Use FP16 if available
)
# %%
# Prepare input
text = "The quick brown fox jumps over the lazy dog."
inputs = tokenizer(text, return_tensors="pt")
inputs = inputs.to("cuda")
# Generate output
outputs = model.generate(inputs["input_ids"], max_length=50)
# Decode and print result
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
# %%
# %%
# Prepare input
def generate(text):
# Define prompt
prompt = f"Answer Concisely: Give me a mapping between the acronym and descriptor in the format '(acronym: description): '{text}'"
# Generate acronym
inputs = tokenizer(prompt, return_tensors="pt")
inputs = inputs.to("cuda")
outputs = model.generate(
inputs["input_ids"],
max_length=100,
no_repeat_ngram_size=3)
return tokenizer.decode(outputs[0], skip_special_tokens=True)
# Example usage
# text = "Advanced Data Analytics Platform"
text = 'ColdFusion Markup Language (CFML)'
acronym = generate(text)
print(f"Acronym: {acronym}")
# %%

52
zero_shot/t5.py Normal file
View File

@ -0,0 +1,52 @@
# %%
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
# %%
# Load model and tokenizer
# model_name = "bigscience/bloom-7b1" # Replace with your model
model_name = "google/flan-t5-large"
tokenizer = AutoTokenizer.from_pretrained(model_name)
# Automatically map model layers to available GPUs
model = AutoModelForSeq2SeqLM.from_pretrained(
model_name,
device_map="auto", # Automatically split across multiple GPUs
torch_dtype="auto" # Use FP16 if available
)
# %%
# Prepare input
text = "The quick brown fox jumps over the lazy dog."
inputs = tokenizer(text, return_tensors="pt")
inputs = inputs.to("cuda")
# Generate output
outputs = model.generate(inputs["input_ids"], max_length=50)
# Decode and print result
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
# %%
# %%
# Prepare input
def generate_acronym(text):
# Define prompt
prompt = f"Answer concisely: make a possible acronym from the following: '{text}'"
# Generate acronym
inputs = tokenizer(prompt, return_tensors="pt")
inputs = inputs.to("cuda")
outputs = model.generate(
inputs["input_ids"],
max_length=100,
no_repeat_ngram_size=3)
return tokenizer.decode(outputs[0], skip_special_tokens=True)
# %%
# Example usage
# text = "Advanced Data Analytics Platform"
text = "red hat enterprise linux"
acronym = generate_acronym(text)
print(f"Acronym: {acronym}")
# %%