hipom_data_mapping/data_preprocess/abbreviations/replacement_dict.py

216 lines
6.8 KiB
Python
Raw Normal View History

# substitution mapping for descriptions
# Abbreviations and their replacements
desc_replacement_dict = {
r'\bLIST\b': 'LIST',
r'\bList\b': 'LIST',
r'\bEXH\.\b': 'EXHAUST',
r'\bEXH\b': 'EXHAUST',
r'\bEXHAUST\.\b': 'EXHAUST',
r'\bExhaust\b': 'EXHAUST',
r'\bEXHAUST\b': 'EXHAUST',
r'\bTEMP\.\b': 'TEMPERATURE',
r'\bTEMP\b': 'TEMPERATURE',
r'\bTEMPERATURE\.\b': 'TEMPERATURE',
r'\bTEMPERATURE\b': 'TEMPERATURE',
r'\bW\.\b': 'WATER',
r'\bWATER\b': 'WATER',
r'\bCW\b': 'COOLING WATER',
r'\bCYL\.\b': 'CYLINDER',
r'\bCyl\b': 'CYLINDER',
r'\bcyl\.\b': 'CYLINDER',
r'\bCYL\b': 'CYLINDER',
r'\bCYL(?=\d|\W|$)\b': 'CYLINDER',
r'\bcylinder\b': 'CYLINDER',
r'\bCYLINDER\b': 'CYLINDER',
r'\bCOOL\.\b': 'COOLING',
r'\bcool\.\b': 'COOLING',
r'\bcooling\b': 'COOLING',
r'\bCOOLING\b': 'COOLING',
r'\bcooler\b': 'COOLER',
r'\bCOOLER\b': 'COOLER',
r'\bScav\.\b': 'SCAVENGE',
r'\bSCAV\.\b': 'SCAVENGE',
r'\bINL\.\b': 'INLET',
r'\binlet\b': 'INLET',
r'\bINLET\b': 'INLET',
r'\bOUT\.\b': 'OUTLET',
r'\bOUTL\.\b': 'OUTLET',
r'\boutlet\b': 'OUTLET',
r'\bOUTLET\b': 'OUTLET',
# bunker tank
r'\bBK\b': 'BUNKER',
r'\bTK\b': 'TANK',
# pressure
r'\bPRESS\b': 'PRESSURE',
r'\bPRESS\.\b': 'PRESSURE',
r'\bPress\.\b': 'PRESSURE',
r'\bpressure\b': 'PRESSURE',
r'\bPRESSURE\b': 'PRESSURE',
# this is a special replacement - it is safe to replace PRS w/o checks
r'PRS\b': 'PRESSURE',
r'\bCLR\b': 'CLEAR',
r'\bENG\.\b': 'ENGINE',
r'\bENG\b': 'ENGINE',
r'\bENGINE\b': 'ENGINE',
r'\bEngine speed\b': 'ENGINE SPEED',
r'\bEngine running\b': 'ENGINE RUNNING',
r'\bEngine RPM pickup\b': 'ENGINE RPM PICKUP',
r'\bEngine room\b': 'ENGINE ROOM',
# main engine
r'\bM/E\b': 'MAIN_ENGINE',
r'\bM_E\b': 'MAIN_ENGINE',
r'\bME(?=\d|\W|$)\b': 'MAIN_ENGINE',
r'\bMAIN ENGINE\b': 'MAIN_ENGINE',
r'\bGen\b': 'GENERATOR_ENGINE',
# ensure that we substitute only for terms where following GE is num or special
r'\bGE(?=\d|\W|$)\b': 'GENERATOR_ENGINE',
r'\bG/E\b': 'GENERATOR_ENGINE',
r'\bG_E\b': 'GENERATOR_ENGINE',
r'\bDG\b': 'GENERATOR_ENGINE',
r'\bD/G\b': 'GENERATOR_ENGINE',
r'\bGEN\.\b': 'GENERATOR_ENGINE',
r'\bGENERATOR ENGINE\b': 'GENERATOR_ENGINE',
r'\b(\d+)MGE\b': r'NO\1 GENERATOR_ENGINE',
r'\bGEN\.WIND\.TEMP\b': 'GENERATOR WINDING TEMPERATURE',
r'\bENGINE ROOM\b': 'ENGINE ROOM',
r'\bE/R\b': 'ENGINE ROOM',
r'\bFLTR\b': 'FILTER',
# marine gas oil
r'\bM\.G\.O\b': 'MARINE GAS OIL',
r'\bMGO\b': 'MARINE GAS OIL',
r'\bMDO\b': 'MARINE DIESEL OIL',
# light fuel oil
r'\bL\.F\.O\b': 'LIGHT FUEL OIL',
r'\bLFO\b': 'LIGHT FUEL OIL',
# heavy fuel oil
r'\bHFO\b': 'HEAVY FUEL OIL',
r'\bH\.F\.O\b': 'HEAVY FUEL OIL',
# for remaining fuel oil that couldn't be substituted
r'\bF\.O\b': 'FUEL OIL',
r'\bFO\b': 'FUEL OIL',
# lubricant
r'\bLUB\.\b': 'LUBRICANT',
# lubricating oil
r'\bL\.O\b': 'LUBRICATING OIL',
r'\bLO\b': 'LUBRICATING OIL',
# lubricating oil pressure
r'\bLO_PRESS\b': 'LUBRICATING OIL PRESSURE',
r'\bLO_PRESSURE\b': 'LUBRICATING OIL PRESSURE',
# temperature
r'\bL\.T\b': 'LOW TEMPERATURE',
r'\bLT\b': 'LOW TEMPERATURE',
r'\bH\.T\b': 'HIGH TEMPERATURE',
r'\bHT\b': 'HIGH TEMPERATURE',
# auxiliary boiler
# replace these first before replacing AUXILIARY only
r'\bAUX\.BOILER\b': 'AUXILIARY BOILER',
r'\bAUX\. BOILER\b': 'AUXILIARY BOILER',
r'\bAUX BLR\b': 'AUXILIARY BOILER',
r'\bAUX\.\b': 'AUXILIARY ',
# composite boiler
r'\bCOMP\. BOILER\b': 'COMPOSITE BOILER',
r'\bCOMP\.BOILER\b': 'COMPOSITE BOILER',
r'\bCOMP BOILER\b': 'COMPOSITE BOILER',
r'\bWIND\.\b': 'WINDING',
r'\bWINDING\b': 'WINDING',
r'\bC\.S\.W\b': 'CSW',
r'\bCSW\b': 'CSW',
r'\bVLOT\.\b': 'VOLTAGE',
r'\bVOLTAGE\b': 'VOLTAGE',
r'\bVOLT\.\b': 'VOLTAGE',
r'\bFREQ\.\b': 'FREQUENCY',
r'\bFREQUENCY\b': 'FREQUENCY',
r'\bCURR\.\b': 'CURRENT',
r'\bCURRENT\b': 'CURRENT',
r'\bTCA\b': 'TURBOCHARGER',
r'\bTCB\b': 'TURBOCHARGER',
r'\bT/C\b': 'TURBOCHARGER',
r'\bT_C\b': 'TURBOCHARGER',
r'\bTC(?=\d|\W|$)\b': 'TURBOCHARGER',
r'\bTURBOCHAGER\b': 'TURBOCHARGER',
r'\bTURBOCHARGER\b': 'TURBOCHARGER',
# misc spelling errors
r'\bOPERATOIN\b': 'OPERATION',
# wrongly attached terms
r'BOILERMGO': 'BOILER MGO',
# additional standardizing replacement
# replace # followed by a number with NO
r'#(?=\d)\b': 'NO',
r'\bNO\.(?=\d)\b': 'NO',
# yes, there was one with two dots - what the hell?
r'\bNO\.\.(?=\d)\b': 'NO',
r'\bNo\.(?=\d)\b': 'NO',
}
# substitution mapping for units
# Abbreviations and their replacements
unit_replacement_dict = {
r'\b%\b': 'PERCENT',
r'\b-\b': '',
r'\b- \b': '',
# ensure no character after A
r'\bA(?!\w|/)': 'CURRENT',
r'\bAmp(?!\w|/)': 'CURRENT',
r'\bHz\b': 'HERTZ',
r'\bKG/CM2\b': 'PRESSURE',
r'\bKG/H\b': 'KILOGRAM PER HOUR',
r'\bKNm\b': 'RPM',
r'\bKW\b': 'POWER',
r'\bKg(?!\w|/)': 'MASS',
r'\bKw\b': 'POWER',
r'\bL(?!\w|/)': 'VOLUME',
r'\bMT/h\b': 'METRIC TONNES PER HOUR',
r'\bMpa\b': 'PRESSURE',
r'\bPF\b': 'POWER FACTOR',
r'\bRPM\b': 'RPM',
r'\bV(?!\w|/)': 'VOLTAGE',
r'\bbar(?!\w|/)': 'PRESSURE',
r'\bbarA\b': 'SCAVENGE PRESSURE',
r'\bcST\b': 'VISCOSITY',
r'\bcSt\b': 'VISCOSITY',
r'\bcst\b': 'VISCOSITY',
r'\bdeg(?!\w|/|\.)': 'DEGREE',
r'\bdeg.C\b': 'TEMPERATURE',
r'\bdegC\b': 'TEMPERATURE',
r'\bdegree\b': 'DEGREE',
r'\bdegreeC\b': 'TEMPERATURE',
r'\bhPa\b': 'PRESSURE',
r'\bhours\b': 'HOURS',
r'\bkN\b': 'THRUST',
r'\bkNm\b': 'TORQUE',
r'\bkW\b': 'POWER',
# ensure that kg is not followed by anything
r'\bkg(?!\w|/)': 'FLOW', # somehow in the data its flow
r'\bkg/P\b': 'MASS FLOW',
r'\bkg/cm2\b': 'PRESSURE',
r'\bkg/cm²\b': 'PRESSURE',
r'\bkg/h\b': 'MASS FLOW',
r'\bkg/hr\b': 'MASS FLOW',
r'\bkg/pulse\b': '',
r'\bkgf/cm2\b': 'PRESSURE',
r'\bkgf/cm²\b': 'PRESSURE',
r'\bkgf/㎠\b': 'PRESSURE',
r'\bknots\b': 'SPEED',
r'\bkw\b': 'POWER',
r'\bl/Hr\b': 'VOLUME FLOW',
r'\bl/h\b': 'VOLUME FLOW',
r'\bl_Hr\b': 'VOLUME FLOW',
r'\bl_hr\b': 'VOLUME FLOW',
r'\bM\b': 'DRAFT', # for wind draft
r'm': 'm', # wind draft and trim - not useful
r'\bm/s\b': 'SPEED',
r'\bm3\b': 'VOLUME',
r'\bmH2O\b': 'DRAFT',
r'\bmWC\b': 'DRAFT',
r'\bmbar\b': 'PRESSURE',
r'\bmg\b': 'ACCELERATION',
r'\bmin-¹\b': '', # data too varied
r'\bmm\b': '', # data too varied
r'\bmmH2O\b': 'WATER DRUM LEVEL',
r'\brev\b': 'RPM',
r'\brpm\b': 'RPM',
r'\bx1000min-¹\b': '',
r'\b°C\b': 'TEMPERATURE',
r'\bºC\b': 'TEMPERATURE',
r'\b℃\b': 'TEMPERATURE'
}