@@ -27,6 +27,20 @@ class Parameters(Enum):
2727 EXCEPT_IF_INVALID = "except_if_invalid"
2828
2929
30+ def _normalize_id_series (series : pd .Series ) -> pd .Series :
31+ if series is None :
32+ return series
33+ if pd .api .types .is_float_dtype (series ):
34+ non_null = series .dropna ()
35+ if len (non_null ) > 0 and ((non_null % 1 ) == 0 ).all ():
36+ series = series .astype ("Int64" )
37+ return series .astype ("string" )
38+ if pd .api .types .is_numeric_dtype (series ):
39+ return series .astype ("string" )
40+ series = series .astype ("string" )
41+ return series .str .replace (r"\\.0$" , "" , regex = True )
42+
43+
3044def apply (file_path : str , parameters : Optional [Dict [Any , Any ]] = None ):
3145 if parameters is None :
3246 parameters = {}
@@ -79,6 +93,8 @@ def apply(file_path: str, parameters: Optional[Dict[Any, Any]] = None):
7993
8094 EVENTS = pd .read_sql ("SELECT * FROM event" , conn )
8195 OBJECTS = pd .read_sql ("SELECT * FROM object" , conn )
96+ EVENTS ["ocel_id" ] = _normalize_id_series (EVENTS ["ocel_id" ])
97+ OBJECTS ["ocel_id" ] = _normalize_id_series (OBJECTS ["ocel_id" ])
8298
8399 etypes = sorted (pandas_utils .format_unique (EVENTS ["ocel_type" ].unique ()))
84100 otypes = sorted (pandas_utils .format_unique (OBJECTS ["ocel_type" ].unique ()))
@@ -109,6 +125,7 @@ def apply(file_path: str, parameters: Optional[Dict[Any, Any]] = None):
109125 df = df .rename (
110126 columns = {"ocel_id" : event_id , "ocel_time" : event_timestamp }
111127 )
128+ df [event_id ] = _normalize_id_series (df [event_id ])
112129 event_types_coll .append (df )
113130
114131 for ot in otypes :
@@ -117,6 +134,7 @@ def apply(file_path: str, parameters: Optional[Dict[Any, Any]] = None):
117134 df = df .rename (
118135 columns = {"ocel_id" : object_id , "ocel_time" : event_timestamp }
119136 )
137+ df [object_id ] = _normalize_id_series (df [object_id ])
120138 object_types_coll .append (df )
121139
122140 event_types_coll = pandas_utils .concat (event_types_coll )
@@ -178,6 +196,8 @@ def apply(file_path: str, parameters: Optional[Dict[Any, Any]] = None):
178196 "ocel_qualifier" : qualifier_field ,
179197 }
180198 )
199+ E2O [event_id ] = _normalize_id_series (E2O [event_id ])
200+ E2O [object_id ] = _normalize_id_series (E2O [object_id ])
181201 E2O [event_activity ] = E2O [event_id ].map (events_id_type )
182202 E2O [event_timestamp ] = E2O [event_id ].map (events_timestamp )
183203 E2O [object_type ] = E2O [object_id ].map (objects_id_type )
@@ -190,6 +210,11 @@ def apply(file_path: str, parameters: Optional[Dict[Any, Any]] = None):
190210 "ocel_qualifier" : qualifier_field ,
191211 }
192212 )
213+ if len (O2O ) > 0 :
214+ O2O [object_id ] = _normalize_id_series (O2O [object_id ])
215+ O2O [object_id + "_2" ] = _normalize_id_series (
216+ O2O [object_id + "_2" ]
217+ )
193218 if len (O2O ) == 0 :
194219 O2O = None
195220
0 commit comments