Skip to content

Commit b52f2b9

Browse files
bug fixing
1 parent 4f6d52a commit b52f2b9

1 file changed

Lines changed: 25 additions & 0 deletions

File tree

  • pm4py/objects/ocel/importer/sqlite/variants

pm4py/objects/ocel/importer/sqlite/variants/ocel20.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,20 @@ class Parameters(Enum):
2727
EXCEPT_IF_INVALID = "except_if_invalid"
2828

2929

30+
def _normalize_id_series(series: pd.Series) -> pd.Series:
31+
if series is None:
32+
return series
33+
if pd.api.types.is_float_dtype(series):
34+
non_null = series.dropna()
35+
if len(non_null) > 0 and ((non_null % 1) == 0).all():
36+
series = series.astype("Int64")
37+
return series.astype("string")
38+
if pd.api.types.is_numeric_dtype(series):
39+
return series.astype("string")
40+
series = series.astype("string")
41+
return series.str.replace(r"\\.0$", "", regex=True)
42+
43+
3044
def apply(file_path: str, parameters: Optional[Dict[Any, Any]] = None):
3145
if parameters is None:
3246
parameters = {}
@@ -79,6 +93,8 @@ def apply(file_path: str, parameters: Optional[Dict[Any, Any]] = None):
7993

8094
EVENTS = pd.read_sql("SELECT * FROM event", conn)
8195
OBJECTS = pd.read_sql("SELECT * FROM object", conn)
96+
EVENTS["ocel_id"] = _normalize_id_series(EVENTS["ocel_id"])
97+
OBJECTS["ocel_id"] = _normalize_id_series(OBJECTS["ocel_id"])
8298

8399
etypes = sorted(pandas_utils.format_unique(EVENTS["ocel_type"].unique()))
84100
otypes = sorted(pandas_utils.format_unique(OBJECTS["ocel_type"].unique()))
@@ -109,6 +125,7 @@ def apply(file_path: str, parameters: Optional[Dict[Any, Any]] = None):
109125
df = df.rename(
110126
columns={"ocel_id": event_id, "ocel_time": event_timestamp}
111127
)
128+
df[event_id] = _normalize_id_series(df[event_id])
112129
event_types_coll.append(df)
113130

114131
for ot in otypes:
@@ -117,6 +134,7 @@ def apply(file_path: str, parameters: Optional[Dict[Any, Any]] = None):
117134
df = df.rename(
118135
columns={"ocel_id": object_id, "ocel_time": event_timestamp}
119136
)
137+
df[object_id] = _normalize_id_series(df[object_id])
120138
object_types_coll.append(df)
121139

122140
event_types_coll = pandas_utils.concat(event_types_coll)
@@ -178,6 +196,8 @@ def apply(file_path: str, parameters: Optional[Dict[Any, Any]] = None):
178196
"ocel_qualifier": qualifier_field,
179197
}
180198
)
199+
E2O[event_id] = _normalize_id_series(E2O[event_id])
200+
E2O[object_id] = _normalize_id_series(E2O[object_id])
181201
E2O[event_activity] = E2O[event_id].map(events_id_type)
182202
E2O[event_timestamp] = E2O[event_id].map(events_timestamp)
183203
E2O[object_type] = E2O[object_id].map(objects_id_type)
@@ -190,6 +210,11 @@ def apply(file_path: str, parameters: Optional[Dict[Any, Any]] = None):
190210
"ocel_qualifier": qualifier_field,
191211
}
192212
)
213+
if len(O2O) > 0:
214+
O2O[object_id] = _normalize_id_series(O2O[object_id])
215+
O2O[object_id + "_2"] = _normalize_id_series(
216+
O2O[object_id + "_2"]
217+
)
193218
if len(O2O) == 0:
194219
O2O = None
195220

0 commit comments

Comments
 (0)