-
-
Save adamburkegh/9ba9ec8e278351095e287dad69af59ea to your computer and use it in GitHub Desktop.
Convert a pandas dataframe to a pm4py event log, which is then exported to an XES file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Pandas dataframe to a pm4py event log conversion | |
""" | |
import pandas as pd | |
import numpy as np | |
import pm4py | |
from pm4py.objects.log.obj import EventLog,Event,Trace # under the hood objects within pm4py, subject to change | |
""" | |
Create a single trace, given some group id, key values from dataframe groupby, trace instance number and some dataframe of events to handle for this trace. The optional trace_attrs parameter determines attributes to promote from event to trace level. | |
""" | |
def convert_trace(trace_id:str, event_col:str, df:pd.DataFrame, | |
trace_attrs=None) -> Trace: | |
# build a trace object | |
trace = Trace() | |
# to add attributes to a trace, use the .attribute member of the trace | |
# .attribtues is a dictionary | |
trace.attributes['concept:name'] = trace_id | |
if trace_attrs: | |
for attr in trace_attrs: | |
trace.attributes[attr] = trace_attrs[attr][trace_id] | |
# convert rows into events | |
df = df.sort_values('time:timestamp') | |
for _,event_data in df.iterrows(): | |
event = Event() | |
edd = event_data.to_dict() | |
event['concept:name'] = edd[event_col] | |
for key in edd: | |
event[key] = event_data[key] | |
trace.append(event) | |
return trace | |
""" | |
Convert dataframe to XES log | |
""" | |
def convert_to_log(df,casecol,eventcol,trace_attrs=None, | |
logname='Event log'): | |
event_log = EventLog( | |
**{ | |
"attributes" : { | |
"concept:name" : logname | |
} | |
} ) | |
traces = [] | |
for dgi,tdf in df.groupby(casecol): | |
traces.append(convert_trace(dgi,eventcol,tdf,trace_attrs=trace_attrs)) | |
event_log._list = traces | |
return event_log |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Example use of pandasxes dataframe to XES conversion | |
""" | |
import pandasxes as xes | |
import pandas as pd | |
import pm4py | |
df = pd.DataFrame({'Animal': ['Falcon', 'Falcon', | |
'Parrot', 'Parrot', | |
'Sloth', 'Sloth'], | |
'Max Speed': [380., 370., 24., 26.,.01, .05], | |
'Colour': ['Brown','Brown', 'Green', 'Green','Grey', None], | |
'Action': ['Fly','Walk', 'Walk', 'Fly','Sleep', 'Crawl'], | |
'time:timestamp': [1,2,3,4,5,6]}) | |
event_log = xes.convert_to_log(df,'Animal','Action') | |
print(event_log) | |
pm4py.write_xes(event_log, "test.xes") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment