Created
May 17, 2020 22:47
-
-
Save joelverhagen/6083f4303c741105366e488d2e067da4 to your computer and use it in GitHub Desktop.
Convert NuGet restore logs to XES (process mining) format
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import collections | |
import urllib | |
from pm4py.objects.log.log import EventLog, Trace, Event | |
from pm4py.objects.log.exporter.xes import exporter as xes_exporter | |
solutionNameToSourcesToEventLog = {} | |
for (dirPath, dirNames, fileNames) in os.walk(r"logs"): | |
for fileName in fileNames: | |
if not fileName.startswith("restoreLog-"): | |
continue | |
print("Processing " + fileName + "...") | |
# Parse the log file and convert it to a Trace | |
filePath = os.path.join(dirPath, fileName) | |
trace = Trace() | |
trace.attributes["concept:name"] = filePath | |
sources = [] | |
with open(filePath, "r") as fh: | |
inSourceList = False | |
for line in fh: | |
if inSourceList: | |
if len(line.strip()) == 0: | |
inSourceList = False | |
else: | |
sources.append(line.strip()) | |
if line.startswith(" GET "): | |
(method, url) = line.strip().split(" ", 1) | |
trace.append(Event(**{ | |
"concept:name": url, | |
"lifecycle:transition": "start", | |
"nuget:http_method": method | |
})) | |
elif line.startswith(" OK ") or line.startswith(" NotFound "): | |
(status, url, duration) = line.strip().split(" ", 2) | |
trace.append(Event(**{ | |
"concept:name": url, | |
"lifecycle:transition": "complete", | |
"nuget:http_status": status, | |
"nuget:http_duration": duration | |
})) | |
elif line.startswith(" InternalServerError "): | |
(status, url, duration) = line.strip().split(" ", 2) | |
trace.append(Event(**{ | |
"concept:name": url, | |
"lifecycle:transition": "ate_abort", | |
"nuget:http_status": status, | |
"nuget:http_duration": duration | |
})) | |
elif line.startswith(" ") and len(line) > 2 and line[2] != " " and ("http://" in line or "https://" in line): | |
raise ValueError("Unknown request/response line: " + line) | |
elif line.strip() == "Feeds used:": | |
inSourceList = True | |
if len(sources) == 0: | |
raise ValueError("No feeds found.") | |
sources.sort() | |
# Add the trace to the proper Event Log | |
(_, solutionName, _) = fileName.split("-", 2) | |
if solutionName not in solutionNameToSourcesToEventLog: | |
sourcesToEventLog = {} | |
solutionNameToSourcesToEventLog[solutionName] = sourcesToEventLog | |
else: | |
sourcesToEventLog = solutionNameToSourcesToEventLog[solutionName] | |
sourcesKey = repr(sources) | |
if sourcesKey not in sourcesToEventLog: | |
eventLog = EventLog() | |
eventLog.attributes["concept:name"] = solutionName | |
eventLog.attributes["nuget:sources"] = sources | |
sourcesToEventLog[sourcesKey] = eventLog | |
else: | |
eventLog = sourcesToEventLog[sourcesKey] | |
eventLog.append(trace) | |
index = 0 | |
for sourcesToEventLog in solutionNameToSourcesToEventLog.values(): | |
for eventLog in sourcesToEventLog.values(): | |
sources = eventLog.attributes["nuget:sources"] | |
sourceDescription = urllib.parse.urlparse(sources[0]).hostname | |
if len(sources) == 2: | |
sourceDescription += "-and-1-other-source" | |
elif len(sources) > 2: | |
sourceDescription += "-and-" + str(len(sources)) + "-other-sources" | |
exportName = eventLog.attributes["concept:name"] + "_" + str(index) + "_" + sourceDescription + ".xes" | |
print("Exporting " + exportName + "...") | |
xes_exporter.apply(eventLog, exportName) | |
index += 1 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment