Skip to content

Instantly share code, notes, and snippets.

@dbatis
Last active December 13, 2015 21:22
Show Gist options
  • Save dbatis/fc67910ce0aba0db2911 to your computer and use it in GitHub Desktop.
Save dbatis/fc67910ce0aba0db2911 to your computer and use it in GitHub Desktop.
This command-line utility receives a directory containing a multi-module Maven project and will output a graph displaying the inter-dependencies of modules.
"""
This command-line utility receives a directory containing a multi-module Maven project and will output a graph
displaying the inter-dependencies of modules.
The input directory need not be an actual Maven aggregator POM or anything of the sort. All pom.xml will be
parsed that reside inside the given directory structure.
The output format is determined by the file extension. The following extensions are supported:
- .graphml
- .gml
- .gv (GraphViz)
- .pdf
- any image extension
Usage:
$ python extract_maven_interdependencies.py [inputDir] [outputFile]
Requirements:
- Python 3.x
- colorama
- networkx
- matplotlib
- lxml
- graphviz (must be in system path)
- graphviz python library
Copyright (c) 2015 Dimitris Batis <deggial@geekylife.gr>
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit
persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial portions
of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
"""
import traceback
from colorama import Fore, init as coloramaInit
import argparse
from lxml import etree
import sys
import os
import networkx as nx
import graphviz as gv
import tempfile
import shutil
class PomFile:
"""
Represents the POM file information that is useful to us (name, parent POM, dependencies, plugins).
:ivar name: project name ("groupId:artifactId")
:ivar parent: parent's name or None if no parent
:ivar dependencies: list of dependency names
:ivar plugins: list of dependency names
:ivar reportPlugins: list of dependency names
"""
def __init__(self, inputFile):
"""
Parse a POM file and its parent POMS if necessary and extract the PomFile object.
:param inputFile: either a path to file or a file-like object
:raise lxml.etree.XMLSyntaxError: if a non-XML input file
"""
# is it a file or file-like object?
if hasattr(inputFile, 'read'):
tree = etree.parse(inputFile)
else:
with open(inputFile, 'r') as fp:
tree = etree.parse(inputFile)
root = tree.getroot()
# is there a parent POM?
self.parent = None
parentGroupId = ''
parentTag = PomFile.getSingleElement(root, 'parent')
if parentTag is not None:
parentGroupId = PomFile.getText(root, 'parent/groupId')
parentArtifactId = PomFile.getText(root, 'parent/artifactId')
self.parent = parentGroupId + ':' + parentArtifactId
# get groupId, artifactId. If groupId is not found, use parent group ID
self.name = PomFile.getFullName(root, defaultGroupId=parentGroupId)
# get names of dependencies and plugins as lists of "groupId:artifactId"
dependencies = root.xpath(PomFile.constructSearchPath("dependencies/dependency"))
self.dependencies = [PomFile.getFullName(x) for x in dependencies]
plugins = root.xpath(PomFile.constructSearchPath("build/plugins/plugin"))
self.plugins = [PomFile.getFullName(x) for x in plugins]
reportPlugins = root.xpath(PomFile.constructSearchPath("reporting/plugins/plugin"))
self.reportPlugins = [PomFile.getFullName(x) for x in reportPlugins]
@staticmethod
def getFullName(element, defaultGroupId=''):
"""
Retrieve the "groupId:artifactId" name of an element such as a <dependency> tag.
:param element: lxml element
:param defaultGroupId: group ID to use if groupId is missing
:return: string in the form "groupId:artifactId"
"""
groupId = PomFile.getText(element, "groupId") or defaultGroupId
artifactId = PomFile.getText(element, "artifactId")
return groupId + ':' + artifactId
@staticmethod
def getText(element, path):
"""
Given a simple path in the style of "node1/node2/node3" it searches for the text content of the
first element matching the path. Node tags are given without namespace.
:param element: lxml element
:param path: path to retrieve text for
:return: text value or None if element did not exist
"""
searchPath = './' + PomFile.constructSearchPath(path) + "/text()[1]"
result = element.xpath(searchPath)
if len(result) == 0:
return None
else:
return str(result[0])
@staticmethod
def getSingleElement(element, path):
"""
Given a simple path in the style of "node1/node2/node3" it searches for the first element matching the path.
Node tags are given without namespace.
:param element: lxml element
:param path: unnamespaced search path
:return: found element or None if element did not exist
"""
searchPath = './' + PomFile.constructSearchPath(path) + "[1]"
result = element.xpath(searchPath)
if len(result) == 0:
return None
else:
return result[0]
@staticmethod
def constructSearchPath(path):
"""
Helper method to construct search path from a simple "node1/node2/node3" approach.
:param path: search path, without namespaces
:return: xpath search path
"""
return '/'.join(list(
map(lambda tag: '*[local-name() = "' + tag + '"]', path.split('/'))
))
class GraphGenerator:
"""
This is the class that does all the actual job to generate the graph, receiving an input folder
and outputting the graph at the requested format.
"""
def __init__(self, inputDir, debug=False):
"""
Read the input directory and process POM files to generate graph.
:param inputDir: input directory path
"""
self.debug = debug
if not os.path.exists(inputDir):
raise Exception("Input directory not found")
# recursively search directory for pom.xml
pomFiles = []
nativeProjectNames = []
for dirpath, dirs, filenames in os.walk(inputDir):
for filename in filenames:
if filename.upper() == 'POM.XML':
path = os.path.abspath(os.path.join(dirpath, filename))
if debug:
print('DEBUG: Found POM ' + path)
newPomFile = PomFile(path)
pomFiles.append(newPomFile)
nativeProjectNames.append(newPomFile.name)
# let's build a list of tuples (project, dependency, dependency_type) where we only keep our own projects
treeNodes = []
for pomFile in pomFiles:
if pomFile.parent is not None:
treeNodes.append((pomFile.name, pomFile.parent, 'parent'))
treeNodes += [(pomFile.name, dependency, '')
for dependency in pomFile.dependencies if dependency in nativeProjectNames]
treeNodes += [(pomFile.name, dependency, 'build plugin')
for dependency in pomFile.plugins if dependency in nativeProjectNames]
treeNodes += [(pomFile.name, dependency, 'report plugin')
for dependency in pomFile.reportPlugins if dependency in nativeProjectNames]
# shorten all names
self.nodes = [GraphGenerator.shortenName(x) for x in nativeProjectNames]
self.graph = [(GraphGenerator.shortenName(x), GraphGenerator.shortenName(y), z) for (x, y, z) in treeNodes]
def generate(self, outputFile):
"""
Generate output file. The type of export is based on the extension of the filename.
:param outputFile: output file
"""
baseFilename, fileExtension = os.path.splitext(outputFile)
fileExtension = fileExtension.upper()
if fileExtension == '.GRAPHML':
graph = self.generateNetworkX()
nx.write_graphml(graph, outputFile)
elif fileExtension == '.GML':
graph = self.generateNetworkX()
nx.write_gml(graph, outputFile)
else:
# use Graphviz library to produce either a .gv file or an image/PDF
graph = self.generateGraphviz()
if fileExtension == '.GV':
graph.save(outputFile)
else:
# in order not to overwrite any source files with the same name, create in temporary
# directory, then copy file
graph.format = fileExtension.lower()[1:]
with tempfile.TemporaryDirectory() as tmpdir:
renderedFile = graph.render(baseFilename, directory=tmpdir)
shutil.copy(renderedFile, outputFile)
@staticmethod
def shortenName(name):
"""
Shortens the name of a Maven project by reducing the groupId to its dotted initials.
:param name: full name
:return: shortened name
"""
groupId, artifact = name.split(':', 1)
return '.'.join([x[0] for x in groupId.split('.')]) + '.' + artifact
def generateNetworkX(self):
"""
Generate a networkx graph of the internal data so that we can save a GraphML or GML file.
:return: networkx graph
"""
graph = nx.DiGraph()
for name in self.nodes:
graph.add_node(name)
for project, dependency, dependencyType in self.graph:
# shorten group IDs
graph.add_edge(project, dependency, label=dependencyType)
return graph
def generateGraphviz(self):
"""
Generate a graph with the graphviz library from internal data.
:return: graphviz graph
"""
graph = gv.Digraph()
graph.node_attr['shape'] = 'rect'
graph.graph_attr['overlap'] = 'false'
for name in self.nodes:
graph.node(name, label=name)
for project, dependency, dependencyType in self.graph:
# shorten group IDs
graph.edge(project, dependency, label=dependencyType)
return graph
class ArgumentParser(argparse.ArgumentParser):
"""
An argument parser that prints error messages in red. colorama must already be initialized.
"""
def error(self, message, printTraceback=False):
"""
Same as parent class, but the message appears in red. Furthermore, if optional argument is True,
then the stack-trace will be printed as well.
:param message: error message
:param printTraceback: if true, print traceback as well
"""
self.print_usage(sys.stderr)
printout = Fore.RED
if printTraceback:
printout = printout + traceback.format_exc() + '\n'
printout = printout + 'ERROR: ' + message + '\n'
self.exit(2, printout)
class CliApplication:
"""
Wrapper of the main workflow to a command-line application.
"""
def __init__(self):
"""
Initialize colours and arguments parser.
"""
coloramaInit(autoreset=True)
# CLI parser
self.parser = ArgumentParser()
self.parser.add_argument("inputDir", help="input directory containing the multi-module project")
self.parser.add_argument("outputFile", help="output file (graph image or GraphML)")
self.parser.add_argument("-X", "--debug", help="print stack trace on error", action="store_true")
def execute(self):
"""
Main workflow of the application. Parse arguments and call a GraphGenerator.
"""
args = self.parser.parse_args()
try:
generator = GraphGenerator(args.inputDir, debug=args.debug)
generator.generate(args.outputFile)
print("Output generated at " + args.outputFile)
except Exception as e:
self.parser.error(str(e), printTraceback=args.debug)
if __name__ == '__main__':
app = CliApplication()
app.execute()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment