The number of artifacts that we are maintaining keeps growing and sometimes is difficult to figure out the dependencies. I know there are ways to see all the artifacts one artifact is dependent on but I couldn't find anything the other way round. So I created this python script that do both.
The scripts analyzes the POM in the provided repositories and then generates and html page showing all the dependencies. The used algorithms are a bit brute force and can be optimized but the times are OK for me so far. I'm also sure the python code can be made more elegant.
#! /usr/bin/env python
#
# This script calculates the dependencies among artifacts in the whole repository
#
import os
import xml.dom.minidom
# The following list contains the directories in the repository that contain artifacts
theRepository = "https://xxx.xxx.xxx.xxx/svn/"
repositoryList = [
theRepository + 'javaprojects/',
theRepository + 'projects/'
]
# Some artifacts are in a "special" structure
repositoryList2= [
theRepository + 'javaprojects/MavenPlugins/trunk/'
]
def executeCommand (cmd, logger, isLinux):
logger.info ("Execute command " + cmd)
startTime = datetime.datetime.now ()
proc = subprocess.Popen(cmd,
shell=isLinux,
stdout=subprocess.PIPE,
)
cmdOutput = proc.communicate()[0]
logger.info ("Command result " + cmdOutput)
endTime = datetime.datetime.now ()
diff = endTime - startTime
logger.info ("Overall time: " + str(diff));
return cmdOutput
def initializeLogger (name, fileName):
logger = logging.getLogger (name)
logger.setLevel(logging.INFO)
hdlr = logging.FileHandler(fileName)
hdlr.setFormatter (logging.Formatter ( "%(message)s"))
#"%(asctime)s %(name)s:%(lineno)d %(levelname)s %(message)s"))
logger.addHandler(hdlr)
return (logger)
class Artifact:
""" The class that contains the Artifact information. """
def __init__ (self, artifactId, groupId):
self.artifactId = artifactId;
self.groupId = groupId
self.dependencies = []
self.indirectDependencies = []
self.directUsers = []
self.indirectUsers = []
self.depTag = '_DEP'
self.userTag = '_USER'
def appendDependency (self, artifact):
if not self.isDependency (artifact):
self.dependencies.append (artifact)
def noOfDependencies (self):
return len (self.dependencies )
def getDependency (self, i):
return self.dependencies[i]
def noOfIndirectDependencies (self):
return len(self.indirectDependencies)
def getIndirectDependency (self, i):
return self.indirectDependencies[i]
def __str__ (self):
retVal = '(' + str (self.groupId) + "." + str(self.artifactId) + ') '
return retVal
def printDependencies (self):
""" Returns a string with all the dependencies """
retVal = str(self) + 'has ' + str(self.noOfDependencies()) + ' dependencies:\n'
for i in range (0, self.noOfDependencies()):
retVal += '\t' + str (self.getDependency(i)) + '\n'
retVal += ' It has ' + str(len(self.indirectDependencies)) + ' indirect dependencies:\n'
for entry in self.indirectDependencies:
retVal += '\t' + str(entry) + '\n'
return retVal
def printUsers (self):
""" Returns a printable string with all the users of this artifact """
retVal = str(self) + 'has ' + str(len(self.directUsers)) + ' direct users:\n'
for entry in self.directUsers:
retVal += '\t' + str(entry) + '\n'
retVal += ' It has ' + str(len(self.indirectUsers)) + ' indirect users:\n'
for entry in self.indirectUsers:
retVal += '\t' + str(entry) + '\n'
return retVal
def isDirectUser (self, user):
""" This function returns whether the given artifact is a direct user of this object or not."""
found = False
for entry in self.directUsers:
if entry == user:
found = True
return found
def isIndirectUser (self, user):
""" This function returns whether the given artifact is an indirect user of this object or not."""
found = False
for entry in self.indirectUsers:
if entry == user:
found = True
return found
def isUser (self, user):
""" This function returns whether the given artifact is a user of this object or not."""
return (self.isDirectUser (user) or self.isIndirectUser (user))
def addDirectUser (self, user):
if not self.isUser (user):
self.directUsers.append (user)
def addIndirectUser (self, user):
if not self.isUser (user):
self.indirectUsers.append (user)
def isDirectDependency (self, dep):
""" This method returns whether the given artifact is a direct dependency of this object or not."""
found = False
for entry in self.dependencies:
if entry == dep:
found = True
break
return found
def isIndirectDependency (self, dep):
""" This method returns whether the given artifact is an indirect dependency of this object or not."""
found = False
if not found:
for entry in self.indirectDependencies:
if entry == dep:
found = True
break
return found
def isDependency (self, dep):
""" This method returns whether the given artifact is a dependency of this object or not."""
return self.isDirectDependency (dep) or self.isIndirectDependency (dep)
def addIndirectDependency (self, dep):
""" This method adds an artifact in the indirect dependency list if it's not already on the list.
It returns True in case it's inserted."""
retVal = False
# Check the dependency list
if not self.isDependency (dep):
self.indirectDependencies.append (dep)
retVal = True
return retVal
def addListOfDependencies (self, list):
""" This function adds a list of dependencies if they are not already in the list. """
modified = False
for entry in list:
if self.addIndirectDependency (entry):
modified = True
return modified
def getListDependencies (self):
""" This method returns the complete list of dependencies."""
return self.dependencies + self.indirectDependencies
def getHTMLName (self):
return self.groupId + "." + self.artifactId
def getHTMLTag (self):
return 'ARTIFACT_' + self.getHTMLName ()
def getHTMLDepTag (self):
return self.getHTMLTag () + self.depTag
def getHTMLUserTag (self):
return self.getHTMLTag () + self.userTag
def printHTMLIndex (self, filePtr):
""" This method prints the index line for the artifact."""
filePtr.write ('<a href="#' + self.getHTMLTag() + '">' + self.getHTMLName() + '</a> ')
filePtr.write ('<a href="#' + self.getHTMLDepTag() + '">(dependencies</a>, ')
filePtr.write ('<a href="#' + self.getHTMLUserTag() + '">users)</a>, ')
filePtr.write ('<p>')
def printHTMLDependencies (self, filePtr):
filePtr.write ('<a name="' + self.getHTMLTag() + '"></a>')
filePtr.write ('<h2> Artifact ' + self.getHTMLName() + '</h2>')
filePtr.write ('<p>')
filePtr.write ('<a name="' + self.getHTMLDepTag() + '"></a>')
filePtr.write ('<h3> ' + self.getHTMLName() + ' dependencies </h3>')
self.printHTMLDependenciesTable (filePtr);
filePtr.write ('<p>')
filePtr.write ('<a name="' + self.getHTMLUserTag() + '"></a>')
filePtr.write ('<h3> ' + self.getHTMLName() + ' users</h3>')
self.printHTMLUsersTable (filePtr);
filePtr.write ('<p>')
def printHTMLDependenciesTable (self,filePtr):
filePtr.write ('<table border="0">\n<tr>')
filePtr.write ('<td width = 400> <b>' + 'Direct dependencies (' + str(len(self.dependencies)) +\
') </b></td>\n')
filePtr.write ('<td width = 400><b>' + 'Indirect dependencies (' +\
str(len(self.indirectDependencies)) + ') </b></td></tr>\n<tr>')
filePtr.write ('<td VALIGN=TOP>')
for entry in self.dependencies:
filePtr.write (entry.getHTMLName() + ' <a href="#' + entry.getHTMLTag() + '">(ref)</a> <p>\n')
filePtr.write ('</td>\n<td VALIGN=TOP>')
for entry in self.indirectDependencies:
filePtr.write (entry.getHTMLName() + ' <a href="#' + entry.getHTMLTag() + '">(ref)</a> <p>\n')
filePtr.write ('</td>\n</tr>\n')
filePtr.write ('</table>\n')
filePtr.write ('<p>')
def printHTMLUsersTable (self, filePtr):
filePtr.write ('<table border="0">\n<tr>')
filePtr.write ('<td width = 400> <b>' + 'Direct users (' + \
str(len(self.directUsers)) + ') </b></td>\n')
filePtr.write ('<td width = 400><b>' + 'Indirect users (' + str(len(self.indirectUsers)) + \
') </b></td></tr>\n<tr>')
filePtr.write ('<td VALIGN=TOP>')
for entry in self.directUsers:
filePtr.write (entry.getHTMLName() + ' <a href="#' + entry.getHTMLTag() + '">(ref)</a> <p>\n')
filePtr.write ('</td>\n<td VALIGN=TOP>')
for entry in self.indirectUsers:
filePtr.write (entry.getHTMLName() + ' <a href="#' + entry.getHTMLTag() + '">(ref)</a> <p>\n')
filePtr.write ('</td>\n</tr>\n')
filePtr.write ('</table>\n')
filePtr.write ('<p>')
def __eq__ (self, other):
return ((self.artifactId == other.artifactId) and (self.groupId == other.groupId))
def __ne__ (self, other):
return ((self.artifactId != other.artifactId) or (self.groupId != other.groupId))
def listDirectory (directory):
""" This function returns two lists with the contents of the given repository directory.
One containing the files and the other the directories."""
fileList = []
dirList = []
listCmd = 'svn list ' + directory
output = executeCommand (listCmd, logger, isLinux)
if len (output) > 0:
listAll = output.split ('\n');
for entry in listAll:
entry = entry.strip()
if len(entry) > 0:
if entry.endswith ('/'):
dirList.append (entry)
else:
fileList.append (entry)
return fileList, dirList
def listArtifact ():
""" This function looks for all the directories that contain a pom.xml file within the trunk directory.
We suppose these directories are artifacts.
The returned list contains pair of name and directory. """
artifactList = []
for dir in repositoryList:
fileList, dirList = listDirectory (dir)
for subDir in dirList:
fileList, dir2List = listDirectory (dir + subDir)
if dir2List.count ("trunk/") > 0:
fileList, dir3List = listDirectory (dir + subDir + 'trunk/')
if fileList.count ("pom.xml") > 0:
name = subDir [: len(subDir) -1]
artifactList.append ([name , dir + subDir + 'trunk/'])
return artifactList
def listArtifact2 ():
""" This function looks for all the directories that contain a pom.xml file within the subdirectories.
We suppose these directories are artifacts."""
artifactList = []
for dir in repositoryList2:
fileList, dirList = listDirectory (dir)
for subDir in dirList:
fileList, dir2List = listDirectory (dir + subDir)
if fileList.count ("pom.xml") > 0:
name = subDir [: len(subDir) -1]
artifactList.append ([name , dir + subDir ])
return artifactList
def listAllArtifacts ():
""" This function returns a list containing the name of all the artifacts in the repository along with
their path."""
artifactList = listArtifact ()
artifactList += listArtifact2 ()
return artifactList
def extractPomFile (repositoryDir):
""" This function downloads the pom file associated with an artifact. The path in the repository of the
artifact must be provided. The function returns true if the file has been downloaded. """
retVal = False
# Delete the pom.xml file if already exists
path = os.path.abspath ('pom.xml')
if os.path.exists (path):
os.remove (path)
exportCmd = 'svn export ' + repositoryDir + 'pom.xml'
output = executeCommand (exportCmd, logger, isLinux)
retVal = os.path.exists (path)
return retVal
def parsePomFile ():
""" This function returns and Artifact object containing the artifact information and its dependencies"""
domObject = xml.dom.minidom.parse ('pom.xml')
project = domObject.getElementsByTagName ('project')
projectChildren = project[0].childNodes
for i in range (0, projectChildren.length):
if projectChildren.item(i).nodeName == 'groupId':
groupName = projectChildren.item(i).firstChild.data
if projectChildren.item(i).nodeName == 'artifactId':
artifactName = projectChildren.item(i).firstChild.data
arte = Artifact (artifactName, groupName)
# Look for the dependencies in the parent section
parents = project[0].getElementsByTagName ('parent')
for i in range (0, parents.length):
parent = parents.item(i)
groupIdDep = parent.getElementsByTagName ('groupId')[0].firstChild.data
artifactIdDep = parent.getElementsByTagName ('artifactId')[0].firstChild.data
if groupIdDep.startswith ('org.example'):
art2 = Artifact (artifactIdDep, groupIdDep)
arte.appendDependency (art2)
# Look for the dependencies in the depencies section
dependencies = project[0].getElementsByTagName ('dependency')
if dependencies.length > 0:
for i in range (0, dependencies.length):
dependency = dependencies.item(i)
groupIdDep = dependency.getElementsByTagName ('groupId')[0].firstChild.data
artifactIdDep = dependency.getElementsByTagName ('artifactId')[0].firstChild.data
if groupIdDep.startswith ('org.example'):
art2 = Artifact (artifactIdDep, groupIdDep)
arte.appendDependency (art2)
return arte
def getDependencyList (artifactList, dep):
""" This function returns the list of dependencies of the artifacts of the given types."""
for entry in artifactList:
if dep == entry:
return entry.getListDependencies ()
return []
def findIndirectDependencies (artifactList, index):
""" This function finds all the indirect dependencies of an artifact. The list gets modified"""
baseArtifact = artifactList[index]
listModified = True
# Include the dependencies of the direct dependencies
for i in range (0, baseArtifact.noOfDependencies()):
dep = baseArtifact.getDependency (i)
if baseArtifact.addListOfDependencies (getDependencyList (artifactList, dep)):
listModified = True
while (listModified):
listModified=False
for i in range (0, baseArtifact.noOfIndirectDependencies()):
dep = baseArtifact.getIndirectDependency (i)
if baseArtifact.addListOfDependencies (getDependencyList (artifactList, dep)):
listModified = True
def findUsers (artifactList, index):
""" This function update the user list in the object. The list gets modified"""
baseArtifact = artifactList[index]
for i in range (0, len(artifactList)):
if i != index:
if artifactList[i].isDirectDependency (baseArtifact):
baseArtifact.addDirectUser (artifactList[i]);
elif artifactList[i].isIndirectDependency (baseArtifact):
baseArtifact.addIndirectUser (artifactList[i])
def lookArtifactDependencies ():
""" This function looks for all the artifacts in the repositories and then look for their dependencies.
The function returns a list of artifact."""
artifactNameList = listAllArtifacts ()
artifactList = []
for entry in artifactNameList:
if extractPomFile (entry[1]):
arte = parsePomFile ()
artifactList.append (arte)
# Calculate all the dependencies
for i in range (0, len (artifactList)):
findIndirectDependencies (artifactList, i)
# Calculate all the users
for i in range (0, len (artifactList)):
findUsers (artifactList, i)
return artifactList
def generateArtifactDependenciesPage (artifactList):
""" This function generates the artifact dependencies html page. """
htmlFile = './artifactDepends.html'
filePtr = open(htmlFile, 'w')
filePtr.write ('<!DOCTYPE html PUBLIC "-//IETF//DTD HTML 2.0//EN">\n <html>\n <head>\n <title>\n')
filePtr.write ('Artifact dependencies\n </title>\n </head>\n <body>\n')
filePtr.write ('The following list shows all the artifact that has been located in the repositories.')
filePtr.write ('For each artifact the list its dependencies and the list of artifact using them are')
filePtr.write (' printed.')
filePtr.write ('<h2>Index</h2>')
filePtr.write ('<p>')
for entry in artifactList:
entry.printHTMLIndex (filePtr)
for entry in artifactList:
entry.printHTMLDependencies (filePtr)
logger = initializeLogger ("artifactDependency", "artifactDependency.log")
if os.name=='posix':
isLinux=True
else:
isLinux=False
artifactList = lookArtifactDependencies ()
generateArtifactDependenciesPage (artifactList)