lunes, 14 de febrero de 2011

Dependencies between maven artifacts

The number of artifacts that we are maintaining keeps growing and sometimes is difficult to figure out the dependencies. I know there are ways to see all the artifacts one artifact is dependent on but I couldn't find anything the other way round. So I created this python script that do both.

The scripts analyzes the POM in the provided repositories and then generates and html page showing all the dependencies. The used algorithms are a bit brute force and can be optimized but the times are OK for me so far. I'm also sure the python code can be made more elegant.


#! /usr/bin/env python
#
# This script calculates the dependencies among artifacts in the whole repository
#

import os
import xml.dom.minidom


# The following list contains the directories in the repository that contain artifacts
theRepository = "https://xxx.xxx.xxx.xxx/svn/"
repositoryList = [
theRepository + 'javaprojects/',
theRepository + 'projects/'
]

# Some artifacts are in a "special" structure
repositoryList2= [
theRepository + 'javaprojects/MavenPlugins/trunk/'
]

def executeCommand (cmd, logger, isLinux):
logger.info ("Execute command " + cmd)
startTime = datetime.datetime.now ()

proc = subprocess.Popen(cmd,
shell=isLinux,
stdout=subprocess.PIPE,
)
cmdOutput = proc.communicate()[0]
logger.info ("Command result " + cmdOutput)
endTime = datetime.datetime.now ()
diff = endTime - startTime
logger.info ("Overall time: " + str(diff));
return cmdOutput

def initializeLogger (name, fileName):
logger = logging.getLogger (name)
logger.setLevel(logging.INFO)
hdlr = logging.FileHandler(fileName)
hdlr.setFormatter (logging.Formatter ( "%(message)s"))
#"%(asctime)s %(name)s:%(lineno)d %(levelname)s %(message)s"))
logger.addHandler(hdlr)
return (logger)


class Artifact:
""" The class that contains the Artifact information. """
def __init__ (self, artifactId, groupId):
self.artifactId = artifactId;
self.groupId = groupId
self.dependencies = []
self.indirectDependencies = []
self.directUsers = []
self.indirectUsers = []
self.depTag = '_DEP'
self.userTag = '_USER'

def appendDependency (self, artifact):
if not self.isDependency (artifact):
self.dependencies.append (artifact)

def noOfDependencies (self):
return len (self.dependencies )

def getDependency (self, i):
return self.dependencies[i]

def noOfIndirectDependencies (self):
return len(self.indirectDependencies)

def getIndirectDependency (self, i):
return self.indirectDependencies[i]

def __str__ (self):
retVal = '(' + str (self.groupId) + "." + str(self.artifactId) + ') '
return retVal

def printDependencies (self):
""" Returns a string with all the dependencies """
retVal = str(self) + 'has ' + str(self.noOfDependencies()) + ' dependencies:\n'
for i in range (0, self.noOfDependencies()):
retVal += '\t' + str (self.getDependency(i)) + '\n'

retVal += ' It has ' + str(len(self.indirectDependencies)) + ' indirect dependencies:\n'
for entry in self.indirectDependencies:
retVal += '\t' + str(entry) + '\n'

return retVal

def printUsers (self):
""" Returns a printable string with all the users of this artifact """
retVal = str(self) + 'has ' + str(len(self.directUsers)) + ' direct users:\n'
for entry in self.directUsers:
retVal += '\t' + str(entry) + '\n'

retVal += ' It has ' + str(len(self.indirectUsers)) + ' indirect users:\n'
for entry in self.indirectUsers:
retVal += '\t' + str(entry) + '\n'

return retVal

def isDirectUser (self, user):
""" This function returns whether the given artifact is a direct user of this object or not."""
found = False
for entry in self.directUsers:
if entry == user:
found = True
return found

def isIndirectUser (self, user):
""" This function returns whether the given artifact is an indirect user of this object or not."""
found = False
for entry in self.indirectUsers:
if entry == user:
found = True
return found

def isUser (self, user):
""" This function returns whether the given artifact is a user of this object or not."""
return (self.isDirectUser (user) or self.isIndirectUser (user))

def addDirectUser (self, user):
if not self.isUser (user):
self.directUsers.append (user)

def addIndirectUser (self, user):
if not self.isUser (user):
self.indirectUsers.append (user)

def isDirectDependency (self, dep):
""" This method returns whether the given artifact is a direct dependency of this object or not."""
found = False
for entry in self.dependencies:
if entry == dep:
found = True
break

return found

def isIndirectDependency (self, dep):
""" This method returns whether the given artifact is an indirect dependency of this object or not."""
found = False

if not found:
for entry in self.indirectDependencies:
if entry == dep:
found = True
break

return found

def isDependency (self, dep):
""" This method returns whether the given artifact is a dependency of this object or not."""
return self.isDirectDependency (dep) or self.isIndirectDependency (dep)



def addIndirectDependency (self, dep):
""" This method adds an artifact in the indirect dependency list if it's not already on the list.
It returns True in case it's inserted."""

retVal = False
# Check the dependency list
if not self.isDependency (dep):
self.indirectDependencies.append (dep)
retVal = True

return retVal

def addListOfDependencies (self, list):
""" This function adds a list of dependencies if they are not already in the list. """
modified = False
for entry in list:
if self.addIndirectDependency (entry):
modified = True

return modified


def getListDependencies (self):
""" This method returns the complete list of dependencies."""
return self.dependencies + self.indirectDependencies

def getHTMLName (self):
return self.groupId + "." + self.artifactId

def getHTMLTag (self):
return 'ARTIFACT_' + self.getHTMLName ()

def getHTMLDepTag (self):
return self.getHTMLTag () + self.depTag

def getHTMLUserTag (self):
return self.getHTMLTag () + self.userTag

def printHTMLIndex (self, filePtr):
""" This method prints the index line for the artifact."""
filePtr.write ('<a href="#' + self.getHTMLTag() + '">' + self.getHTMLName() + '</a> ')
filePtr.write ('<a href="#' + self.getHTMLDepTag() + '">(dependencies</a>, ')
filePtr.write ('<a href="#' + self.getHTMLUserTag() + '">users)</a>, ')
filePtr.write ('<p>')

def printHTMLDependencies (self, filePtr):
filePtr.write ('<a name="' + self.getHTMLTag() + '"></a>')
filePtr.write ('<h2> Artifact ' + self.getHTMLName() + '</h2>')
filePtr.write ('<p>')
filePtr.write ('<a name="' + self.getHTMLDepTag() + '"></a>')
filePtr.write ('<h3> ' + self.getHTMLName() + ' dependencies </h3>')
self.printHTMLDependenciesTable (filePtr);
filePtr.write ('<p>')
filePtr.write ('<a name="' + self.getHTMLUserTag() + '"></a>')
filePtr.write ('<h3> ' + self.getHTMLName() + ' users</h3>')
self.printHTMLUsersTable (filePtr);
filePtr.write ('<p>')

def printHTMLDependenciesTable (self,filePtr):
filePtr.write ('<table border="0">\n<tr>')
filePtr.write ('<td width = 400> <b>' + 'Direct dependencies (' + str(len(self.dependencies)) +\
') </b></td>\n')
filePtr.write ('<td width = 400><b>' + 'Indirect dependencies (' +\
str(len(self.indirectDependencies)) + ') </b></td></tr>\n<tr>')
filePtr.write ('<td VALIGN=TOP>')

for entry in self.dependencies:
filePtr.write (entry.getHTMLName() + ' <a href="#' + entry.getHTMLTag() + '">(ref)</a> <p>\n')

filePtr.write ('</td>\n<td VALIGN=TOP>')

for entry in self.indirectDependencies:
filePtr.write (entry.getHTMLName() + ' <a href="#' + entry.getHTMLTag() + '">(ref)</a> <p>\n')

filePtr.write ('</td>\n</tr>\n')
filePtr.write ('</table>\n')
filePtr.write ('<p>')

def printHTMLUsersTable (self, filePtr):
filePtr.write ('<table border="0">\n<tr>')
filePtr.write ('<td width = 400> <b>' + 'Direct users (' + \
str(len(self.directUsers)) + ') </b></td>\n')
filePtr.write ('<td width = 400><b>' + 'Indirect users (' + str(len(self.indirectUsers)) + \
') </b></td></tr>\n<tr>')
filePtr.write ('<td VALIGN=TOP>')

for entry in self.directUsers:
filePtr.write (entry.getHTMLName() + ' <a href="#' + entry.getHTMLTag() + '">(ref)</a> <p>\n')

filePtr.write ('</td>\n<td VALIGN=TOP>')

for entry in self.indirectUsers:
filePtr.write (entry.getHTMLName() + ' <a href="#' + entry.getHTMLTag() + '">(ref)</a> <p>\n')

filePtr.write ('</td>\n</tr>\n')
filePtr.write ('</table>\n')
filePtr.write ('<p>')


def __eq__ (self, other):
return ((self.artifactId == other.artifactId) and (self.groupId == other.groupId))

def __ne__ (self, other):
return ((self.artifactId != other.artifactId) or (self.groupId != other.groupId))

def listDirectory (directory):
""" This function returns two lists with the contents of the given repository directory.
One containing the files and the other the directories."""
fileList = []
dirList = []

listCmd = 'svn list ' + directory

output = executeCommand (listCmd, logger, isLinux)

if len (output) > 0:
listAll = output.split ('\n');
for entry in listAll:
entry = entry.strip()
if len(entry) > 0:
if entry.endswith ('/'):
dirList.append (entry)
else:
fileList.append (entry)

return fileList, dirList


def listArtifact ():
""" This function looks for all the directories that contain a pom.xml file within the trunk directory.
We suppose these directories are artifacts.
The returned list contains pair of name and directory. """
artifactList = []
for dir in repositoryList:
fileList, dirList = listDirectory (dir)

for subDir in dirList:
fileList, dir2List = listDirectory (dir + subDir)
if dir2List.count ("trunk/") > 0:
fileList, dir3List = listDirectory (dir + subDir + 'trunk/')
if fileList.count ("pom.xml") > 0:
name = subDir [: len(subDir) -1]
artifactList.append ([name , dir + subDir + 'trunk/'])
return artifactList

def listArtifact2 ():
""" This function looks for all the directories that contain a pom.xml file within the subdirectories.
We suppose these directories are artifacts."""
artifactList = []
for dir in repositoryList2:
fileList, dirList = listDirectory (dir)

for subDir in dirList:
fileList, dir2List = listDirectory (dir + subDir)
if fileList.count ("pom.xml") > 0:
name = subDir [: len(subDir) -1]
artifactList.append ([name , dir + subDir ])

return artifactList

def listAllArtifacts ():
""" This function returns a list containing the name of all the artifacts in the repository along with
their path."""
artifactList = listArtifact ()
artifactList += listArtifact2 ()
return artifactList

def extractPomFile (repositoryDir):
""" This function downloads the pom file associated with an artifact. The path in the repository of the
artifact must be provided. The function returns true if the file has been downloaded. """
retVal = False

# Delete the pom.xml file if already exists
path = os.path.abspath ('pom.xml')
if os.path.exists (path):
os.remove (path)

exportCmd = 'svn export ' + repositoryDir + 'pom.xml'
output = executeCommand (exportCmd, logger, isLinux)

retVal = os.path.exists (path)


return retVal

def parsePomFile ():
""" This function returns and Artifact object containing the artifact information and its dependencies"""
domObject = xml.dom.minidom.parse ('pom.xml')

project = domObject.getElementsByTagName ('project')
projectChildren = project[0].childNodes

for i in range (0, projectChildren.length):
if projectChildren.item(i).nodeName == 'groupId':
groupName = projectChildren.item(i).firstChild.data
if projectChildren.item(i).nodeName == 'artifactId':
artifactName = projectChildren.item(i).firstChild.data
arte = Artifact (artifactName, groupName)

# Look for the dependencies in the parent section
parents = project[0].getElementsByTagName ('parent')
for i in range (0, parents.length):
parent = parents.item(i)
groupIdDep = parent.getElementsByTagName ('groupId')[0].firstChild.data
artifactIdDep = parent.getElementsByTagName ('artifactId')[0].firstChild.data
if groupIdDep.startswith ('org.example'):
art2 = Artifact (artifactIdDep, groupIdDep)
arte.appendDependency (art2)

# Look for the dependencies in the depencies section
dependencies = project[0].getElementsByTagName ('dependency')
if dependencies.length > 0:
for i in range (0, dependencies.length):
dependency = dependencies.item(i)
groupIdDep = dependency.getElementsByTagName ('groupId')[0].firstChild.data
artifactIdDep = dependency.getElementsByTagName ('artifactId')[0].firstChild.data
if groupIdDep.startswith ('org.example'):
art2 = Artifact (artifactIdDep, groupIdDep)
arte.appendDependency (art2)

return arte

def getDependencyList (artifactList, dep):
""" This function returns the list of dependencies of the artifacts of the given types."""

for entry in artifactList:
if dep == entry:
return entry.getListDependencies ()

return []


def findIndirectDependencies (artifactList, index):
""" This function finds all the indirect dependencies of an artifact. The list gets modified"""
baseArtifact = artifactList[index]
listModified = True

# Include the dependencies of the direct dependencies
for i in range (0, baseArtifact.noOfDependencies()):
dep = baseArtifact.getDependency (i)
if baseArtifact.addListOfDependencies (getDependencyList (artifactList, dep)):
listModified = True

while (listModified):
listModified=False
for i in range (0, baseArtifact.noOfIndirectDependencies()):
dep = baseArtifact.getIndirectDependency (i)
if baseArtifact.addListOfDependencies (getDependencyList (artifactList, dep)):
listModified = True

def findUsers (artifactList, index):
""" This function update the user list in the object. The list gets modified"""
baseArtifact = artifactList[index]

for i in range (0, len(artifactList)):
if i != index:
if artifactList[i].isDirectDependency (baseArtifact):
baseArtifact.addDirectUser (artifactList[i]);
elif artifactList[i].isIndirectDependency (baseArtifact):
baseArtifact.addIndirectUser (artifactList[i])

def lookArtifactDependencies ():
""" This function looks for all the artifacts in the repositories and then look for their dependencies.
The function returns a list of artifact."""
artifactNameList = listAllArtifacts ()
artifactList = []

for entry in artifactNameList:
if extractPomFile (entry[1]):
arte = parsePomFile ()
artifactList.append (arte)

# Calculate all the dependencies
for i in range (0, len (artifactList)):
findIndirectDependencies (artifactList, i)

# Calculate all the users
for i in range (0, len (artifactList)):
findUsers (artifactList, i)

return artifactList

def generateArtifactDependenciesPage (artifactList):
""" This function generates the artifact dependencies html page. """
htmlFile = './artifactDepends.html'
filePtr = open(htmlFile, 'w')
filePtr.write ('<!DOCTYPE html PUBLIC "-//IETF//DTD HTML 2.0//EN">\n <html>\n <head>\n <title>\n')
filePtr.write ('Artifact dependencies\n </title>\n </head>\n <body>\n')


filePtr.write ('The following list shows all the artifact that has been located in the repositories.')
filePtr.write ('For each artifact the list its dependencies and the list of artifact using them are')
filePtr.write (' printed.')
filePtr.write ('<h2>Index</h2>')
filePtr.write ('<p>')

for entry in artifactList:
entry.printHTMLIndex (filePtr)
for entry in artifactList:
entry.printHTMLDependencies (filePtr)

logger = initializeLogger ("artifactDependency", "artifactDependency.log")

if os.name=='posix':
isLinux=True
else:
isLinux=False


artifactList = lookArtifactDependencies ()
generateArtifactDependenciesPage (artifactList)




No hay comentarios:

Publicar un comentario