domingo, 5 de junio de 2011

Monitoring processes with Python

We've been working on some scripts to monitor the behavior of a process. The idea is run a process for days and register its memory and CPU usage. We also want to run in both Linux and Windows.

In the Linux environment the top command is used to find out CPU usage and the proc filesystem to obtain the memory usage. The ps command is used to obtain the list of processes.

The win32 extensions for python library is being used for Windows. I've found a bit difficult to work with this library and that's is the main reason I'm writing this post.

Finding out the process ID


The first thing is to find out process to monitor. We are assuming the process to monitor is the lastest descendent of the current process. We have created a function to list all the processes along with their parent proces and then another to obtain the last descendent.

def getListOfProcessesLinux (logger):
processList = []
cmd = 'ps -eo pid,ppid'
output = executeCommand (cmd, logger, True)
lines = output.split ('\n')
skipFirstLine = True
for line in lines:
if skipFirstLine:
skipFirstLine = False
else:
tempList = line.split ();
if (len (tempList) >= 2):
pid = int (tempList[0].strip())
ppid = int (tempList[1].strip())
processList.append ([pid, ppid]);
return processList

def getListOfProcessesWindows (logger):
processList = []
object = win32pdhutil.find_pdh_counter_localized_name("Process")
object2 = win32pdhutil.find_pdh_counter_localized_name('ID Process')
object3 = win32pdhutil.find_pdh_counter_localized_name('Creating Process ID')

# Get the list of processes
try:
junk, instances = win32pdh.EnumObjectItems(None,None, object, win32pdh.PERF_DETAIL_WIZARD)
except Exception, e:
print ('exception' + str(e))

for instance in instances:
hq = win32pdh.OpenQuery() # initializes the query handle
path = win32pdh.MakeCounterPath( (None,object,instance, None, 0,object2) )
counter_handle=win32pdh.AddCounter(hq, path)
win32pdh.CollectQueryData(hq) #collects data for the counter
type, pid = win32pdh.GetFormattedCounterValue(counter_handle, win32pdh.PDH_FMT_LONG)

path2 = win32pdh.MakeCounterPath( (None, object, instance, None, 0, object3) )
counter_handle2 = win32pdh.AddCounter (hq, path2)
win32pdh.CollectQueryData(hq) #collects data for the counter
type, ppid = win32pdh.GetFormattedCounterValue(counter_handle2, win32pdh.PDH_FMT_LONG)
win32pdh.CloseQuery(hq)
processList.append ([pid, ppid])
return processList

def getListOfProcesses (logger, isLinux):
""" This function returns a list of pairs pid ppid for all the processes in the system.
The function has different implementations for Linux and windows. """
if isLinux:
return getListOfProcessesLinux (logger);
else:
return getListOfProcessesWindows (logger);

def getLastDescendent (pid, logger, isLinux):
""" This function returns the last descendent of the given process. """
descendent = pid
processes = getListOfProcesses (logger, isLinux)
exitLoop = False
while (not exitLoop):
# Look for the first descendent of the process. The case of more than one descendent is not considered
found = False
for entry in processes:
if entry[1] == descendent:
found = True
descendent = entry[0]
break

if not found:
exitLoop = True
return descendent


Monitoring the memory usage


This is the code for the memory usage:

def getVmPeakLinux (pid, logger): try:
cmd = 'cat /proc/' + pid + '/status'
out = executeCommand (cmd, logger, True) pos = out.find('VmPeak:')

memStr = out [pos + 7:pos + 17]
memStr = memStr.strip() except Exception, detail:
memStr = '0'
logger.error ('getVmPeakLinux. Cannot get memory information. ' + str(deta
il))
return memStr

def getVmPeakWindows (pid, logger):
try:
pid = int (pid)
han = win32api.OpenProcess (win32con.PROCESS_ALL_ACCESS, False, pid)
info = win32process.GetProcessMemoryInfo (han)

memStr = str(info['PeakPagefileUsage'])
except Exception,detail:
memStr = '0'
logger.error ('getVmPeakWindows. Cannot get memory information. ' + str(de
tail))

return memStr

def getVmPeak (pid, logger, isLinux):
""" This function receives a pid and returns a string indicating the peak of t
he process virtual memory
size.
The function has different implementations for Linux and windows."""
if isLinux:
return (getVmPeakLinux (pid, logger))
else:
return (getVmPeakWindows (pid, logger))


def getVmSizeLinux (pid, logger): try:
cmd = 'cat /proc/' + pid + '/status'
out = executeCommand (cmd, logger, True)
pos = out.find('VmSize:')
memStr = out [pos + 7:pos + 17]
memStr = memStr.strip()
except Exception, detail:
memStr = '0'
logger.error ('getVmSizeLinux. Cannot get memory information. ' + str(deta
il)) return memStr


def getVmSizeWindows (pid, logger):
try:
pid = int(pid)
han = win32api.OpenProcess (win32con.PROCESS_ALL_ACCESS, False, pid)
info = win32process.GetProcessMemoryInfo (han)

memStr = str(info['PagefileUsage'])
except Exception,detail:
memStr = '0'
logger.error ('getVmSizeWindows. Cannot get memory information. ' + str(de
tail) )
return memStr

def getVmSize (pid, logger, isLinux):
""" This function receives a pid and returns a string indicating the current p
rocess virtual memory size.
The function has different implementations for Linux and windows."""
if isLinux:
return (getVmSizeLinux (pid, logger))
else:
return (getVmSizeWindows (pid, logger))

Monitoring the CPU usage


This is the actual code:

def getIdleTimeLinux (logger):
retVal = 110.0
cmd = 'top -b -d 2 -n 2'

output = executeCommand (cmd, logger, True)
pos = output.find ('%id')
if pos > 0:
output = output [pos + 3:]
pos = output.find ('%id')
if pos > 0:
percentStr = output [pos - 5:pos]
percentStr = percentStr.strip()
retVal = float(percentStr)
else:
logger.error ('Cannot find the second idle')
else:
logger.error ('Cannot find the first idle')

return retVal

def getIdleTimeWindows (logger):
retVal = 110.0
object = win32pdhutil.find_pdh_counter_localized_name("Processor")

# I can't translate with find_pdh_counter_localized_name the name "% Processor time"
items, instances = win32pdh.EnumObjectItems (None, None, object, win32pdh.PERF_DETAIL_WIZARD)

hq = win32pdh.OpenQuery()
path = win32pdh.MakeCounterPath ((None, object, '_Total', None, -1, items[0]))
counter_handle = win32pdh.AddCounter (hq, path)
win32pdh.CollectQueryData(hq)
time.sleep (2)
win32pdh.CollectQueryData(hq)
type, retVal = win32pdh.GetFormattedCounterValue (counter_handle, win32pdh.PDH_FMT_LONG)
win32pdh.CloseQuery(hq)

return (100.0 - retVal)

def getIdleTime (logger, isLinux):
""" This function measures the current CPU usage. It returns the percentage of idle time for a certain
time period.
The function has different implementations for Linux and windows. """
if isLinux:
return getIdleTimeLinux (logger)
else:
return getIdleTimeWindows (logger)


The way to find out the platform we're running is:

if os.name=='posix':
isLinux=True
else:
isLinux=False