viernes, 8 de marzo de 2013

Log diff script

I wanted to share a little a script I wrote to make a diff between two log files. The basic idea is to compare the log messages but ignore certain fields that are different every time, like the time stamp. What I do is remove every digit (0-9) and hex digit (a-f A-F) from every line and then call difflib to make the comparison. Then the original lines are presented in and html format. So here is the script:

#!/usr/bin/python



import difflib

import sys

import re

import itertools



def charJunk (ch):

    ch  = re.sub (r"[A-Fa-f0-9]", "", ch)



    if len(ch) == 0:

        return True

    else:

        return False



def filter (line):

    return  re.sub (r"[A-Fa-f0-9]", "", line)



def printResultsHtml (result, file1Name, file2Name):

    text1 = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" \n\

          "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> \n\

            \n\

            <html> \n\

            \n\

            <head> \n\

                <meta http-equiv="Content-Type" \n\

                    content="text/html; charset=ISO-8859-1" /> \n\

                <title></title> \n\

                <style type="text/css"> \n\

                    table.diff {font-family:Courier; border:medium;} \n\

                    .diff_header {background-color:#e0e0e0} \n\

                    td.diff_header {text-align:right} \n\

                    .diff_next {background-color:#c0c0c0} \n\

                    .diff_add {background-color:#aaffaa} \n\

                    .diff_chg {background-color:#ffff77} \n\

                    .diff_sub {background-color:#ffaaaa} \n\

                </style> \n\

            </head>\n\

            <body>\n'



    table_header = '<table class="diff" id="difflib_chg_to0__top"\n\

           cellspacing="0" cellpadding="0" rules="groups" >\n\

        <colgroup></colgroup> <colgroup></colgroup> <colgroup></colgroup>\n\

        <colgroup></colgroup> <colgroup></colgroup> <colgroup></colgroup>\n\

        <thead><tr><th class="diff_next"><br /></th><th colspan="2"\n\

        class="diff_header">\n'



    print text1



    print table_header, 

    print file1Name,

    print '</th><th class="diff_next"><br /></th><th colspan="2" class="diff_header">',

    print file2Name,

    print '</th></tr></thead>',





    index1 = 0

    index2 = 0

    for line in result:

        if line.startswith ('-'):

            print '<tr><td class="diff_next"></td><td class="diff_header">' + str (index1 +1) + '</td>',

            print '<td nowrap="nowrap"><span class="diff_sub">' + lines1[index1] + '</span></td>',

            print '<td class="diff_next"></td><td class="diff_header">' + '</td>',

            print '<td nowrap="nowrap">' + '</td></tr>',

            index1 += 1

        elif line.startswith ('+'):

            print '<tr><td class="diff_next"></td><td class="diff_header">'  + '</td>',

            print '<td nowrap="nowrap">' + '</td>',

            print '<td class="diff_next"></td><td class="diff_header">' + str(index2 + 1) +  '</td>',

            print '<td nowrap="nowrap"><span class="diff_add">' + lines2[index2] + '</span></td></tr>',

            index2 += 1

        elif line.startswith ('?'):

            pass

        else:

            if index1 >= len (lines1):

                print "Reeeeeeeeediox"

                break

            else:

                print '<tr><td class="diff_next"></td><td class="diff_header">' + str (index1 +1) + '</td>',

                print '<td nowrap="nowrap">' + lines1[index1] + '</td>',

                print '<td class="diff_next"></td><td class="diff_header">' + str (index2 +1) + '</td>',

                print '<td nowrap="nowrap">' + lines2[index2] + '</td></tr>',

                index1 += 1

                index2 += 1

    print text2





def printResultsHtml2 (result, file1Name, file2Name):

    text1 = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" \n\

          "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> \n\

            \n\

            <html> \n\

            \n\

            <head> \n\

                <meta http-equiv="Content-Type" \n\

                    content="text/html; charset=ISO-8859-1" /> \n\

                <title></title> \n\

                <style type="text/css"> \n\

                    table.diff {font-family:Courier; border:medium;} \n\

                    .diff_header {background-color:#e0e0e0} \n\

                    td.diff_header {text-align:right} \n\

                    .diff_next {background-color:#c0c0c0} \n\

                    .diff_add {background-color:#aaffaa} \n\

                    .diff_chg {background-color:#ffff77} \n\

                    .diff_sub {background-color:#ffaaaa} \n\

                    #one \n\

                {border:1px solid red;overflow:hidden; \n\

                float: left; \n\

                width: 48%;overflow-x: scroll; \n\

                } \n\

            #two \n\

                {border:1px solid red;overflow:hidden; \n\

                float: left; \n\

                width: 48%;overflow-x: scroll; \n\

                } \n\

            .c1 {width: 200px;} \n\

            #wrapper \n\

                { \n\

                float: left; \n\

                float/**/: none; \n\

                } \n\

            /* easy clearing */ \n\

            #wrapper:after \n\

                { \n\

                content: ' + "'.';  \n" +\

                'display: block;  \n\

                height: 0;  \n\

                clear: both;  \n\

                visibility: hidden; \n\

                } \n\

            #wrapper \n\

                { \n\

                display: inline-block; \n\

                } \n\

            /*\*/ \n\

            #wrapper \n\

                { \n\

                display: block; \n\

                } \n\

            /* end easy clearing */ \n\

                </style> \n\

            </head>\n\

            <body>\n'



    text2= '<table class="diff" summary="Legends">\n\

        <tr> <th colspan="2"> Legends </th> </tr>\n\

        <tr> <td> <table border="" summary="Colors">\n\

                      <tr><th> Colors </th> </tr>\n\

                      <tr><td class="diff_add"> Added </td></tr>\n\

                      <tr><td class="diff_chg">Changed</td> </tr>\n\

                      <tr><td class="diff_sub">Deleted</td> </tr>\n\

                  </table></td>\n\

             <td> <table border="" summary="Links">\n\

                      <tr><th colspan="2"> Links </th> </tr>\n\

                      <tr><td>(f)irst change</td> </tr>\n\

                      <tr><td>(n)ext change</td> </tr>\n\

                      <tr><td>(t)op</td> </tr>\n\

                  </table></td> </tr>\n\

    </table>\n\

    </body>\n\

    </html>\n'



    table_header = '<table class="diff" id="difflib_chg_to0__top"\n\

           cellspacing="0" cellpadding="0" rules="groups" >\n\

        <colgroup></colgroup> <colgroup></colgroup> <colgroup></colgroup>\n\

        <colgroup></colgroup> <colgroup></colgroup> <colgroup></colgroup>\n\

        <thead><tr><th class="diff_next"><br /></th><th colspan="2"\n\

        class="diff_header">\n'

    table_header2 = '<table class="diff" id="difflib_chg_to0__top"\n\

           cellspacing="0" cellpadding="0" rules="groups" >\n\

        <colgroup></colgroup> <colgroup></colgroup> <colgroup></colgroup>\n\

        <colgroup></colgroup> <colgroup></colgroup> <colgroup></colgroup>\n\

        <thead><tr><th class="diff_next"><br /></th><th colspan="2"\n\

        class="diff_header">\n'



    print text1



    print '<div id="wrapper"><div id="one">'

    print table_header, 

    print file1Name,

    print '</th></tr></thead>',



    r1, r2 = itertools.tee (result)



    index1 = 0

    index2 = 0

    for line in r1:

        if line.startswith ('-'):

            print '<tr><td class="diff_next"></td><td class="diff_header">' + str (index1 +1) + '</td>',

            print '<td nowrap="nowrap"><span class="diff_sub">' + lines1[index1] + '</span></td></tr>',

            index1 += 1

        elif line.startswith ('+'):

            print '<tr><td class="diff_next"></td><td class="diff_header">  '  + '</td>',

            print '<td nowrap="nowrap">+  ' + '</td></tr>',

            index2 += 1

        elif line.startswith ('?'):

            pass

        else:

            if index1 >= len (lines1):

                print "Reeeeeeeeediox"

                break

            else:

                print '<tr><td class="diff_next"></td><td class="diff_header">' + str (index1 +1) + '</td>',

                print '<td nowrap="nowrap">' + lines1[index1] + '</td></tr>',

                index1 += 1

                index2 += 1

    print '</table></div><div id="two">'

    print table_header2, 

    print file2Name,

    print '</th></tr></thead>',



    index1 = 0

    index2 = 0

    for line in r2:

        if line.startswith ('-'):

            print '<tr><td class="diff_next"></td><td class="diff_header">' + '</td>',

            print '<td nowrap="nowrap">- ' +  '</td></tr>',

            index1 += 1

        elif line.startswith ('+'):

            print '<tr><td class="diff_next"></td><td class="diff_header">'  + str(index2 +1 ) + '</td>',

            print '<td nowrap="nowrap"><span class="diff_add">' + lines2[index2] + '</span></td></tr>',

            index2 += 1

        elif line.startswith ('?'):

            pass

        else:

            if index1 >= len (lines1):

                print "Reeeeeeeeediox"

                break

            else:

                print '<tr><td class="diff_next"></td><td class="diff_header">' + str (index2 +1) + '</td>',

                print '<td nowrap="nowrap">' + lines2[index2] + '</td></tr>',

                index1 += 1

                index2 += 1



    print '</table></div></div>'





def printResults (result):

    index1 = 0

    index2 = 0

    for line in result:

        print (index1, index2),

        if line.startswith ('-'):

            print '- ' + lines1[index1],

            index1 += 1

        elif line.startswith ('+'):

            print '+ ' + lines2[index2],

            index2 += 1

        elif line.startswith ('?'):

            pass

        else:

            if index1 >= len (lines1):

                break

            else:

                print '  ' + lines1[index1],

                index1 += 1

                index2 += 1



def printResults2 (result):

    for line in result:

        print line,





file1 = open (sys.argv[1])

file2 = open (sys.argv[2])

    

lines1 = file1.read().splitlines (1)

lines2 = file2.read().splitlines (1)



filteredLines1 = []

filteredLines2 = []



for line in lines1:

    filteredLines1.append (filter (line))

for line in lines2:

    filteredLines2.append ( filter (line))



d = difflib.Differ ()



results = d.compare (filteredLines1, filteredLines2)



printResultsHtml2 (results, sys.argv[1], sys.argv[2])