Commits

Richard Shea  committed f57a4ae

Initial commit. Code has hard coded paths to directories which only exist on dev machine.

  • Participants

Comments (0)

Files changed (1)

File findBadAddresses.py

+'''
+f=file(fp)
+fp="./1306429999.V811I65cc573M345725.diezel"
+msg = email_message_from_string(f)
+print msg.as_string()
+print msg.get_payload()[0]
+'''
+import re
+def stripLineFeeds(string):
+    string = string.replace("\r","")
+    string = string.replace("\n","")
+    string = string.strip()
+    return string
+
+def stripFromVerboseMessage(sIn, compile_objVerboseMsg):
+    match_obj = compile_objVerboseMsg.search(sIn)
+    try:
+        MAILTOADDRESS = match_obj.group('MAILTOADDRESS')
+    except AttributeError: 
+        MAILTOADDRESS = sIn
+
+    return MAILTOADDRESS
+
+def processFile(fileName, p, compile_obj, spamWriter, compile_objVerboseMsg):
+    f=file(p)
+    msg = email.message_from_string(f.read())
+    sErrText = msg.get_payload()[0]
+    # method 1: using a compile object
+    '''
+    print sErrText
+    print type(sErrText)
+    import pprint       
+    pprint.pprint(dir(sErrText))
+    '''
+    blnGoodErrMsg = False
+    blnGoodEmail = False
+    # Retrieve group(s) by name
+    match_obj = compile_obj.search(sErrText.as_string())
+    try:
+        fullErrorMessage = match_obj.group('fullErrorMessage')
+    #except AttributeError as (errno, strerror):
+    except AttributeError:
+        print "Error 1" 
+    except:
+        print "Unexpected error:", sys.exc_info()[0]
+        raise
+    else:
+        fullErrorMessage = stripLineFeeds(fullErrorMessage)
+        blnGoodErrMsg = True 
+
+    
+    try:
+        emailAddress = match_obj.group('emailAddress')
+    #except AttributeError as (errno, strerror):
+    except AttributeError:
+        print "Error 1" 
+    except:
+        print "Unexpected error:", sys.exc_info()[0]
+        raise
+    else:
+        emailAddress = stripFromVerboseMessage(emailAddress, compile_objVerboseMsg)
+        emailAddress = stripLineFeeds(emailAddress)
+        blnGoodEmail = True
+
+    if blnGoodEmail == True and blnGoodErrMsg:
+        print "%s -> %s" % (emailAddress, fullErrorMessage)
+        spamWriter.writerow([fileName, emailAddress, fullErrorMessage])
+    else:
+        print "+" * 60
+        print sErrText.as_string()
+        print "+" * 60
+
+
+def processFilePlain(p, compile_obj):
+    f=file(p)
+    msg = email.message_from_string(f.read())
+    sErrText = msg.get_payload()[0]
+    return sErrText 
+
+def buildIgnoreList():
+    lst = []
+    lst.append('1306107016.V811I65cc3caM321663.diezel')
+    lst.append('1306107022.V811I65cc3e2M13969.diezel')
+    lst.append('1306107023.V811I65cc3e4M209786.diezel')
+    lst.append('1306107024.V811I65cc3e5M217596.diezel')
+    lst.append('1306139420.V811I65cc4e2M320634.diezel')
+    lst.append('1306139432.V811I65cc4efM494423.diezel')
+    lst.append('1306144832.V811I65cc4f6M753347.diezel')
+    lst.append('1306148434.V811I65cc518M29274.diezel')
+    lst.append('1306159236.V811I65cc556M88068.diezel')
+    return lst
+import os
+import email
+import csv
+lstFilesToIgnore = buildIgnoreList()
+
+path = 'C:/usr/rshea/mytemp/20110609/NZLPProblemEmails/'
+rawstr = r"""^(?P<fullErrorMessage>[<](?P<emailAddress>.+)[>].*)"""
+
+rawstrVerboseMsg = r"""<head>.*</head><body>.+mailto.*?>(?P<MAILTOADDRESS>.*?)<.*</body>"""
+listing = os.listdir(path)
+compile_obj = re.compile(rawstr,  re.MULTILINE| re.DOTALL)
+compile_objVerboseMsg = re.compile(rawstrVerboseMsg,  re.IGNORECASE|re.MULTILINE| re.DOTALL)
+
+spamWriter = csv.writer(open('NZLP-bademailaddresses.csv', 'wb'), dialect='excel')
+
+fileCnt = 1  
+for infileName in listing:
+    fileCnt += 1 
+    if infileName in lstFilesToIgnore:
+        pass
+    else:
+        infileFullPath = "%s/%s" % (path, infileName)
+        processFile(infileName, infileFullPath,compile_obj, spamWriter, compile_objVerboseMsg)
+