Email to HTML Script - Python Snipplr Social Repository

Revision: 63531

at May 15, 2013 03:59 by drydenlong

Initial Code

import email, getpass, imaplib, os, re, csv, zipfile, glob, threading

detach_dir = 'directory' # directory where to save attachments
user = ("username")
pwd = ("password")

# connecting to the gmail imap server
m = imaplib.IMAP4_SSL("imap server")
m.login(user,pwd)
m.select("INBOX") # here you a can choose a mail box like INBOX instead
# use m.list() to get all the mailboxes

resp, items = m.search(None, "ALL") # you could filter using the IMAP rules here (check http://www.example-code.com/csharp/imap-search-critera.asp)
items = items[0].split() # getting the mails id

for emailid in items:
    resp, data = m.fetch(emailid, "(RFC822)") # fetching the mail, "`(RFC822)`" means "get the whole stuff", but you can ask for headers only, etc
    email_body = data[0][1] # getting the mail content
    mail = email.message_from_string(email_body) # parsing the mail content to get a mail object
            

    #Check if any attachments at all
    if mail.get_content_maintype() != 'multipart':
        continue

    #print "["+mail["From"]+"] :" + mail["Subject"]

    # we use walk to create a generator so we can iterate on the parts and forget about the recursive headache
    for part in mail.walk():
        # each part is a either non-multipart, or another multipart message
        # that contains further parts... Message is organized like a tree
        if part.get_content_type() == 'text/plain':
            content = part.get_payload()
            message = re.compile(r'\%(.+?)\%', re.DOTALL).findall(content)
            message = re.sub(r'=\\r\\', '', str(message))
            message = re.sub(r'\[\'', '', str(message))
            message = re.sub(r'\'\]', '', str(message))
            token = re.compile(r'\$(.+?)\$', re.DOTALL).findall(content)
            token = re.sub(r'\[\'', '', str(token))
            token = re.sub(r'\'\]', '', str(token))
            tag = re.compile(r'\^(.+?)\^', re.DOTALL).findall(content)
            tag = re.sub(r'\[\'', '', str(tag))
            tag = re.sub(r'\'\]', '', str(tag))
            print message
            print token
            print tag
            #print part.get_payload() # prints the raw text
        # multipart are just containers, so we skip them
        if part.get_content_maintype() == 'multipart':
            continue

        # is this part an attachment ?
        if part.get('Content-Disposition') is None:
            continue

        filename = part.get_filename()
        counter = 1

        # if there is no filename, we create one with a counter to avoid duplicates
        if not filename:
            filename = 'part-%03d%s' % (counter, 'bin')
            counter += 1

        att_path = os.path.join(detach_dir, filename)

        #Check if its already there
        if not os.path.isfile(att_path) :
            # finally write the stuff
            fp = open(att_path, 'wb')
            fp.write(part.get_payload(decode=True))
            fp.close()
            
        path = detach_dir
        os.chdir(path)
        image1 = str(glob.glob('upload-photo1*'))
        image2 = str(glob.glob('upload-photo2*'))
        image3 = str(glob.glob('upload-photo3*'))
        image1 = re.sub(r'\[\'', '', image1)
        image1 = re.sub(r'\'\]', '', image1)
        image2 = re.sub(r'\[\'', '', image2)
        image2 = re.sub(r'\'\]', '', image2)
        image3 = re.sub(r'\[\'', '', image3)
        image3 = re.sub(r'\'\]', '', image3)
        htmlFile = str(token)+'.html'
        #if tag == 'email_blast_demo':
        htmlCode = ('''<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
                    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
                    <html xmlns="http://www.w3.org/1999/xhtml"><head>
                    <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
                    <title>Untitled Document</title></head><body>
                    <table width="554" border="0" cellspacing="0" cellpadding="0" align="center"><tr><td>
                    <img src="'''+image1+'''" width="554" height="186" /></td></tr><tr><td>
                    <p style="font-family:Arial, Helvetica, sans-serif; font-size:11pt; line-height:14pt;">
                    <br />Dear [Fld:FirstName],<br /><br />'''+str(message)+'''<br /><br /><a href="PLACEHOLDER">
                    <img src="'''+image2+'''" width="248" height="38" alt="Opt-in for men\'s health tips now" /></a>
                    <br /><br /><br /><img src="'''+image3+'''" width="167" height="62" align="right" /><br />
                    <p style="font-family:Arial, Helvetica, sans-serif; font-size:10pt;"></td></tr></table>
                    </body></html>''')
        htmlData = open(os.path.join('directory', htmlFile), 'w+')
        htmlData.write(htmlCode)
        print htmlFile+' Complete'
        htmlData.close()
    allFiles = [f for f in os.listdir(path) if not f.endswith('.zip')]
    for file in allFiles:
        archive = zipfile.ZipFile(token+'.zip', mode='a')
        archive.write(file)
        archive.close()
        os.unlink(file)


#  This script will access a set email account, parse the text and attachments of each email, create HTML markup
#  and zip the files together. This script assumes a set template for the HTML. I will most likely have to change
#  this in order to incorporate multiple templates. The HTML markup for each template will be sent in the email
#  and be parsed in the ame fashion as the `token` and `tag` variables above.
#
#  What still needs to be done:
#  1) Archive email after being zipped so that duplicates are not created
#  2) Email .zip file to requestor (person who ordered)

Initial URL

Initial Description

A script to parse emails and return HTML suited for email blast programs

Initial Title

Email to HTML Script

Initial Tags

python

Initial Language

Python

Choose a language for easy browsing: