# This file is part of ReportTool # ReportTool (Felicity) is copyright 2004-8 Steve Butterfill. # # ReportTool is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 2 of the License, or # (at your option) any later version. # # ReportTool is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with ReportTool. If not, see . # # If you want to use ReportTool under a difference licence, email # s.butterfill@warwick.ac.uk. # ------------------- # fileit.py # (c) Steve Butterfill 2007 # Use subject to licence. See licence.txt # # Downloads messages from an email account using imap and # creates notes according to who they're too/from. # uses uid to keep track of which messages have been extracted # INFO: 'uid' is a unique and hopefully invariant id for each email message in an account from datetime import datetime import string, re import email, imaplib import html2text #inc. w. felicity, GPL licence import logging log = logging.getLogger(__name__) class Note(object): """These are the objects created from emails""" def __init__(self, staff_id, student_id, sender, recepient, subject, body, date=datetime.now): self.staff_id=staff_id self.student_id=student_id self.sender = sender self.recepient = recepient self.subject = subject self.date = date self.body = body @staticmethod def create(match, sender, recepient, subject, body): """returns a new note from the parameters. Match is a dictionary containing sender_id and recepient_id""" return Note(staff_id=match['staff_id'], student_id=match['student_id'], sender=sender, recepient=recepient, subject=subject, body=body) class FileIt(object): """retrieves email messages from a server and returns them as Note objects. Usage: f = FileIt(params...) results = f.go() create_records(results['notes']) display_errors(results['errors']) Tip: use the make_dict function of this module to create dictionaries. This avoids all kinds of problems involving duplicate names or duplicate email addresses. """ def __init__(self, user_name, password, host, port, old_uids, student_dicts, staff_dicts): """old_uids specifies uids to ignore. sender_dicts is a list of dictionaries to use for matching against senders, where the keys are matched and the values are sender_id. recepient_dicts is similar""" self.user_name=user_name self.password=password self.host=host self.port=port self.old_uids = old_uids #replace keys of dictionaries with compiled regex self.student_dicts = [self._replace_dict(d) for d in student_dicts] self.staff_dicts = [self._replace_dict(d) for d in staff_dicts] @staticmethod def _replace_dict(dic): """returns a new dict where the keys are compiled regular expressions""" new_dict={} for key in dic: match = re.compile("\\b%s\\b" % key.lower()) #match whole words only new_dict[match]=dic[key] return new_dict def go(self): """retrieve new messages and create Note objects. Returns a dictionary containing notes and errors: notes: keys are uids, values are new notes from email message with that uid errors: keys are uids, values are strings describing the error associated with the message.""" serverd = self._connect() if not serverd['success']: return serverd #return value is error dict imap_server= serverd['imap_server'] uids = serverd['uids'] notes = {}; errors = {} for uid in uids: msgd = self._get_msg(imap_server=imap_server, uid=uid) if not msgd['success']: #failed to get message with this uid errors[uid]="failed to open this message uid=%s on server %s" % (uid, self.host) continue #msg was successfully read noted = self._process_msg(msgd['msg']) if not noted['success']: errors[uid]="failed to match this message uid=%s" % (uid, ) else: notes[uid]=noted['note'] return dict(notes=notes, errors=errors) def _connect(self): """connects to an email server. Return dict with imap_server, uids of new messages, or error_msg if fail to connect""" try: imap_server = imaplib.IMAP4_SSL(self.host, self.port) imap_server.login(self.user_name, self.password) imap_server.select() typ, data = imap_server.uid("search", "ALL") except Exception, e: error_msg= "Error attempting to connect to email server %s. Details: %s." \ % (fileit_host, str(e)) return dict(success=False, error_msg=error_msg) uids = string.split(data[0]) #exclude any old uids uids = filter(lambda x: x not in self.old_uids, uids) return dict(success=True, imap_server=imap_server, uids=uids) def _get_msg(self, imap_server, uid): """gets the message on the opened server with the specified uid. Return a dictionary with the msg object if success.""" #attempt to get the message try: typ, data = imap_server.uid("fetch", uid, '(RFC822)') except Exception, e: msg = "could not read %s due to an error: %s" % (uid, str(e)) log.warn("Fileit--reading mail--"+msg) return dict(success=False, error_msg = msg) msg = email.message_from_string(data[0][1]) return dict(success=True, msg=msg) def _process_msg(self, msg): """process the email message and create a Note. Return a dictionary with the Note object created if success.""" #get parts of message sender = msg["From"] recepient = msg["To"] subject = msg["Subject"] body = self._get_body(msg) #attempt to match against db match = self._search_msg(sender, recepient, subject) if not match['success']: return dict(success=False, error_msg="No match found.") note = Note.create(match, sender, recepient, subject, body) return dict(success=True, note=note) def _get_body(self, msg): """extracts the body from an email message, discarding all but the text and html parts""" body = "date of email message: " + str(msg['Date'])+"\n\n" _splitter="\n-------------------------------\n" for item in msg.walk(): msg_type = item.get_content_maintype() if msg_type in ("text"): #convert html to text by removing all tags try: body += html2text.strip(item.get_payload()) except Exception, e: log.debug("Error in html2text.strip. %s" % str(e)) body +=item.get_payload() body += _splitter #break between parts if more than one body.rstrip(_splitter) #remove last break in msg return body def _search_msg(self, sender, recepient, subject): """Searches a message to see if it can be matched against the dictionaries provided.""" #this is what we return if nothing found no_match_dict = dict(success=False, error_msg="no matches found, sender=%s, recepient=%s, subject=%s" % (sender, recepient, subject)) #is the sender a student? student_id = self._do_search(sender, self.student_dicts) if student_id is not None: return dict(success=True, staff_id=None, student_id=student_id) #is the sender a staff? staff_id = self._do_search(sender, self.staff_dicts) if staff_id is None: return no_match_dict #staff_id found no_match_dict['staff_id']=staff_id #recepient a student? student_id = self._do_search(recepient, self.student_dicts) if student_id is None: #try searching the subject line student_id = self._do_search(subject, self.student_dicts) if student_id is None: return no_match_dict return dict(success=True, staff_id=staff_id, student_id=student_id) @staticmethod def _do_search(text_to_search, dicts): """helper method. Searches whether any keys in the dicts match text_to_search. Ignores case. Uses dictionaries in order. If finds a unique match in one dict, does not search further. If multiple matches within a dictionary before a unique match found, returns None.""" if text_to_search is None: return None text_to_search = text_to_search.lower() for d in dicts: #search each dict found = None for key in d: #search each key in dict if key.search(text_to_search): #keys are compiled regex if found is None and found != d[key]: found = d[key] else: #print "found two: %s and %s" % (found, d[key]) return None #multiple matches if found: return found #skip checking in further dicts. return found def make_dict(attr_name, students=[], staff=[]): """Makes staff and student dicts for the specified attr_name. Students, staff are lists of these objects. In each dict, the keys are attr_name and the values are the ids of the objects. The tricky bit is that if a staff and student share an attribute (e.g. lastname) then we don't want to use that attribute in identifying emails. (otherwise if a staff and student were both called 'Jones', the student might send emails that appeared to be from the staff.) """ #first check for disallowed values found = [] #holds values found disallowed = [] #holds disallowed values for items in (students, staff): for item in items: value = getattr(item, attr_name) value = str(value).lower() if value in found: if item not in disallowed: disallowed.append(value) else: found.append(value) #then create the dictionaries. student_dict={}; staff_dict={} for items, dict in ((students,student_dict), (staff, staff_dict)): for item in items: value = getattr(item, attr_name) if value is None or value in disallowed: continue value = str(value).lower() dict[value]=item.id #finally, return the dictionaries if len(students)==0: return staff_dict if len(staff)==0: return student_dict return student_dict, staff_dict