diff --git a/feedbackloop/main.py b/feedbackloop/main.py index 76de6456e41bdaaeb0b21d36d097efe7cd4d4c97..d016df201a9de1c3a7a488d4dbb31b4a48da2125 100644 --- a/feedbackloop/main.py +++ b/feedbackloop/main.py @@ -7,7 +7,7 @@ from .app import app, db # Import all views by side effect. from . import views from .model import FeedbackEntry -from .parse import scan_imap +from .parse import MailScanner def create_app(config=None): @@ -28,12 +28,13 @@ manager = Manager(create_app) def ingest(unseen, limit): """Read and ingest new messages from the IMAP mailbox.""" with app.app_context(): - for entry in scan_imap( + with MailScanner( app.config['IMAP_SERVER'], app.config['IMAP_USERNAME'], app.config['IMAP_PASSWORD'], - unseen, limit): - db.session.add(entry) + app.config.get('IMAP_REMOVE_MESSAGES', True)) as scanner: + for entry in scanner.scan(unseen, limit): + db.session.add(entry) db.session.commit() diff --git a/feedbackloop/parse.py b/feedbackloop/parse.py index 097ccd5258c6b76ed18dc6ba8d321aca6020d249..aa35ab1eb70799cd1588ae577cd9d6697750c6b1 100644 --- a/feedbackloop/parse.py +++ b/feedbackloop/parse.py @@ -55,103 +55,133 @@ def _hdr(hdr_list, name): return v -def _scan_mailbox(server, username, password, unseen=False, limit=None): - conn = imaplib.IMAP4_SSL(server) - conn.socket().settimeout(30) - i = 0 - try: - conn.login(username, password) - conn.select('INBOX') - res, data = conn.uid('search', None, '(UNSEEN)' if unseen else 'ALL') + +class MailScanner(): + """Read messages from a remote IMAP folder and remove them. + + Operates as a context manager, messages scanned are only removed + if the inner block executes successfully. + """ + + def __init__(self, server, username, password, remove=True): + self._server = server + self._username = username + self._password = password + self._remove = remove + self._to_delete = [] + self._conn = None + + def __enter__(self): + self._conn = imaplib.IMAP4_SSL(self._server) + self._conn.socket().settimeout(30) + try: + self._conn.login(self._username, self._password) + self._conn.select('INBOX') + except: + try: + self._conn.close() + except: + pass + raise + return self + + def __exit__(self, exc_type, value, traceback): + if self._remove and not exc_type: + for uid in self._to_delete: + self._conn.uid('store', uid.encode('utf-8'), '+FLAGS', '\\Deleted') + self._conn.expunge() + try: + self._conn.close() + except: + pass + + def _scan_mailbox(self, unseen=False, limit=None): + i = 0 + res, data = self._conn.uid('search', None, '(UNSEEN)' if unseen else 'ALL') if res != 'OK': raise IMAPError(res) for uid in data[0].decode('utf-8').split(): - res, response = conn.uid('fetch', uid, '(RFC822)') + res, response = self._conn.uid('fetch', uid, '(RFC822)') if res != 'OK': raise IMAPError(res) - yield response[0][1] + yield uid, response[0][1] i += 1 if limit and i > limit: break - finally: - try: - conn.close() - except: - pass - - -def scan_imap(server, user, password, unseen, limit=None): - for raw_msg in _scan_mailbox(server, user, password, unseen, limit): - try: - # Parse the ARF message body. - msg = ARFMessage(raw_msg.decode('utf-8')) - report_sender, _ = _normalize_addr( - _hdr(msg.get_message_headers(), 'From')) - - # Try different ways of finding the original sender. We - # look at a few possible things, in order of increasing - # priority: - # - # - the From of the original message - # - the From reported in the feedback-report attachment - # - the authenticated sender from the Received headers - # - # We also attempt to detect if the sender is a mailing - # list by looking for a "List-Id" header, or a "-bounces" - # sender. - hdrs = msg.get_original_message_headers() - is_list = False - timestamp = datetime.now() - - if hdrs: - original_from = _hdr(hdrs, 'From') - timestamp = datetime.fromtimestamp( - time.mktime(parsedate(_hdr(hdrs, 'Date')))) - - fr = msg.get_feedback_report() - if fr: - print(f'feedback report: {fr.get_feedback_type()} - {fr.get_original_mail_from()}') - original_from = fr.get_original_mail_from() - arrival_date = fr.get_arrival_date() - if arrival_date: + def scan(self, unseen, limit=None): + for uid, raw_msg in self._scan_mailbox(unseen, limit): + try: + + # Parse the ARF message body. + msg = ARFMessage(raw_msg.decode('utf-8')) + report_sender, _ = _normalize_addr( + _hdr(msg.get_message_headers(), 'From')) + + # Try different ways of finding the original sender. We + # look at a few possible things, in order of increasing + # priority: + # + # - the From of the original message + # - the From reported in the feedback-report attachment + # - the authenticated sender from the Received headers + # + # We also attempt to detect if the sender is a mailing + # list by looking for a "List-Id" header, or a "-bounces" + # sender. + hdrs = msg.get_original_message_headers() + is_list = False + timestamp = datetime.now() + + if hdrs: + original_from = _hdr(hdrs, 'From') timestamp = datetime.fromtimestamp( - time.mktime(parsedate(arrival_date))) - - if hdrs: - auth_sender = _find_authenticated_sender(hdrs) - if auth_sender: - print(f'found authenticated sender: {auth_sender}') - original_from = auth_sender - - if hdrs: - list_id = _hdr(hdrs, 'List-Id') - if list_id: - original_from = _list_name_from_id(list_id) - is_list = True - - if not original_from: - print('unable to extract original sender') - continue - - original_from, maybe_list = _normalize_addr(original_from) - if maybe_list and not is_list: - is_list = maybe_list - - entry = FeedbackEntry( - sender=original_from, - reporter=report_sender, - is_list=is_list, - timestamp=timestamp, - ) - if not is_list: - entry.message = raw_msg - print(f'original from: {original_from}, list={is_list}') - yield entry - - except Exception as e: - print(f'error: {e}') - #print(raw_msg) + time.mktime(parsedate(_hdr(hdrs, 'Date')))) + + fr = msg.get_feedback_report() + if fr: + print(f'feedback report: {fr.get_feedback_type()} - {fr.get_original_mail_from()}') + original_from = fr.get_original_mail_from() + arrival_date = fr.get_arrival_date() + if arrival_date: + timestamp = datetime.fromtimestamp( + time.mktime(parsedate(arrival_date))) + + if hdrs: + auth_sender = _find_authenticated_sender(hdrs) + if auth_sender: + print(f'found authenticated sender: {auth_sender}') + original_from = auth_sender + + if hdrs: + list_id = _hdr(hdrs, 'List-Id') + if list_id: + original_from = _list_name_from_id(list_id) + is_list = True + + if not original_from: + print('unable to extract original sender') + continue + + original_from, maybe_list = _normalize_addr(original_from) + if maybe_list and not is_list: + is_list = maybe_list + + entry = FeedbackEntry( + sender=original_from, + reporter=report_sender, + is_list=is_list, + timestamp=timestamp, + ) + if not is_list: + entry.message = raw_msg + print(f'original from: {original_from}, list={is_list}') + yield entry + self._to_delete.append(uid) + + except Exception as e: + print(f'error: {e}') + #print(raw_msg) def main(): @@ -162,8 +192,9 @@ def main(): parser.add_argument('--unseen', action='store_true') args = parser.parse_args() - for entry in scan_imap(args.server, args.user, args.password, args.unseen, 20): - print(entry) + with MailScanner(args.server, args.user, args.password, False) as scanner: + for entry in scanner.scan(args.unseen, 20): + print(entry) if __name__ == '__main__': diff --git a/feedbackloop/views.py b/feedbackloop/views.py index c9e8f5dfe1f0404f8530a429e3fb6e17db402394..e15fd0838f419ac49332e417cf4d726d69db3828 100644 --- a/feedbackloop/views.py +++ b/feedbackloop/views.py @@ -1,10 +1,10 @@ from flask import render_template, abort -from sqlalchemy import func +from sqlalchemy import func, text from .app import app, db from .model import FeedbackEntry -@app.route('/report/:report_id') +@app.route('/report/<report_id>') def show_report(report_id): report = FeedbackEntry.query.get(report_id) if not report: @@ -12,10 +12,11 @@ def show_report(report_id): return render_template('report.html', report=report) -@app.route('/by_sender/:sender') +@app.route('/by_sender/<sender>') def by_sender(sender): reports = FeedbackEntry.query.filter( - FeedbackEntry.sender == sender).order_by('timestamp desc') + FeedbackEntry.sender == sender).order_by( + FeedbackEntry.timestamp.desc()) if not reports: abort(404) return render_template('sender.html', @@ -26,15 +27,19 @@ def by_sender(sender): @app.route('/') def index(): top_user_senders = db.session.query( - func.count(FeedbackEntry.id)).filter( - FeedbackEntry.is_list is False).group_by( - FeedbackEntry.sender).limit(20) + FeedbackEntry.sender, + func.count(FeedbackEntry.id).label('count')).filter( + FeedbackEntry.is_list == False).group_by( + FeedbackEntry.sender).order_by(text('count DESC')).limit(20) top_list_senders = db.session.query( - func.count(FeedbackEntry.id)).filter( - FeedbackEntry.is_list is True).group_by( - FeedbackEntry.sender).limit(20) + FeedbackEntry.sender, + func.count(FeedbackEntry.id).label('count')).filter( + FeedbackEntry.is_list == True).group_by( + FeedbackEntry.sender).order_by(text('count DESC')).limit(20) top_reporters = db.session.query( - func.count(FeedbackEntry.id)).group_by(FeedbackEntry.reporter) + FeedbackEntry.reporter, + func.count(FeedbackEntry.id).label('count')).group_by( + FeedbackEntry.reporter).order_by(text('count DESC')) return render_template('index.html', top_user_senders=top_user_senders,