Skip to content
Snippets Groups Projects
Commit 23e20a51 authored by ale's avatar ale
Browse files

Initial commit

parents
No related branches found
No related tags found
No related merge requests found
*.pyc
*.egg-info
from flask import Flask
from flask_sqlalchemy import SQLAlchemy
app = Flask(__name__)
app.config['SQLALCHEMY_DATABASE_URI'] = 'sqlite:///:memory:'
app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False
db = SQLAlchemy(app)
# arf.py - Abstract classes for representing Abuse Reporting Format messages
# Copyright (C) 2017 Dan Nielsen <dnielsen@fastmail.fm>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
""" arf.py - Abstract class for representing Abuse Reporting Format (ARF)
messages as defined in RFC5965.
http://www.faqs.org/rfcs/rfc5965.html
"""
import json
from email.parser import Parser
from email.message import Message
class ARFMessage(object):
""" ARF abstract
"""
def __init__(self, arf_source):
self._message = Parser().parsestr(arf_source)
def _get_part(self, subtype):
for part in self._message.walk():
if part.get_content_subtype() == subtype:
return part
return None
def _header_to_camelcase(self, field):
def camelcase():
while True:
yield str.capitalize
c = camelcase()
return ''.join((next(c)(x) for x in field.split('-')))
def _clean_field_value(self, field_value):
return field_value.replace('\n', '').replace('\t', ' ')
def _serialize_headers(self, source_headers):
target_headers = {}
for field, value in source_headers:
clean_value = self._clean_field_value(value)
json_field = self._header_to_camelcase(field)
if json_field in target_headers:
if isinstance(target_headers[json_field], list):
target_headers[json_field].append(clean_value)
else:
target_headers[json_field] = [target_headers[json_field],
clean_value]
else:
target_headers[json_field] = clean_value
return target_headers
def get_message_headers(self):
""" Returns ARF message headers """
return self._message.items()
def get_descriptive_payload(self):
""" Returns the descriptive or 'friendly' part of the message """
return self._message.get_payload()[0].get_payload()
def get_feedback_report(self):
""" Returns the message/feedback-report part as a FeedbackReport """
part = self._get_part('feedback-report')
if part:
return Parser(FeedbackReport).parsestr(
part.get_payload()[0].as_string())
def get_original_message_headers(self):
""" Returns headers from the orginal message """
part = self._get_part('rfc822')
if part:
return part.get_payload()[0].items()
def get_original_message_payload(self):
""" Returns the content of the original message """
part = self._get_part('rfc822')
if part:
return part.get_payload()[0].as_string()
def serialize_message_headers_to_json(self):
""" Returns the ARF message headers as a JSON string """
return json.dumps(self._serialize_headers(self.get_message_headers()))
def serialize_original_message_headers_to_json(self):
""" Returns the original message headers as a JSON string """
return json.dumps(self._serialize_headers(
self.get_original_message_headers()))
def serialize_report(self):
target = {}
target['MessageHeaders'] = self._serialize_headers(
self.get_message_headers())
try:
target['OriginalMessageHeaders'] = self._serialize_headers(
self.get_original_message_headers())
except Exception:
target['OriginalMessageHeaders'] = {}
try:
target['FeedbackReport'] = self._serialize_headers(
self.get_feedback_report().items())
except Exception:
target['FeedbackReport'] = {}
return target
class FeedbackReport(Message):
""" FeedbackReport - Convenience class with methods corresponding
to the required and optional ARF fields as defined in RFC5965
for mime type feedback-report
"""
def get_feedback_type(self):
return self.get('Feedback-Type')
def get_user_agent(self):
return self.get('User-Agent')
def get_version(self):
return self.get('Version')
def get_original_envelope_id(self):
return self.get('Original-Envelope-Id')
def get_original_mail_from(self):
return self.get('Original-Mail-From')
def get_arrival_date(self):
return self.get('Arrival-Date')
def get_reporting_mta(self):
return self.get('Reporting-MTA')
def get_source_ip(self):
return self.get('Source-IP')
def get_incidents(self):
return self.get('Incidents')
def get_authentication_results(self):
return self.get('Authentication-Results')
def get_original_rcpt_to(self):
return self.get('Original-Rcpt-To')
def get_reported_domain(self):
return self.get('Reported-Domain')
def get_reported_uri(self):
return self.get('Reported-URI')
def load_arf(source_file):
with open(source_file, 'r') as file_handle:
return ARFMessage(file_handle.read())
from cheroot import wsgi
from datetime import datetime, timedelta
from flask_script import Manager
from .app import app, db
# Import all views by side effect.
from . import views
from .model import FeedbackEntry
from .parse import scan_imap
def create_app(config=None):
app.config.from_envvar('APP_CONFIG', silent=True)
if config:
app.config.update(config)
db.create_all()
return app
manager = Manager(create_app)
@manager.option('--unseen', action='store_true')
@manager.option('--limit', type=int, default=0)
def ingest(unseen, limit):
"""Read and ingest new messages from the IMAP mailbox."""
with app.app_context():
for entry in scan_imap(
app.config['IMAP_SERVER'],
app.config['IMAP_USERNAME'],
app.config['IMAP_PASSWORD'],
unseen, limit):
db.session.add(entry)
db.session.commit()
@manager.option('--days', default=30)
def expire(days):
"""Expire old entries from the database."""
cutoff = datetime.now() - timedelta(days)
with app.app_context():
FeedbackEntry.query.filter(
FeedbackEntry.timestamp < cutoff
).delete()
db.session.commit()
@manager.option('--addr', default='0.0.0.0')
@manager.option('--port', type=int, default=3030)
def server(addr, port):
wsgi.Server((addr, port), app).start()
def main():
manager.run()
if __name__ == '__main__':
main()
from .app import db
class FeedbackEntry(db.Model):
__tablename__ = 'feedback'
id = db.Column(db.Integer, primary_key=True)
sender = db.Column(db.Text, index=True)
reporter = db.Column(db.Text)
is_list = db.Column(db.Boolean)
message = db.Column(db.Text)
timestamp = db.Column(db.DateTime)
#!/usr/bin/python3
import argparse
import imaplib
import re
import time
from datetime import datetime
from email.utils import parsedate
from feedbackloop.arf import ARFMessage
from .model import FeedbackEntry
class IMAPError(Exception):
pass
_bounces_rx = re.compile(r'-(bounces|owner)@')
def _normalize_addr(addr):
"""Normalize an address, handling <> syntax.
The returned boolean is true if the address looked like
a mailman bounce address.
"""
m = re.search(r'<([^>]+)>', addr)
if m:
addr = m[1]
if _bounces_rx.search(addr):
return _bounces_rx.sub('', addr), True
return addr, False
def _list_name_from_id(list_id):
"""Parse a (mailman) List-Id header and return the list address."""
m = re.search(r'<(.+)\.(autistici\.org|inventati\.org|boum\.org|naga\.it)>', list_id)
if m:
return f'{m[1]}@{m[2]}'
return f'list-id:{list_id}'
def _find_authenticated_sender(hdr_list):
for name, value in hdr_list:
if name != 'Received':
continue
m = re.search(r'\(Authenticated sender: ([^)]+)\)', value)
if m:
return m[1]
def _hdr(hdr_list, name):
for k, v in hdr_list:
if k == name:
return v
def _scan_mailbox(server, username, password, unseen=False, limit=None):
conn = imaplib.IMAP4_SSL(server)
conn.socket().settimeout(30)
i = 0
try:
conn.login(username, password)
conn.select('INBOX')
res, data = conn.uid('search', None, '(UNSEEN)' if unseen else 'ALL')
if res != 'OK':
raise IMAPError(res)
for uid in data[0].decode('utf-8').split():
res, response = conn.uid('fetch', uid, '(RFC822)')
if res != 'OK':
raise IMAPError(res)
yield response[0][1]
i += 1
if limit and i > limit:
break
finally:
try:
conn.close()
except:
pass
def scan_imap(server, user, password, unseen, limit=None):
for raw_msg in _scan_mailbox(server, user, password, unseen, limit):
try:
# Parse the ARF message body.
msg = ARFMessage(raw_msg.decode('utf-8'))
report_sender, _ = _normalize_addr(
_hdr(msg.get_message_headers(), 'From'))
# Try different ways of finding the original sender. We
# look at a few possible things, in order of increasing
# priority:
#
# - the From of the original message
# - the From reported in the feedback-report attachment
# - the authenticated sender from the Received headers
#
# We also attempt to detect if the sender is a mailing
# list by looking for a "List-Id" header, or a "-bounces"
# sender.
hdrs = msg.get_original_message_headers()
is_list = False
timestamp = datetime.now()
if hdrs:
original_from = _hdr(hdrs, 'From')
timestamp = datetime.fromtimestamp(
time.mktime(parsedate(_hdr(hdrs, 'Date'))))
fr = msg.get_feedback_report()
if fr:
print(f'feedback report: {fr.get_feedback_type()} - {fr.get_original_mail_from()}')
original_from = fr.get_original_mail_from()
arrival_date = fr.get_arrival_date()
if arrival_date:
timestamp = datetime.fromtimestamp(
time.mktime(parsedate(arrival_date)))
if hdrs:
auth_sender = _find_authenticated_sender(hdrs)
if auth_sender:
print(f'found authenticated sender: {auth_sender}')
original_from = auth_sender
if hdrs:
list_id = _hdr(hdrs, 'List-Id')
if list_id:
original_from = _list_name_from_id(list_id)
is_list = True
if not original_from:
print('unable to extract original sender')
continue
original_from, maybe_list = _normalize_addr(original_from)
if maybe_list and not is_list:
is_list = maybe_list
entry = FeedbackEntry(
sender=original_from,
reporter=report_sender,
is_list=is_list,
timestamp=timestamp,
)
if not is_list:
entry.message = raw_msg
print(f'original from: {original_from}, list={is_list}')
yield entry
except Exception as e:
print(f'error: {e}')
#print(raw_msg)
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--user')
parser.add_argument('--password')
parser.add_argument('--server', default='mail.autistici.org')
parser.add_argument('--unseen', action='store_true')
args = parser.parse_args()
for entry in scan_imap(args.server, args.user, args.password, args.unseen, 20):
print(entry)
if __name__ == '__main__':
main()
body {
background: white;
}
<!doctype html>
<html lang="en">
<head>
<title>Spam Feedback Loop Report {% block title %}{% endblock %}</title>
<link rel="stylesheet" href="/static/style.css">
</head>
<body>
<div id="container">
{% block content %}{% endblock %}
</div>
</body>
</html>
{% extends "_page.html" %}
{% block content %}
<h1>Spam Feedback Loop Reports</h1>
<h3>Top reported user senders</h3>
<table class="table">
{% for row in top_user_senders %}
<tr>
<td><a href="{{ url_for('by_sender', sender=row[0]) }}">{{ row[0] }}</a></td>
<td>{{ row[1] }}</td>
</tr>
{% endfor %}
</table>
<h3>Top reported list senders</h3>
<table class="table">
{% for row in top_list_senders %}
<tr>
<td><a href="{{ url_for('by_sender', sender=row[0]) }}">{{ row[0] }}</a></td>
<td>{{ row[1] }}</td>
</tr>
{% endfor %}
</table>
<h3>Reporting organizations</h3>
<table class="table">
{% for row in top_reporters %}
<tr>
<td>{{ row[0] }}</td>
<td>{{ row[1] }}</td>
</tr>
{% endfor %}
</table>
{% endblock %}
{% extends "_page.html" %}
{% block content %}
<h1>Spam Feedback Loop Reports</h1>
<h2>Report #{{ report.id }}</h2>
<p>Received at {{ report.timestamp.strftime('%Y-%m-%d %H:%M') }}.</p>
<pre>{{ report.message }}</pre>
{% endblock %}
{% extends "_page.html" %}
{% block content %}
<h1>Spam Feedback Loop Reports</h1>
<h2>Reports for <i>{{ sender }}</i></h2>
<table class="table">
{% for r in reports %}
<tr>
<td>
<a href="{{ url_for('show_report', report_id=r.id) }}">
{{ r.timestamp.strftime('%Y-%m-%d %H:%M') }}
</a>
</td>
<td>
{{ r.reporter }}
</td>
</tr>
{% endfor %}
</table>
{% endblock %}
from flask import render_template, abort
from sqlalchemy import func
from .app import app, db
from .model import FeedbackEntry
@app.route('/report/:report_id')
def show_report(report_id):
report = FeedbackEntry.query.get(report_id)
if not report:
abort(404)
return render_template('report.html', report=report)
@app.route('/by_sender/:sender')
def by_sender(sender):
reports = FeedbackEntry.query.filter(
FeedbackEntry.sender == sender).order_by('timestamp desc')
if not reports:
abort(404)
return render_template('sender.html',
sender=sender,
reports=reports)
@app.route('/')
def index():
top_user_senders = db.session.query(
func.count(FeedbackEntry.id)).filter(
FeedbackEntry.is_list is False).group_by(
FeedbackEntry.sender).limit(20)
top_list_senders = db.session.query(
func.count(FeedbackEntry.id)).filter(
FeedbackEntry.is_list is True).group_by(
FeedbackEntry.sender).limit(20)
top_reporters = db.session.query(
func.count(FeedbackEntry.id)).group_by(FeedbackEntry.reporter)
return render_template('index.html',
top_user_senders=top_user_senders,
top_list_senders=top_list_senders,
top_reporters=top_reporters)
setup.py 0 → 100644
from setuptools import setup, find_packages
setup(name='feedbackloop',
version='1.0',
description='A/I Spam Feedback Loop Management',
author='A/I',
author_email='info@autistici.org',
url='https://git.autistici.org/ai3/tools/feedback-loop',
packages=find_packages(),
install_requires=[
'Flask', 'Flask-Script', 'cheroot', 'sqlalchemy', 'Flask-SQLAlchemy',
],
entry_points={
'console_scripts': [
'feedbackloop = feedbackloop.main:main',
],
},
)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment