Commit 0925d496 authored by ale's avatar ale
Browse files

use Flask to provide WSGI app

Also, upgrade Debian packaging rules to use 'dh'.
parent 1adb6e57
nospam (0.2.4) unstable; urgency=medium
* Use Flask to provide a WSGI app to the gevent WSGI server.
-- Autistici/Inventati <debian@autistici.org> Sat, 24 Sep 2016 07:36:32 +0100
nospam (0.2.3) unstable; urgency=low
* Improvements in the domain filtering / whitelisting code.
......
......@@ -2,14 +2,14 @@ Source: nospam
Section: python
Priority: optional
Maintainer: Autistici/Inventati <debian@autistici.org>
Build-Depends: debhelper (>= 7), cdbs, python-support, python-setuptools, python
Standards-Version: 3.8.4
Build-Depends: debhelper (>= 7), dh-python
Standards-Version: 3.9.6
Package: nospam
Architecture: all
Homepage: http://code.autistici.org/p/nospam
Depends: ${misc:Depends}, ${python:Depends},
python-setuptools, python-protobuf, python-jinja2, python-greenlet, python-yaml,
python-gevent (>= 0.12), build-essential, libevent-dev (>= 1.4), curl, crm114, adduser
python-gevent (>= 1.0), build-essential, libevent-dev (>= 1.4), curl, crm114, adduser
Description: A blogspam-compatible spam filter daemon.
Does not rely on IP reputation but bayesian filtering instead.
#!/usr/bin/make -f
# -*- makefile -*-
include /usr/share/cdbs/1/rules/debhelper.mk
DEB_PYTHON_SYSTEM := pysupport
DEB_PYTHON_DESTDIR := $(CURDIR)/debian/nospam
include /usr/share/cdbs/1/class/python-distutils.mk
export DH_OPTIONS
binary-install/nospam::
%:
dh $@ --with=python2
override_dh_install:
dh_install
test -d debian/nospam/etc/nospam || mkdir -p debian/nospam/etc/nospam
install -o root -g root -m 644 debian/sample.conf debian/nospam/etc/nospam/config.yml
install -o root -g root -m 644 nospam/conf/stopwords.txt \
......@@ -16,6 +17,3 @@ binary-install/nospam::
install -o root -g root -m 755 nospam/conf/update-surbl \
debian/nospam/usr/bin/update-surbl
cleanbuilddir/nospam::
rm -fr build build-stamp configure-stamp build/ MANIFEST nospam.egg-info/
# -*- coding: utf-8 -*-
"""
flaskext.xmlrpc
===============
Adds support for creating XML-RPC APIs to Flask.
:copyright: (c) 2010 by Matthew "LeafStorm" Frazier.
:license: MIT, see LICENSE for more details.
"""
from flask import request, current_app
from SimpleXMLRPCServer import SimpleXMLRPCDispatcher as Dispatcher
import sys
import xmlrpclib
Fault = xmlrpclib.Fault
class XMLRPCHandler(Dispatcher):
"""
This is the basic XML-RPC handler class. To use it, you create it::
handler = XMLRPCHandler('api')
Then, you can register functions with the :meth:`register` method::
@handler.register
def spam():
pass
:meth:`register` is just an alias for :meth:`register_function`, so you
can use that too.
You can also register an instance using the :meth:`register_instance`
method, and any methods on said instance will be exposed if they do not
start with an ``_``.
Then, you connect it to a :class:`~flask.Flask` instance or a Flask
module with the :meth:`connect` method, like this::
handler.connect(app, '/')
:param endpoint_name: The name to use as an endpoint when connected to
an app or module. If not specified here, you specify
when you call :meth:`connect`.
:param instance: The instance to register and expose the methods of.
:param introspection: Whether to register the introspection functions,
like :obj:`system.listMethods`. (It will by
default.)
:param multicall: Whether to register the :obj:`system.multicall`
function. (It won't by default.)
"""
def __init__(self, endpoint_name=None, instance=None, introspection=True,
multicall=False):
if sys.version_info[:2] < (2, 5):
Dispatcher.__init__(self)
else:
Dispatcher.__init__(self, True, 'utf-8')
self.endpoint_name = endpoint_name
if introspection:
self.register_introspection_functions()
if multicall:
self.register_multicall_functions()
if instance:
self.register_instance(instance)
def register(self, *args, **kwargs):
"""
An alias for :meth:`register_function`.
"""
return self.register_function(*args, **kwargs)
def register_function(self, function, name=None):
"""
This will register the given function. There are two ways to use it.
As a plain old method, with or without a name::
handler.register_function(spam)
handler.register_function(spam, 'spam')
As a decorator, also with or without a name::
@handler.register_function
def spam():
pass
@handler.register_function('spam')
def spam():
pass
It's shorter and easier to use :meth:`register`, however, as it does
the exact same thing.
:param function: The function to register. (In the named decorator
form, this is the function's name.)
:param name: The name to use, except in the named decorator form.
If not given, the function's :obj:`__name__` attribute
will be used.
"""
if isinstance(function, basestring):
return lambda fn: self.register_function(fn, function)
return Dispatcher.register_function(self, function, name)
def register_instance(self, instance, allow_dotted_names=False):
"""
This registers any kind of object. If the requested method hasn't been
registered by :meth:`register_function`, it will be checked against
the instance. You can only have one instance at a time, however.
If :obj:`allow_dotted_names` is True, the name will be split on the
dots and the object will be traveled down recursively. However, this
is a **HUGE SECURITY LOOPHOLE**, as while private methods (starting
with ``_``) will not be exposed, it's still possible that someone
could get access to your globals and do very bad things. So don't
do it unless you have a very good reason.
:param instance: The instance to register.
:param allow_dotted_names: Whether to resolve dots in method names.
You probably shouldn't.
"""
# Yes, it's just a wrapper. I know. This way the docs are consistent.
Dispatcher.register_instance(self, instance, allow_dotted_names)
def connect(self, app_module, route, endpoint_name=None):
"""
Connects the handler to an app or module. You have to provide the
app and the URL route to use. The route can't contain any variable
parts, because there is no way to get them to the method. ::
handler.connect(app, '/api')
:param app_module: The app or module to connect the handler to.
:param route: The URL route to use for the handler.
:param endpoint_name: The name to use when connecting the endpoint.
"""
if endpoint_name is None:
endpoint_name = self.endpoint_name
if endpoint_name is None: # still
raise RuntimeError("No endpoint name given!")
app_module.add_url_rule(route, endpoint_name, self.handle_request,
methods=['POST'])
def handle_request(self):
"""
This is the actual request handler that is routed by :meth:`connect`.
It takes the request data, dispatches the method, and sends it back
to the client.
"""
response_data = self._marshaled_dispatch(request.data)
return current_app.response_class(response_data,
content_type='text/xml')
def namespace(self, prefix):
"""
This returns a :class:`XMLRPCNamespace` object, which has
:meth:`~XMLRPCNamespace.register` and
:meth:`~XMLRPCNamespace.register_function` methods. These forward
directly to the :meth:`register_function` method of the parent they
were created from, but they will prepend the given prefix, plus a dot,
to the name registered. For example::
blog = handler.namespace('blog')
@blog.register
def new_post(whatever):
pass
would make :obj:`new_post` available as :obj:`blog.new_post`.
:param prefix: The name to prefix the methods with.
"""
return XMLRPCNamespace(self, prefix)
class XMLRPCNamespace(object):
"""
This is a simple proxy that can register methods, and passes them on to
the :class:`XMLRPCHandler` that created it with a given name added as a
prefix (with a dot). For more nesting, you can create namespaces from
namespaces with the :meth:`namespace` method.
:parameter handler: The handler to pass the methods to.
:parameter prefix: The prefix to give to the assigned methods. A dot will
be appended.
"""
def __init__(self, handler, prefix):
self.handler = handler
self.prefix = prefix
def register_function(self, function, name=None):
"""
Registers a function. Use is the same as with the
:meth:`XMLRPCHandler.register_function` method.
:param function: The function to register. (In the named decorator
form, this is the function's name.)
:param name: The name to use, except in the named decorator form.
If not given, the function's :obj:`__name__` attribute
will be used.
"""
if isinstance(function, basestring):
return lambda fn: self.register_function(fn, function)
if name is None:
name = function.__name__
new_name = self.prefix + '.' + name
self.handler.register_function(function, new_name)
def register(self, *args, **kwargs):
"""
An alias for :meth:`register_function`. As with
:meth:`XMLRPCHandler.register`, it's shorter and easier to type.
"""
return self.register_function(*args, **kwargs)
def namespace(self, name):
"""
Returns another namespace for the same handler, with the given name
postfixed to the current namespace's prefix. For example, ::
handler.namespace('foo').namespace('bar')
gives the same result as::
handler.namespace('foo.bar')
:param prefix: The name to prefix the methods with.
"""
return XMLRPCNamespace(self.handler, self.prefix + '.' + name)
def dump_method_call(method, *params):
"""
This marshals the given method and parameters into a proper XML-RPC method
call. It's very useful for testing.
:param method: The name of the method to call.
:param params: The parameters to pass to the method.
"""
return xmlrpclib.dumps(params, methodname=method)
def load_method_response(response):
"""
This returns the actual value returned from an XML-RPC response. If it's
a :obj:`Fault` instance, it will return the fault instead of the value.
This is also useful for testing.
:param response: The marshaled XML-RPC method response or fault.
"""
try:
return xmlrpclib.loads(response)[0][0]
except Fault, fault:
return fault
def test_xmlrpc_call(client, rpc_path, method, *params):
"""
This makes a method call using a Werkzeug :obj:`Client`, such as the one
returned by :meth:`flask.Flask.test_client`. It constructs the method
call, makes the request, and then returns the response value or a
:obj:`Fault`.
:param client: A :obj:`werkzeug.Client`.
:param rpc_path: The path to the XML-RPC handler.
:param method: The method to call.
:param params: The parameters to pass to the method.
"""
rv = client.post(
rpc_path,
data=dump_method_call(method, *params),
content_type='text/xml'
)
return load_method_response(rv.data)
test_xmlrpc_call.__test__ = False # prevents Nose from collecting it
class XMLRPCTester(object):
"""
This lets you conveniently make method calls using a Werkzeug
:obj:`Client`, like the one returned by :meth:`flask.Flask.test_client`.
You create it with the :obj:`Client` and the path to the responder, and
then you call it with the method and params.
:param client: A :obj:`werkzeug.Client`.
:param rpc_path: The path to the XML-RPC handler.
"""
__test__ = False # prevents Nose from collecting it
def __init__(self, client, rpc_path):
self.client = client
self.rpc_path = rpc_path
def call(self, method, *params):
"""
This calls the client's :obj:`post` method with the responder path,
the marshaled method call, and a content type of ``text/xml``. It will
return the unmarshaled response or fault.
You can just call the instance like a function for the same effect.
These two calls are equivalent::
tester.call('hello', 'world')
tester('hello', 'world')
:param method: The name of the method to call.
:param params: The parameters to pass to the method.
"""
return test_xmlrpc_call(self.client, self.rpc_path, method, *params)
def __call__(self, method, *params):
return self.call(method, *params)
#!/usr/bin/env python
from nospam import api
from nospam import instrumentation
from flask import Flask, render_template, url_for
from flaskext.xmlrpc import XMLRPCHandler, Fault
app = Flask(__name__, )
config = {'port': 9001,
'spam_threshold': '2.0',
'data_dir': './var/lib/nospam',
'config_dir': './config'}
ns = api.NoSpam(config)
blogspam_api = XMLRPCHandler("", instance=api.BlogSpamCompatibleAPI(ns))
blogspam_api.connect(app, '/RPC2')
# blogspam_api.connect(app, '/')
@app.route('/docs/<doc>')
def docs(doc):
# TODO: grrrrr
if not doc.endswith('.html'):
doc = doc + '.html'
docfile = 'docs/' + doc
return render_template(docfile)
@app.route('/vars')
def get_vars():
return instrumentation.get_vars()
@app.route('/')
def homepage():
n_ham, n_spam = ns.stats()
seen_comments = n_ham + n_spam
if seen_comments > 0:
pct_spam = 100.0 * n_spam / seen_comments
else:
pct_spam = 0
return render_template( 'index.html', seen_comments=seen_comments,
pct_spam=pct_spam, num_ham=n_ham, num_spam=n_spam)
if __name__ == '__main__':
app.run()
import gevent
from gevent import http
import logging
import operator
import os
import re
import traceback
from jinja2 import FileSystemLoader, Environment
log = logging.getLogger(__name__)
_template_dir = os.path.join(os.path.dirname(__file__), 'templates')
_jinja = Environment(loader=FileSystemLoader(_template_dir))
# The HTTPServer API changed with gevent 0.13.
_gevent012_compat = (gevent.__version__ < '0.13')
def render(req, template, **args):
data = _jinja.get_template(template).render(**args).encode('utf-8')
req.add_output_header('Content-Type', 'text/html')
req.add_output_header('Content-Length', str(len(data)))
req.send_reply(200, 'OK', data)
def abort(req, code, msg):
req.add_output_header('Content-Type', 'text/html')
req.send_reply(code, msg, '<h1>%s</h1>' % msg)
class HTTPServer(http.HTTPServer):
"""Simple HTTP server class based on gevent.http.
Allows registering handlers for specific URLs (with literal
matching).
"""
# Handlers are global process-wide. This implies that only a
# single HTTPServer will be used of course.
_handlers = []
def __init__(self, listener):
if _gevent012_compat:
self._listener = listener
http.HTTPServer.__init__(self)
else:
http.HTTPServer.__init__(self, listener, spawn=100)
@classmethod
def register_url(cls, url, handler, method=None):
cls._handlers.append((re.compile('^%s$' % url), method, handler))
def handle(self, req):
for url, method, handler in self._handlers:
if url.match(req.uri) and (method is None or req.typestr == method):
try:
handler(req)
except Exception, e:
log.error('uncaught exception: %s: %s\n%s' % (
req.uri, str(e), traceback.format_exc()))
abort(req, 500, 'Internal Error')
return
abort(req, 404, 'Not Found')
def serve_forever(self):
# Sort by method so that we check method-constrained urls first.
self._handlers.sort(key=operator.itemgetter(1), reverse=True)
if _gevent012_compat:
http.HTTPServer.serve_forever(self, self._listener)
else:
http.HTTPServer.serve_forever(self)
......@@ -4,7 +4,6 @@ from nospam.dbshelve import get_shelve
from datetime import date
from math import exp
from nospam.plugin_base import BasePlugin
from nospam import http
from nospam import urls
......@@ -21,7 +20,7 @@ class ClassifiedPlugin(BasePlugin):
concurrent = config.get('concurrent', False)
self._dict = get_shelve(os.path.join(data_dir, 'domains_dict'), concurrent)
self.max_new_links = config.get('classified_max_new_links', 100)
http.HTTPServer.register_url('/classified', self._dump_stats)
#http.HTTPServer.register_url('/classified', self._dump_stats)
def testComment(self, comment):
domains = urls.get_domains(comment['_urls'])
......
#!/usr/bin/python
import gevent
from gevent import pywsgi
from gevent import monkey
monkey.patch_all()
import datetime
import logging
import os
from flask import Flask, request, render_template, abort
from flaskext.xmlrpc import XMLRPCHandler, Fault
from nospam import api
from nospam import http
from nospam import xmlrpc
from nospam import instrumentation
log = logging.getLogger(__name__)
app = Flask(__name__)
rpchandler = XMLRPCHandler('blogspam')
rpchandler.connect(app, '/RPC2')
@rpchandler.register
def testComment(comment):
return app.blogspam.testComment(comment)
@rpchandler.register
def classifyComment(comment):
return app.blogspam.classifyComment(comment)
def homepage(bs):
def _homepage(req):
n_ham, n_spam = bs.stats()
seen_comments = n_ham + n_spam
if seen_comments > 0:
pct_spam = 100.0 * n_spam / seen_comments
else:
pct_spam = 0
return http.render(req, 'index.html', seen_comments=seen_comments,
@app.route('/', methods=('GET',))
def homepage():
n_ham, n_spam = app.nospam.stats()
seen_comments = n_ham + n_spam
if seen_comments > 0:
pct_spam = 100.0 * n_spam / seen_comments
else:
pct_spam = 0
return render_template('index.html', seen_comments=seen_comments,
pct_spam=pct_spam, num_ham=n_ham, num_spam=n_spam)
return _homepage
def staticfile(req):
page = req.uri[1:]
assert '/../' not in page
@app.route('/docs/<page>')
def staticfile(page):
if not page.endswith('.html'):
page += '.html'
# Trick: we check for existence of the page with some knowledge of
# the template_dir. Do not use os.path.join (page may start with /).
page_path = '%s/%s' % (http._template_dir, page)
if not os.path.exists(page_path):
return http.abort(req, 404, 'Not Found')
lastmod = datetime.datetime.utcnow() - datetime.timedelta(15)
req.add_output_header('Last-Modified',
lastmod.strftime('%a, %d %b %Y %H:%M:%S +0000'))
req.add_output_header('Cache-Control', 'public')
return http.render(req, page)
try:
return render_template('docs/' + page)
except:
abort(404)
def make_server(config):
ns = api.NoSpam(config)
addr = ('0.0.0.0', int(config.get('port')))
http_server = http.HTTPServer(addr)
blogspam_api = xmlrpc.XMLRPCServer(api.BlogSpamCompatibleAPI(ns))
http_server.register_url('/RPC2', blogspam_api, method='POST')
http_server.register_url('/', blogspam_api, method='POST')
@app.route('/vars')
def vars():
resp = make_response(instrumentation.get_vars())
resp.headers['Content-Type'] = 'text/plain'
return resp
http_server.register_url('/docs/\w*', staticfile)
http_server.register_url('/vars', instrumentation.http_handler)
http_server.register_url('/', homepage(ns))
return http_server
def make_server(config):
app.nospam = api.NoSpam(config)
app.blogspam = api.BlogSpamCompatibleAPI(app.nospam)
addr = ('0.0.0.0', int(config.get('port')))
return pywsgi.WSGIServer(addr, app.wsgi_app)
def run_server(server):
......
import os
import shutil
import tempfile
import unittest
from nospam.plugins.bayes import BayesianPlugin
......@@ -11,7 +12,7 @@ class BayesianPluginTest(unittest.TestCase):
self.bayes = BayesianPlugin({'data_dir': self.data_dir})
def tearDown(self):
os.system('rm -fr %s' % self.data_dir)