Commit 0d0b2e50 authored by ale's avatar ale

merge v2 to master

parents a57c02cd 39b5424b
#!/bin/bash
bindir=/usr/share/ai-offlinescan/
rootdir=${1:-/home/disastro/backup/ai/ring0}
gawk --version > /dev/null 2>&1 || {
echo "Error: you need GNU awk (gawk) to run this script" 1>&2
exit 1
}
${bindir}/incremental-virus-scan.sh ${rootdir}/* \
| xargs -0 clamscan --no-summary --stdout --infected \
| ${bindir}/parse-scan-output.py \
| ${bindir}/contrib/ai/upload-scan-results.py
#!/bin/bash
#
# Scan a rdiff-backup metadata directory and Output the list of files
# that have changed since the last invocation.
#
# The output can be fed to analysis software for incremental scanning.
#
usage() {
echo "Usage: $0 [-n] [-r <run_dir>] [-x <exclude_pattern>]... <rdiff_root_directory>..." 1>&2
exit 1
}
run_dir=.
dry_run=${DRY_RUN:-n}
excludes=backups/mysql
while getopts hnr:x: flag; do
case $flag in
h)
usage;;
n)
dry_run=y;;
r)
run_dir="$OPTARG";;
x)
excludes="$OPTARG ${excludes}";;
esac
done
shift $(( $OPTIND - 1 ))
test $# -lt 1 && usage
incoming_dirs="$@"
# Compute the exclude grep pattern.
exclude_pattern=""
if [ -n "${excludes}" ]; then
for e in ${excludes}; do
exclude_pattern="${exclude_pattern}${exclude_pattern:+|}${e}"
done
exclude_pattern="(${exclude_pattern})"
fi
abspath() {
echo $(cd $(dirname $1) && pwd)/$(basename $1)
}
# Scan a rdiff-backup metadata directory.
scan_dir() {
local dir="$1"
local stamp_file="$(abspath $2)"
local last_scanned_at=$(test -e ${stamp_file} && cat ${stamp_file})
(cd ${dir} ; \
find . -maxdepth 1 -type f -name 'mirror_metadata*.diff.gz' -print |
sort |
while read filename ; do
local stamp=$(echo ${filename} | sed -e 's/^.*mirror_metadata\.\(.*\)+.*\.diff\.gz$/\1/')
# Only process files which are newer than the last run.
if [ "${stamp}" \> "${last_scanned_at}" ]; then
# Save the new dataset stamp. This only works because the file list
# is sorted.
if [ ${dry_run} = "n" ]; then
echo "${stamp}" > "${stamp_file}"
fi
# Extract file names from the mirror_metadata.
zgrep "^File" "${filename}" | \
(if [ -n "${exclude_pattern}" ]; then
egrep -v "${exclude_pattern}"
else
cat
fi) | \
cut -d' ' -f 2-
fi
done)
}
for base_dir in ${incoming_dirs} ; do
if [ ! -d ${base_dir}/rdiff-backup-data ]; then
echo "Error: ${base_dir} does not seem to contain a rdiff backup" 1>&2
continue
fi
stamp_file=${run_dir}/$(basename ${base_dir}).stamp
scan_dir ${base_dir}/rdiff-backup-data ${stamp_file} | \
awk -v base_dir=$base_dir 'BEGIN { ORS="\0" } { print base_dir "/" $0 }'
done
#!/usr/bin/python
#
# Upload the scan results to the a/i servers.
# Use SSO to authenticate to the receiver.
#
"""Run scan, process the results and upload them."""
import cookielib
import json
import optparse
import os
import re
import sys
import time
import urllib2
import logging
from sso.urllib2_handler import SSOProcessor
#from sso.urllib2_handler import SSOProcessor
from offlinescan import clamav
from offlinescan import incremental_scan
user_info_pattern = re.compile(
r'(?P<host>[^/]+)/home/users/investici\.org/(?P<user>[^/]+)/html-(?P<site>[^/]+)/(?P<path>.*)$')
SUBMIT_URL = 'https://www.autistici.org/offlinescan/receiver'
log = logging.getLogger(__name__)
......@@ -35,25 +39,24 @@ def create_opener():
SSOProcessor(username=username, password=password))
def read_results():
return json.load(sys.stdin)
def process_results(data):
log.info('processing data from %s (ts=%d)', data['host'], data['stamp'])
out = {}
for result in data['results']:
out.setdefault(result['user'], []).append({
'path': result['path'],
'virus': result['virus'],
'found_at': data['stamp']})
return out
def process_scan_results(scan_results):
"""Extract AI-specific site information from the scan results."""
now = int(time.time())
by_user = {}
for entry in scan_results:
m = user_info_pattern.match(entry['path'])
if not m:
continue
entry.update(m.groupdict())
entry['found_at'] = now
by_user.setdefault(entry['user'], []).append(entry)
return by_user
def send_results(results):
def submit(results, submit_url):
try:
req = urllib2.Request(
SUBMIT_URL,
submit_url,
json.dumps(results),
{'Content-Type': 'application/json'})
resp = create_opener().open(req)
......@@ -63,12 +66,29 @@ def send_results(results):
log.error('submit error: %s', e)
def setup_logging(level=logging.INFO):
logging.basicConfig(level=level)
def main():
logging.basicConfig(level=logging.INFO)
parser = optparse.OptionParser()
parser.add_option(
'--exclude', dest='excludes', metavar='PATTERN',
action='append', default=[],
help='Exclude files matching a pattern (the paths will be relative '
'to the backup root)')
parser.add_option(
'--submit-url', dest='submit_url', metavar='URL',
default='https://offlinescan.autistici.org/receiver',
help='URL to submit results to (default: %default)')
opts, args = parser.parse_args()
if not args:
parser.error('Must specify a PATH')
files = incremental_scan.scan_dirs(args)
scan_results = clamav.clamscan(files)
if scan_results:
log.info('found new viruses:\n%s', json.dumps(scan_results, indent=4))
data = process_scan_results(scan_results)
submit(data, opts.submit_url)
if __name__ == '__main__':
setup_logging()
send_results(
process_results(
read_results()))
main()
......@@ -10,7 +10,6 @@ import redis
from flask import Flask, request, abort, g
app = Flask(__name__)
app.config.from_pyfile('config.py')
def get_redis_connection():
......@@ -33,23 +32,27 @@ def receive():
abort(400)
p = g.redis.pipeline(transaction=False)
per_user = {}
#per_user = {}
for user, results in data.iteritems():
key = 'virus:%s' % user
p.ltrim(key, 0, 0)
#p.ltrim(key, 0, 0)
for r in results:
p.lpush(key, r)
per_user[user] = len(results)
#per_user[user] = len(results)
# Save some incremental stats with per-user infection counts.
#new_ptr = 'virus_stats:%f' % time.time()
#for user, count in per_user.iteritems():
# p.hset(new_ptr, user, count)
#p.set('virus_stats:head', new_ptr)
new_ptr = 'virus_stats:%f' % time.time()
for user, count in per_user.iteritems():
p.hset(new_ptr, user, count)
p.set('virus_stats:head', new_ptr)
p.execute()
return ['ok']
def create_app():
# The env variable name is compatible with wsgi-app-installer.
app.config.from_envvar('APP_CONFIG', silent=True)
return app
......
import os
import re
import subprocess
max_cmdlen = os.sysconf('SC_ARG_MAX') / 2
devnull = open('/dev/null', 'w')
row_pattern = re.compile(r'^(.*): (.*) FOUND$')
clamscan_cmd = ['clamscan', '--no-summary', '--stdout', '--infected']
def clamscan(files):
"""Run 'clamscan' on the given files.
Returns a list of dictionaries with 'path' and 'virus' keys.
"""
state = {'buf': clamscan_cmd[:], 'len': 0}
results = []
def _flush():
pipe = subprocess.Popen(state['buf'], stderr=devnull)
for line in pipe.stdin:
m = row_pattern.match(line.rstrip('\n'))
if m:
results.append({'path': m.group(1), 'virus': m.group(2)})
pipe.wait()
state['buf'] = clamscan_cmd[:]
state['len'] = 0
# Build argument lists, xargs-style.
for f in files:
state['buf'].append(f)
state['len'] += len(f) + 1
if state['len'] > max_cmdlen:
_flush()
if state['len']:
_flush()
return results
def main():
import json, sys
files = map(lambda x: x.rstrip('\n'), sys.stdin)
json.dump(clamscan(files), sys.stdout, indent=2)
if __name__ == '__main__':
main()
import fnmatch
import optparse
import os
def read_stamp(stamp_file):
try:
with open(stamp_file) as fd:
return fd.read().strip()
except:
return ''
def match_any(path, patterns):
for p in patterns:
if fnmatch.fnmatch(path, p):
return True
return False
def scan_mirror_metadata(root_dir, mm_path, excludes, changed):
"""Extract filenames from a mirror_metadata diff file."""
with gzip.GzipFile(mm_path, 'r') as fd:
for line in fd:
if line.startswith('File '):
path = line.rstrip('\n')[5:]
if match_any(path, excludes):
continue
changed.add(os.path.join(root_dir, path))
def scan_dir(root_dir, excludes, changed):
"""Analyze rdiff-backup metadata to look for changed files.
The scan is incremental: the timestamp of the most recent backup
is stored in a state file in the rdiff-backup-data directory.
"""
data_dir = os.path.join(root_dir, 'rdiff-backup-data')
if not os.path.isdir(data_dir):
raise Exception('%s does not seem to contain a rdiff backup' % root_dir)
stamp_file = os.path.join(data_dir, '.offlinescan_stamp')
last_scanned_at = read_stamp(stamp_file)
for filename in sorted(os.listdir(data_dir)):
if (not filename.startswith('mirror_metadata.') or
not filename.endswith('.diff.gz')):
continue
stamp = filename.split('.')[1]
if stamp < last_scanned_at:
continue
scan_mirror_metadata(
os.path.join(data_dir, filename), excludes, changed)
if stamp:
with open(stamp_file, 'w') as fd:
fd.write('%s\n', stamp)
def scan_dirs(dirs, excludes=[]):
"""Analyze multiple rdiff-backup dirs."""
changed = set()
for root_dir in dirs:
scan_dir(root_dir, excludes, changed)
return changed
def main():
parser = optparse.OptionParser(usage='%prog <PATH>...')
parser.add_option(
'--exclude', dest='excludes', metavar='PATTERN',
action='append', default=[],
help='Exclude files matching a pattern (the paths will be relative '
'to the backup root)')
opts, args = parser.parse_args()
if not args:
parser.error('Must specify a PATH')
for path in scan_dirs(args, opts.excludes):
print path
if __name__ == '__main__':
main()
#!/usr/bin/python
import json
import optparse
import re
import socket
import sys
import time
ROW_PATTERN = r'^(.*): (.*) FOUND$'
DEFAULT_PATTERN = r'(?P<host>[^/]+)/home/users/investici\.org/(?P<user>[^/]+)/(?P<path>.*)$'
# Normalize paths to utf-8.
def _tounicode(s):
try:
return unicode(s, 'utf-8')
except UnicodeDecodeError:
return unicode(s, 'iso-8859-1')
def extract_records(stream):
"""Split path and message (virus) from the input."""
row_pattern = re.compile(ROW_PATTERN)
for line in stream:
line = _tounicode(line.strip())
m = row_pattern.match(line)
if not m:
continue
yield m.group(1), m.group(2)
def parse_records(stream, pattern):
path_pattern = re.compile(pattern)
for path, msg in stream:
m = path_pattern.search(path)
if m:
out = m.groupdict()
out['virus'] = msg
out['local_path'] = path
yield out
def main():
parser = optparse.OptionParser()
parser.add_option('--regexp', default=DEFAULT_PATTERN,
help='File-matching pattern.')
opts, args = parser.parse_args()
if len(args) != 0:
parser.error('Too many arguments!')
results = list(parse_records(extract_records(sys.stdin), opts.regexp))
virusinfo = {
'host': socket.gethostname(),
'stamp': int(time.time()),
'results': results,
}
print json.dumps(virusinfo, indent=4).encode('utf-8')
if __name__ == '__main__':
main()
#!/usr/bin/python
from setuptools import setup, find_packages
setup(
name='offlinescan',
version='0.1',
description='Detect security issues by examining backups',
author='Autistici/Inventati',
author_email='info@autistici.org',
url='http://git.autistici.org/ai/ai-offlinescan',
install_requires=[],
test_requires=[],
setup_requires=[],
zip_safe=True,
packages=find_packages(),
package_data={},
entry_points={
'console_scripts': [
'ofs-incremental-scan = offlinescan.incremental_scan:main',
'ofs-clamscan = offlinescan.clamav:main',
'ofs-probe = offlinescan.ai.probe:main',
],
},
)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment