Commit 032896aa authored by ale's avatar ale

rewrite the modular command-line tools in Python

parent 59213c4e
"""Run scan, process the results and upload them."""
import cookielib
import json
import os
import sys
import time
import urllib2
import logging
from sso.urllib2_handler import SSOProcessor
from offlinescan import clamav
from offlinescan import incremental_scan
user_info_pattern = re.compile(
r'(?P<host>[^/]+)/home/users/investici\.org/(?P<user>[^/]+)/html-(?P<site>[^/]+)/(?P<path>.*)$')
log = logging.getLogger(__name__)
def get_credentials():
user = os.getenv('SSO_USER', 'offlinescan')
pwfile = os.path.expanduser(
os.getenv('SSO_PWFILE', '~/.offlinescan.pw'))
with open(pwfile, 'r') as fd:
pw = fd.read().strip()
return user, pw
def create_opener():
username, password = get_credentials()
jar = cookielib.CookieJar()
return urllib2.build_opener(
urllib2.HTTPCookieProcessor(jar),
SSOProcessor(username=username, password=password))
def process_scan_results(scan_results):
"""Extract AI-specific site information from the scan results."""
now = int(time.time())
by_user = {}
for entry in scan_results:
m = user_info_pattern.match(entry['path'])
if not m:
continue
entry.update(m.groupdict())
entry['found_at'] = now
by_user.setdefault(entry['user'], []).append(entry)
return by_user
def submit(results, submit_url):
try:
req = urllib2.Request(
submit_url,
json.dumps(results),
{'Content-Type': 'application/json'})
resp = create_opener().open(req)
if resp.status_code != 200:
log.error('submit error: HTTP status %d', resp.status_code)
except Exception, e:
log.error('submit error: %s', e)
def main():
logging.basicConfig(level=logging.INFO)
parser = optparse.OptionParser()
parser.add_option(
'--exclude', dest='excludes', metavar='PATTERN',
action='append', default=[],
help='Exclude files matching a pattern (the paths will be relative '
'to the backup root)')
parser.add_option(
'--submit-url', dest='submit_url', metavar='URL',
default='https://offlinescan.autistici.org/receiver',
help='URL to submit results to (default: %default)')
opts, args = parser.parse_args()
if not args:
parser.error('Must specify a PATH')
files = incremental_scan.scan_dirs(args)
scan_results = clamav.clamscan(files)
if scan_results:
log.info('found new viruses:\n%s', json.dumps(scan_results, indent=4))
data = process_scan_results(scan_results)
submit(data, opts.submit_url)
if __name__ == '__main__':
main()
import os
import re
import subprocess
max_cmdlen = os.sysconf('SC_ARG_MAX') / 2
devnull = open('/dev/null', 'w')
row_pattern = re.compile(r'^(.*): (.*) FOUND$')
clamscan_cmd = ['clamscan', '--no-summary', '--stdout', '--infected']
def clamscan(files):
"""Run 'clamscan' on the given files.
Returns a list of dictionaries with 'path' and 'virus' keys.
"""
state = {'buf': clamscan_cmd[:], 'len': 0}
results = []
def _flush():
pipe = subprocess.Popen(state['buf'], stderr=devnull)
for line in pipe.stdin:
m = row_pattern.match(line.rstrip('\n'))
if m:
results.append({'path': m.group(1), 'virus': m.group(2)})
pipe.wait()
state['buf'] = clamscan_cmd[:]
state['len'] = 0
# Build argument lists, xargs-style.
for f in files:
state['buf'].append(f)
state['len'] += len(f) + 1
if state['len'] > max_cmdlen:
_flush()
if state['len']:
_flush()
return results
def main():
import json, sys
files = map(lambda x: x.rstrip('\n'), sys.stdin)
json.dump(clamscan(files), sys.stdout, indent=2)
if __name__ == '__main__':
main()
import fnmatch
import optparse
import os
def read_stamp(stamp_file):
try:
with open(stamp_file) as fd:
return fd.read().strip()
except:
return ''
def match_any(path, patterns):
for p in patterns:
if fnmatch.fnmatch(path, p):
return True
return False
def scan_mirror_metadata(root_dir, mm_path, excludes, changed):
"""Extract filenames from a mirror_metadata diff file."""
with gzip.GzipFile(mm_path, 'r') as fd:
for line in fd:
if line.startswith('File '):
path = line.rstrip('\n')[5:]
if match_any(path, excludes):
continue
changed.add(os.path.join(root_dir, path))
def scan_dir(root_dir, excludes, changed):
"""Analyze rdiff-backup metadata to look for changed files.
The scan is incremental: the timestamp of the most recent backup
is stored in a state file in the rdiff-backup-data directory.
"""
data_dir = os.path.join(root_dir, 'rdiff-backup-data')
if not os.path.isdir(data_dir):
raise Exception('%s does not seem to contain a rdiff backup' % root_dir)
stamp_file = os.path.join(data_dir, '.offlinescan_stamp')
last_scanned_at = read_stamp(stamp_file)
for filename in sorted(os.listdir(data_dir)):
if (not filename.startswith('mirror_metadata.') or
not filename.endswith('.diff.gz')):
continue
stamp = filename.split('.')[1]
if stamp < last_scanned_at:
continue
scan_mirror_metadata(
os.path.join(data_dir, filename), excludes, changed)
if stamp:
with open(stamp_file, 'w') as fd:
fd.write('%s\n', stamp)
def scan_dirs(dirs, excludes=[]):
"""Analyze multiple rdiff-backup dirs."""
changed = set()
for root_dir in dirs:
scan_dir(root_dir, excludes, changed)
return changed
def main():
parser = optparse.OptionParser(usage='%prog <PATH>...')
parser.add_option(
'--exclude', dest='excludes', metavar='PATTERN',
action='append', default=[],
help='Exclude files matching a pattern (the paths will be relative '
'to the backup root)')
opts, args = parser.parse_args()
if not args:
parser.error('Must specify a PATH')
for path in scan_dirs(args, opts.excludes):
print path
if __name__ == '__main__':
main()
#!/usr/bin/python
from setuptools import setup, find_packages
setup(
name='offlinescan',
version='0.1',
description='Detect security issues by examining backups',
author='Autistici/Inventati',
author_email='info@autistici.org',
url='http://git.autistici.org/ai/ai-offlinescan',
install_requires=[],
test_requires=[],
setup_requires=[],
zip_safe=True,
packages=find_packages(),
package_data={},
entry_points={
'console_scripts': [
'ofs-incremental-scan = offlinescan.incremental_scan:main',
'ofs-clamscan = offlinescan.clamav:main',
'ofs-probe = offlinescan.ai.probe:main',
],
},
)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment