diff --git a/README.md b/README.md index 4530ac8867143105183d4d14c56c1d073979f4f1..64db07887841135ea658b7e3591180bfcbeb4a68 100644 --- a/README.md +++ b/README.md @@ -4,10 +4,10 @@ gitlab-docker-autodep Automatically rebuild all the downstream dependencies of Docker-based projects on a Gitlab instance. -It scans all repositories containing a Dockerfile looking for FROM -lines and navigates the resulting dependency tree to find all projects -that needs to be rebuilt when their base image (or an upstream image -thereof) changes. +It scans the *master* branch of all repositories containing a +Dockerfile looking for FROM lines and navigates the resulting +dependency tree to find all projects that needs to be rebuilt when +their base image (or an upstream image thereof) changes. By default, since it is meant to be used as a trigger as the last step in a CI script, it will not navigate the dependency tree recursively @@ -15,25 +15,38 @@ but only look at the first-level dependencies: this way, further CI pipelines downstream will each trigger their own deps once the image has been rebuilt. +# Installation -## Usage - -Install the tool either in a virtualenv of or system-wide with: +Install the tool either in a virtualenv of or system-wide with any of +the standard Python installation mechanisms, for instance (using +*setuptools*): ``` sudo python setup.py install ``` -This will install the *gitlab-docker-autodep* command-line tool. +This will install the *gitlab-docker-autodep* command-line tool in +/usr/local/bin. The tool should work with either Python 2 and Python +3, and it has few dependencies (just the [Gitlab +API](https://python-gitlab.readthedocs.io/en/stable/), and +[Flask](https://flask.pocoo.org/)). + +# Usage + +There are two modes of using this software: either as a one-shot +command-line tool, or as a standalone HTTP server responding to Gitlab +Webhook requests, to integrate Docker dependencies with Gitlab CI. -Invoke the tool by pointing it at your Gitlab instance with the -*--url* command-line option, and passing the path of the repository -you want the dependencies of as an argument. This should be the full -repository path including the namespace, not the full URL -(e.g. *group/projectname*). +In both cases, the program is configured via command-line options. + +## Common options + +The tool must be pointed at your Gitlab instance with the *--url* +command-line option, You can pass an authentication token using the *--token* command-line -option. +option. This is usually required in order to trigger CI pipelines: the +token must have sufficient permissions to do so. The tool will only examine Docker images hosted on the Docker registry associated with the Gitlab instance. By default the registry name is @@ -41,6 +54,67 @@ automatically derived from the server URL (adding a *registry* prefix), but it can be changed with the *--registry* command-line option. +## Command-line + +The `rebuild` command will trigger a rebuild of all the dependencies +of a given Docker image, possibly waiting for the CI pipelines to +complete. Pass the qualified Docker image name (as it appears on FROM +lines in Dockerfiles) as a command-line argument. + The tool will print out the project names of the dependencies it -found, and it will also trigger a rebuild if the *--rebuild* option is -specified. +found. The *--recurse* option will traverse the dependency tree +recursively, waiting for CI pipelines to complete so that they are +built in the right order. + +It is possible to limit the scope of the initial dependency scan +(which is an expensive operation) to projects matching a Gitlab search +keyword using the *--match* option. + +## Gitlab CI integration + +In order to automatically rebuild the dependencies of a Docker image +built using Gitlab CI, it is possible to run *gitlab-docker-autodep* +as a webhook: this way, whenever a successful CI pipeline completes, +you can trigger a rebuild of the dependencies. + +To do this, use the *server* command of *gitlab-docker-autodep*, and +specify the address to bind to using the *--host* and *--port* +options. It is also possible to enforce authentication of the webhook +with a secret token +([X-Gitlab-Token](https://docs.gitlab.com/ee/user/project/integrations/webhooks.html)) +using the *--webhook-auth-token* option. + +When running in this mode, it is assumed that all your Docker-related +projects have webhooks set up to rebuild their dependencies, so +*gitlab-docker-autodep* will only trigger a build of the immediate +dependencies of a project. + +Also note that the server does not have any TLS support: if necessary, +it is best to use a dedicated reverse proxy (Apache, NGINX, etc). + +For example, assuming the webhook server will be running on the same +machine as Gitlab itself, and that the Gitlab authentication token is +stored in */etc/gitlab_docker_token*: + +``` +gitlab-docker-autodep \ + --url=https://my.gitlab \ + --token=$(< /etc/gitlab_docker_token) \ + server \ + --host=127.0.0.1 --port=14001 +``` + +You can then configure your project's webhooks with the URL +`http://localhost:14001/webhook`, with the *Trigger* checkbox set only +on *Pipeline events*. + +It can be useful to run the *rebuild* command from a cron job, for +instance in order to rebuild images on a periodic schedule, and +assuming all your projects share a common base image: + +``` +50 5 * * * root gitlab-docker-autodep + --url=https://my.gitlab + --token=$(< /etc/gitlab_docker_token) + rebuild $MY_BASE_IMAGE +``` diff --git a/gitlab_docker_autodep/deps.py b/gitlab_docker_autodep/deps.py new file mode 100644 index 0000000000000000000000000000000000000000..81fc7ac7eabcdae64a5e2bf30500b52f4348178b --- /dev/null +++ b/gitlab_docker_autodep/deps.py @@ -0,0 +1,77 @@ +import gitlab +import logging + + +def _parse_dockerfile(df): + for line in df.split('\n'): + if line.startswith('FROM '): + return line[5:].strip() + + +def _fetch_dockerfile(gl, project): + try: + f = project.files.get(file_path='Dockerfile', ref='master') + return f.decode() + except: + return None + + +def _remove_image_tag(name): + if ':' in name: + return name.split(':')[0] + return name + + +def build_dependency_tree(gl, search_pattern=None): + """Build the project dependency map based on Dockerfiles.""" + deps = {} + + projects = gl.projects.list(all=True, search=search_pattern, as_list=False) + for project in projects: + df = _fetch_dockerfile(gl, project) + if not df: + continue + base_image = _parse_dockerfile(df) + if not base_image: + logging.error('ERROR: could not find base image for %s', + project.path_with_namespace) + continue + deps.setdefault(_remove_image_tag(base_image), []).append(project) + return deps + + +def rebuild(project, wait=False): + pipeline = project.pipelines.create({'ref': 'master'}) + if wait: + while pipeline.finished_at is None: + pipeline.refresh() + time.sleep(3) + return pipeline + + +def rebuild_deps(gitlab_url, registry_hostname, gitlab_token, + search_pattern, image_name, + dry_run=False, recurse=False, wait=False): + """Rebuild dependencies of the given image.""" + gl = gitlab.Gitlab(gitlab_url, private_token=gitlab_token) + if gitlab_token: + gl.auth() + + deps = build_dependency_tree(gl, search_pattern) + + stack = deps.get(_remove_image_tag(image_name), []) + while stack: + project = stack.pop(0) + + print 'rebuilding %s' % project.path_with_namespace + if not dry_run: + pipeline = rebuild(project, wait) + if pipeline.status != 'success': + logging.error('ERROR: build failed for %s', + project.path_with_namespace) + return + + if recurse: + image_name = '%s/%s' % ( + registry_hostname, project.path_with_namespace) + stack.extend(deps.get(image_name, [])) diff --git a/gitlab_docker_autodep/main.py b/gitlab_docker_autodep/main.py index 411b56f1fc017ed3ab7c6fda754a81a5b57d477f..34d40b33e2574aebb52f47d7c0409fa1f5c7cde3 100644 --- a/gitlab_docker_autodep/main.py +++ b/gitlab_docker_autodep/main.py @@ -1,123 +1,101 @@ -import gitlab -import optparse +import argparse +import logging import os -import sys import time import urlparse - -def _parse_dockerfile(df): - for line in df.split('\n'): - if line.startswith('FROM '): - return line[5:].strip() - - -def _fetch_dockerfile(gl, project): - try: - f = project.files.get(file_path='Dockerfile', ref='master') - return f.decode() - except: - return None - - -def _remove_image_tag(name): - if ':' in name: - return name.split(':')[0] - return name - - -def build_dependency_tree(gl, search_pattern=None): - """Build the project dependency map based on Dockerfiles.""" - deps = {} - - projects = gl.projects.list(all=True, search=search_pattern, as_list=False) - for project in projects: - df = _fetch_dockerfile(gl, project) - if not df: - continue - base_image = _parse_dockerfile(df) - if not base_image: - print >>sys.stderr, 'ERROR: could not find base image for %s' % ( - project.path_with_namespace,) - continue - deps.setdefault(_remove_image_tag(base_image), []).append(project) - return deps - - -def rebuild(project, wait=False): - pipeline = project.pipelines.create({'ref': 'master'}) - if wait: - while pipeline.finished_at is None: - pipeline.refresh() - time.sleep(3) - return pipeline - - -def rebuild_deps(gitlab_url, registry_hostname, gitlab_token, - search_pattern, image_name, - dry_run=False, recurse=False, wait=False): - """Rebuild dependencies of the given image.""" - gl = gitlab.Gitlab(gitlab_url, private_token=gitlab_token) - if gitlab_token: - gl.auth() - - deps = build_dependency_tree(gl, search_pattern) - - stack = deps.get(_remove_image_tag(image_name), []) - while stack: - project = stack.pop(0) - - print 'rebuilding %s' % project.path_with_namespace - if not dry_run: - pipeline = rebuild(project, wait) - if pipeline.status != 'success': - print >>sys.stderr, 'ERROR: build failed for %s' % ( - project.path_with_namespace,) - return - - if recurse: - image_name = '%s/%s' % ( - registry_hostname, project.path_with_namespace) - stack.extend(deps.get(image_name, [])) +from .deps import rebuild_deps +from .server import run_app def main(): - parser = optparse.OptionParser(usage='%prog [<options>] <image_name>') - parser.add_option('--token', help='Authentication token') - parser.add_option('--registry', - help='Docker registry hostname (if empty, it will be ' - 'automatically derived from --url)') - parser.add_option('--url', help='Gitlab URL') - parser.add_option('-n', '--dry-run', action='store_true', dest='dry_run', - help='Only show what would be done') - parser.add_option('--recurse', action='store_true', - help='Include all dependencies recursively ' - 'and wait for completion of the pipelines') - parser.add_option('--match', - help='Search keyword(s) to filter project list') - opts, args = parser.parse_args() - - if not opts.url: + parser = argparse.ArgumentParser( + description='Rebuild Docker images on a Gitlab instance.') + subparsers = parser.add_subparsers(dest='subparser') + + # Common options. + parser.add_argument('--token', metavar='TOKEN', + help='Gitlab authentication token') + parser.add_argument('--registry', metavar='NAME', + help='Docker registry hostname (if empty, it will be ' + 'automatically derived from --url)') + parser.add_argument('--url', metavar='URL', help='Gitlab URL') + parser.add_argument('--debug', action='store_true') + + # Rebuild deps. + rebuild_image_parser = subparsers.add_parser( + 'rebuild', + help='rebuild dependencies of an image', + description='Rebuild all projects that depend on the specified ' + 'Docker image.') + rebuild_image_parser.add_argument( + '-n', '--dry-run', action='store_true', dest='dry_run', + help='only show what would be done') + rebuild_image_parser.add_argument( + '--recurse', action='store_true', + help='Include all dependencies recursively ' + 'and wait for completion of the pipelines') + rebuild_image_parser.add_argument( + '--match', + help='Search keyword(s) to filter project list') + rebuild_image_parser.add_argument( + 'image_name', + help='Docker image name') + + # Server. + server_parser = subparsers.add_parser( + 'server', + help='start a HTTP server', + description='Start a HTTP server that listens for Gitlab webhooks. ' + 'Configure Gitlab to send Pipeline events for your projects to this ' + 'server to auto-rebuild first-level dependencies.') + server_parser.add_argument( + '--port', metavar='PORT', type=int, default='5404', + dest='bind_port', help='port to listen on') + server_parser.add_argument( + '--addr', metavar='IP', default='127.0.0.1', + dest='bind_host', help='address to listen on') + server_parser.add_argument( + '--webhook-auth-token', metavar='TOKEN', + help='Secret X-Gitlab-Token for request authentication') + + args = parser.parse_args() + cmd = args.subparser + + if not args.url: parser.error('Must specify --url') - if len(args) != 1: - parser.error('Bad number of arguments') + + logging.basicConfig( + format='%(message)s', + level=logging.DEBUG if args.debug else logging.INFO, + ) # If --registry is not specified, make an educated guess. - registry_hostname = opts.registry + registry_hostname = args.registry if not registry_hostname: - registry_hostname = 'registry.' + urlparse.urlsplit(opts.url).netloc - print >>sys.stderr, 'using %s as Docker registry' % (registry_hostname,) - - rebuild_deps( - opts.url, - registry_hostname, - opts.token, - opts.match, - args[0], - opts.dry_run, - opts.recurse, - opts.recurse, - ) + registry_hostname = 'registry.' + urlparse.urlsplit(args.url).netloc + logging.error('using %s as Docker registry', registry_hostname) + + if cmd == 'rebuild': + rebuild_deps( + args.url, + registry_hostname, + args.token, + args.match, + args[0], + args.dry_run, + args.recurse, + args.recurse, + ) + elif cmd == 'server': + run_app( + args.url, + registry_hostname, + args.token, + args.bind_host, + args.bind_port, + args.webhook_auth_token, + ) if __name__ == '__main__': diff --git a/gitlab_docker_autodep/server.py b/gitlab_docker_autodep/server.py new file mode 100644 index 0000000000000000000000000000000000000000..4272b60fda5ac26f70119f515bf20a9b915ef755 --- /dev/null +++ b/gitlab_docker_autodep/server.py @@ -0,0 +1,138 @@ +import gitlab +import Queue +import threading +import time +from flask import Flask, request, make_response +from .deps import build_dependency_tree, rebuild + + +app = Flask(__name__) + + +# Maintain a process-wide cache of dependencies, updated periodically +# in the background. This is protected by a mutex. +class _DepsCache(object): + + update_interval = 1800 + + _deps_lock = threading.Lock() + _deps_loaded = threading.Event() + _deps = {} + + def wait_until_loaded(self): + self._deps_loaded.wait() + + def get_deps(self, image_name): + with self._deps_lock: + return self._deps.get(image_name, []) + + def update_thread(self): + loaded = False + while True: + try: + if not loaded: + app.logger.info('scanning project dependencies...') + new_deps = build_dependency_tree(app.gl) + with self._deps_lock: + self._deps = new_deps + if not loaded: + app.logger.info('project dependencies loaded') + loaded = True + self._deps_loaded.set() + except Exception as e: + app.logger.error('error updating project dependencies: %s' % str(e)) + time.sleep(self.update_interval) + +deps_cache = _DepsCache() +queue = Queue.Queue() + + +def _process_request(data): + pipeline_status = data['object_attributes']['status'] + branch = data['object_attributes']['ref'] + path_with_namespace = data['project']['path_with_namespace'] + + action = 'none' + if pipeline_status == 'success': + # Rebuild the immediate dependencies of this image. + image_name = '%s/%s' % (app.config['REGISTRY_HOSTNAME'], path_with_namespace) + built_projects = [] + for project in deps_cache.get_deps(image_name): + try: + rebuild(project) + built_projects.append(project.path_with_namespace) + except Exception as e: + app.logger.error('error rebuilding project %s: %s' % ( + project.path_with_namespace, str(e))) + action = 'rebuilt %s' % (', '.join(built_projects),) + + app.logger.info('pipeline for %s@%s: %s, action=%s', + path_with_namespace, branch, pipeline_status, action) + + +def worker_thread(): + deps_cache.wait_until_loaded() + while True: + data = queue.get() + try: + _process_request(data) + except Exception as e: + app.logger.error('error processing request: %s', str(e)) + + +def run_app(gitlab_url, registry_hostname, gitlab_token, + bind_host, bind_port, webhook_token, + num_workers=2): + app.config.update({ + 'REGISTRY_HOSTNAME': registry_hostname, + 'WEBHOOK_AUTH_TOKEN': webhook_token, + }) + + app.gl = gitlab.Gitlab(gitlab_url, private_token=gitlab_token) + if gitlab_token: + app.gl.auth() + + # Start the update thread that will periodically update the + # dependency map (an expensive operation). + update_t = threading.Thread( + target=deps_cache.update_thread, + name='Dependency Update Thread') + update_t.setDaemon(True) + update_t.start() + + # Start the worker threads that will process the requests. + for i in range(num_workers): + wt = threading.Thread( + target=worker_thread, + name='Worker %d' % (i+1)) + wt.setDaemon(True) + wt.start() + + # Start the HTTP server to receive webhook requests. + app.run(host=bind_host, port=bind_port) + + +@app.route('/') +def app_index(): + return make_response( + 200, ['<html><body><h1>gitlab-docker-autodep</h1></body></html>']) + + +@app.route('/webhook', methods=('POST',)) +def app_webhook(): + # Authenticate the request, if configured to do so. + if app.config['WEBHOOK_AUTH_TOKEN']: + token = request.header.get('X-Gitlab-Token') + if token != app.config['WEBHOOK_AUTH_TOKEN']: + abort(401) + + # Ignore non-pipeline events. + ev_type = request.header.get('X-Gitlab-Event') + if ev_type != 'Pipeline Hook': + app.logger.info('ignored non-pipeline event with type %s' % ev_type) + return make_response(200, ['{}']) + + # Send request to worker threads for processing. + queue.put(request.json) + + return make_response(200, ['{}']) diff --git a/setup.py b/setup.py index d8da492708daa400120552231310433028365cf5..852a17b6b6f6aba253a57fc12cc32729fb6a71fe 100644 --- a/setup.py +++ b/setup.py @@ -4,12 +4,12 @@ from setuptools import setup, find_packages setup( name="gitlab-docker-autodep", - version="0.1", + version="0.2", description="Automatically rebuild Docker images", author="Autistici/Inventati", author_email="info@autistici.org", url="https://git.autistici.org/ale/gitlab-docker-autodep", - install_requires=['python-gitlab'], + install_requires=['python-gitlab', 'Flask'], zip_safe=True, packages=find_packages(), entry_points={