diff --git a/README.md b/README.md index d654c3b85223ab403e2c15617c0c2a07e6f2915b..4c8b9e28ebf123dfaefaafceb7c838db1287d0db 100644 --- a/README.md +++ b/README.md @@ -33,20 +33,23 @@ API](https://python-gitlab.readthedocs.io/en/stable/), and # Usage -There are two modes of using this software: either as a one-shot -command-line tool, or as a standalone HTTP server responding to Gitlab -Webhook requests, to integrate Docker dependencies with Gitlab CI. +The tool is split into functional components: -In both cases, the program is configured via command-line options. +* scan Gitlab and generate a dependency map (stored as a JSON file) +* manually trigger builds using the dependency map +* run a server that listens for Gitlab notifications and trigger + builds + +In all cases, the program is configured via command-line options. ## Common options The tool must be pointed at your Gitlab instance with the *--url* command-line option, -You can pass an authentication token using the *--token* command-line -option. This is usually required in order to trigger CI pipelines: the -access token must have the *api* scope. +You can pass an authentication token using the *--token* or +*--token-file* command-line options. This is usually required in order +to trigger CI pipelines: the access token must have the *api* scope. The tool will only examine Docker images hosted on the Docker registry associated with the Gitlab instance. By default the registry name is @@ -68,7 +71,7 @@ example, it is possible to efficiently limit the scope of the tool to a specific namespace: ``` -gitlab-docker-autodep ... --match myns --filter ^myns/ ... +gitlab-docker-autodep deps --match myns --filter ^myns/ ... ``` Note that, when building the dependency tree: @@ -123,8 +126,9 @@ stored in */etc/gitlab_docker_token*: ``` gitlab-docker-autodep \ --url=https://my.gitlab \ - --token=$(< /etc/gitlab_docker_token) \ + --token-file=/etc/gitlab_docker_token \ server \ + --deps=deps.json --host=127.0.0.1 --port=14001 ``` @@ -132,6 +136,16 @@ You can then configure your project's webhooks with the URL `http://localhost:14001/`, with the *Trigger* checkbox set only on *Pipeline events*. +Then you should generate the *deps.json* dependency map periodically, +for instance with a cron job: + +``` +*/30 * * * * root gitlab-docker-autodep + --url=https://my.gitlab + --token-file=/etc/gitlab_docker_token + deps > deps.json +``` + It can be useful to run the *rebuild* command from a cron job, for instance in order to rebuild images on a periodic schedule, and assuming all your projects share a common base image: @@ -139,6 +153,6 @@ assuming all your projects share a common base image: ``` 50 5 * * * root gitlab-docker-autodep --url=https://my.gitlab - --token=$(< /etc/gitlab_docker_token) + --token-file=/etc/gitlab_docker_token rebuild $MY_BASE_IMAGE ``` diff --git a/gitlab_docker_autodep/deps.py b/gitlab_docker_autodep/docker_deps.py similarity index 51% rename from gitlab_docker_autodep/deps.py rename to gitlab_docker_autodep/docker_deps.py index c5ae360663b1ca229aec9aff88fa92708f9e847f..aabf43090be1c7d87cadac41c27cb2282e9615d0 100644 --- a/gitlab_docker_autodep/deps.py +++ b/gitlab_docker_autodep/docker_deps.py @@ -1,16 +1,14 @@ import gitlab +import json import logging import re -import time +import sys -_from_rx = re.compile(r'^FROM\s+(.*)$') +_from_rx = re.compile(r'^FROM\s+(\S+).*$', re.MULTILINE) def _parse_dockerfile(df): - for line in df.split('\n'): - m = _from_rx.match(line) - if m: - return m.group(1) + return _from_rx.findall(df) def _fetch_dockerfile(gl, project, ref): @@ -35,7 +33,7 @@ def _remove_image_tag(name): return name -def build_dependency_tree(gl, search_pattern=None, filter_pattern=None): +def build_docker_deps(gl, search_pattern=None, filter_pattern=None): """Build the project dependency map based on Dockerfiles. This can be a fairly expensive (long) operation if the list of @@ -60,60 +58,43 @@ def build_dependency_tree(gl, search_pattern=None, filter_pattern=None): projects = gl.projects.list(all=True, search=search_pattern, as_list=False) for project in projects: + project_name = project.path_with_namespace + project_url = project_name if filter_rx is not None and not filter_rx.search(project.path_with_namespace): + continue + if not _has_gitlab_ci(gl, project, 'master'): continue df = _fetch_dockerfile(gl, project, 'master') if not df: continue - base_image = _parse_dockerfile(df.decode('utf-8')) - if not base_image: + base_images = _parse_dockerfile(df.decode('utf-8')) + if not base_images: logging.error('ERROR: could not find base image for %s', project.path_with_namespace) continue - if not _has_gitlab_ci(gl, project, 'master'): - continue - deps.setdefault(_remove_image_tag(base_image), []).append(project) + for img in base_images: + deps.setdefault(_remove_image_tag(img), []).append(project_url) return deps -def rebuild(project, wait=False): - """Trigger a rebuild of a project.""" - pipeline = project.pipelines.create({'ref': 'master'}) - if wait: - while pipeline.finished_at is None: - pipeline.refresh() - time.sleep(3) - return pipeline +def docker_deps_to_project_deps(deps, registry_hostname): + out = {} + for image_name in deps: + if image_name.startswith(registry_hostname): + project_name = image_name[len(registry_hostname)+1:] + out[project_name] = deps[image_name] + return out -def rebuild_deps(gitlab_url, registry_hostname, gitlab_token, - search_pattern, filter_pattern, image_name, - dry_run=False, recurse=False, wait=False): - """Rebuild dependencies of the given image.""" +def dump_deps(gitlab_url, registry_hostname, gitlab_token, + deps_match, deps_filter, project_deps=True): gl = gitlab.Gitlab(gitlab_url, private_token=gitlab_token) if gitlab_token: gl.auth() - deps = build_dependency_tree(gl, search_pattern) - - stack = deps.get(_remove_image_tag(image_name), []) - while stack: - project = stack.pop(0) - - logging.info('rebuilding %s', project.path_with_namespace) - if not dry_run: - try: - pipeline = rebuild(project, wait) - if pipeline.status not in ('success', 'pending'): - logging.error('ERROR: build failed for %s (status: %s)', - project.path_with_namespace, pipeline.status) - continue - except gitlab.exceptions.GitlabError as e: - logging.error('ERROR: gitlab error: %s: %s', - project.path_with_namespace, str(e)) - continue - - if recurse: - image_name = '%s/%s' % ( - registry_hostname, project.path_with_namespace) - stack.extend(deps.get(image_name, [])) + deps = build_docker_deps(gl, deps_match, deps_filter) + + if project_deps: + deps = docker_deps_to_project_deps(deps, registry_hostname) + + json.dump(deps, sys.stdout, indent=2) diff --git a/gitlab_docker_autodep/docker_deps_test.py b/gitlab_docker_autodep/docker_deps_test.py new file mode 100644 index 0000000000000000000000000000000000000000..bcffbaab35eaff54ea19e4337077f3a354e94385 --- /dev/null +++ b/gitlab_docker_autodep/docker_deps_test.py @@ -0,0 +1,19 @@ +from .docker_deps import _parse_dockerfile + +import unittest + + +class TestParseDockerfile(unittest.TestCase): + + def test_parse_dockerfile(self): + dockerfile = ''' +FROM baseimage1 AS build +RUN build + +FROM baseimage2 +COPY --from=build bin /usr/bin/bin +RUN fix-perms + +''' + images = _parse_dockerfile(dockerfile) + self.assertEqual(['baseimage1', 'baseimage2'], images) diff --git a/gitlab_docker_autodep/main.py b/gitlab_docker_autodep/main.py index 5c87ea4b86db49118a7b343e99b5bf557868d24d..041f2cd1b74505c9e72fea286938359840d2f77d 100644 --- a/gitlab_docker_autodep/main.py +++ b/gitlab_docker_autodep/main.py @@ -7,7 +7,8 @@ try: except ImportError: import urllib.parse as urlparse -from .deps import rebuild_deps +from .docker_deps import dump_deps +from .rebuild import rebuild_deps from .server import run_app @@ -21,6 +22,10 @@ def main(): gitlab_opts_group = common_parser.add_argument_group('gitlab options') gitlab_opts_group.add_argument( '--url', metavar='URL', help='Gitlab URL') + gitlab_opts_group.add_argument( + '--token-file', metavar='FILE', + type=argparse.FileType('r'), + help='Load Gitlab authentication token from this file') gitlab_opts_group.add_argument( '--token', metavar='TOKEN', help='Gitlab authentication token') @@ -29,21 +34,35 @@ def main(): help='Docker registry hostname (if empty, it will be ' 'automatically derived from --url)') scope_opts_group = common_parser.add_argument_group('project scope options') - scope_opts_group.add_argument( + common_parser.add_argument('--debug', action='store_true') + + # Compute deps. + deps_parser = subparsers.add_parser( + 'deps', + parents=[common_parser], + help='build dependency map', + description='Generate a map of Docker-derived dependencies between ' + 'projects on a Gitlab instance.') + deps_parser.add_argument( '--match', help='Search query to filter project list on the server side') - scope_opts_group.add_argument( + deps_parser.add_argument( '--filter', - help='Regexp to filter project list on the client side') - common_parser.add_argument('--debug', action='store_true') - - # Rebuild deps. + help='Regexp to filter project list on the right-hand (dependency) side') + deps_parser.add_argument( + '--docker', action='store_true', + help='Output dependencies between Docker images, not Gitlab projects') + + # Trigger rebuilds of reverse deps. rebuild_image_parser = subparsers.add_parser( 'rebuild', parents=[common_parser], - help='rebuild dependencies of an image', + help='rebuild dependencies of a project', description='Rebuild all projects that depend on the specified ' - 'Docker image.') + 'project.') + rebuild_image_parser.add_argument( + '--deps', metavar='FILE', + help='file with project dependencies') rebuild_image_parser.add_argument( '-n', '--dry-run', action='store_true', dest='dry_run', help='only show what would be done') @@ -52,8 +71,8 @@ def main(): help='Include all dependencies recursively ' 'and wait for completion of the pipelines') rebuild_image_parser.add_argument( - 'image_name', - help='Docker image name') + 'project_path', + help='Project name (relative path)') # Server. server_parser = subparsers.add_parser( @@ -63,6 +82,9 @@ def main(): description='Start a HTTP server that listens for Gitlab webhooks. ' 'Configure Gitlab to send Pipeline events for your projects to this ' 'server to auto-rebuild first-level dependencies.') + server_parser.add_argument( + '--deps', metavar='FILE', + help='file with project dependencies') server_parser.add_argument( '--port', metavar='PORT', type=int, default='5404', dest='bind_port', help='port to listen on') @@ -90,25 +112,33 @@ def main(): registry_hostname = 'registry.' + urlparse.urlsplit(args.url).netloc logging.error('using %s as Docker registry', registry_hostname) - if cmd == 'rebuild': - rebuild_deps( + gitlab_token = args.token + if not gitlab_token and args.token_file: + gitlab_token = args.token_file.read().strip().encode('utf-8') + + if cmd == 'deps': + dump_deps( args.url, registry_hostname, - args.token, + gitlab_token, args.match, args.filter, - args.image_name, + not args.docker, + ) + elif cmd == 'rebuild': + rebuild_deps( + args.url, + gitlab_token, + args.deps, + args.project_path, args.dry_run, args.recurse, - args.recurse, ) elif cmd == 'server': run_app( args.url, - registry_hostname, - args.token, - args.match, - args.filter, + gitlab_token, + args.deps, args.bind_host, args.bind_port, args.webhook_auth_token, diff --git a/gitlab_docker_autodep/rebuild.py b/gitlab_docker_autodep/rebuild.py new file mode 100644 index 0000000000000000000000000000000000000000..84ac48ca2bf165ef496a46bcf3912a36ac3b2dbc --- /dev/null +++ b/gitlab_docker_autodep/rebuild.py @@ -0,0 +1,40 @@ +import gitlab +import json +import logging +import time + + +def rebuild(gl, project_path, wait=False): + """Trigger a rebuild of a project.""" + project = gl.projects.get(project_path) + if not project: + return None + + pipeline = project.pipelines.create({'ref': 'master'}) + if wait: + while pipeline.finished_at is None: + pipeline.refresh() + time.sleep(3) + return pipeline + + +def rebuild_deps(gitlab_url, gitlab_token, + project_deps_path, project_path, dry_run, wait_and_recurse): + gl = gitlab.Gitlab(gitlab_url, private_token=gitlab_token) + if gitlab_token: + gl.auth() + + with open(project_deps_path) as fd: + project_deps = json.load(fd) + + stack = project_deps.get(project_path, []) + while stack: + path = stack.pop(0) + logging.info('rebuilding %s', path) + if not dry_run: + rebuild(gl, path, wait_and_recurse) + if wait_and_recurse: + stack.extend(project_deps.get(path, [])) + + + diff --git a/gitlab_docker_autodep/server.py b/gitlab_docker_autodep/server.py index 3d7b772b37ed8c974c6548681b4134eb3c1dae70..da65c4cbe410b7e50b7391a9c95ac8b942e13373 100644 --- a/gitlab_docker_autodep/server.py +++ b/gitlab_docker_autodep/server.py @@ -1,119 +1,118 @@ import gitlab +import json +import logging +import os import threading import time try: import Queue except ImportError: import queue as Queue -from flask import Flask, request, make_response -from .deps import build_dependency_tree, rebuild +from cheroot import wsgi +from flask import Flask, request, make_response -app = Flask(__name__) +from .rebuild import rebuild -# Maintain a process-wide cache of dependencies, updated periodically -# in the background. This is protected by a mutex. -class _DepsCache(object): +class _ReloadableJSONFile(object): - update_interval = 1800 + check_interval = 60 - _deps_lock = threading.Lock() - _deps_loaded = threading.Event() - _deps = {} + def __init__(self, path): + self.path = path + self.lock = threading.Lock() + self._load() + t = threading.Thread( + target=self._update_thread, + name='File reload thread for %s' % path) + t.setDaemon(True) + t.start() - def wait_until_loaded(self): - self._deps_loaded.wait() + def get_contents(self): + with self.lock: + return self.data - def get_deps(self, image_name): - with self._deps_lock: - return self._deps.get(image_name, []) + def _load(self): + with self.lock: + with open(self.path) as fd: + self.data = json.load(fd) + self.stamp = os.stat(self.path).st_mtime - def update_thread(self, search_pattern, filter_pattern): - loaded = False + def _update_thread(self): while True: + time.sleep(self.check_interval) try: - if not loaded: - app.logger.info('scanning project dependencies...') - new_deps = build_dependency_tree(app.gl, search_pattern, filter_pattern) - with self._deps_lock: - self._deps = new_deps - if not loaded: - app.logger.info('project dependencies loaded') - loaded = True - self._deps_loaded.set() - except Exception as e: - app.logger.error('error updating project dependencies: %s' % str(e)) - time.sleep(self.update_interval) + if os.stat(self.path).st_mtime > self.stamp: + self._load() + except: + pass + -deps_cache = _DepsCache() queue = Queue.Queue() -def _process_request(data): +def _process_request(gl, project_deps, data): pipeline_status = data['object_attributes']['status'] branch = data['object_attributes']['ref'] path_with_namespace = data['project']['path_with_namespace'] action = 'none' if pipeline_status == 'success': - # Rebuild the immediate dependencies of this image. - image_name = '%s/%s' % (app.config['REGISTRY_HOSTNAME'], path_with_namespace) + deps = project_deps.get_contents().get(path_with_namespace, []) + built_projects = [] - for project in deps_cache.get_deps(image_name): + for dep_path in deps: try: - rebuild(project) - built_projects.append(project.path_with_namespace) + p = rebuild(gl, dep_path) + logging.info('started pipeline %s', p) except Exception as e: - app.logger.error('error rebuilding project %s: %s' % ( - project.path_with_namespace, str(e))) + logging.error('error rebuilding project %s: %s' % ( + path_with_namespace, str(e))) action = 'rebuilt %s' % (', '.join(built_projects),) - app.logger.info('pipeline for %s@%s: %s, action=%s', - path_with_namespace, branch, pipeline_status, action) + logging.info('pipeline for %s@%s: %s, action=%s', + path_with_namespace, branch, pipeline_status, action) -def worker_thread(): - deps_cache.wait_until_loaded() +def worker_thread(gitlab_url, gitlab_token, project_deps): + gl = gitlab.Gitlab(gitlab_url, private_token=gitlab_token) + if gitlab_token: + gl.auth() + while True: data = queue.get() try: - _process_request(data) + _process_request(gl, project_deps, data) except Exception as e: - app.logger.error('error processing request: %s', str(e)) + logging.error('error processing request: %s', str(e)) + + +app = Flask(__name__) -def run_app(gitlab_url, registry_hostname, gitlab_token, - search_pattern, filter_pattern, bind_host, bind_port, - webhook_token, num_workers=2): +def run_app(gitlab_url, gitlab_token, + project_deps_path, bind_host, bind_port, + webhook_token, num_workers=3): app.config.update({ - 'REGISTRY_HOSTNAME': registry_hostname, 'WEBHOOK_AUTH_TOKEN': webhook_token, }) - app.gl = gitlab.Gitlab(gitlab_url, private_token=gitlab_token) - if gitlab_token: - app.gl.auth() - - # Start the update thread that will periodically update the - # dependency map (an expensive operation). - update_t = threading.Thread( - target=deps_cache.update_thread, - args=(search_pattern, filter_pattern), - name='Dependency Update Thread') - update_t.setDaemon(True) - update_t.start() - - # Start the worker threads that will process the requests. + project_deps = _ReloadableJSONFile(project_deps_path) + + # Start the worker threads that will process the requests in the + # background. for i in range(num_workers): wt = threading.Thread( target=worker_thread, + args=(gitlab_url, gitlab_token, project_deps), name='Worker %d' % (i+1)) wt.setDaemon(True) wt.start() # Start the HTTP server to receive webhook requests. - app.run(host=bind_host, port=bind_port) + server = wsgi.Server((bind_host, bind_port), app) + server.start() @app.route('/', methods=('POST',)) diff --git a/setup.py b/setup.py index 852a17b6b6f6aba253a57fc12cc32729fb6a71fe..c316141daf2642f5c681711f5dd3b7d3a4dbe1aa 100644 --- a/setup.py +++ b/setup.py @@ -4,12 +4,12 @@ from setuptools import setup, find_packages setup( name="gitlab-docker-autodep", - version="0.2", + version="0.3", description="Automatically rebuild Docker images", author="Autistici/Inventati", author_email="info@autistici.org", url="https://git.autistici.org/ale/gitlab-docker-autodep", - install_requires=['python-gitlab', 'Flask'], + install_requires=['python-gitlab', 'Flask', 'cheroot'], zip_safe=True, packages=find_packages(), entry_points={ diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000000000000000000000000000000000000..e0284e40de13aeaef4a64efa27aa32c57ee59644 --- /dev/null +++ b/tox.ini @@ -0,0 +1,9 @@ +[tox] +envlist = py3 + +[testenv] +deps= + nose +commands= + nosetests -vv [] +