Commit 5c74a911 authored by ale's avatar ale
Browse files

Refactor into split components

parent a78a90af
...@@ -33,20 +33,23 @@ API](https://python-gitlab.readthedocs.io/en/stable/), and ...@@ -33,20 +33,23 @@ API](https://python-gitlab.readthedocs.io/en/stable/), and
# Usage # Usage
There are two modes of using this software: either as a one-shot The tool is split into functional components:
command-line tool, or as a standalone HTTP server responding to Gitlab
Webhook requests, to integrate Docker dependencies with Gitlab CI.
In both cases, the program is configured via command-line options. * scan Gitlab and generate a dependency map (stored as a JSON file)
* manually trigger builds using the dependency map
* run a server that listens for Gitlab notifications and trigger
builds
In all cases, the program is configured via command-line options.
## Common options ## Common options
The tool must be pointed at your Gitlab instance with the *--url* The tool must be pointed at your Gitlab instance with the *--url*
command-line option, command-line option,
You can pass an authentication token using the *--token* command-line You can pass an authentication token using the *--token* or
option. This is usually required in order to trigger CI pipelines: the *--token-file* command-line options. This is usually required in order
access token must have the *api* scope. to trigger CI pipelines: the access token must have the *api* scope.
The tool will only examine Docker images hosted on the Docker registry The tool will only examine Docker images hosted on the Docker registry
associated with the Gitlab instance. By default the registry name is associated with the Gitlab instance. By default the registry name is
...@@ -68,7 +71,7 @@ example, it is possible to efficiently limit the scope of the tool to ...@@ -68,7 +71,7 @@ example, it is possible to efficiently limit the scope of the tool to
a specific namespace: a specific namespace:
``` ```
gitlab-docker-autodep ... --match myns --filter ^myns/ ... gitlab-docker-autodep deps --match myns --filter ^myns/ ...
``` ```
Note that, when building the dependency tree: Note that, when building the dependency tree:
...@@ -123,8 +126,9 @@ stored in */etc/gitlab_docker_token*: ...@@ -123,8 +126,9 @@ stored in */etc/gitlab_docker_token*:
``` ```
gitlab-docker-autodep \ gitlab-docker-autodep \
--url=https://my.gitlab \ --url=https://my.gitlab \
--token=$(< /etc/gitlab_docker_token) \ --token-file=/etc/gitlab_docker_token \
server \ server \
--deps=deps.json
--host=127.0.0.1 --port=14001 --host=127.0.0.1 --port=14001
``` ```
...@@ -132,6 +136,16 @@ You can then configure your project's webhooks with the URL ...@@ -132,6 +136,16 @@ You can then configure your project's webhooks with the URL
`http://localhost:14001/`, with the *Trigger* checkbox set only `http://localhost:14001/`, with the *Trigger* checkbox set only
on *Pipeline events*. on *Pipeline events*.
Then you should generate the *deps.json* dependency map periodically,
for instance with a cron job:
```
*/30 * * * * root gitlab-docker-autodep
--url=https://my.gitlab
--token-file=/etc/gitlab_docker_token
deps > deps.json
```
It can be useful to run the *rebuild* command from a cron job, for It can be useful to run the *rebuild* command from a cron job, for
instance in order to rebuild images on a periodic schedule, and instance in order to rebuild images on a periodic schedule, and
assuming all your projects share a common base image: assuming all your projects share a common base image:
...@@ -139,6 +153,6 @@ assuming all your projects share a common base image: ...@@ -139,6 +153,6 @@ assuming all your projects share a common base image:
``` ```
50 5 * * * root gitlab-docker-autodep 50 5 * * * root gitlab-docker-autodep
--url=https://my.gitlab --url=https://my.gitlab
--token=$(< /etc/gitlab_docker_token) --token-file=/etc/gitlab_docker_token
rebuild $MY_BASE_IMAGE rebuild $MY_BASE_IMAGE
``` ```
import gitlab import gitlab
import json
import logging import logging
import re import re
import time import sys
_from_rx = re.compile(r'^FROM\s+(.*)$') _from_rx = re.compile(r'^FROM\s+(\S+).*$', re.MULTILINE)
def _parse_dockerfile(df): def _parse_dockerfile(df):
for line in df.split('\n'): return _from_rx.findall(df)
m = _from_rx.match(line)
if m:
return m.group(1)
def _fetch_dockerfile(gl, project, ref): def _fetch_dockerfile(gl, project, ref):
...@@ -35,7 +33,7 @@ def _remove_image_tag(name): ...@@ -35,7 +33,7 @@ def _remove_image_tag(name):
return name return name
def build_dependency_tree(gl, search_pattern=None, filter_pattern=None): def build_docker_deps(gl, search_pattern=None, filter_pattern=None):
"""Build the project dependency map based on Dockerfiles. """Build the project dependency map based on Dockerfiles.
This can be a fairly expensive (long) operation if the list of This can be a fairly expensive (long) operation if the list of
...@@ -60,60 +58,43 @@ def build_dependency_tree(gl, search_pattern=None, filter_pattern=None): ...@@ -60,60 +58,43 @@ def build_dependency_tree(gl, search_pattern=None, filter_pattern=None):
projects = gl.projects.list(all=True, search=search_pattern, as_list=False) projects = gl.projects.list(all=True, search=search_pattern, as_list=False)
for project in projects: for project in projects:
project_name = project.path_with_namespace
project_url = project_name
if filter_rx is not None and not filter_rx.search(project.path_with_namespace): if filter_rx is not None and not filter_rx.search(project.path_with_namespace):
continue
if not _has_gitlab_ci(gl, project, 'master'):
continue continue
df = _fetch_dockerfile(gl, project, 'master') df = _fetch_dockerfile(gl, project, 'master')
if not df: if not df:
continue continue
base_image = _parse_dockerfile(df.decode('utf-8')) base_images = _parse_dockerfile(df.decode('utf-8'))
if not base_image: if not base_images:
logging.error('ERROR: could not find base image for %s', logging.error('ERROR: could not find base image for %s',
project.path_with_namespace) project.path_with_namespace)
continue continue
if not _has_gitlab_ci(gl, project, 'master'): for img in base_images:
continue deps.setdefault(_remove_image_tag(img), []).append(project_url)
deps.setdefault(_remove_image_tag(base_image), []).append(project)
return deps return deps
def rebuild(project, wait=False): def docker_deps_to_project_deps(deps, registry_hostname):
"""Trigger a rebuild of a project.""" out = {}
pipeline = project.pipelines.create({'ref': 'master'}) for image_name in deps:
if wait: if image_name.startswith(registry_hostname):
while pipeline.finished_at is None: project_name = image_name[len(registry_hostname)+1:]
pipeline.refresh() out[project_name] = deps[image_name]
time.sleep(3) return out
return pipeline
def rebuild_deps(gitlab_url, registry_hostname, gitlab_token, def dump_deps(gitlab_url, registry_hostname, gitlab_token,
search_pattern, filter_pattern, image_name, deps_match, deps_filter, project_deps=True):
dry_run=False, recurse=False, wait=False):
"""Rebuild dependencies of the given image."""
gl = gitlab.Gitlab(gitlab_url, private_token=gitlab_token) gl = gitlab.Gitlab(gitlab_url, private_token=gitlab_token)
if gitlab_token: if gitlab_token:
gl.auth() gl.auth()
deps = build_dependency_tree(gl, search_pattern) deps = build_docker_deps(gl, deps_match, deps_filter)
stack = deps.get(_remove_image_tag(image_name), []) if project_deps:
while stack: deps = docker_deps_to_project_deps(deps, registry_hostname)
project = stack.pop(0)
json.dump(deps, sys.stdout, indent=2)
logging.info('rebuilding %s', project.path_with_namespace)
if not dry_run:
try:
pipeline = rebuild(project, wait)
if pipeline.status not in ('success', 'pending'):
logging.error('ERROR: build failed for %s (status: %s)',
project.path_with_namespace, pipeline.status)
continue
except gitlab.exceptions.GitlabError as e:
logging.error('ERROR: gitlab error: %s: %s',
project.path_with_namespace, str(e))
continue
if recurse:
image_name = '%s/%s' % (
registry_hostname, project.path_with_namespace)
stack.extend(deps.get(image_name, []))
from .docker_deps import _parse_dockerfile
import unittest
class TestParseDockerfile(unittest.TestCase):
def test_parse_dockerfile(self):
dockerfile = '''
FROM baseimage1 AS build
RUN build
FROM baseimage2
COPY --from=build bin /usr/bin/bin
RUN fix-perms
'''
images = _parse_dockerfile(dockerfile)
self.assertEqual(['baseimage1', 'baseimage2'], images)
...@@ -7,7 +7,8 @@ try: ...@@ -7,7 +7,8 @@ try:
except ImportError: except ImportError:
import urllib.parse as urlparse import urllib.parse as urlparse
from .deps import rebuild_deps from .docker_deps import dump_deps
from .rebuild import rebuild_deps
from .server import run_app from .server import run_app
...@@ -21,6 +22,10 @@ def main(): ...@@ -21,6 +22,10 @@ def main():
gitlab_opts_group = common_parser.add_argument_group('gitlab options') gitlab_opts_group = common_parser.add_argument_group('gitlab options')
gitlab_opts_group.add_argument( gitlab_opts_group.add_argument(
'--url', metavar='URL', help='Gitlab URL') '--url', metavar='URL', help='Gitlab URL')
gitlab_opts_group.add_argument(
'--token-file', metavar='FILE',
type=argparse.FileType('r'),
help='Load Gitlab authentication token from this file')
gitlab_opts_group.add_argument( gitlab_opts_group.add_argument(
'--token', metavar='TOKEN', '--token', metavar='TOKEN',
help='Gitlab authentication token') help='Gitlab authentication token')
...@@ -29,21 +34,35 @@ def main(): ...@@ -29,21 +34,35 @@ def main():
help='Docker registry hostname (if empty, it will be ' help='Docker registry hostname (if empty, it will be '
'automatically derived from --url)') 'automatically derived from --url)')
scope_opts_group = common_parser.add_argument_group('project scope options') scope_opts_group = common_parser.add_argument_group('project scope options')
scope_opts_group.add_argument( common_parser.add_argument('--debug', action='store_true')
# Compute deps.
deps_parser = subparsers.add_parser(
'deps',
parents=[common_parser],
help='build dependency map',
description='Generate a map of Docker-derived dependencies between '
'projects on a Gitlab instance.')
deps_parser.add_argument(
'--match', '--match',
help='Search query to filter project list on the server side') help='Search query to filter project list on the server side')
scope_opts_group.add_argument( deps_parser.add_argument(
'--filter', '--filter',
help='Regexp to filter project list on the client side') help='Regexp to filter project list on the right-hand (dependency) side')
common_parser.add_argument('--debug', action='store_true') deps_parser.add_argument(
'--docker', action='store_true',
# Rebuild deps. help='Output dependencies between Docker images, not Gitlab projects')
# Trigger rebuilds of reverse deps.
rebuild_image_parser = subparsers.add_parser( rebuild_image_parser = subparsers.add_parser(
'rebuild', 'rebuild',
parents=[common_parser], parents=[common_parser],
help='rebuild dependencies of an image', help='rebuild dependencies of a project',
description='Rebuild all projects that depend on the specified ' description='Rebuild all projects that depend on the specified '
'Docker image.') 'project.')
rebuild_image_parser.add_argument(
'--deps', metavar='FILE',
help='file with project dependencies')
rebuild_image_parser.add_argument( rebuild_image_parser.add_argument(
'-n', '--dry-run', action='store_true', dest='dry_run', '-n', '--dry-run', action='store_true', dest='dry_run',
help='only show what would be done') help='only show what would be done')
...@@ -52,8 +71,8 @@ def main(): ...@@ -52,8 +71,8 @@ def main():
help='Include all dependencies recursively ' help='Include all dependencies recursively '
'and wait for completion of the pipelines') 'and wait for completion of the pipelines')
rebuild_image_parser.add_argument( rebuild_image_parser.add_argument(
'image_name', 'project_path',
help='Docker image name') help='Project name (relative path)')
# Server. # Server.
server_parser = subparsers.add_parser( server_parser = subparsers.add_parser(
...@@ -63,6 +82,9 @@ def main(): ...@@ -63,6 +82,9 @@ def main():
description='Start a HTTP server that listens for Gitlab webhooks. ' description='Start a HTTP server that listens for Gitlab webhooks. '
'Configure Gitlab to send Pipeline events for your projects to this ' 'Configure Gitlab to send Pipeline events for your projects to this '
'server to auto-rebuild first-level dependencies.') 'server to auto-rebuild first-level dependencies.')
server_parser.add_argument(
'--deps', metavar='FILE',
help='file with project dependencies')
server_parser.add_argument( server_parser.add_argument(
'--port', metavar='PORT', type=int, default='5404', '--port', metavar='PORT', type=int, default='5404',
dest='bind_port', help='port to listen on') dest='bind_port', help='port to listen on')
...@@ -90,25 +112,33 @@ def main(): ...@@ -90,25 +112,33 @@ def main():
registry_hostname = 'registry.' + urlparse.urlsplit(args.url).netloc registry_hostname = 'registry.' + urlparse.urlsplit(args.url).netloc
logging.error('using %s as Docker registry', registry_hostname) logging.error('using %s as Docker registry', registry_hostname)
if cmd == 'rebuild': gitlab_token = args.token
rebuild_deps( if not gitlab_token and args.token_file:
gitlab_token = args.token_file.read().strip().encode('utf-8')
if cmd == 'deps':
dump_deps(
args.url, args.url,
registry_hostname, registry_hostname,
args.token, gitlab_token,
args.match, args.match,
args.filter, args.filter,
args.image_name, not args.docker,
)
elif cmd == 'rebuild':
rebuild_deps(
args.url,
gitlab_token,
args.deps,
args.project_path,
args.dry_run, args.dry_run,
args.recurse, args.recurse,
args.recurse,
) )
elif cmd == 'server': elif cmd == 'server':
run_app( run_app(
args.url, args.url,
registry_hostname, gitlab_token,
args.token, args.deps,
args.match,
args.filter,
args.bind_host, args.bind_host,
args.bind_port, args.bind_port,
args.webhook_auth_token, args.webhook_auth_token,
......
import gitlab
import json
import logging
import time
def rebuild(gl, project_path, wait=False):
"""Trigger a rebuild of a project."""
project = gl.projects.get(project_path)
if not project:
return None
pipeline = project.pipelines.create({'ref': 'master'})
if wait:
while pipeline.finished_at is None:
pipeline.refresh()
time.sleep(3)
return pipeline
def rebuild_deps(gitlab_url, gitlab_token,
project_deps_path, project_path, dry_run, wait_and_recurse):
gl = gitlab.Gitlab(gitlab_url, private_token=gitlab_token)
if gitlab_token:
gl.auth()
with open(project_deps_path) as fd:
project_deps = json.load(fd)
stack = project_deps.get(project_path, [])
while stack:
path = stack.pop(0)
logging.info('rebuilding %s', path)
if not dry_run:
rebuild(gl, path, wait_and_recurse)
if wait_and_recurse:
stack.extend(project_deps.get(path, []))
import gitlab import gitlab
import json
import logging
import os
import threading import threading
import time import time
try: try:
import Queue import Queue
except ImportError: except ImportError:
import queue as Queue import queue as Queue
from flask import Flask, request, make_response
from .deps import build_dependency_tree, rebuild
from cheroot import wsgi
from flask import Flask, request, make_response
app = Flask(__name__) from .rebuild import rebuild
# Maintain a process-wide cache of dependencies, updated periodically class _ReloadableJSONFile(object):
# in the background. This is protected by a mutex.
class _DepsCache(object):
update_interval = 1800 check_interval = 60
_deps_lock = threading.Lock() def __init__(self, path):
_deps_loaded = threading.Event() self.path = path
_deps = {} self.lock = threading.Lock()
self._load()
t = threading.Thread(
target=self._update_thread,
name='File reload thread for %s' % path)
t.setDaemon(True)
t.start()
def wait_until_loaded(self): def get_contents(self):
self._deps_loaded.wait() with self.lock:
return self.data
def get_deps(self, image_name): def _load(self):
with self._deps_lock: with self.lock:
return self._deps.get(image_name, []) with open(self.path) as fd:
self.data = json.load(fd)
self.stamp = os.stat(self.path).st_mtime
def update_thread(self, search_pattern, filter_pattern): def _update_thread(self):
loaded = False
while True: while True:
time.sleep(self.check_interval)
try: try:
if not loaded: if os.stat(self.path).st_mtime > self.stamp:
app.logger.info('scanning project dependencies...') self._load()
new_deps = build_dependency_tree(app.gl, search_pattern, filter_pattern) except:
with self._deps_lock: pass
self._deps = new_deps
if not loaded:
app.logger.info('project dependencies loaded')
loaded = True
self._deps_loaded.set()
except Exception as e:
app.logger.error('error updating project dependencies: %s' % str(e))
time.sleep(self.update_interval)
deps_cache = _DepsCache()
queue = Queue.Queue() queue = Queue.Queue()
def _process_request(data): def _process_request(gl, project_deps, data):
pipeline_status = data['object_attributes']['status'] pipeline_status = data['object_attributes']['status']
branch = data['object_attributes']['ref'] branch = data['object_attributes']['ref']
path_with_namespace = data['project']['path_with_namespace'] path_with_namespace = data['project']['path_with_namespace']
action = 'none' action = 'none'
if pipeline_status == 'success': if pipeline_status == 'success':
# Rebuild the immediate dependencies of this image. deps = project_deps.get_contents().get(path_with_namespace, [])
image_name = '%s/%s' % (app.config['REGISTRY_HOSTNAME'], path_with_namespace)
built_projects = [] built_projects = []
for project in deps_cache.get_deps(image_name): for dep_path in deps:
try: try:
rebuild(project) p = rebuild(gl, dep_path)
built_projects.append(project.path_with_namespace) logging.info('started pipeline %s', p)
except Exception as e: except Exception as e:
app.logger.error('error rebuilding project %s: %s' % ( logging.error('error rebuilding project %s: %s' % (
project.path_with_namespace, str(e))) path_with_namespace, str(e)))
action = 'rebuilt %s' % (', '.join(built_projects),) action = 'rebuilt %s' % (', '.join(built_projects),)
app.logger.info('pipeline for %s@%s: %s, action=%s', logging.info('pipeline for %s@%s: %s, action=%s',
path_with_namespace, branch, pipeline_status, action) path_with_namespace, branch, pipeline_status, action)
def worker_thread(): def worker_thread(gitlab_url, gitlab_token, project_deps):
deps_cache.wait_until_loaded() gl = gitlab.Gitlab(gitlab_url, private_token=gitlab_token)
if gitlab_token:
gl.auth()
while True: while True:
data = queue.get() data = queue.get()