diff --git a/LICENSE b/LICENSE index 93557b6708baee58f4c32c5e9cb86184575f6a0a..e963df8294069543e782fd72aa71832dca1571ca 100644 --- a/LICENSE +++ b/LICENSE @@ -620,55 +620,3 @@ copy of the Program in return for a fee. END OF TERMS AND CONDITIONS - How to Apply These Terms to Your New Programs - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -state the exclusion of warranty; and each file should have at least -the "copyright" line and a pointer to where the full notice is found. - - gitlab-docker-autodep - Copyright (C) 2018 ale - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. - -Also add information on how to contact you by electronic and paper mail. - - If the program does terminal interaction, make it output a short -notice like this when it starts in an interactive mode: - - gitlab-docker-autodep Copyright (C) 2018 ale - This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. - This is free software, and you are welcome to redistribute it - under certain conditions; type `show c' for details. - -The hypothetical commands `show w' and `show c' should show the appropriate -parts of the General Public License. Of course, your program's commands -might be different; for a GUI interface, you would use an "about box". - - You should also get your employer (if you work as a programmer) or school, -if any, to sign a "copyright disclaimer" for the program, if necessary. -For more information on this, and how to apply and follow the GNU GPL, see -<http://www.gnu.org/licenses/>. - - The GNU General Public License does not permit incorporating your program -into proprietary programs. If your program is a subroutine library, you -may consider it more useful to permit linking proprietary applications with -the library. If this is what you want to do, use the GNU Lesser General -Public License instead of this License. But first, please read -<http://www.gnu.org/philosophy/why-not-lgpl.html>. diff --git a/README.md b/README.md index 4c8b9e28ebf123dfaefaafceb7c838db1287d0db..0f94cf90e59f63530815c9dfe43e21f6ded1be69 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,23 @@ -gitlab-docker-autodep +gitlab-deps === -Automatically rebuild all the downstream dependencies of Docker-based -projects on a Gitlab instance. +*Gitlab-deps* is a simple build orchestration toolkit: it tracks +dependencies between projects on a Gitlab instance and it can +automatically rebuild dependencies when a project pipeline completes +successfully. -It scans the *master* branch of all repositories containing a -Dockerfile looking for FROM lines and navigates the resulting -dependency tree to find all projects that needs to be rebuilt when -their base image (or an upstream image thereof) changes. +It can track dependencies between projects by either of two supported +ways: + +* projects can add a `.gitlab-deps` file to the root of their + repository, containing the fully qualified project URLs of their + dependencies; +* gitlab-deps can scan Dockerfiles (in the repository root) and + automatically infer dependencies based on FROM lines. + +The implementation depends on HTTP hooks triggered by pipeline events: +gitlab-deps runs a small HTTP server to respond to these requests and +trigger new builds. By default, since it is meant to be used as a trigger as the last step in a CI script, it will not navigate the dependency tree recursively @@ -17,105 +27,130 @@ has been rebuilt. # Installation +The tools require Python 3. + Install the tool either in a virtualenv of or system-wide with any of the standard Python installation mechanisms, for instance (using *setuptools*): ``` -sudo python setup.py install +sudo python3 setup.py install ``` -This will install the *gitlab-docker-autodep* command-line tool in -/usr/local/bin. The tool should work with either Python 2 and Python -3, and it has few dependencies (just the [Gitlab -API](https://python-gitlab.readthedocs.io/en/stable/), and -[Flask](https://flask.pocoo.org/)). +This will install the *gitlab-deps* command-line tool in +/usr/local/bin. The tool has few dependencies (just the [Gitlab +API](https://python-gitlab.readthedocs.io/en/stable/), +[Flask](https://flask.pocoo.org/) and +[Cheroot](https://pypi.org/project/cheroot/)). # Usage -The tool is split into functional components: +The toolkit is split into functional components (all wrapped in a +single executable with different sub-commands): -* scan Gitlab and generate a dependency map (stored as a JSON file) +* scan Gitlab and generate a dependency map * manually trigger builds using the dependency map * run a server that listens for Gitlab notifications and trigger builds -In all cases, the program is configured via command-line options. +The tools talk to Gitlab using its API, so you're going to need an +admin token in order to create new pipelines. ## Common options The tool must be pointed at your Gitlab instance with the *--url* -command-line option, +command-line option, or alternatively using the `GITLAB_URL` +environment variable. You can pass an authentication token using the *--token* or *--token-file* command-line options. This is usually required in order -to trigger CI pipelines: the access token must have the *api* scope. +to trigger CI pipelines, or to access private projects: the access +token must have at least the *api* scope. Credentials can also be +provided using the `GITLAB_AUTH_TOKEN` or `GITLAB_AUTH_TOKEN_FILE` +environment variables. -The tool will only examine Docker images hosted on the Docker registry -associated with the Gitlab instance. By default the registry name is -automatically derived from the server URL (adding a *registry* -prefix), but it can be changed with the *--registry* command-line -option. +### Listing projects -### Scope +The *list-projects* sub-command can be used to create a list of +projects (and their branches) in the Gitlab instance. It provides some +basic functionality for filtering (using the *--search* option), but +it generates output suitable for *grep*, e.g. to filter a specific +path prefix (Gitlab group): -On larger Gitlab instances, parsing Dockerfiles for all projects can -be an expensive (long) operation. The program offers two options to -manage the scope of the dependency analysis: *--match* and *--filter*. +``` +gitlab-deps list-projects | grep ^path/to/my/group/ +``` -The former, *--match*, allows for filtering the project list on the -server side, using a Gitlab search query. The latter, *--filter*, -applies a regular expression to the project names (including -namespaces) before parsing their dependencies. Combining the two, for -example, it is possible to efficiently limit the scope of the tool to -a specific namespace: +or to only select "master" branches: ``` -gitlab-docker-autodep deps --match myns --filter ^myns/ ... +gitlab-deps list-projects | grep ':master$' ``` -Note that, when building the dependency tree: +The output from this command is just a list of project paths (with +namespaces) and branch names, separated by a ':', one per line: -* tags in FROM lines are ignored -* only the *master* branch of repositories is scanned for Dockerfiles +``` +path/to/project1:master +path/to/project1:test-branch +path/to/project2:master +... +``` -This might lead to more rebuilds than strictly necessary. +## Computing dependencies -## Command-line +The *deps* sub-command will scan the projects and their repositories, +and it will produce a list of all the edges in the dependency +graph. It takes a list of project_path:branch specs as input (as +produced by the *list-projects* sub-command), and it will produce a +list of edges as whitespace-separated project:branch pairs, e.g.: -The `rebuild` command will trigger a rebuild of all the dependencies -of a given Docker image, possibly waiting for the CI pipelines to -complete. Pass the qualified Docker image name (as it appears on FROM -lines in Dockerfiles) as a command-line argument. +``` +project:master dependency1:master +project:master dependency2:master +``` + +The output format is once again meant to be processed with standard +UNIX tools such as *awk* and *grep*. -The tool will print out the project names of the dependencies it -found. The *--recurse* option will traverse the dependency tree -recursively, waiting for CI pipelines to complete so that they are -built in the right order. +## Configuring pipeline_events hooks -It is possible to limit the scope of the initial dependency scan -(which is an expensive operation) to projects matching a Gitlab search -keyword using the *--match* option. +To work, gitlab-deps needs a HTTP hook for pipeline_events on all +projects that have dependencies. Since setting this up in Gitlab is a +manual and laborious process, the *set-hooks* sub-command is provided +to do this automatically using the API. The intended usage is to run +it on the right-hand side of the dependency edges (i.e. the list of +projects/branches that actually have dependencies): + +``` +gitlab-deps deps | awk '{print $2}' | gitlab-deps set-hooks +``` -## Gitlab CI integration +## One-off rebuilds -In order to automatically rebuild the dependencies of a Docker image -built using Gitlab CI, it is possible to run *gitlab-docker-autodep* -as a webhook: this way, whenever a successful CI pipeline completes, -you can trigger a rebuild of the dependencies. +The *rebuild* sub-Command will trigger a rebuild of all the +dependencies of a given project, possibly waiting for the CI pipelines +to complete. Pass a qualified project name and branch as a +command-line argument. The dependency graph (list of edges as produced +by the *deps* sub-command) must also be provided, either as a file or +on standard input. -To do this, use the *server* command of *gitlab-docker-autodep*, and -specify the address to bind to using the *--host* and *--port* -options. It is also possible to enforce authentication of the webhook -with a secret token +The *--recurse* option will traverse the dependency tree recursively, +waiting for CI pipelines to complete so that they are built in the +right order. + +## Running the server + +The gitlab-deps tool has a *server* command to start a simple HTTP +server that receives the pipeline_events webhooks from Gitlab, and +trigger builds for project dependencies. + +The *server* command requires an address to bind to, specified using +the *--host* and *--port* options. It is also possible to enforce +authentication of the webhook with a secret token ([X-Gitlab-Token](https://docs.gitlab.com/ee/user/project/integrations/webhooks.html)) using the *--webhook-auth-token* option. -When running in this mode, it is assumed that all your Docker-related -projects have webhooks set up to rebuild their dependencies, so -*gitlab-docker-autodep* will only trigger a build of the immediate -dependencies of a project. - Also note that the server does not have any TLS support: if necessary, it is best to use a dedicated reverse proxy (Apache, NGINX, etc). @@ -124,35 +159,13 @@ machine as Gitlab itself, and that the Gitlab authentication token is stored in */etc/gitlab_docker_token*: ``` -gitlab-docker-autodep \ +gitlab-deps deps \ +| gitlab-deps server \ --url=https://my.gitlab \ --token-file=/etc/gitlab_docker_token \ - server \ - --deps=deps.json --host=127.0.0.1 --port=14001 ``` -You can then configure your project's webhooks with the URL -`http://localhost:14001/`, with the *Trigger* checkbox set only -on *Pipeline events*. - -Then you should generate the *deps.json* dependency map periodically, -for instance with a cron job: - -``` -*/30 * * * * root gitlab-docker-autodep - --url=https://my.gitlab - --token-file=/etc/gitlab_docker_token - deps > deps.json -``` - -It can be useful to run the *rebuild* command from a cron job, for -instance in order to rebuild images on a periodic schedule, and -assuming all your projects share a common base image: - -``` -50 5 * * * root gitlab-docker-autodep - --url=https://my.gitlab - --token-file=/etc/gitlab_docker_token - rebuild $MY_BASE_IMAGE -``` +If configuring webhooks manually (rather than with *set-hooks*), +create a new webhook with the URL `http://localhost:14001/`, and with +the *Trigger* checkbox set only on *Pipeline events*. diff --git a/gitlab_docker_autodep/__init__.py b/gitlab_deps/__init__.py similarity index 100% rename from gitlab_docker_autodep/__init__.py rename to gitlab_deps/__init__.py diff --git a/gitlab_deps/deps.py b/gitlab_deps/deps.py new file mode 100644 index 0000000000000000000000000000000000000000..531a9c218f2ca68fa235133c8ee7f777726b5aa3 --- /dev/null +++ b/gitlab_deps/deps.py @@ -0,0 +1,118 @@ +import re + + +DEFAULT_BRANCH = 'master' + + +def split_project_branch(project_with_branch): + if ':' in project_with_branch: + p, b = project_with_branch.split(':') + return p, b + return project_with_branch, DEFAULT_BRANCH + + +def list_projects(gl, search_pattern): + projects = gl.projects.list( + all=True, + search=search_pattern, + search_namespaces=True, + as_list=False, + simple=True, + ) + for p in projects: + yield p.path_with_namespace + + +def get_branches(gl, project_names): + for path_with_namespace in project_names: + p = gl.projects.get(path_with_namespace) + for b in p.branches.list(): + yield (path_with_namespace, b.name) + + +def has_ci(gl, project_path, branch_name): + p = gl.projects.get(project_path) + try: + p.files.get(file_path='.gitlab-ci.yml', ref=branch_name) + return True + except Exception: + return False + + +_from_rx = re.compile(r'^FROM\s+(\S+).*$', re.MULTILINE) + + +def get_docker_deps(gl, project_path, branch_name): + p = gl.projects.get(project_path) + try: + f = p.files.get(file_path='Dockerfile', ref=branch_name) + return _from_rx.findall(f.decode().decode('utf-8')) + except Exception: + return [] + + +def get_explicit_deps(gl, project_path, branch_name): + p = gl.projects.get(project_path) + try: + f = p.files.get(file_path='.gitlab-deps', ref=branch_name) + return f.decode().decode('utf-8').split('\n') + except Exception: + return [] + + +_docker_image_rx = re.compile(r'^([^/]*)(/([^:]*))?(:(.*))?$') + + +def docker_image_to_project(docker_image, registry_hostname): + m = _docker_image_rx.match(docker_image) + if m and m[1] == registry_hostname: + # The branch is the tag, except for 'latest' + if not m[5] or m[5] == 'latest': + branch = DEFAULT_BRANCH + else: + branch = m[5] + return m[3], branch + return None, None + + +_url_rx = re.compile(r'^(https?://[^/]+/)([^:]+)(:.*)?$') + + +def url_to_project(url, gitlab_url): + m = _url_rx.match(url) + if m and m[1] == gitlab_url: + return m[2], m[3] or DEFAULT_BRANCH + + +def not_null(l): + return filter(None, l) + + +def get_deps(gl, gitlab_url, registry_hostname, project_path, branch_name): + deps = [] + deps.extend(not_null( + url_to_project(url, gitlab_url) + for url in get_explicit_deps(gl, project_path, branch_name))) + deps.extend(not_null( + docker_image_to_project(img, registry_hostname) + for img in get_docker_deps(gl, project_path, branch_name))) + return deps + + +def list_deps(gl, gitlab_url, registry_hostname, projects): + for project_path, branch_name in projects: + deps = get_deps(gl, gitlab_url, registry_hostname, + project_path, branch_name) + for dep_path, dep_branch in deps: + print(f'{project_path}:{branch_name} {dep_path}:{dep_branch}') + + +def read_deps(fd): + deps = {} + for line in fd: + src, dst = line.strip().split() + src_project, src_branch = split_project_branch(src) + dst_project, dst_branch = split_project_branch(dst) + deps.setdefault((src_project, src_branch), []).append( + (dst_project, dst_branch)) + return deps diff --git a/gitlab_deps/hooks.py b/gitlab_deps/hooks.py new file mode 100644 index 0000000000000000000000000000000000000000..5f53b2113e83473cc8a4f24da4e75ea9f2421c97 --- /dev/null +++ b/gitlab_deps/hooks.py @@ -0,0 +1,19 @@ +import logging + + +def check_hook(gl, hook_url, webhook_token, project_path, dry_run): + project = gl.projects.get(project_path) + found = False + for h in project.hooks.list(): + if h.url == hook_url and h.pipeline_events: + found = True + break + if found: + return + logging.info('adding pipeline_events hook to %s', project_path) + if not dry_run: + project.hooks.add( + url=hook_url, + pipeline_events=True, + token=webhook_token, + ) diff --git a/gitlab_deps/main.py b/gitlab_deps/main.py new file mode 100644 index 0000000000000000000000000000000000000000..6fec413f3b57d880d1fdd05b26d6428351193b63 --- /dev/null +++ b/gitlab_deps/main.py @@ -0,0 +1,269 @@ +import argparse +import gitlab +import logging +import os +import sys +from urllib.parse import urlsplit + +from .deps import get_branches, list_projects, list_deps, \ + split_project_branch, read_deps +from .hooks import check_hook +from .rebuild import rebuild_deps +from .server import run_app + + +def _fmtdesc(s): + return s.strip() + + +def main(): + parser = argparse.ArgumentParser( + description='Manage Gitlab project dependencies and trigger pipelines.') + subparsers = parser.add_subparsers(dest='subparser') + + # Common options. + common_parser = argparse.ArgumentParser(add_help=False) + common_parser.add_argument( + '--debug', action='store_true', + help='increase logging level') + common_parser.add_argument( + '-n', '--dry-run', action='store_true', dest='dry_run', + help='only show what would be done') + gitlab_opts_group = common_parser.add_argument_group('gitlab options') + gitlab_opts_group.add_argument( + '--url', metavar='URL', help='Gitlab URL', + default=os.getenv('GITLAB_URL')) + gitlab_opts_group.add_argument( + '--token-file', metavar='FILE', + type=argparse.FileType('r'), + default=os.getenv('GITLAB_AUTH_TOKEN_FILE'), + help='file containing the Gitlab authentication token') + gitlab_opts_group.add_argument( + '--token', metavar='TOKEN', + default=os.getenv('GITLAB_AUTH_TOKEN'), + help='Gitlab authentication token') + + # List projects. + list_projects_parser = subparsers.add_parser( + 'list-projects', + parents=[common_parser], + help='list projects', + formatter_class=argparse.RawDescriptionHelpFormatter, + description=_fmtdesc(''' +List all projects and their branches on the Gitlab instance. + +The output is a list of project paths with all their branches, separated +by a colon, one per line. Since the Gitlab 'search' API is quite +coarse, you can then filter the output for specific projects or branches +using 'grep', e.g.: + + gitlab-deps list-projects | grep ^path/to/my/group/ + +or + + gitlab-deps list-projects | grep ':master$' + +''')) + list_projects_parser.add_argument( + '--search', + help='search query used to filter project list on the server side') + + # Compute deps. + deps_parser = subparsers.add_parser( + 'deps', + parents=[common_parser], + help='build dependency map', + formatter_class=argparse.RawDescriptionHelpFormatter, + description=_fmtdesc(''' +Generate a map of dependencies between projects on a +Gitlab instance. + +The input must consist of a list of projects along with their +branches, separated by a colon, one per line. If the branch is +unspecified, 'master' is assumed. + +The output consists of pairs of project / dependency (so, these are +'forward' dependencies), for all projects/branches specified in the +input. + +To obtain a list of reverse dependencies, one can simply swap the +columns in the output, e.g.: + + gitlab-deps deps < project.list | awk '{print $2, $1}' + +'''), epilog=_fmtdesc(''' +Input can be read from a file (if passed as an argument), or +from standard input if a filename is omitted or specified as '-'. +''')) + deps_parser.add_argument( + '--registry', metavar='NAME', + default=os.getenv('GITLAB_REGISTRY'), + help='Docker registry hostname (if empty, it will be ' + 'automatically derived from --url)') + deps_parser.add_argument( + 'projects_list', + type=argparse.FileType('r'), + nargs='?', default=sys.stdin) + + # Setup pipeline hooks on the specified projects. + set_hooks_parser = subparsers.add_parser( + 'set-hooks', + parents=[common_parser], + help='set pipeline hooks on projects', + formatter_class=argparse.RawDescriptionHelpFormatter, + description=_fmtdesc(''' +Set a HTTP hook for pipeline_events on the specified projects. + +Takes a list of projects (optional branch specifiers will be ignored) +as input. Pipeline hooks are required by 'gitlab-deps server' to +trigger dependent builds, so a common way to use this command is to +feed it the right-hand side of the 'gitlab-deps deps' output, e.g.: + + gitlab-deps deps < project.list \\ + | awk '{print $2}' \\ + | gitlab-deps set-hooks --hook-url=... + +using --hook-url to point at the URL of 'gitlab-deps server'. + +'''), epilog=_fmtdesc(''' +Input can be read from a file (if passed as an argument), or +from standard input if a filename is omitted or specified as '-'. +''')) + set_hooks_parser.add_argument( + '--hook-url', metavar='URL', + help='URL for the pipeline HTTP hook') + set_hooks_parser.add_argument( + '--webhook-auth-token', metavar='TOKEN', + help='secret X-Gitlab-Token for request authentication') + set_hooks_parser.add_argument( + 'projects_list', + type=argparse.FileType('r'), + nargs='?', default=sys.stdin) + + # Trigger rebuilds of reverse deps. + rebuild_image_parser = subparsers.add_parser( + 'rebuild', + parents=[common_parser], + help='rebuild dependencies of a project', + formatter_class=argparse.RawDescriptionHelpFormatter, + description=_fmtdesc(''' +Rebuild all projects that depend on the specified project. + +Takes a single project path as argument, and triggers a rebuild of its +direct dependencies. Useful for one-off rebuilds. + +If the --recurse option is provided, the tool will wait for completion +of the pipeline and recursively trigger its dependencies too, +navigating the entire dependency tree. + +'''), epilog=_fmtdesc(''' +Project dependencies can be read from a file (if passed as an +argument), or from standard input if a filename is omitted or +specified as '-'. +''')) + rebuild_image_parser.add_argument( + '--recurse', action='store_true', + help='include all dependencies recursively ' + 'and wait for completion of the pipelines') + rebuild_image_parser.add_argument( + 'project_path', + help='project name (relative path, with optional branch)') + rebuild_image_parser.add_argument( + 'dependencies_list', + type=argparse.FileType('r'), + nargs='?', default=sys.stdin) + + # Server. + server_parser = subparsers.add_parser( + 'server', + parents=[common_parser], + help='start the HTTP server', + formatter_class=argparse.RawDescriptionHelpFormatter, + description=_fmtdesc(''' +Start an HTTP server that listens for Gitlab webhooks. + +When the server receives a pipeline event from Gitlab, it will trigger +new builds for the direct dependencies of the project. The server is +meant to be associated with a single Gitlab instance. + +You must provide the server with the list of project dependencies. + +'''), epilog=_fmtdesc(''' +Project dependencies can be read from a file (if passed as an +argument), or from standard input if a filename is omitted or +specified as '-'. +''')) + server_parser.add_argument( + '--port', metavar='PORT', type=int, default='5404', + dest='bind_port', help='port to listen on') + server_parser.add_argument( + '--addr', metavar='IP', default='127.0.0.1', + dest='bind_host', help='address to listen on') + server_parser.add_argument( + '--webhook-auth-token', metavar='TOKEN', + help='secret X-Gitlab-Token for request authentication') + server_parser.add_argument( + 'dependencies_list', + type=argparse.FileType('r'), + nargs='?', default=sys.stdin) + + args = parser.parse_args() + cmd = args.subparser + + if not args.url: + parser.error('Must specify --url') + + logging.basicConfig( + format='%(message)s', + level=logging.DEBUG if args.debug else logging.INFO, + ) + + # Connect to the Gitlab API. + gitlab_token = args.token + if not gitlab_token and args.token_file: + gitlab_token = args.token_file.read().strip().encode('utf-8') + + gl = gitlab.Gitlab(args.url, private_token=gitlab_token) + if gitlab_token: + gl.auth() + + # Dispatch to the command executor. + if cmd == 'list-projects': + for p, b in get_branches(gl, list_projects(gl, args.search)): + print(f'{p}:{b}') + + elif cmd == 'deps': + # If --registry is not specified, make an educated guess. + registry_hostname = args.registry + if not registry_hostname: + registry_hostname = 'registry.' + urlsplit(args.url).netloc + logging.warning('guessed %s for the Docker registry hostname', + registry_hostname) + projects = [split_project_branch(x.strip()) + for x in args.projects_list] + list_deps(gl, args.url, registry_hostname, projects) + + elif cmd == 'rebuild': + deps = read_deps(args.dependencies_list) + project_path, branch_name = split_project_branch(args.project_path) + rebuild_deps(gl, deps, project_path, branch_name, args.dry_run, + args.recurse) + + elif cmd == 'set-hooks': + if not args.hook_url: + parser.error('Must specify --hook-url') + # Need a project list on input, ignore branches. + projects = set(y[0] for y in ( + split_project_branch(x.strip()) for x in args.projects_list)) + for project_path in projects: + check_hook(gl, args.hook_url, args.webhook_auth_token, + project_path, args.dry_run) + + elif cmd == 'server': + deps = read_deps(args.dependencies_list) + run_app(gl, deps, args.bind_host, args.bind_port, + args.webhook_auth_token) + + +if __name__ == '__main__': + main() diff --git a/gitlab_deps/rebuild.py b/gitlab_deps/rebuild.py new file mode 100644 index 0000000000000000000000000000000000000000..96ebc69d0b2d3b20fc3e3a5a5acaeb4aac644c12 --- /dev/null +++ b/gitlab_deps/rebuild.py @@ -0,0 +1,29 @@ +import logging +import time + + +def rebuild(gl, project_path, branch_name, wait=False): + """Trigger a rebuild of a project.""" + project = gl.projects.get(project_path) + if not project: + return None + + pipeline = project.pipelines.create({'ref': branch_name}) + logging.info('started pipeline %s', pipeline.web_url) + if wait: + while pipeline.finished_at is None: + pipeline.refresh() + time.sleep(3) + return pipeline + + +def rebuild_deps(gl, project_deps, project_path, branch_name, dry_run, + wait_and_recurse): + stack = project_deps.get((project_path, branch_name), []) + while stack: + path, branch = stack.pop(0) + logging.info('rebuilding %s:%s', path, branch) + if not dry_run: + rebuild(gl, path, branch, wait_and_recurse) + if wait_and_recurse: + stack.extend(project_deps.get((path, branch), [])) diff --git a/gitlab_docker_autodep/server.py b/gitlab_deps/server.py similarity index 52% rename from gitlab_docker_autodep/server.py rename to gitlab_deps/server.py index bbc320621b8d6091c850657528d4b71e66be2e5c..2f5ae406eaf239aba236ca0e2217df1e0251a850 100644 --- a/gitlab_docker_autodep/server.py +++ b/gitlab_deps/server.py @@ -1,9 +1,5 @@ -import gitlab -import json import logging -import os import threading -import time try: import Queue except ImportError: @@ -15,71 +11,33 @@ from flask import Flask, request, make_response, abort from .rebuild import rebuild -class _ReloadableJSONFile(object): - - check_interval = 60 - - def __init__(self, path): - self.path = path - self.lock = threading.Lock() - self._load() - t = threading.Thread( - target=self._update_thread, - name='File reload thread for %s' % path) - t.setDaemon(True) - t.start() - - def get_contents(self): - with self.lock: - return self.data - - def _load(self): - with self.lock: - with open(self.path) as fd: - self.data = json.load(fd) - self.stamp = os.stat(self.path).st_mtime - - def _update_thread(self): - while True: - time.sleep(self.check_interval) - try: - if os.stat(self.path).st_mtime > self.stamp: - self._load() - except: - pass - - queue = Queue.Queue() def _process_request(gl, project_deps, data): pipeline_status = data['object_attributes']['status'] - branch = data['object_attributes']['ref'] - path_with_namespace = data['project']['path_with_namespace'] + branch_name = data['object_attributes']['ref'] + project_path = data['project']['path_with_namespace'] action = 'none' if pipeline_status == 'success': - deps = project_deps.get_contents().get(path_with_namespace, []) + deps = project_deps.get((project_path, branch_name), []) built_projects = [] - for dep_path in deps: + for dep_path, dep_branch in deps: try: - p = rebuild(gl, dep_path) - logging.info('started pipeline %s', p) + rebuild(gl, dep_path, dep_branch) + built_projects.append(f'{dep_path}:{dep_branch}') except Exception as e: - logging.error('error rebuilding project %s: %s' % ( - path_with_namespace, str(e))) + logging.error('error rebuilding project %s:%s: %s' % ( + dep_path, dep_branch, str(e))) action = 'rebuilt %s' % (', '.join(built_projects),) logging.info('pipeline for %s@%s: %s, action=%s', - path_with_namespace, branch, pipeline_status, action) + project_path, branch_name, pipeline_status, action) -def worker_thread(gitlab_url, gitlab_token, project_deps): - gl = gitlab.Gitlab(gitlab_url, private_token=gitlab_token) - if gitlab_token: - gl.auth() - +def worker_thread(gl, project_deps): while True: data = queue.get() try: @@ -91,21 +49,18 @@ def worker_thread(gitlab_url, gitlab_token, project_deps): app = Flask(__name__) -def run_app(gitlab_url, gitlab_token, - project_deps_path, bind_host, bind_port, +def run_app(gl, project_deps, bind_host, bind_port, webhook_token, num_workers=3): app.config.update({ 'WEBHOOK_AUTH_TOKEN': webhook_token, }) - project_deps = _ReloadableJSONFile(project_deps_path) - # Start the worker threads that will process the requests in the # background. for i in range(num_workers): wt = threading.Thread( target=worker_thread, - args=(gitlab_url, gitlab_token, project_deps), + args=(gl, project_deps), name='Worker %d' % (i+1)) wt.setDaemon(True) wt.start() diff --git a/gitlab_docker_autodep/docker_deps.py b/gitlab_docker_autodep/docker_deps.py deleted file mode 100644 index aabf43090be1c7d87cadac41c27cb2282e9615d0..0000000000000000000000000000000000000000 --- a/gitlab_docker_autodep/docker_deps.py +++ /dev/null @@ -1,100 +0,0 @@ -import gitlab -import json -import logging -import re -import sys - - -_from_rx = re.compile(r'^FROM\s+(\S+).*$', re.MULTILINE) - -def _parse_dockerfile(df): - return _from_rx.findall(df) - - -def _fetch_dockerfile(gl, project, ref): - try: - f = project.files.get(file_path='Dockerfile', ref=ref) - return f.decode() - except: - return None - - -def _has_gitlab_ci(gl, project, ref): - try: - project.files.get(file_path='.gitlab-ci.yml', ref=ref) - return True - except: - return False - - -def _remove_image_tag(name): - if ':' in name: - return name.split(':')[0] - return name - - -def build_docker_deps(gl, search_pattern=None, filter_pattern=None): - """Build the project dependency map based on Dockerfiles. - - This can be a fairly expensive (long) operation if the list of - projects is large. The 'search_pattern' argument allows for - filtering on the server side, using Gitlab search query syntax. - On the client side, the project list can be filtered with a - regular expression using the 'filter_pattern' argument, which will - be applied to the project's path_with_namespace. - - Returns an {image_name: [projects]}, where 'projects' is the list - of projects that have 'image_name' as their base Docker - image. These are gitlab.Project instances. - - We only examine Dockerfiles in the master branch of repositories. - - """ - deps = {} - - filter_rx = None - if filter_pattern: - filter_rx = re.compile(filter_pattern) - - projects = gl.projects.list(all=True, search=search_pattern, as_list=False) - for project in projects: - project_name = project.path_with_namespace - project_url = project_name - if filter_rx is not None and not filter_rx.search(project.path_with_namespace): - continue - if not _has_gitlab_ci(gl, project, 'master'): - continue - df = _fetch_dockerfile(gl, project, 'master') - if not df: - continue - base_images = _parse_dockerfile(df.decode('utf-8')) - if not base_images: - logging.error('ERROR: could not find base image for %s', - project.path_with_namespace) - continue - for img in base_images: - deps.setdefault(_remove_image_tag(img), []).append(project_url) - return deps - - -def docker_deps_to_project_deps(deps, registry_hostname): - out = {} - for image_name in deps: - if image_name.startswith(registry_hostname): - project_name = image_name[len(registry_hostname)+1:] - out[project_name] = deps[image_name] - return out - - -def dump_deps(gitlab_url, registry_hostname, gitlab_token, - deps_match, deps_filter, project_deps=True): - gl = gitlab.Gitlab(gitlab_url, private_token=gitlab_token) - if gitlab_token: - gl.auth() - - deps = build_docker_deps(gl, deps_match, deps_filter) - - if project_deps: - deps = docker_deps_to_project_deps(deps, registry_hostname) - - json.dump(deps, sys.stdout, indent=2) diff --git a/gitlab_docker_autodep/docker_deps_test.py b/gitlab_docker_autodep/docker_deps_test.py deleted file mode 100644 index bcffbaab35eaff54ea19e4337077f3a354e94385..0000000000000000000000000000000000000000 --- a/gitlab_docker_autodep/docker_deps_test.py +++ /dev/null @@ -1,19 +0,0 @@ -from .docker_deps import _parse_dockerfile - -import unittest - - -class TestParseDockerfile(unittest.TestCase): - - def test_parse_dockerfile(self): - dockerfile = ''' -FROM baseimage1 AS build -RUN build - -FROM baseimage2 -COPY --from=build bin /usr/bin/bin -RUN fix-perms - -''' - images = _parse_dockerfile(dockerfile) - self.assertEqual(['baseimage1', 'baseimage2'], images) diff --git a/gitlab_docker_autodep/main.py b/gitlab_docker_autodep/main.py deleted file mode 100644 index 041f2cd1b74505c9e72fea286938359840d2f77d..0000000000000000000000000000000000000000 --- a/gitlab_docker_autodep/main.py +++ /dev/null @@ -1,149 +0,0 @@ -import argparse -import logging -import os -import time -try: - import urlparse -except ImportError: - import urllib.parse as urlparse - -from .docker_deps import dump_deps -from .rebuild import rebuild_deps -from .server import run_app - - -def main(): - parser = argparse.ArgumentParser( - description='Rebuild Docker images on a Gitlab instance.') - subparsers = parser.add_subparsers(dest='subparser') - - # Common options. - common_parser = argparse.ArgumentParser(add_help=False) - gitlab_opts_group = common_parser.add_argument_group('gitlab options') - gitlab_opts_group.add_argument( - '--url', metavar='URL', help='Gitlab URL') - gitlab_opts_group.add_argument( - '--token-file', metavar='FILE', - type=argparse.FileType('r'), - help='Load Gitlab authentication token from this file') - gitlab_opts_group.add_argument( - '--token', metavar='TOKEN', - help='Gitlab authentication token') - gitlab_opts_group.add_argument( - '--registry', metavar='NAME', - help='Docker registry hostname (if empty, it will be ' - 'automatically derived from --url)') - scope_opts_group = common_parser.add_argument_group('project scope options') - common_parser.add_argument('--debug', action='store_true') - - # Compute deps. - deps_parser = subparsers.add_parser( - 'deps', - parents=[common_parser], - help='build dependency map', - description='Generate a map of Docker-derived dependencies between ' - 'projects on a Gitlab instance.') - deps_parser.add_argument( - '--match', - help='Search query to filter project list on the server side') - deps_parser.add_argument( - '--filter', - help='Regexp to filter project list on the right-hand (dependency) side') - deps_parser.add_argument( - '--docker', action='store_true', - help='Output dependencies between Docker images, not Gitlab projects') - - # Trigger rebuilds of reverse deps. - rebuild_image_parser = subparsers.add_parser( - 'rebuild', - parents=[common_parser], - help='rebuild dependencies of a project', - description='Rebuild all projects that depend on the specified ' - 'project.') - rebuild_image_parser.add_argument( - '--deps', metavar='FILE', - help='file with project dependencies') - rebuild_image_parser.add_argument( - '-n', '--dry-run', action='store_true', dest='dry_run', - help='only show what would be done') - rebuild_image_parser.add_argument( - '--recurse', action='store_true', - help='Include all dependencies recursively ' - 'and wait for completion of the pipelines') - rebuild_image_parser.add_argument( - 'project_path', - help='Project name (relative path)') - - # Server. - server_parser = subparsers.add_parser( - 'server', - parents=[common_parser], - help='start a HTTP server', - description='Start a HTTP server that listens for Gitlab webhooks. ' - 'Configure Gitlab to send Pipeline events for your projects to this ' - 'server to auto-rebuild first-level dependencies.') - server_parser.add_argument( - '--deps', metavar='FILE', - help='file with project dependencies') - server_parser.add_argument( - '--port', metavar='PORT', type=int, default='5404', - dest='bind_port', help='port to listen on') - server_parser.add_argument( - '--addr', metavar='IP', default='127.0.0.1', - dest='bind_host', help='address to listen on') - server_parser.add_argument( - '--webhook-auth-token', metavar='TOKEN', - help='Secret X-Gitlab-Token for request authentication') - - args = parser.parse_args() - cmd = args.subparser - - if not args.url: - parser.error('Must specify --url') - - logging.basicConfig( - format='%(message)s', - level=logging.DEBUG if args.debug else logging.INFO, - ) - - # If --registry is not specified, make an educated guess. - registry_hostname = args.registry - if not registry_hostname: - registry_hostname = 'registry.' + urlparse.urlsplit(args.url).netloc - logging.error('using %s as Docker registry', registry_hostname) - - gitlab_token = args.token - if not gitlab_token and args.token_file: - gitlab_token = args.token_file.read().strip().encode('utf-8') - - if cmd == 'deps': - dump_deps( - args.url, - registry_hostname, - gitlab_token, - args.match, - args.filter, - not args.docker, - ) - elif cmd == 'rebuild': - rebuild_deps( - args.url, - gitlab_token, - args.deps, - args.project_path, - args.dry_run, - args.recurse, - ) - elif cmd == 'server': - run_app( - args.url, - gitlab_token, - args.deps, - args.bind_host, - args.bind_port, - args.webhook_auth_token, - ) - - -if __name__ == '__main__': - main() diff --git a/gitlab_docker_autodep/rebuild.py b/gitlab_docker_autodep/rebuild.py deleted file mode 100644 index 84ac48ca2bf165ef496a46bcf3912a36ac3b2dbc..0000000000000000000000000000000000000000 --- a/gitlab_docker_autodep/rebuild.py +++ /dev/null @@ -1,40 +0,0 @@ -import gitlab -import json -import logging -import time - - -def rebuild(gl, project_path, wait=False): - """Trigger a rebuild of a project.""" - project = gl.projects.get(project_path) - if not project: - return None - - pipeline = project.pipelines.create({'ref': 'master'}) - if wait: - while pipeline.finished_at is None: - pipeline.refresh() - time.sleep(3) - return pipeline - - -def rebuild_deps(gitlab_url, gitlab_token, - project_deps_path, project_path, dry_run, wait_and_recurse): - gl = gitlab.Gitlab(gitlab_url, private_token=gitlab_token) - if gitlab_token: - gl.auth() - - with open(project_deps_path) as fd: - project_deps = json.load(fd) - - stack = project_deps.get(project_path, []) - while stack: - path = stack.pop(0) - logging.info('rebuilding %s', path) - if not dry_run: - rebuild(gl, path, wait_and_recurse) - if wait_and_recurse: - stack.extend(project_deps.get(path, [])) - - - diff --git a/setup.py b/setup.py index c316141daf2642f5c681711f5dd3b7d3a4dbe1aa..5f13dd8d09b590f6485115684544c75bf36ebec7 100644 --- a/setup.py +++ b/setup.py @@ -3,18 +3,18 @@ from setuptools import setup, find_packages setup( - name="gitlab-docker-autodep", - version="0.3", - description="Automatically rebuild Docker images", + name="gitlab-deps", + version="0.9", + description="Manage Gitlab project dependencies and pipelines", author="Autistici/Inventati", author_email="info@autistici.org", - url="https://git.autistici.org/ale/gitlab-docker-autodep", + url="https://git.autistici.org/ale/gitlab-deps", install_requires=['python-gitlab', 'Flask', 'cheroot'], zip_safe=True, packages=find_packages(), entry_points={ "console_scripts": [ - "gitlab-docker-autodep = gitlab_docker_autodep.main:main", + "gitlab-deps = gitlab_deps.main:main", ], }, )