From 69468cbd5b7ec2bd59faab70d10e1bbe619f5a98 Mon Sep 17 00:00:00 2001 From: ale <ale@incal.net> Date: Sun, 21 Oct 2018 10:45:08 +0100 Subject: [PATCH] Refactor dependency builder scope checks Make them common to all commands, and clarify the docs about scoping in general. --- README.md | 26 ++++++++++++++++++- gitlab_docker_autodep/deps.py | 44 ++++++++++++++++++++++++++------- gitlab_docker_autodep/main.py | 36 ++++++++++++++++++--------- gitlab_docker_autodep/server.py | 9 ++++--- 4 files changed, 90 insertions(+), 25 deletions(-) diff --git a/README.md b/README.md index 64db078..533f6aa 100644 --- a/README.md +++ b/README.md @@ -46,7 +46,7 @@ command-line option, You can pass an authentication token using the *--token* command-line option. This is usually required in order to trigger CI pipelines: the -token must have sufficient permissions to do so. +access token must have the *api* scope. The tool will only examine Docker images hosted on the Docker registry associated with the Gitlab instance. By default the registry name is @@ -54,6 +54,30 @@ automatically derived from the server URL (adding a *registry* prefix), but it can be changed with the *--registry* command-line option. +### Scope + +On larger Gitlab instances, parsing Dockerfiles for all projects can +be an expensive (long) operation. The program offers two options to +manage the scope of the dependency analysis: *--match* and *--filter*. + +The former, *--match*, allows for filtering the project list on the +server side, using a Gitlab search query. The latter, *--filter*, +applies a regular expression to the project names (including +namespaces) before parsing their dependencies. Combining the two, for +example, it is possible to efficiently limit the scope of the tool to +a specific namespace: + +``` +gitlab-docker-autodep ... --match myns --filter ^myns/ ... +``` + +Note that, when building the dependency tree: + +* tags in FROM lines are ignored +* only the *master* branch of repositories is scanned for Dockerfiles + +This might lead to more rebuilds than strictly necessary. + ## Command-line The `rebuild` command will trigger a rebuild of all the dependencies diff --git a/gitlab_docker_autodep/deps.py b/gitlab_docker_autodep/deps.py index 81fc7ac..12f0f25 100644 --- a/gitlab_docker_autodep/deps.py +++ b/gitlab_docker_autodep/deps.py @@ -1,16 +1,20 @@ import gitlab import logging +import re +_from_rx = re.compile(r'^FROM\s+(.*)$') + def _parse_dockerfile(df): for line in df.split('\n'): - if line.startswith('FROM '): - return line[5:].strip() + m = _from_rx.match(line) + if m: + return m.group(1) -def _fetch_dockerfile(gl, project): +def _fetch_dockerfile(gl, project, ref): try: - f = project.files.get(file_path='Dockerfile', ref='master') + f = project.files.get(file_path='Dockerfile', ref=ref) return f.decode() except: return None @@ -22,13 +26,34 @@ def _remove_image_tag(name): return name -def build_dependency_tree(gl, search_pattern=None): - """Build the project dependency map based on Dockerfiles.""" +def build_dependency_tree(gl, search_pattern=None, filter_pattern=None): + """Build the project dependency map based on Dockerfiles. + + This can be a fairly expensive (long) operation if the list of + projects is large. The 'search_pattern' argument allows for + filtering on the server side, using Gitlab search query syntax. + On the client side, the project list can be filtered with a + regular expression using the 'filter_pattern' argument, which will + be applied to the project's path_with_namespace. + + Returns an {image_name: [projects]}, where 'projects' is the list + of projects that have 'image_name' as their base Docker + image. These are gitlab.Project instances. + + We only examine Dockerfiles in the master branch of repositories. + + """ deps = {} + filter_rx = None + if filter_pattern: + filter_rx = re.compile(filter_pattern) + projects = gl.projects.list(all=True, search=search_pattern, as_list=False) for project in projects: - df = _fetch_dockerfile(gl, project) + if filter_rx is not None and not filter_rx.search(project.path_with_namespace): + continue + df = _fetch_dockerfile(gl, project, 'master') if not df: continue base_image = _parse_dockerfile(df) @@ -41,6 +66,7 @@ def build_dependency_tree(gl, search_pattern=None): def rebuild(project, wait=False): + """Trigger a rebuild of a project.""" pipeline = project.pipelines.create({'ref': 'master'}) if wait: while pipeline.finished_at is None: @@ -50,7 +76,7 @@ def rebuild(project, wait=False): def rebuild_deps(gitlab_url, registry_hostname, gitlab_token, - search_pattern, image_name, + search_pattern, filter_pattern, image_name, dry_run=False, recurse=False, wait=False): """Rebuild dependencies of the given image.""" gl = gitlab.Gitlab(gitlab_url, private_token=gitlab_token) @@ -63,7 +89,7 @@ def rebuild_deps(gitlab_url, registry_hostname, gitlab_token, while stack: project = stack.pop(0) - print 'rebuilding %s' % project.path_with_namespace + logging.info('rebuilding %s', project.path_with_namespace) if not dry_run: pipeline = rebuild(project, wait) if pipeline.status != 'success': diff --git a/gitlab_docker_autodep/main.py b/gitlab_docker_autodep/main.py index 34d40b3..6a22dcb 100644 --- a/gitlab_docker_autodep/main.py +++ b/gitlab_docker_autodep/main.py @@ -14,17 +14,30 @@ def main(): subparsers = parser.add_subparsers(dest='subparser') # Common options. - parser.add_argument('--token', metavar='TOKEN', - help='Gitlab authentication token') - parser.add_argument('--registry', metavar='NAME', - help='Docker registry hostname (if empty, it will be ' - 'automatically derived from --url)') - parser.add_argument('--url', metavar='URL', help='Gitlab URL') - parser.add_argument('--debug', action='store_true') + common_parser = argparse.ArgumentParser(add_help=False) + gitlab_opts_group = common_parser.add_argument_group('gitlab options') + gitlab_opts_group.add_argument( + '--url', metavar='URL', help='Gitlab URL') + gitlab_opts_group.add_argument( + '--token', metavar='TOKEN', + help='Gitlab authentication token') + gitlab_opts_group.add_argument( + '--registry', metavar='NAME', + help='Docker registry hostname (if empty, it will be ' + 'automatically derived from --url)') + scope_opts_group = common_parser.add_argument_group('project scope options') + scope_opts_group.add_argument( + '--match', + help='Search query to filter project list on the server side') + scope_opts_group.add_argument( + '--filter', + help='Regexp to filter project list on the client side') + common_parser.add_argument('--debug', action='store_true') # Rebuild deps. rebuild_image_parser = subparsers.add_parser( 'rebuild', + parents=[common_parser], help='rebuild dependencies of an image', description='Rebuild all projects that depend on the specified ' 'Docker image.') @@ -35,9 +48,6 @@ def main(): '--recurse', action='store_true', help='Include all dependencies recursively ' 'and wait for completion of the pipelines') - rebuild_image_parser.add_argument( - '--match', - help='Search keyword(s) to filter project list') rebuild_image_parser.add_argument( 'image_name', help='Docker image name') @@ -45,6 +55,7 @@ def main(): # Server. server_parser = subparsers.add_parser( 'server', + parents=[common_parser], help='start a HTTP server', description='Start a HTTP server that listens for Gitlab webhooks. ' 'Configure Gitlab to send Pipeline events for your projects to this ' @@ -82,7 +93,8 @@ def main(): registry_hostname, args.token, args.match, - args[0], + args.filter, + args.image_name, args.dry_run, args.recurse, args.recurse, @@ -92,6 +104,8 @@ def main(): args.url, registry_hostname, args.token, + args.match, + args.filter, args.bind_host, args.bind_port, args.webhook_auth_token, diff --git a/gitlab_docker_autodep/server.py b/gitlab_docker_autodep/server.py index 4272b60..1273f88 100644 --- a/gitlab_docker_autodep/server.py +++ b/gitlab_docker_autodep/server.py @@ -26,13 +26,13 @@ class _DepsCache(object): with self._deps_lock: return self._deps.get(image_name, []) - def update_thread(self): + def update_thread(self, search_pattern, filter_pattern): loaded = False while True: try: if not loaded: app.logger.info('scanning project dependencies...') - new_deps = build_dependency_tree(app.gl) + new_deps = build_dependency_tree(app.gl, search_pattern, filter_pattern) with self._deps_lock: self._deps = new_deps if not loaded: @@ -81,8 +81,8 @@ def worker_thread(): def run_app(gitlab_url, registry_hostname, gitlab_token, - bind_host, bind_port, webhook_token, - num_workers=2): + search_pattern, filter_pattern, bind_host, bind_port, + webhook_token, num_workers=2): app.config.update({ 'REGISTRY_HOSTNAME': registry_hostname, 'WEBHOOK_AUTH_TOKEN': webhook_token, @@ -96,6 +96,7 @@ def run_app(gitlab_url, registry_hostname, gitlab_token, # dependency map (an expensive operation). update_t = threading.Thread( target=deps_cache.update_thread, + args=(search_pattern, filter_pattern), name='Dependency Update Thread') update_t.setDaemon(True) update_t.start() -- GitLab