Commit 69468cbd authored by ale's avatar ale

Refactor dependency builder scope checks

Make them common to all commands, and clarify the docs about scoping
in general.
parent d5a3d46e
......@@ -46,7 +46,7 @@ command-line option,
You can pass an authentication token using the *--token* command-line
option. This is usually required in order to trigger CI pipelines: the
token must have sufficient permissions to do so.
access token must have the *api* scope.
The tool will only examine Docker images hosted on the Docker registry
associated with the Gitlab instance. By default the registry name is
......@@ -54,6 +54,30 @@ automatically derived from the server URL (adding a *registry*
prefix), but it can be changed with the *--registry* command-line
option.
### Scope
On larger Gitlab instances, parsing Dockerfiles for all projects can
be an expensive (long) operation. The program offers two options to
manage the scope of the dependency analysis: *--match* and *--filter*.
The former, *--match*, allows for filtering the project list on the
server side, using a Gitlab search query. The latter, *--filter*,
applies a regular expression to the project names (including
namespaces) before parsing their dependencies. Combining the two, for
example, it is possible to efficiently limit the scope of the tool to
a specific namespace:
```
gitlab-docker-autodep ... --match myns --filter ^myns/ ...
```
Note that, when building the dependency tree:
* tags in FROM lines are ignored
* only the *master* branch of repositories is scanned for Dockerfiles
This might lead to more rebuilds than strictly necessary.
## Command-line
The `rebuild` command will trigger a rebuild of all the dependencies
......
import gitlab
import logging
import re
_from_rx = re.compile(r'^FROM\s+(.*)$')
def _parse_dockerfile(df):
for line in df.split('\n'):
if line.startswith('FROM '):
return line[5:].strip()
m = _from_rx.match(line)
if m:
return m.group(1)
def _fetch_dockerfile(gl, project):
def _fetch_dockerfile(gl, project, ref):
try:
f = project.files.get(file_path='Dockerfile', ref='master')
f = project.files.get(file_path='Dockerfile', ref=ref)
return f.decode()
except:
return None
......@@ -22,13 +26,34 @@ def _remove_image_tag(name):
return name
def build_dependency_tree(gl, search_pattern=None):
"""Build the project dependency map based on Dockerfiles."""
def build_dependency_tree(gl, search_pattern=None, filter_pattern=None):
"""Build the project dependency map based on Dockerfiles.
This can be a fairly expensive (long) operation if the list of
projects is large. The 'search_pattern' argument allows for
filtering on the server side, using Gitlab search query syntax.
On the client side, the project list can be filtered with a
regular expression using the 'filter_pattern' argument, which will
be applied to the project's path_with_namespace.
Returns an {image_name: [projects]}, where 'projects' is the list
of projects that have 'image_name' as their base Docker
image. These are gitlab.Project instances.
We only examine Dockerfiles in the master branch of repositories.
"""
deps = {}
filter_rx = None
if filter_pattern:
filter_rx = re.compile(filter_pattern)
projects = gl.projects.list(all=True, search=search_pattern, as_list=False)
for project in projects:
df = _fetch_dockerfile(gl, project)
if filter_rx is not None and not filter_rx.search(project.path_with_namespace):
continue
df = _fetch_dockerfile(gl, project, 'master')
if not df:
continue
base_image = _parse_dockerfile(df)
......@@ -41,6 +66,7 @@ def build_dependency_tree(gl, search_pattern=None):
def rebuild(project, wait=False):
"""Trigger a rebuild of a project."""
pipeline = project.pipelines.create({'ref': 'master'})
if wait:
while pipeline.finished_at is None:
......@@ -50,7 +76,7 @@ def rebuild(project, wait=False):
def rebuild_deps(gitlab_url, registry_hostname, gitlab_token,
search_pattern, image_name,
search_pattern, filter_pattern, image_name,
dry_run=False, recurse=False, wait=False):
"""Rebuild dependencies of the given image."""
gl = gitlab.Gitlab(gitlab_url, private_token=gitlab_token)
......@@ -63,7 +89,7 @@ def rebuild_deps(gitlab_url, registry_hostname, gitlab_token,
while stack:
project = stack.pop(0)
print 'rebuilding %s' % project.path_with_namespace
logging.info('rebuilding %s', project.path_with_namespace)
if not dry_run:
pipeline = rebuild(project, wait)
if pipeline.status != 'success':
......
......@@ -14,17 +14,30 @@ def main():
subparsers = parser.add_subparsers(dest='subparser')
# Common options.
parser.add_argument('--token', metavar='TOKEN',
help='Gitlab authentication token')
parser.add_argument('--registry', metavar='NAME',
help='Docker registry hostname (if empty, it will be '
'automatically derived from --url)')
parser.add_argument('--url', metavar='URL', help='Gitlab URL')
parser.add_argument('--debug', action='store_true')
common_parser = argparse.ArgumentParser(add_help=False)
gitlab_opts_group = common_parser.add_argument_group('gitlab options')
gitlab_opts_group.add_argument(
'--url', metavar='URL', help='Gitlab URL')
gitlab_opts_group.add_argument(
'--token', metavar='TOKEN',
help='Gitlab authentication token')
gitlab_opts_group.add_argument(
'--registry', metavar='NAME',
help='Docker registry hostname (if empty, it will be '
'automatically derived from --url)')
scope_opts_group = common_parser.add_argument_group('project scope options')
scope_opts_group.add_argument(
'--match',
help='Search query to filter project list on the server side')
scope_opts_group.add_argument(
'--filter',
help='Regexp to filter project list on the client side')
common_parser.add_argument('--debug', action='store_true')
# Rebuild deps.
rebuild_image_parser = subparsers.add_parser(
'rebuild',
parents=[common_parser],
help='rebuild dependencies of an image',
description='Rebuild all projects that depend on the specified '
'Docker image.')
......@@ -35,9 +48,6 @@ def main():
'--recurse', action='store_true',
help='Include all dependencies recursively '
'and wait for completion of the pipelines')
rebuild_image_parser.add_argument(
'--match',
help='Search keyword(s) to filter project list')
rebuild_image_parser.add_argument(
'image_name',
help='Docker image name')
......@@ -45,6 +55,7 @@ def main():
# Server.
server_parser = subparsers.add_parser(
'server',
parents=[common_parser],
help='start a HTTP server',
description='Start a HTTP server that listens for Gitlab webhooks. '
'Configure Gitlab to send Pipeline events for your projects to this '
......@@ -82,7 +93,8 @@ def main():
registry_hostname,
args.token,
args.match,
args[0],
args.filter,
args.image_name,
args.dry_run,
args.recurse,
args.recurse,
......@@ -92,6 +104,8 @@ def main():
args.url,
registry_hostname,
args.token,
args.match,
args.filter,
args.bind_host,
args.bind_port,
args.webhook_auth_token,
......
......@@ -26,13 +26,13 @@ class _DepsCache(object):
with self._deps_lock:
return self._deps.get(image_name, [])
def update_thread(self):
def update_thread(self, search_pattern, filter_pattern):
loaded = False
while True:
try:
if not loaded:
app.logger.info('scanning project dependencies...')
new_deps = build_dependency_tree(app.gl)
new_deps = build_dependency_tree(app.gl, search_pattern, filter_pattern)
with self._deps_lock:
self._deps = new_deps
if not loaded:
......@@ -81,8 +81,8 @@ def worker_thread():
def run_app(gitlab_url, registry_hostname, gitlab_token,
bind_host, bind_port, webhook_token,
num_workers=2):
search_pattern, filter_pattern, bind_host, bind_port,
webhook_token, num_workers=2):
app.config.update({
'REGISTRY_HOSTNAME': registry_hostname,
'WEBHOOK_AUTH_TOKEN': webhook_token,
......@@ -96,6 +96,7 @@ def run_app(gitlab_url, registry_hostname, gitlab_token,
# dependency map (an expensive operation).
update_t = threading.Thread(
target=deps_cache.update_thread,
args=(search_pattern, filter_pattern),
name='Dependency Update Thread')
update_t.setDaemon(True)
update_t.start()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment