Commit 69468cbd authored by ale's avatar ale

Refactor dependency builder scope checks

Make them common to all commands, and clarify the docs about scoping
in general.
parent d5a3d46e
...@@ -46,7 +46,7 @@ command-line option, ...@@ -46,7 +46,7 @@ command-line option,
You can pass an authentication token using the *--token* command-line You can pass an authentication token using the *--token* command-line
option. This is usually required in order to trigger CI pipelines: the option. This is usually required in order to trigger CI pipelines: the
token must have sufficient permissions to do so. access token must have the *api* scope.
The tool will only examine Docker images hosted on the Docker registry The tool will only examine Docker images hosted on the Docker registry
associated with the Gitlab instance. By default the registry name is associated with the Gitlab instance. By default the registry name is
...@@ -54,6 +54,30 @@ automatically derived from the server URL (adding a *registry* ...@@ -54,6 +54,30 @@ automatically derived from the server URL (adding a *registry*
prefix), but it can be changed with the *--registry* command-line prefix), but it can be changed with the *--registry* command-line
option. option.
### Scope
On larger Gitlab instances, parsing Dockerfiles for all projects can
be an expensive (long) operation. The program offers two options to
manage the scope of the dependency analysis: *--match* and *--filter*.
The former, *--match*, allows for filtering the project list on the
server side, using a Gitlab search query. The latter, *--filter*,
applies a regular expression to the project names (including
namespaces) before parsing their dependencies. Combining the two, for
example, it is possible to efficiently limit the scope of the tool to
a specific namespace:
```
gitlab-docker-autodep ... --match myns --filter ^myns/ ...
```
Note that, when building the dependency tree:
* tags in FROM lines are ignored
* only the *master* branch of repositories is scanned for Dockerfiles
This might lead to more rebuilds than strictly necessary.
## Command-line ## Command-line
The `rebuild` command will trigger a rebuild of all the dependencies The `rebuild` command will trigger a rebuild of all the dependencies
......
import gitlab import gitlab
import logging import logging
import re
_from_rx = re.compile(r'^FROM\s+(.*)$')
def _parse_dockerfile(df): def _parse_dockerfile(df):
for line in df.split('\n'): for line in df.split('\n'):
if line.startswith('FROM '): m = _from_rx.match(line)
return line[5:].strip() if m:
return m.group(1)
def _fetch_dockerfile(gl, project): def _fetch_dockerfile(gl, project, ref):
try: try:
f = project.files.get(file_path='Dockerfile', ref='master') f = project.files.get(file_path='Dockerfile', ref=ref)
return f.decode() return f.decode()
except: except:
return None return None
...@@ -22,13 +26,34 @@ def _remove_image_tag(name): ...@@ -22,13 +26,34 @@ def _remove_image_tag(name):
return name return name
def build_dependency_tree(gl, search_pattern=None): def build_dependency_tree(gl, search_pattern=None, filter_pattern=None):
"""Build the project dependency map based on Dockerfiles.""" """Build the project dependency map based on Dockerfiles.
This can be a fairly expensive (long) operation if the list of
projects is large. The 'search_pattern' argument allows for
filtering on the server side, using Gitlab search query syntax.
On the client side, the project list can be filtered with a
regular expression using the 'filter_pattern' argument, which will
be applied to the project's path_with_namespace.
Returns an {image_name: [projects]}, where 'projects' is the list
of projects that have 'image_name' as their base Docker
image. These are gitlab.Project instances.
We only examine Dockerfiles in the master branch of repositories.
"""
deps = {} deps = {}
filter_rx = None
if filter_pattern:
filter_rx = re.compile(filter_pattern)
projects = gl.projects.list(all=True, search=search_pattern, as_list=False) projects = gl.projects.list(all=True, search=search_pattern, as_list=False)
for project in projects: for project in projects:
df = _fetch_dockerfile(gl, project) if filter_rx is not None and not filter_rx.search(project.path_with_namespace):
continue
df = _fetch_dockerfile(gl, project, 'master')
if not df: if not df:
continue continue
base_image = _parse_dockerfile(df) base_image = _parse_dockerfile(df)
...@@ -41,6 +66,7 @@ def build_dependency_tree(gl, search_pattern=None): ...@@ -41,6 +66,7 @@ def build_dependency_tree(gl, search_pattern=None):
def rebuild(project, wait=False): def rebuild(project, wait=False):
"""Trigger a rebuild of a project."""
pipeline = project.pipelines.create({'ref': 'master'}) pipeline = project.pipelines.create({'ref': 'master'})
if wait: if wait:
while pipeline.finished_at is None: while pipeline.finished_at is None:
...@@ -50,7 +76,7 @@ def rebuild(project, wait=False): ...@@ -50,7 +76,7 @@ def rebuild(project, wait=False):
def rebuild_deps(gitlab_url, registry_hostname, gitlab_token, def rebuild_deps(gitlab_url, registry_hostname, gitlab_token,
search_pattern, image_name, search_pattern, filter_pattern, image_name,
dry_run=False, recurse=False, wait=False): dry_run=False, recurse=False, wait=False):
"""Rebuild dependencies of the given image.""" """Rebuild dependencies of the given image."""
gl = gitlab.Gitlab(gitlab_url, private_token=gitlab_token) gl = gitlab.Gitlab(gitlab_url, private_token=gitlab_token)
...@@ -63,7 +89,7 @@ def rebuild_deps(gitlab_url, registry_hostname, gitlab_token, ...@@ -63,7 +89,7 @@ def rebuild_deps(gitlab_url, registry_hostname, gitlab_token,
while stack: while stack:
project = stack.pop(0) project = stack.pop(0)
print 'rebuilding %s' % project.path_with_namespace logging.info('rebuilding %s', project.path_with_namespace)
if not dry_run: if not dry_run:
pipeline = rebuild(project, wait) pipeline = rebuild(project, wait)
if pipeline.status != 'success': if pipeline.status != 'success':
......
...@@ -14,17 +14,30 @@ def main(): ...@@ -14,17 +14,30 @@ def main():
subparsers = parser.add_subparsers(dest='subparser') subparsers = parser.add_subparsers(dest='subparser')
# Common options. # Common options.
parser.add_argument('--token', metavar='TOKEN', common_parser = argparse.ArgumentParser(add_help=False)
help='Gitlab authentication token') gitlab_opts_group = common_parser.add_argument_group('gitlab options')
parser.add_argument('--registry', metavar='NAME', gitlab_opts_group.add_argument(
help='Docker registry hostname (if empty, it will be ' '--url', metavar='URL', help='Gitlab URL')
'automatically derived from --url)') gitlab_opts_group.add_argument(
parser.add_argument('--url', metavar='URL', help='Gitlab URL') '--token', metavar='TOKEN',
parser.add_argument('--debug', action='store_true') help='Gitlab authentication token')
gitlab_opts_group.add_argument(
'--registry', metavar='NAME',
help='Docker registry hostname (if empty, it will be '
'automatically derived from --url)')
scope_opts_group = common_parser.add_argument_group('project scope options')
scope_opts_group.add_argument(
'--match',
help='Search query to filter project list on the server side')
scope_opts_group.add_argument(
'--filter',
help='Regexp to filter project list on the client side')
common_parser.add_argument('--debug', action='store_true')
# Rebuild deps. # Rebuild deps.
rebuild_image_parser = subparsers.add_parser( rebuild_image_parser = subparsers.add_parser(
'rebuild', 'rebuild',
parents=[common_parser],
help='rebuild dependencies of an image', help='rebuild dependencies of an image',
description='Rebuild all projects that depend on the specified ' description='Rebuild all projects that depend on the specified '
'Docker image.') 'Docker image.')
...@@ -35,9 +48,6 @@ def main(): ...@@ -35,9 +48,6 @@ def main():
'--recurse', action='store_true', '--recurse', action='store_true',
help='Include all dependencies recursively ' help='Include all dependencies recursively '
'and wait for completion of the pipelines') 'and wait for completion of the pipelines')
rebuild_image_parser.add_argument(
'--match',
help='Search keyword(s) to filter project list')
rebuild_image_parser.add_argument( rebuild_image_parser.add_argument(
'image_name', 'image_name',
help='Docker image name') help='Docker image name')
...@@ -45,6 +55,7 @@ def main(): ...@@ -45,6 +55,7 @@ def main():
# Server. # Server.
server_parser = subparsers.add_parser( server_parser = subparsers.add_parser(
'server', 'server',
parents=[common_parser],
help='start a HTTP server', help='start a HTTP server',
description='Start a HTTP server that listens for Gitlab webhooks. ' description='Start a HTTP server that listens for Gitlab webhooks. '
'Configure Gitlab to send Pipeline events for your projects to this ' 'Configure Gitlab to send Pipeline events for your projects to this '
...@@ -82,7 +93,8 @@ def main(): ...@@ -82,7 +93,8 @@ def main():
registry_hostname, registry_hostname,
args.token, args.token,
args.match, args.match,
args[0], args.filter,
args.image_name,
args.dry_run, args.dry_run,
args.recurse, args.recurse,
args.recurse, args.recurse,
...@@ -92,6 +104,8 @@ def main(): ...@@ -92,6 +104,8 @@ def main():
args.url, args.url,
registry_hostname, registry_hostname,
args.token, args.token,
args.match,
args.filter,
args.bind_host, args.bind_host,
args.bind_port, args.bind_port,
args.webhook_auth_token, args.webhook_auth_token,
......
...@@ -26,13 +26,13 @@ class _DepsCache(object): ...@@ -26,13 +26,13 @@ class _DepsCache(object):
with self._deps_lock: with self._deps_lock:
return self._deps.get(image_name, []) return self._deps.get(image_name, [])
def update_thread(self): def update_thread(self, search_pattern, filter_pattern):
loaded = False loaded = False
while True: while True:
try: try:
if not loaded: if not loaded:
app.logger.info('scanning project dependencies...') app.logger.info('scanning project dependencies...')
new_deps = build_dependency_tree(app.gl) new_deps = build_dependency_tree(app.gl, search_pattern, filter_pattern)
with self._deps_lock: with self._deps_lock:
self._deps = new_deps self._deps = new_deps
if not loaded: if not loaded:
...@@ -81,8 +81,8 @@ def worker_thread(): ...@@ -81,8 +81,8 @@ def worker_thread():
def run_app(gitlab_url, registry_hostname, gitlab_token, def run_app(gitlab_url, registry_hostname, gitlab_token,
bind_host, bind_port, webhook_token, search_pattern, filter_pattern, bind_host, bind_port,
num_workers=2): webhook_token, num_workers=2):
app.config.update({ app.config.update({
'REGISTRY_HOSTNAME': registry_hostname, 'REGISTRY_HOSTNAME': registry_hostname,
'WEBHOOK_AUTH_TOKEN': webhook_token, 'WEBHOOK_AUTH_TOKEN': webhook_token,
...@@ -96,6 +96,7 @@ def run_app(gitlab_url, registry_hostname, gitlab_token, ...@@ -96,6 +96,7 @@ def run_app(gitlab_url, registry_hostname, gitlab_token,
# dependency map (an expensive operation). # dependency map (an expensive operation).
update_t = threading.Thread( update_t = threading.Thread(
target=deps_cache.update_thread, target=deps_cache.update_thread,
args=(search_pattern, filter_pattern),
name='Dependency Update Thread') name='Dependency Update Thread')
update_t.setDaemon(True) update_t.setDaemon(True)
update_t.start() update_t.start()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment