From d4175d13252c59e3f8ee20a6375ea86815670c64 Mon Sep 17 00:00:00 2001
From: ale <ale@incal.net>
Date: Wed, 1 Jul 2020 18:02:35 +0100
Subject: [PATCH 01/10] Refactor the tool into smaller composible modules

The idea is that all the filtering capability should be replaced by
"grep" and shell pipes.

Part one (server still TODO).
---
 gitlab_docker_autodep/deps.py    | 118 +++++++++++++++++++++++++++++++
 gitlab_docker_autodep/main.py    |  85 ++++++++++++----------
 gitlab_docker_autodep/rebuild.py |  15 +---
 3 files changed, 167 insertions(+), 51 deletions(-)
 create mode 100644 gitlab_docker_autodep/deps.py

diff --git a/gitlab_docker_autodep/deps.py b/gitlab_docker_autodep/deps.py
new file mode 100644
index 0000000..531a9c2
--- /dev/null
+++ b/gitlab_docker_autodep/deps.py
@@ -0,0 +1,118 @@
+import re
+
+
+DEFAULT_BRANCH = 'master'
+
+
+def split_project_branch(project_with_branch):
+    if ':' in project_with_branch:
+        p, b = project_with_branch.split(':')
+        return p, b
+    return project_with_branch, DEFAULT_BRANCH
+
+
+def list_projects(gl, search_pattern):
+    projects = gl.projects.list(
+        all=True,
+        search=search_pattern,
+        search_namespaces=True,
+        as_list=False,
+        simple=True,
+    )
+    for p in projects:
+        yield p.path_with_namespace
+
+
+def get_branches(gl, project_names):
+    for path_with_namespace in project_names:
+        p = gl.projects.get(path_with_namespace)
+        for b in p.branches.list():
+            yield (path_with_namespace, b.name)
+
+
+def has_ci(gl, project_path, branch_name):
+    p = gl.projects.get(project_path)
+    try:
+        p.files.get(file_path='.gitlab-ci.yml', ref=branch_name)
+        return True
+    except Exception:
+        return False
+
+
+_from_rx = re.compile(r'^FROM\s+(\S+).*$', re.MULTILINE)
+
+
+def get_docker_deps(gl, project_path, branch_name):
+    p = gl.projects.get(project_path)
+    try:
+        f = p.files.get(file_path='Dockerfile', ref=branch_name)
+        return _from_rx.findall(f.decode().decode('utf-8'))
+    except Exception:
+        return []
+
+
+def get_explicit_deps(gl, project_path, branch_name):
+    p = gl.projects.get(project_path)
+    try:
+        f = p.files.get(file_path='.gitlab-deps', ref=branch_name)
+        return f.decode().decode('utf-8').split('\n')
+    except Exception:
+        return []
+
+
+_docker_image_rx = re.compile(r'^([^/]*)(/([^:]*))?(:(.*))?$')
+
+
+def docker_image_to_project(docker_image, registry_hostname):
+    m = _docker_image_rx.match(docker_image)
+    if m and m[1] == registry_hostname:
+        # The branch is the tag, except for 'latest'
+        if not m[5] or m[5] == 'latest':
+            branch = DEFAULT_BRANCH
+        else:
+            branch = m[5]
+        return m[3], branch
+    return None, None
+
+
+_url_rx = re.compile(r'^(https?://[^/]+/)([^:]+)(:.*)?$')
+
+
+def url_to_project(url, gitlab_url):
+    m = _url_rx.match(url)
+    if m and m[1] == gitlab_url:
+        return m[2], m[3] or DEFAULT_BRANCH
+
+
+def not_null(l):
+    return filter(None, l)
+
+
+def get_deps(gl, gitlab_url, registry_hostname, project_path, branch_name):
+    deps = []
+    deps.extend(not_null(
+        url_to_project(url, gitlab_url)
+        for url in get_explicit_deps(gl, project_path, branch_name)))
+    deps.extend(not_null(
+        docker_image_to_project(img, registry_hostname)
+        for img in get_docker_deps(gl, project_path, branch_name)))
+    return deps
+
+
+def list_deps(gl, gitlab_url, registry_hostname, projects):
+    for project_path, branch_name in projects:
+        deps = get_deps(gl, gitlab_url, registry_hostname,
+                        project_path, branch_name)
+        for dep_path, dep_branch in deps:
+            print(f'{project_path}:{branch_name} {dep_path}:{dep_branch}')
+
+
+def read_deps(fd):
+    deps = {}
+    for line in fd:
+        src, dst = line.strip().split()
+        src_project, src_branch = split_project_branch(src)
+        dst_project, dst_branch = split_project_branch(dst)
+        deps.setdefault((src_project, src_branch), []).append(
+            (dst_project, dst_branch))
+    return deps
diff --git a/gitlab_docker_autodep/main.py b/gitlab_docker_autodep/main.py
index 041f2cd..eb36202 100644
--- a/gitlab_docker_autodep/main.py
+++ b/gitlab_docker_autodep/main.py
@@ -1,13 +1,12 @@
 import argparse
+import gitlab
 import logging
 import os
-import time
-try:
-    import urlparse
-except ImportError:
-    import urllib.parse as urlparse
+import sys
+import urllib.parse as urlparse
 
-from .docker_deps import dump_deps
+from .deps import get_branches, list_projects, list_deps, \
+    split_project_branch, read_deps
 from .rebuild import rebuild_deps
 from .server import run_app
 
@@ -19,40 +18,51 @@ def main():
 
     # Common options.
     common_parser = argparse.ArgumentParser(add_help=False)
+    common_parser.add_argument('--debug', action='store_true')
+    common_parser.add_argument(
+        '-n', '--dry-run', action='store_true', dest='dry_run',
+        help='only show what would be done')
     gitlab_opts_group = common_parser.add_argument_group('gitlab options')
     gitlab_opts_group.add_argument(
-        '--url', metavar='URL', help='Gitlab URL')
+        '--url', metavar='URL', help='Gitlab URL',
+        default=os.getenv('GITLAB_URL'))
     gitlab_opts_group.add_argument(
         '--token-file', metavar='FILE',
         type=argparse.FileType('r'),
+        default=os.getenv('GITLAB_AUTH_TOKEN_FILE'),
         help='Load Gitlab authentication token from this file')
     gitlab_opts_group.add_argument(
         '--token', metavar='TOKEN',
+        default=os.getenv('GITLAB_AUTH_TOKEN'),
         help='Gitlab authentication token')
     gitlab_opts_group.add_argument(
         '--registry', metavar='NAME',
+        default=os.getenv('GITLAB_REGISTRY'),
         help='Docker registry hostname (if empty, it will be '
         'automatically derived from --url)')
-    scope_opts_group = common_parser.add_argument_group('project scope options')
-    common_parser.add_argument('--debug', action='store_true')
+
+    # List projects.
+    list_projects_parser = subparsers.add_parser(
+        'list-projects',
+        parents=[common_parser],
+        help='list projects',
+        description='List all projects and their branches on the Gitlab '
+        'instance.')
+    list_projects_parser.add_argument(
+        '--search',
+        help='Search query used to filter project list on the server side')
 
     # Compute deps.
     deps_parser = subparsers.add_parser(
         'deps',
         parents=[common_parser],
         help='build dependency map',
-        description='Generate a map of Docker-derived dependencies between '
-        'projects on a Gitlab instance.')
-    deps_parser.add_argument(
-        '--match',
-        help='Search query to filter project list on the server side')
-    deps_parser.add_argument(
-        '--filter',
-        help='Regexp to filter project list on the right-hand (dependency) side')
+        description='Generate a map of dependencies between projects on a '
+        'Gitlab instance.')
     deps_parser.add_argument(
         '--docker', action='store_true',
         help='Output dependencies between Docker images, not Gitlab projects')
-    
+
     # Trigger rebuilds of reverse deps.
     rebuild_image_parser = subparsers.add_parser(
         'rebuild',
@@ -63,16 +73,13 @@ def main():
     rebuild_image_parser.add_argument(
         '--deps', metavar='FILE',
         help='file with project dependencies')
-    rebuild_image_parser.add_argument(
-        '-n', '--dry-run', action='store_true', dest='dry_run',
-        help='only show what would be done')
     rebuild_image_parser.add_argument(
         '--recurse', action='store_true',
         help='Include all dependencies recursively '
         'and wait for completion of the pipelines')
     rebuild_image_parser.add_argument(
         'project_path',
-        help='Project name (relative path)')
+        help='Project name (relative path, with optional branch)')
 
     # Server.
     server_parser = subparsers.add_parser(
@@ -110,31 +117,31 @@ def main():
     registry_hostname = args.registry
     if not registry_hostname:
         registry_hostname = 'registry.' + urlparse.urlsplit(args.url).netloc
-        logging.error('using %s as Docker registry', registry_hostname)
+        logging.warning('guessed %s for the Docker registry hostname',
+                        registry_hostname)
 
     gitlab_token = args.token
     if not gitlab_token and args.token_file:
         gitlab_token = args.token_file.read().strip().encode('utf-8')
 
+    gl = gitlab.Gitlab(args.url, private_token=gitlab_token)
+    if gitlab_token:
+        gl.auth()
+
+    # Dispatch to the command executor.
+    if cmd == 'list-projects':
+        for p, b in get_branches(gl, list_projects(gl, args.search)):
+            print(f'{p}:{b}')
     if cmd == 'deps':
-        dump_deps(
-            args.url,
-            registry_hostname,
-            gitlab_token,
-            args.match,
-            args.filter,
-            not args.docker,
-        )
+        projects = [split_project_branch(x.strip()) for x in sys.stdin]
+        list_deps(gl, args.url, registry_hostname, projects)
     elif cmd == 'rebuild':
-        rebuild_deps(
-            args.url,
-            gitlab_token,
-            args.deps,
-            args.project_path,
-            args.dry_run,
-            args.recurse,
-        )
+        deps = read_deps(sys.stdin)
+        project_path, branch_name = split_project_branch(args.project_path)
+        rebuild_deps(gl, deps, project_path, branch_name, args.dry_run,
+                     args.recurse)
     elif cmd == 'server':
+        # TODO
         run_app(
             args.url,
             gitlab_token,
diff --git a/gitlab_docker_autodep/rebuild.py b/gitlab_docker_autodep/rebuild.py
index 84ac48c..b7b70a3 100644
--- a/gitlab_docker_autodep/rebuild.py
+++ b/gitlab_docker_autodep/rebuild.py
@@ -1,5 +1,3 @@
-import gitlab
-import json
 import logging
 import time
 
@@ -18,16 +16,9 @@ def rebuild(gl, project_path, wait=False):
     return pipeline
 
 
-def rebuild_deps(gitlab_url, gitlab_token,
-                 project_deps_path, project_path, dry_run, wait_and_recurse):
-    gl = gitlab.Gitlab(gitlab_url, private_token=gitlab_token)
-    if gitlab_token:
-        gl.auth()
-
-    with open(project_deps_path) as fd:
-        project_deps = json.load(fd)
-
-    stack = project_deps.get(project_path, [])
+def rebuild_deps(gl, project_deps, project_path, branch_name, dry_run,
+                 wait_and_recurse):
+    stack = project_deps.get((project_path, branch_name), [])
     while stack:
         path = stack.pop(0)
         logging.info('rebuilding %s', path)
-- 
GitLab


From ec4523cd1ba35a1e5e444fa1e899d4fda56076b9 Mon Sep 17 00:00:00 2001
From: ale <ale@incal.net>
Date: Wed, 1 Jul 2020 19:11:18 +0100
Subject: [PATCH 02/10] Fix the server to use plain text dependencies as input

---
 gitlab_docker_autodep/main.py    | 11 ++-----
 gitlab_docker_autodep/rebuild.py |  3 --
 gitlab_docker_autodep/server.py  | 51 ++------------------------------
 3 files changed, 6 insertions(+), 59 deletions(-)

diff --git a/gitlab_docker_autodep/main.py b/gitlab_docker_autodep/main.py
index eb36202..58313d1 100644
--- a/gitlab_docker_autodep/main.py
+++ b/gitlab_docker_autodep/main.py
@@ -141,15 +141,10 @@ def main():
         rebuild_deps(gl, deps, project_path, branch_name, args.dry_run,
                      args.recurse)
     elif cmd == 'server':
+        deps = read_deps(sys.stdin)
         # TODO
-        run_app(
-            args.url,
-            gitlab_token,
-            args.deps,
-            args.bind_host,
-            args.bind_port,
-            args.webhook_auth_token,
-        )
+        run_app(gl, deps, args.bind_host, args.bind_port,
+                args.webhook_auth_token)
 
 
 if __name__ == '__main__':
diff --git a/gitlab_docker_autodep/rebuild.py b/gitlab_docker_autodep/rebuild.py
index b7b70a3..b9cbf4c 100644
--- a/gitlab_docker_autodep/rebuild.py
+++ b/gitlab_docker_autodep/rebuild.py
@@ -26,6 +26,3 @@ def rebuild_deps(gl, project_deps, project_path, branch_name, dry_run,
             rebuild(gl, path, wait_and_recurse)
         if wait_and_recurse:
             stack.extend(project_deps.get(path, []))
-
-            
-        
diff --git a/gitlab_docker_autodep/server.py b/gitlab_docker_autodep/server.py
index bbc3206..ffc0351 100644
--- a/gitlab_docker_autodep/server.py
+++ b/gitlab_docker_autodep/server.py
@@ -1,9 +1,5 @@
-import gitlab
-import json
 import logging
-import os
 import threading
-import time
 try:
     import Queue
 except ImportError:
@@ -15,40 +11,6 @@ from flask import Flask, request, make_response, abort
 from .rebuild import rebuild
 
 
-class _ReloadableJSONFile(object):
-
-    check_interval = 60
-
-    def __init__(self, path):
-        self.path = path
-        self.lock = threading.Lock()
-        self._load()
-        t = threading.Thread(
-            target=self._update_thread,
-            name='File reload thread for %s' % path)
-        t.setDaemon(True)
-        t.start()
-
-    def get_contents(self):
-        with self.lock:
-            return self.data
-
-    def _load(self):
-        with self.lock:
-            with open(self.path) as fd:
-                self.data = json.load(fd)
-            self.stamp = os.stat(self.path).st_mtime
-
-    def _update_thread(self):
-        while True:
-            time.sleep(self.check_interval)
-            try:
-                if os.stat(self.path).st_mtime > self.stamp:
-                    self._load()
-            except:
-                pass
-
-
 queue = Queue.Queue()
 
 
@@ -75,11 +37,7 @@ def _process_request(gl, project_deps, data):
                  path_with_namespace, branch, pipeline_status, action)
 
 
-def worker_thread(gitlab_url, gitlab_token, project_deps):
-    gl = gitlab.Gitlab(gitlab_url, private_token=gitlab_token)
-    if gitlab_token:
-        gl.auth()
-
+def worker_thread(gl, project_deps):
     while True:
         data = queue.get()
         try:
@@ -91,21 +49,18 @@ def worker_thread(gitlab_url, gitlab_token, project_deps):
 app = Flask(__name__)
 
 
-def run_app(gitlab_url, gitlab_token,
-            project_deps_path, bind_host, bind_port,
+def run_app(gl, project_deps, bind_host, bind_port,
             webhook_token, num_workers=3):
     app.config.update({
         'WEBHOOK_AUTH_TOKEN': webhook_token,
     })
 
-    project_deps = _ReloadableJSONFile(project_deps_path)
-
     # Start the worker threads that will process the requests in the
     # background.
     for i in range(num_workers):
         wt = threading.Thread(
             target=worker_thread,
-            args=(gitlab_url, gitlab_token, project_deps),
+            args=(gl, project_deps),
             name='Worker %d' % (i+1))
         wt.setDaemon(True)
         wt.start()
-- 
GitLab


From 3ed99637206bbabde3ae703bd270561b03a50dbb Mon Sep 17 00:00:00 2001
From: ale <ale@incal.net>
Date: Wed, 1 Jul 2020 22:03:33 +0100
Subject: [PATCH 03/10] Add a "set-hooks" command to set pipeline hooks on
 projects

---
 gitlab_docker_autodep/hooks.py | 16 ++++++++++++++++
 gitlab_docker_autodep/main.py  | 25 ++++++++++++++++++++++++-
 2 files changed, 40 insertions(+), 1 deletion(-)
 create mode 100644 gitlab_docker_autodep/hooks.py

diff --git a/gitlab_docker_autodep/hooks.py b/gitlab_docker_autodep/hooks.py
new file mode 100644
index 0000000..dc99626
--- /dev/null
+++ b/gitlab_docker_autodep/hooks.py
@@ -0,0 +1,16 @@
+
+
+def check_hook(gl, hook_url, webhook_token, project_path):
+    project = gl.projects.get(project_path)
+    found = False
+    for h in project.hooks.list():
+        if h.url == hook_url and h.pipeline_events:
+            found = True
+            break
+    if found:
+        return
+    project.hooks.add(
+        url=hook_url,
+        pipeline_events=True,
+        token=webhook_token,
+    )
diff --git a/gitlab_docker_autodep/main.py b/gitlab_docker_autodep/main.py
index 58313d1..58f804f 100644
--- a/gitlab_docker_autodep/main.py
+++ b/gitlab_docker_autodep/main.py
@@ -7,6 +7,7 @@ import urllib.parse as urlparse
 
 from .deps import get_branches, list_projects, list_deps, \
     split_project_branch, read_deps
+from .hooks import check_hook
 from .rebuild import rebuild_deps
 from .server import run_app
 
@@ -63,6 +64,20 @@ def main():
         '--docker', action='store_true',
         help='Output dependencies between Docker images, not Gitlab projects')
 
+    # Setup pipeline hooks on the specified projects.
+    set_hooks_parser = subparsers.add_parser(
+        'set-hooks',
+        parents=[common_parser],
+        help='set pipeline hooks on projects',
+        description='Set the pipeline hooks on the specified projects '
+        '(usually points at our own server)')
+    set_hooks_parser.add_argument(
+        '--hook-url', metavar='URL',
+        help='URL for the pipeline HTTP hook')
+    set_hooks_parser.add_argument(
+        '--webhook-auth-token', metavar='TOKEN',
+        help='Secret X-Gitlab-Token for request authentication')
+
     # Trigger rebuilds of reverse deps.
     rebuild_image_parser = subparsers.add_parser(
         'rebuild',
@@ -140,9 +155,17 @@ def main():
         project_path, branch_name = split_project_branch(args.project_path)
         rebuild_deps(gl, deps, project_path, branch_name, args.dry_run,
                      args.recurse)
+    elif cmd == 'set-hooks':
+        if not args.hook_url:
+            parser.error('Must specify --hook-url')
+        # Need a project list on input, ignore branches.
+        projects = set(y[0] for y in (
+            split_project_branch(x.strip()) for x in sys.stdin))
+        for project_path in projects:
+            check_hook(gl, args.hook_url, args.webhook_auth_token,
+                       project_path)
     elif cmd == 'server':
         deps = read_deps(sys.stdin)
-        # TODO
         run_app(gl, deps, args.bind_host, args.bind_port,
                 args.webhook_auth_token)
 
-- 
GitLab


From d8a0b0455aa731c61432ebaf8cdba3c42ea38426 Mon Sep 17 00:00:00 2001
From: ale <ale@incal.net>
Date: Wed, 1 Jul 2020 23:44:00 +0100
Subject: [PATCH 04/10] Change binary name to gitlab-deps

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index c316141..2099426 100644
--- a/setup.py
+++ b/setup.py
@@ -14,7 +14,7 @@ setup(
     packages=find_packages(),
     entry_points={
         "console_scripts": [
-            "gitlab-docker-autodep = gitlab_docker_autodep.main:main",
+            "gitlab-deps = gitlab_docker_autodep.main:main",
             ],
         },
     )
-- 
GitLab


From cadc1cf7fe7b605dfdda21f32069ca3dea7fc685 Mon Sep 17 00:00:00 2001
From: ale <ale@incal.net>
Date: Wed, 1 Jul 2020 23:55:58 +0100
Subject: [PATCH 05/10] Better help messages

---
 gitlab_docker_autodep/main.py | 46 +++++++++++++++++++++++++++++------
 1 file changed, 39 insertions(+), 7 deletions(-)

diff --git a/gitlab_docker_autodep/main.py b/gitlab_docker_autodep/main.py
index 58f804f..9a14950 100644
--- a/gitlab_docker_autodep/main.py
+++ b/gitlab_docker_autodep/main.py
@@ -12,6 +12,10 @@ from .rebuild import rebuild_deps
 from .server import run_app
 
 
+def _fmtdesc(s):
+    return s.strip()
+
+
 def main():
     parser = argparse.ArgumentParser(
         description='Rebuild Docker images on a Gitlab instance.')
@@ -47,8 +51,22 @@ def main():
         'list-projects',
         parents=[common_parser],
         help='list projects',
-        description='List all projects and their branches on the Gitlab '
-        'instance.')
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        description=_fmtdesc('''
+List all projects and their branches on the Gitlab instance.
+
+The output is a list of project paths with all their branches, separated
+by a colon, one per line. Since the Gitlab 'search' API is quite
+coarse, you can then filter the output for specific projects or branches
+using 'grep', e.g.:
+
+    gitlab-deps list-projects | grep ^path/to/my/group/
+
+or
+
+    gitlab-deps list-projects | grep ':master$'
+
+'''))
     list_projects_parser.add_argument(
         '--search',
         help='Search query used to filter project list on the server side')
@@ -58,11 +76,25 @@ def main():
         'deps',
         parents=[common_parser],
         help='build dependency map',
-        description='Generate a map of dependencies between projects on a '
-        'Gitlab instance.')
-    deps_parser.add_argument(
-        '--docker', action='store_true',
-        help='Output dependencies between Docker images, not Gitlab projects')
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        description=_fmtdesc('''
+Generate a map of dependencies between projects on a
+Gitlab instance.
+
+The input (on standard input) must consist of a list of projects along
+with their branches, separated by a colon, one per line. If the branch
+is unspecified, 'master' is assumed.
+
+The output consists of pairs of project / dependency (so, these are
+'forward' dependencies), for all projects/branches specified in the
+input.
+
+To obtain a list of reverse dependencies, one can simply swap the
+columns in the output, e.g.:
+
+    gitlab-deps deps < project.list | awk '{print $2, $1}'
+
+'''))
 
     # Setup pipeline hooks on the specified projects.
     set_hooks_parser = subparsers.add_parser(
-- 
GitLab


From 3c7b79cbe1e4800964d760631651ef3fedb18ece Mon Sep 17 00:00:00 2001
From: ale <ale@incal.net>
Date: Thu, 2 Jul 2020 11:12:53 +0100
Subject: [PATCH 06/10] Update server code to support the new dependency maps

Improve help messages for the command too.
---
 gitlab_docker_autodep/hooks.py   |  15 +--
 gitlab_docker_autodep/main.py    | 151 ++++++++++++++++++++++---------
 gitlab_docker_autodep/rebuild.py |  13 +--
 gitlab_docker_autodep/server.py  |  18 ++--
 4 files changed, 132 insertions(+), 65 deletions(-)

diff --git a/gitlab_docker_autodep/hooks.py b/gitlab_docker_autodep/hooks.py
index dc99626..5f53b21 100644
--- a/gitlab_docker_autodep/hooks.py
+++ b/gitlab_docker_autodep/hooks.py
@@ -1,6 +1,7 @@
+import logging
 
 
-def check_hook(gl, hook_url, webhook_token, project_path):
+def check_hook(gl, hook_url, webhook_token, project_path, dry_run):
     project = gl.projects.get(project_path)
     found = False
     for h in project.hooks.list():
@@ -9,8 +10,10 @@ def check_hook(gl, hook_url, webhook_token, project_path):
             break
     if found:
         return
-    project.hooks.add(
-        url=hook_url,
-        pipeline_events=True,
-        token=webhook_token,
-    )
+    logging.info('adding pipeline_events hook to %s', project_path)
+    if not dry_run:
+        project.hooks.add(
+            url=hook_url,
+            pipeline_events=True,
+            token=webhook_token,
+        )
diff --git a/gitlab_docker_autodep/main.py b/gitlab_docker_autodep/main.py
index 9a14950..6fec413 100644
--- a/gitlab_docker_autodep/main.py
+++ b/gitlab_docker_autodep/main.py
@@ -3,7 +3,7 @@ import gitlab
 import logging
 import os
 import sys
-import urllib.parse as urlparse
+from urllib.parse import urlsplit
 
 from .deps import get_branches, list_projects, list_deps, \
     split_project_branch, read_deps
@@ -18,12 +18,14 @@ def _fmtdesc(s):
 
 def main():
     parser = argparse.ArgumentParser(
-        description='Rebuild Docker images on a Gitlab instance.')
+        description='Manage Gitlab project dependencies and trigger pipelines.')
     subparsers = parser.add_subparsers(dest='subparser')
 
     # Common options.
     common_parser = argparse.ArgumentParser(add_help=False)
-    common_parser.add_argument('--debug', action='store_true')
+    common_parser.add_argument(
+        '--debug', action='store_true',
+        help='increase logging level')
     common_parser.add_argument(
         '-n', '--dry-run', action='store_true', dest='dry_run',
         help='only show what would be done')
@@ -35,16 +37,11 @@ def main():
         '--token-file', metavar='FILE',
         type=argparse.FileType('r'),
         default=os.getenv('GITLAB_AUTH_TOKEN_FILE'),
-        help='Load Gitlab authentication token from this file')
+        help='file containing the Gitlab authentication token')
     gitlab_opts_group.add_argument(
         '--token', metavar='TOKEN',
         default=os.getenv('GITLAB_AUTH_TOKEN'),
         help='Gitlab authentication token')
-    gitlab_opts_group.add_argument(
-        '--registry', metavar='NAME',
-        default=os.getenv('GITLAB_REGISTRY'),
-        help='Docker registry hostname (if empty, it will be '
-        'automatically derived from --url)')
 
     # List projects.
     list_projects_parser = subparsers.add_parser(
@@ -69,7 +66,7 @@ or
 '''))
     list_projects_parser.add_argument(
         '--search',
-        help='Search query used to filter project list on the server side')
+        help='search query used to filter project list on the server side')
 
     # Compute deps.
     deps_parser = subparsers.add_parser(
@@ -81,9 +78,9 @@ or
 Generate a map of dependencies between projects on a
 Gitlab instance.
 
-The input (on standard input) must consist of a list of projects along
-with their branches, separated by a colon, one per line. If the branch
-is unspecified, 'master' is assumed.
+The input must consist of a list of projects along with their
+branches, separated by a colon, one per line. If the branch is
+unspecified, 'master' is assumed.
 
 The output consists of pairs of project / dependency (so, these are
 'forward' dependencies), for all projects/branches specified in the
@@ -94,51 +91,108 @@ columns in the output, e.g.:
 
     gitlab-deps deps < project.list | awk '{print $2, $1}'
 
+'''), epilog=_fmtdesc('''
+Input can be read from a file (if passed as an argument), or
+from standard input if a filename is omitted or specified as '-'.
 '''))
+    deps_parser.add_argument(
+        '--registry', metavar='NAME',
+        default=os.getenv('GITLAB_REGISTRY'),
+        help='Docker registry hostname (if empty, it will be '
+        'automatically derived from --url)')
+    deps_parser.add_argument(
+        'projects_list',
+        type=argparse.FileType('r'),
+        nargs='?', default=sys.stdin)
 
     # Setup pipeline hooks on the specified projects.
     set_hooks_parser = subparsers.add_parser(
         'set-hooks',
         parents=[common_parser],
         help='set pipeline hooks on projects',
-        description='Set the pipeline hooks on the specified projects '
-        '(usually points at our own server)')
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        description=_fmtdesc('''
+Set a HTTP hook for pipeline_events on the specified projects.
+
+Takes a list of projects (optional branch specifiers will be ignored)
+as input. Pipeline hooks are required by 'gitlab-deps server' to
+trigger dependent builds, so a common way to use this command is to
+feed it the right-hand side of the 'gitlab-deps deps' output, e.g.:
+
+    gitlab-deps deps < project.list \\
+        | awk '{print $2}' \\
+        | gitlab-deps set-hooks --hook-url=...
+
+using --hook-url to point at the URL of 'gitlab-deps server'.
+
+'''), epilog=_fmtdesc('''
+Input can be read from a file (if passed as an argument), or
+from standard input if a filename is omitted or specified as '-'.
+'''))
     set_hooks_parser.add_argument(
         '--hook-url', metavar='URL',
         help='URL for the pipeline HTTP hook')
     set_hooks_parser.add_argument(
         '--webhook-auth-token', metavar='TOKEN',
-        help='Secret X-Gitlab-Token for request authentication')
+        help='secret X-Gitlab-Token for request authentication')
+    set_hooks_parser.add_argument(
+        'projects_list',
+        type=argparse.FileType('r'),
+        nargs='?', default=sys.stdin)
 
     # Trigger rebuilds of reverse deps.
     rebuild_image_parser = subparsers.add_parser(
         'rebuild',
         parents=[common_parser],
         help='rebuild dependencies of a project',
-        description='Rebuild all projects that depend on the specified '
-        'project.')
-    rebuild_image_parser.add_argument(
-        '--deps', metavar='FILE',
-        help='file with project dependencies')
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        description=_fmtdesc('''
+Rebuild all projects that depend on the specified project.
+
+Takes a single project path as argument, and triggers a rebuild of its
+direct dependencies. Useful for one-off rebuilds.
+
+If the --recurse option is provided, the tool will wait for completion
+of the pipeline and recursively trigger its dependencies too,
+navigating the entire dependency tree.
+
+'''), epilog=_fmtdesc('''
+Project dependencies can be read from a file (if passed as an
+argument), or from standard input if a filename is omitted or
+specified as '-'.
+'''))
     rebuild_image_parser.add_argument(
         '--recurse', action='store_true',
-        help='Include all dependencies recursively '
+        help='include all dependencies recursively '
         'and wait for completion of the pipelines')
     rebuild_image_parser.add_argument(
         'project_path',
-        help='Project name (relative path, with optional branch)')
+        help='project name (relative path, with optional branch)')
+    rebuild_image_parser.add_argument(
+        'dependencies_list',
+        type=argparse.FileType('r'),
+        nargs='?', default=sys.stdin)
 
     # Server.
     server_parser = subparsers.add_parser(
         'server',
         parents=[common_parser],
-        help='start a HTTP server',
-        description='Start a HTTP server that listens for Gitlab webhooks. '
-        'Configure Gitlab to send Pipeline events for your projects to this '
-        'server to auto-rebuild first-level dependencies.')
-    server_parser.add_argument(
-        '--deps', metavar='FILE',
-        help='file with project dependencies')
+        help='start the HTTP server',
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        description=_fmtdesc('''
+Start an HTTP server that listens for Gitlab webhooks.
+
+When the server receives a pipeline event from Gitlab, it will trigger
+new builds for the direct dependencies of the project. The server is
+meant to be associated with a single Gitlab instance.
+
+You must provide the server with the list of project dependencies.
+
+'''), epilog=_fmtdesc('''
+Project dependencies can be read from a file (if passed as an
+argument), or from standard input if a filename is omitted or
+specified as '-'.
+'''))
     server_parser.add_argument(
         '--port', metavar='PORT', type=int, default='5404',
         dest='bind_port', help='port to listen on')
@@ -147,7 +201,11 @@ columns in the output, e.g.:
         dest='bind_host', help='address to listen on')
     server_parser.add_argument(
         '--webhook-auth-token', metavar='TOKEN',
-        help='Secret X-Gitlab-Token for request authentication')
+        help='secret X-Gitlab-Token for request authentication')
+    server_parser.add_argument(
+        'dependencies_list',
+        type=argparse.FileType('r'),
+        nargs='?', default=sys.stdin)
 
     args = parser.parse_args()
     cmd = args.subparser
@@ -160,13 +218,7 @@ columns in the output, e.g.:
         level=logging.DEBUG if args.debug else logging.INFO,
     )
 
-    # If --registry is not specified, make an educated guess.
-    registry_hostname = args.registry
-    if not registry_hostname:
-        registry_hostname = 'registry.' + urlparse.urlsplit(args.url).netloc
-        logging.warning('guessed %s for the Docker registry hostname',
-                        registry_hostname)
-
+    # Connect to the Gitlab API.
     gitlab_token = args.token
     if not gitlab_token and args.token_file:
         gitlab_token = args.token_file.read().strip().encode('utf-8')
@@ -179,25 +231,36 @@ columns in the output, e.g.:
     if cmd == 'list-projects':
         for p, b in get_branches(gl, list_projects(gl, args.search)):
             print(f'{p}:{b}')
-    if cmd == 'deps':
-        projects = [split_project_branch(x.strip()) for x in sys.stdin]
+
+    elif cmd == 'deps':
+        # If --registry is not specified, make an educated guess.
+        registry_hostname = args.registry
+        if not registry_hostname:
+            registry_hostname = 'registry.' + urlsplit(args.url).netloc
+        logging.warning('guessed %s for the Docker registry hostname',
+                        registry_hostname)
+        projects = [split_project_branch(x.strip())
+                    for x in args.projects_list]
         list_deps(gl, args.url, registry_hostname, projects)
+
     elif cmd == 'rebuild':
-        deps = read_deps(sys.stdin)
+        deps = read_deps(args.dependencies_list)
         project_path, branch_name = split_project_branch(args.project_path)
         rebuild_deps(gl, deps, project_path, branch_name, args.dry_run,
                      args.recurse)
+
     elif cmd == 'set-hooks':
         if not args.hook_url:
             parser.error('Must specify --hook-url')
         # Need a project list on input, ignore branches.
         projects = set(y[0] for y in (
-            split_project_branch(x.strip()) for x in sys.stdin))
+            split_project_branch(x.strip()) for x in args.projects_list))
         for project_path in projects:
             check_hook(gl, args.hook_url, args.webhook_auth_token,
-                       project_path)
+                       project_path, args.dry_run)
+
     elif cmd == 'server':
-        deps = read_deps(sys.stdin)
+        deps = read_deps(args.dependencies_list)
         run_app(gl, deps, args.bind_host, args.bind_port,
                 args.webhook_auth_token)
 
diff --git a/gitlab_docker_autodep/rebuild.py b/gitlab_docker_autodep/rebuild.py
index b9cbf4c..96ebc69 100644
--- a/gitlab_docker_autodep/rebuild.py
+++ b/gitlab_docker_autodep/rebuild.py
@@ -2,13 +2,14 @@ import logging
 import time
 
 
-def rebuild(gl, project_path, wait=False):
+def rebuild(gl, project_path, branch_name, wait=False):
     """Trigger a rebuild of a project."""
     project = gl.projects.get(project_path)
     if not project:
         return None
 
-    pipeline = project.pipelines.create({'ref': 'master'})
+    pipeline = project.pipelines.create({'ref': branch_name})
+    logging.info('started pipeline %s', pipeline.web_url)
     if wait:
         while pipeline.finished_at is None:
             pipeline.refresh()
@@ -20,9 +21,9 @@ def rebuild_deps(gl, project_deps, project_path, branch_name, dry_run,
                  wait_and_recurse):
     stack = project_deps.get((project_path, branch_name), [])
     while stack:
-        path = stack.pop(0)
-        logging.info('rebuilding %s', path)
+        path, branch = stack.pop(0)
+        logging.info('rebuilding %s:%s', path, branch)
         if not dry_run:
-            rebuild(gl, path, wait_and_recurse)
+            rebuild(gl, path, branch, wait_and_recurse)
         if wait_and_recurse:
-            stack.extend(project_deps.get(path, []))
+            stack.extend(project_deps.get((path, branch), []))
diff --git a/gitlab_docker_autodep/server.py b/gitlab_docker_autodep/server.py
index ffc0351..2f5ae40 100644
--- a/gitlab_docker_autodep/server.py
+++ b/gitlab_docker_autodep/server.py
@@ -16,25 +16,25 @@ queue = Queue.Queue()
 
 def _process_request(gl, project_deps, data):
     pipeline_status = data['object_attributes']['status']
-    branch = data['object_attributes']['ref']
-    path_with_namespace = data['project']['path_with_namespace']
+    branch_name = data['object_attributes']['ref']
+    project_path = data['project']['path_with_namespace']
 
     action = 'none'
     if pipeline_status == 'success':
-        deps = project_deps.get_contents().get(path_with_namespace, [])
+        deps = project_deps.get((project_path, branch_name), [])
 
         built_projects = []
-        for dep_path in deps:
+        for dep_path, dep_branch in deps:
             try:
-                p = rebuild(gl, dep_path)
-                logging.info('started pipeline %s', p)
+                rebuild(gl, dep_path, dep_branch)
+                built_projects.append(f'{dep_path}:{dep_branch}')
             except Exception as e:
-                logging.error('error rebuilding project %s: %s' % (
-                    path_with_namespace, str(e)))
+                logging.error('error rebuilding project %s:%s: %s' % (
+                    dep_path, dep_branch, str(e)))
         action = 'rebuilt %s' % (', '.join(built_projects),)
 
     logging.info('pipeline for %s@%s: %s, action=%s',
-                 path_with_namespace, branch, pipeline_status, action)
+                 project_path, branch_name, pipeline_status, action)
 
 
 def worker_thread(gl, project_deps):
-- 
GitLab


From 14f2094e3fd094bfa14f2237a4624cb5fc090397 Mon Sep 17 00:00:00 2001
From: ale <ale@incal.net>
Date: Thu, 2 Jul 2020 11:15:23 +0100
Subject: [PATCH 07/10] Rename package to gitlab_deps

---
 {gitlab_docker_autodep => gitlab_deps}/__init__.py         | 0
 {gitlab_docker_autodep => gitlab_deps}/deps.py             | 0
 {gitlab_docker_autodep => gitlab_deps}/docker_deps.py      | 0
 {gitlab_docker_autodep => gitlab_deps}/docker_deps_test.py | 0
 {gitlab_docker_autodep => gitlab_deps}/hooks.py            | 0
 {gitlab_docker_autodep => gitlab_deps}/main.py             | 0
 {gitlab_docker_autodep => gitlab_deps}/rebuild.py          | 0
 {gitlab_docker_autodep => gitlab_deps}/server.py           | 0
 8 files changed, 0 insertions(+), 0 deletions(-)
 rename {gitlab_docker_autodep => gitlab_deps}/__init__.py (100%)
 rename {gitlab_docker_autodep => gitlab_deps}/deps.py (100%)
 rename {gitlab_docker_autodep => gitlab_deps}/docker_deps.py (100%)
 rename {gitlab_docker_autodep => gitlab_deps}/docker_deps_test.py (100%)
 rename {gitlab_docker_autodep => gitlab_deps}/hooks.py (100%)
 rename {gitlab_docker_autodep => gitlab_deps}/main.py (100%)
 rename {gitlab_docker_autodep => gitlab_deps}/rebuild.py (100%)
 rename {gitlab_docker_autodep => gitlab_deps}/server.py (100%)

diff --git a/gitlab_docker_autodep/__init__.py b/gitlab_deps/__init__.py
similarity index 100%
rename from gitlab_docker_autodep/__init__.py
rename to gitlab_deps/__init__.py
diff --git a/gitlab_docker_autodep/deps.py b/gitlab_deps/deps.py
similarity index 100%
rename from gitlab_docker_autodep/deps.py
rename to gitlab_deps/deps.py
diff --git a/gitlab_docker_autodep/docker_deps.py b/gitlab_deps/docker_deps.py
similarity index 100%
rename from gitlab_docker_autodep/docker_deps.py
rename to gitlab_deps/docker_deps.py
diff --git a/gitlab_docker_autodep/docker_deps_test.py b/gitlab_deps/docker_deps_test.py
similarity index 100%
rename from gitlab_docker_autodep/docker_deps_test.py
rename to gitlab_deps/docker_deps_test.py
diff --git a/gitlab_docker_autodep/hooks.py b/gitlab_deps/hooks.py
similarity index 100%
rename from gitlab_docker_autodep/hooks.py
rename to gitlab_deps/hooks.py
diff --git a/gitlab_docker_autodep/main.py b/gitlab_deps/main.py
similarity index 100%
rename from gitlab_docker_autodep/main.py
rename to gitlab_deps/main.py
diff --git a/gitlab_docker_autodep/rebuild.py b/gitlab_deps/rebuild.py
similarity index 100%
rename from gitlab_docker_autodep/rebuild.py
rename to gitlab_deps/rebuild.py
diff --git a/gitlab_docker_autodep/server.py b/gitlab_deps/server.py
similarity index 100%
rename from gitlab_docker_autodep/server.py
rename to gitlab_deps/server.py
-- 
GitLab


From e27619fcd137fdbbc901bc00f221607cb3e7a8ad Mon Sep 17 00:00:00 2001
From: ale <ale@incal.net>
Date: Thu, 2 Jul 2020 11:15:57 +0100
Subject: [PATCH 08/10] Rename package to gitlab_deps

---
 LICENSE  | 52 ----------------------------------------------------
 setup.py | 10 +++++-----
 2 files changed, 5 insertions(+), 57 deletions(-)

diff --git a/LICENSE b/LICENSE
index 93557b6..e963df8 100644
--- a/LICENSE
+++ b/LICENSE
@@ -620,55 +620,3 @@ copy of the Program in return for a fee.
 
                      END OF TERMS AND CONDITIONS
 
-            How to Apply These Terms to Your New Programs
-
-  If you develop a new program, and you want it to be of the greatest
-possible use to the public, the best way to achieve this is to make it
-free software which everyone can redistribute and change under these terms.
-
-  To do so, attach the following notices to the program.  It is safest
-to attach them to the start of each source file to most effectively
-state the exclusion of warranty; and each file should have at least
-the "copyright" line and a pointer to where the full notice is found.
-
-    gitlab-docker-autodep
-    Copyright (C) 2018  ale
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-
-Also add information on how to contact you by electronic and paper mail.
-
-  If the program does terminal interaction, make it output a short
-notice like this when it starts in an interactive mode:
-
-    gitlab-docker-autodep  Copyright (C) 2018  ale
-    This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
-    This is free software, and you are welcome to redistribute it
-    under certain conditions; type `show c' for details.
-
-The hypothetical commands `show w' and `show c' should show the appropriate
-parts of the General Public License.  Of course, your program's commands
-might be different; for a GUI interface, you would use an "about box".
-
-  You should also get your employer (if you work as a programmer) or school,
-if any, to sign a "copyright disclaimer" for the program, if necessary.
-For more information on this, and how to apply and follow the GNU GPL, see
-<http://www.gnu.org/licenses/>.
-
-  The GNU General Public License does not permit incorporating your program
-into proprietary programs.  If your program is a subroutine library, you
-may consider it more useful to permit linking proprietary applications with
-the library.  If this is what you want to do, use the GNU Lesser General
-Public License instead of this License.  But first, please read
-<http://www.gnu.org/philosophy/why-not-lgpl.html>.
diff --git a/setup.py b/setup.py
index 2099426..5f13dd8 100644
--- a/setup.py
+++ b/setup.py
@@ -3,18 +3,18 @@
 from setuptools import setup, find_packages
 
 setup(
-    name="gitlab-docker-autodep",
-    version="0.3",
-    description="Automatically rebuild Docker images",
+    name="gitlab-deps",
+    version="0.9",
+    description="Manage Gitlab project dependencies and pipelines",
     author="Autistici/Inventati",
     author_email="info@autistici.org",
-    url="https://git.autistici.org/ale/gitlab-docker-autodep",
+    url="https://git.autistici.org/ale/gitlab-deps",
     install_requires=['python-gitlab', 'Flask', 'cheroot'],
     zip_safe=True,
     packages=find_packages(),
     entry_points={
         "console_scripts": [
-            "gitlab-deps = gitlab_docker_autodep.main:main",
+            "gitlab-deps = gitlab_deps.main:main",
             ],
         },
     )
-- 
GitLab


From efd3b720006dbf487e6c4fc561396f35ab29b2e8 Mon Sep 17 00:00:00 2001
From: ale <ale@incal.net>
Date: Thu, 2 Jul 2020 11:16:40 +0100
Subject: [PATCH 09/10] Remove obsolete docker_deps.py

---
 gitlab_deps/docker_deps.py      | 100 --------------------------------
 gitlab_deps/docker_deps_test.py |  19 ------
 2 files changed, 119 deletions(-)
 delete mode 100644 gitlab_deps/docker_deps.py
 delete mode 100644 gitlab_deps/docker_deps_test.py

diff --git a/gitlab_deps/docker_deps.py b/gitlab_deps/docker_deps.py
deleted file mode 100644
index aabf430..0000000
--- a/gitlab_deps/docker_deps.py
+++ /dev/null
@@ -1,100 +0,0 @@
-import gitlab
-import json
-import logging
-import re
-import sys
-
-
-_from_rx = re.compile(r'^FROM\s+(\S+).*$', re.MULTILINE)
-
-def _parse_dockerfile(df):
-    return _from_rx.findall(df)
-
-
-def _fetch_dockerfile(gl, project, ref):
-    try:
-        f = project.files.get(file_path='Dockerfile', ref=ref)
-        return f.decode()
-    except:
-        return None
-
-
-def _has_gitlab_ci(gl, project, ref):
-    try:
-        project.files.get(file_path='.gitlab-ci.yml', ref=ref)
-        return True
-    except:
-        return False
-
-
-def _remove_image_tag(name):
-    if ':' in name:
-        return name.split(':')[0]
-    return name
-
-
-def build_docker_deps(gl, search_pattern=None, filter_pattern=None):
-    """Build the project dependency map based on Dockerfiles.
-
-    This can be a fairly expensive (long) operation if the list of
-    projects is large. The 'search_pattern' argument allows for
-    filtering on the server side, using Gitlab search query syntax.
-    On the client side, the project list can be filtered with a
-    regular expression using the 'filter_pattern' argument, which will
-    be applied to the project's path_with_namespace.
-
-    Returns an {image_name: [projects]}, where 'projects' is the list
-    of projects that have 'image_name' as their base Docker
-    image. These are gitlab.Project instances.
-
-    We only examine Dockerfiles in the master branch of repositories.
-
-    """
-    deps = {}
-
-    filter_rx = None
-    if filter_pattern:
-        filter_rx = re.compile(filter_pattern)
-
-    projects = gl.projects.list(all=True, search=search_pattern, as_list=False)
-    for project in projects:
-        project_name = project.path_with_namespace
-        project_url = project_name
-        if filter_rx is not None and not filter_rx.search(project.path_with_namespace):
-            continue 
-        if not _has_gitlab_ci(gl, project, 'master'):
-            continue
-        df = _fetch_dockerfile(gl, project, 'master')
-        if not df:
-            continue
-        base_images = _parse_dockerfile(df.decode('utf-8'))
-        if not base_images:
-            logging.error('ERROR: could not find base image for %s',
-                          project.path_with_namespace)
-            continue
-        for img in base_images:
-            deps.setdefault(_remove_image_tag(img), []).append(project_url)
-    return deps
-
-
-def docker_deps_to_project_deps(deps, registry_hostname):
-    out = {}
-    for image_name in deps:
-        if image_name.startswith(registry_hostname):
-            project_name = image_name[len(registry_hostname)+1:]
-            out[project_name] = deps[image_name]
-    return out
-
-
-def dump_deps(gitlab_url, registry_hostname, gitlab_token,
-              deps_match, deps_filter, project_deps=True):
-    gl = gitlab.Gitlab(gitlab_url, private_token=gitlab_token)
-    if gitlab_token:
-        gl.auth()
-
-    deps = build_docker_deps(gl, deps_match, deps_filter)
-
-    if project_deps:
-        deps = docker_deps_to_project_deps(deps, registry_hostname)
-
-    json.dump(deps, sys.stdout, indent=2)
diff --git a/gitlab_deps/docker_deps_test.py b/gitlab_deps/docker_deps_test.py
deleted file mode 100644
index bcffbaa..0000000
--- a/gitlab_deps/docker_deps_test.py
+++ /dev/null
@@ -1,19 +0,0 @@
-from .docker_deps import _parse_dockerfile
-
-import unittest
-
-
-class TestParseDockerfile(unittest.TestCase):
-
-    def test_parse_dockerfile(self):
-        dockerfile = '''
-FROM baseimage1 AS build
-RUN build
-
-FROM baseimage2
-COPY --from=build bin /usr/bin/bin
-RUN fix-perms
-
-'''
-        images = _parse_dockerfile(dockerfile)
-        self.assertEqual(['baseimage1', 'baseimage2'], images)
-- 
GitLab


From 4bdd388c123fb250ebccf89a4250ff31dce569a1 Mon Sep 17 00:00:00 2001
From: ale <ale@incal.net>
Date: Thu, 2 Jul 2020 12:26:00 +0100
Subject: [PATCH 10/10] Update README

---
 README.md | 195 +++++++++++++++++++++++++++++-------------------------
 1 file changed, 104 insertions(+), 91 deletions(-)

diff --git a/README.md b/README.md
index 4c8b9e2..0f94cf9 100644
--- a/README.md
+++ b/README.md
@@ -1,13 +1,23 @@
-gitlab-docker-autodep
+gitlab-deps
 ===
 
-Automatically rebuild all the downstream dependencies of Docker-based
-projects on a Gitlab instance.
+*Gitlab-deps* is a simple build orchestration toolkit: it tracks
+dependencies between projects on a Gitlab instance and it can
+automatically rebuild dependencies when a project pipeline completes
+successfully.
 
-It scans the *master* branch of all repositories containing a
-Dockerfile looking for FROM lines and navigates the resulting
-dependency tree to find all projects that needs to be rebuilt when
-their base image (or an upstream image thereof) changes.
+It can track dependencies between projects by either of two supported
+ways:
+
+* projects can add a `.gitlab-deps` file to the root of their
+  repository, containing the fully qualified project URLs of their
+  dependencies;
+* gitlab-deps can scan Dockerfiles (in the repository root) and
+  automatically infer dependencies based on FROM lines.
+
+The implementation depends on HTTP hooks triggered by pipeline events:
+gitlab-deps runs a small HTTP server to respond to these requests and
+trigger new builds.
 
 By default, since it is meant to be used as a trigger as the last step
 in a CI script, it will not navigate the dependency tree recursively
@@ -17,105 +27,130 @@ has been rebuilt.
 
 # Installation
 
+The tools require Python 3.
+
 Install the tool either in a virtualenv of or system-wide with any of
 the standard Python installation mechanisms, for instance (using
 *setuptools*):
 
 ```
-sudo python setup.py install
+sudo python3 setup.py install
 ```
 
-This will install the *gitlab-docker-autodep* command-line tool in
-/usr/local/bin. The tool should work with either Python 2 and Python
-3, and it has few dependencies (just the [Gitlab
-API](https://python-gitlab.readthedocs.io/en/stable/), and
-[Flask](https://flask.pocoo.org/)).
+This will install the *gitlab-deps* command-line tool in
+/usr/local/bin. The tool has few dependencies (just the [Gitlab
+API](https://python-gitlab.readthedocs.io/en/stable/),
+[Flask](https://flask.pocoo.org/) and
+[Cheroot](https://pypi.org/project/cheroot/)).
 
 # Usage
 
-The tool is split into functional components:
+The toolkit is split into functional components (all wrapped in a
+single executable with different sub-commands):
 
-* scan Gitlab and generate a dependency map (stored as a JSON file)
+* scan Gitlab and generate a dependency map
 * manually trigger builds using the dependency map
 * run a server that listens for Gitlab notifications and trigger
   builds
 
-In all cases, the program is configured via command-line options.
+The tools talk to Gitlab using its API, so you're going to need an
+admin token in order to create new pipelines.
 
 ## Common options
 
 The tool must be pointed at your Gitlab instance with the *--url*
-command-line option, 
+command-line option, or alternatively using the `GITLAB_URL`
+environment variable.
 
 You can pass an authentication token using the *--token* or
 *--token-file* command-line options. This is usually required in order
-to trigger CI pipelines: the access token must have the *api* scope.
+to trigger CI pipelines, or to access private projects: the access
+token must have at least the *api* scope. Credentials can also be
+provided using the `GITLAB_AUTH_TOKEN` or `GITLAB_AUTH_TOKEN_FILE`
+environment variables.
 
-The tool will only examine Docker images hosted on the Docker registry
-associated with the Gitlab instance. By default the registry name is
-automatically derived from the server URL (adding a *registry*
-prefix), but it can be changed with the *--registry* command-line
-option.
+### Listing projects
 
-### Scope
+The *list-projects* sub-command can be used to create a list of
+projects (and their branches) in the Gitlab instance. It provides some
+basic functionality for filtering (using the *--search* option), but
+it generates output suitable for *grep*, e.g. to filter a specific
+path prefix (Gitlab group):
 
-On larger Gitlab instances, parsing Dockerfiles for all projects can
-be an expensive (long) operation. The program offers two options to
-manage the scope of the dependency analysis: *--match* and *--filter*.
+```
+gitlab-deps list-projects | grep ^path/to/my/group/
+```
 
-The former, *--match*, allows for filtering the project list on the
-server side, using a Gitlab search query. The latter, *--filter*,
-applies a regular expression to the project names (including
-namespaces) before parsing their dependencies. Combining the two, for
-example, it is possible to efficiently limit the scope of the tool to
-a specific namespace:
+or to only select "master" branches:
 
 ```
-gitlab-docker-autodep deps --match myns --filter ^myns/ ...
+gitlab-deps list-projects | grep ':master$'
 ```
 
-Note that, when building the dependency tree:
+The output from this command is just a list of project paths (with
+namespaces) and branch names, separated by a ':', one per line:
 
-* tags in FROM lines are ignored
-* only the *master* branch of repositories is scanned for Dockerfiles
+```
+path/to/project1:master
+path/to/project1:test-branch
+path/to/project2:master
+...
+```
 
-This might lead to more rebuilds than strictly necessary.
+## Computing dependencies
 
-## Command-line
+The *deps* sub-command will scan the projects and their repositories,
+and it will produce a list of all the edges in the dependency
+graph. It takes a list of project_path:branch specs as input (as
+produced by the *list-projects* sub-command), and it will produce a
+list of edges as whitespace-separated project:branch pairs, e.g.:
 
-The `rebuild` command will trigger a rebuild of all the dependencies
-of a given Docker image, possibly waiting for the CI pipelines to
-complete. Pass the qualified Docker image name (as it appears on FROM
-lines in Dockerfiles) as a command-line argument.
+```
+project:master dependency1:master
+project:master dependency2:master
+```
+
+The output format is once again meant to be processed with standard
+UNIX tools such as *awk* and *grep*.
 
-The tool will print out the project names of the dependencies it
-found. The *--recurse* option will traverse the dependency tree
-recursively, waiting for CI pipelines to complete so that they are
-built in the right order.
+## Configuring pipeline_events hooks
 
-It is possible to limit the scope of the initial dependency scan
-(which is an expensive operation) to projects matching a Gitlab search
-keyword using the *--match* option.
+To work, gitlab-deps needs a HTTP hook for pipeline_events on all
+projects that have dependencies. Since setting this up in Gitlab is a
+manual and laborious process, the *set-hooks* sub-command is provided
+to do this automatically using the API. The intended usage is to run
+it on the right-hand side of the dependency edges (i.e. the list of
+projects/branches that actually have dependencies):
+
+```
+gitlab-deps deps | awk '{print $2}' | gitlab-deps set-hooks
+```
 
-## Gitlab CI integration
+## One-off rebuilds
 
-In order to automatically rebuild the dependencies of a Docker image
-built using Gitlab CI, it is possible to run *gitlab-docker-autodep*
-as a webhook: this way, whenever a successful CI pipeline completes,
-you can trigger a rebuild of the dependencies.
+The *rebuild* sub-Command will trigger a rebuild of all the
+dependencies of a given project, possibly waiting for the CI pipelines
+to complete. Pass a qualified project name and branch as a
+command-line argument. The dependency graph (list of edges as produced
+by the *deps* sub-command) must also be provided, either as a file or
+on standard input.
 
-To do this, use the *server* command of *gitlab-docker-autodep*, and
-specify the address to bind to using the *--host* and *--port*
-options. It is also possible to enforce authentication of the webhook
-with a secret token
+The *--recurse* option will traverse the dependency tree recursively,
+waiting for CI pipelines to complete so that they are built in the
+right order.
+
+## Running the server
+
+The gitlab-deps tool has a *server* command to start a simple HTTP
+server that receives the pipeline_events webhooks from Gitlab, and
+trigger builds for project dependencies.
+
+The *server* command requires an address to bind to, specified using
+the *--host* and *--port* options. It is also possible to enforce
+authentication of the webhook with a secret token
 ([X-Gitlab-Token](https://docs.gitlab.com/ee/user/project/integrations/webhooks.html))
 using the *--webhook-auth-token* option.
 
-When running in this mode, it is assumed that all your Docker-related
-projects have webhooks set up to rebuild their dependencies, so
-*gitlab-docker-autodep* will only trigger a build of the immediate
-dependencies of a project.
-
 Also note that the server does not have any TLS support: if necessary,
 it is best to use a dedicated reverse proxy (Apache, NGINX, etc).
 
@@ -124,35 +159,13 @@ machine as Gitlab itself, and that the Gitlab authentication token is
 stored in */etc/gitlab_docker_token*:
 
 ```
-gitlab-docker-autodep \
+gitlab-deps deps \
+| gitlab-deps server \
     --url=https://my.gitlab \
     --token-file=/etc/gitlab_docker_token \
-    server \
-    --deps=deps.json
     --host=127.0.0.1 --port=14001
 ```
 
-You can then configure your project's webhooks with the URL
-`http://localhost:14001/`, with the *Trigger* checkbox set only
-on *Pipeline events*.
-
-Then you should generate the *deps.json* dependency map periodically,
-for instance with a cron job:
-
-```
-*/30 * * * * root gitlab-docker-autodep
-                  --url=https://my.gitlab 
-                  --token-file=/etc/gitlab_docker_token
-                  deps > deps.json
-```
-
-It can be useful to run the *rebuild* command from a cron job, for
-instance in order to rebuild images on a periodic schedule, and
-assuming all your projects share a common base image:
-
-```
-50 5 * * * root gitlab-docker-autodep 
-                --url=https://my.gitlab
-                --token-file=/etc/gitlab_docker_token
-                rebuild $MY_BASE_IMAGE
-```
+If configuring webhooks manually (rather than with *set-hooks*),
+create a new webhook with the URL `http://localhost:14001/`, and with
+the *Trigger* checkbox set only on *Pipeline events*.
-- 
GitLab