stellargraph/stellargraph

View on GitHub
scripts/ci/validate_ci_workflow.py

Summary

Maintainability
A
0 mins
Test Coverage
# -*- coding: utf-8 -*-
#
# Copyright 2020 Data61, CSIRO
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import yaml
import sys
import glob
import os
from collections import Counter

NOTEBOOK_MARKER = "# MARKER: list of all notebooks"
NOTEBOOK_JOB = "notebooks"

ALL_JOBS_PASSED_MARKER = "# MARKER: list of all jobs"
ALL_JOBS_PASSED_JOB = "all-jobs-passed"

ROOT_DIR = os.path.join(os.path.dirname(__file__), "../..")
WORKFLOW = ".github/workflows/ci.yml"


def error(message, line):
    print(f"::error file={WORKFLOW},line::{line}::{message}")
    sys.exit(1)


def find_marker_line(contents, marker):
    try:
        marker_position = contents.index(marker)
    except:
        error(
            f"failed to find {marker!r} comment before the 'notebook:' matrix configuration",
            0,
        )

    return contents.count("\n", 0, marker_position) + 1


def find_key(obj, path, marker_line):
    for i, key in enumerate(path):
        try:
            obj = obj[key]
        except KeyError:
            context = ".".join(repr(p) for p in path[:i])
            others = ", ".join(repr(k) for k in obj.keys())
            error(f"expected key {key!r} at {context}, found {others}", marker_line)

    return obj


def unique_and_equal(found, expected, name, step, marker_line):
    # check for any notebooks listed more than once
    repeated = [name for name, count in Counter(found).items() if count > 1]
    if repeated:
        repeated_str = ", ".join(repeated)
        error(f"found {len(repeated)} {name} listed twice: {repeated_str}", marker_line)

    listed = set(found)

    if listed != expected:
        extra = listed - expected
        missing = expected - listed

        message = [
            f"found list of {len(listed)} {name} in '{step}' to be different to the {len(expected)} {name} on disk"
        ]

        if extra:
            extra_str = ", ".join(sorted(extra))
            message.append(f"{name} listed but not on disk: {extra_str}")
        if missing:
            missing_str = ", ".join(sorted(missing))
            message.append(f"{name} on disk but not listed: {missing_str}")

        error("; ".join(message), marker_line)

    print(f"{WORKFLOW}:{marker_line}: success: listed {name} matches {name} on disk")


def check_notebook_list(contents, workflow):
    marker_line = find_marker_line(contents, NOTEBOOK_MARKER)

    found = find_key(
        workflow, ["jobs", NOTEBOOK_JOB, "strategy", "matrix", "notebook"], marker_line
    )
    expected = set(glob.glob("demos/**/*.ipynb", recursive=True))

    unique_and_equal(
        found, expected, name="notebook(s)", step=NOTEBOOK_JOB, marker_line=marker_line
    )


def check_needs_list(contents, workflow):
    marker_line = find_marker_line(contents, ALL_JOBS_PASSED_MARKER)

    jobs = find_key(workflow, ["jobs"], marker_line)
    found = find_key(jobs, [ALL_JOBS_PASSED_JOB, "needs"], marker_line)

    # this should depend on all of the other jobs...
    expected = set(jobs.keys())
    # ... except itself
    expected.remove(ALL_JOBS_PASSED_JOB)

    unique_and_equal(
        found,
        expected,
        name="job(s)",
        step=ALL_JOBS_PASSED_JOB,
        marker_line=marker_line,
    )


def main():
    # make sure we're always in the root of the repo, no matter from where the script is run
    os.chdir(ROOT_DIR)
    print(f"Running in {os.getcwd()}")

    with open(WORKFLOW) as f:
        contents = f.read()

    workflow = yaml.safe_load(contents)

    check_notebook_list(contents, workflow)
    check_needs_list(contents, workflow)


if __name__ == "__main__":
    main()