whylabs/whylogs-python

View on GitHub
python/test_notebooks/notebook_tests.py

Summary

Maintainability
A
0 mins
Test Coverage
import os
import subprocess

import papermill as pm

TEST_DIR = os.path.dirname(os.path.abspath(__file__))
PARENT_DIR = os.path.join(TEST_DIR, os.pardir, os.pardir)
OUTPUT_NOTEBOOK = "output.ipynb"
skip_notebooks = [
    "Guest Session.ipynb",
    "Single_Image_Tracing_Profile_to_WhyLabs.ipynb",
    "Pyspark_Profiling.ipynb",
    "WhyLabs_Sagemaker-PyTorch.ipynb",
    "Kafka_Example.ipynb",
    "Writing_to_WhyLabs.ipynb",
    "Writing_Reference_Profiles_to_WhyLabs.ipynb",
    "flask_with_whylogs.ipynb",
    "BigQuery_Example.ipynb",
    "Segments.ipynb",
    "Writing_Regression_Performance_Metrics_to_WhyLabs.ipynb",
    "Writing_Classification_Performance_Metrics_to_WhyLabs.ipynb",
    "Getting_Started_with_WhyLabsV1.ipynb",
    "Getting_Started_with_UDFs.ipynb",
    "Writing_Feature_Weights_to_WhyLabs.ipynb",
    "Image_Logging.ipynb",
    "Writing_Ranking_Performance_Metrics_to_WhyLabs.ipynb",
    "Image_Logging_Udf_Metric.ipynb",
    "mnist_exploration.ipynb",
    "performance_estimation.ipynb",
    "Embeddings_Distance_Logging.ipynb",  # skipped due to data download
    "whylogs_Audio_examples.ipynb",  # skipped because of Kaggle data download and API key for whylabs upload
    "Logging_with_Debug_Events.ipynb",  # skipped because of API key required with whylabs writing
    "NLP_Summarization.ipynb",
    "Multi dataset logger.ipynb",
    "Pyspark_and_Constraints.ipynb",
    "LocalStore_with_Constraints.ipynb",  # skipped because it has over 4 minutes of thread.sleep in it
    "KS_Profiling.ipynb",  # skipped because this takes a few minutes to run
    "Monitoring_Embeddings.ipynb",  # skipped because needs user input
    "whylogs_UDF_examples.ipynb",  # skipped until multiple output column UDFs released
    "Transaction_Examples.ipynb",  # skipped because API key required for whylabs writing
]


# https://docs.pytest.org/en/6.2.x/example/parametrize.html#a-quick-port-of-testscenarios
def pytest_generate_tests(metafunc):
    idlist = []
    argvalues = []
    for scenario in metafunc.cls.scenarios:
        idlist.append(scenario[0])
        items = scenario[1].items()
        argnames = [x[0] for x in items]
        argvalues.append([x[1] for x in items])
    metafunc.parametrize(argnames, argvalues, ids=idlist, scope="class")


def process_notebook(notebook_filename):
    """
    Checks if an IPython notebook runs without error from start to finish. If so, writes the
    notebook to HTML (with outputs) and overwrites the .ipynb file (without outputs).
    """
    try:
        pm.execute_notebook(notebook_filename, OUTPUT_NOTEBOOK, timeout=180)
    except Exception as e:
        print(f"Notebook: {notebook_filename} failed test with exception: {e}")
        raise

    print(f"Successfully executed {notebook_filename}")


class TestNotebooks:
    git_files = (
        subprocess.check_output("git ls-tree --full-tree --name-only -r HEAD", shell=True).decode("utf-8").splitlines()
    )

    # Get just the notebooks from the git files
    notebooks = [fn for fn in git_files if fn.endswith(".ipynb") and os.path.basename(fn) not in skip_notebooks]
    scenarios = [(notebook, {"notebook": notebook}) for notebook in notebooks]

    def test_all_notebooks(self, notebook):
        process_notebook(os.path.join(PARENT_DIR, notebook))