JenspederM/kedro-databricks

View on GitHub
src/kedro_databricks/templates/databricks_run.py

Summary

Maintainability
A
0 mins
Test Coverage
# This file is used to run Kedro pipelines on Databricks.
# It is automatically generated by the Kedro-Databricks plugin.
# Do not modify this file directly.

import argparse
import logging

from kedro.framework.project import configure_project
from kedro.framework.session import KedroSession


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--env", dest="env", type=str)
    parser.add_argument("--conf-source", dest="conf_source", type=str)
    parser.add_argument("--package-name", dest="package_name", type=str)
    parser.add_argument("--nodes", dest="nodes", type=str)

    args = parser.parse_args()
    env = args.env
    conf_source = args.conf_source
    package_name = args.package_name
    nodes = [node.strip() for node in args.nodes.split(",")]

    # https://kb.databricks.com/notebooks/cmd-c-on-object-id-p0.html
    logging.getLogger("py4j.java_gateway").setLevel(logging.ERROR)
    logging.getLogger("py4j.py4j.clientserver").setLevel(logging.ERROR)

    configure_project(package_name)
    with KedroSession.create(env=env, conf_source=conf_source) as session:
        if len(nodes) > 0:
            session.run(node_names=nodes)
        else:
            session.run()


if __name__ == "__main__":
    main()