src/kedro_databricks/templates/databricks_run.py
# This file is used to run Kedro pipelines on Databricks.
# It is automatically generated by the Kedro-Databricks plugin.
# Do not modify this file directly.
import argparse
import logging
from kedro.framework.project import configure_project
from kedro.framework.session import KedroSession
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--env", dest="env", type=str)
parser.add_argument("--conf-source", dest="conf_source", type=str)
parser.add_argument("--package-name", dest="package_name", type=str)
parser.add_argument("--nodes", dest="nodes", type=str)
args = parser.parse_args()
env = args.env
conf_source = args.conf_source
package_name = args.package_name
nodes = [node.strip() for node in args.nodes.split(",")]
# https://kb.databricks.com/notebooks/cmd-c-on-object-id-p0.html
logging.getLogger("py4j.java_gateway").setLevel(logging.ERROR)
logging.getLogger("py4j.py4j.clientserver").setLevel(logging.ERROR)
configure_project(package_name)
with KedroSession.create(env=env, conf_source=conf_source) as session:
if len(nodes) > 0:
session.run(node_names=nodes)
else:
session.run()
if __name__ == "__main__":
main()