api/seeddata/notebookTemplate.json
[
{
"model": "genie.notebooktemplate",
"pk": 1,
"fields": {
"template": {
"info": {},
"name": "{{name}}",
"path": "/{{name}}",
"config": {
"looknfeel": "default",
"personalizedMode": "false",
"isZeppelinNotebookCronEnable": false
},
"version": "0.9.0",
"noteForms": {},
"noteParams": {},
"paragraphs": [
{
"id": "paragraph_1616145245686_1473486751",
"apps": [],
"text": "%spark.pyspark\n# ==========================================\n# Connection Properties\n# ==========================================\ndBHost = \"{{sourceConnection_host}}\"\ndBPort = \"{{sourceConnection_port}}\"\ndBSID = \"{{sourceConnection_sid}}\"\ndBName = \"{{sourceConnection_database}}\"\ndBUsername = \"{{sourceConnection_username}}\"\ndBPassword = \"{{sourceConnection_password}}\"\ndBQuery = \"\"\"{{sqlQuery}}\"\"\"\nprimaryColumnName = \"{{primaryKeyColumn}}\"\n# ==========================================\n\nif \"{{sourceConnection_type}}\"==\"Oracle\":\n df = spark.read \\\n .format(\"jdbc\") \\\n .option(\"url\", f\"jdbc:oracle:thin:@{dBHost}:{dBPort}/{dBSID}\") \\\n .option(\"query\", dBQuery) \\\n .option(\"password\", dBPassword) \\\n .option(\"user\", dBUsername) \\\n .option(\"driver\", \"oracle.jdbc.driver.OracleDriver\") \\\n .load()\n \nelif \"{{sourceConnection_type}}\"==\"Postgres\":\n df = spark.read \\\n .format(\"jdbc\") \\\n .option(\"url\", f\"jdbc:postgresql://{dBHost}:{dBPort}/{dBName}\") \\\n .option(\"query\", dBQuery) \\\n .option(\"password\", dBPassword) \\\n .option(\"user\", dBUsername) \\\n .option(\"driver\", \"org.postgresql.Driver\")\n if \"{{sourceConnection_sslRequired}}\" == \"True\":\n df = df.option(\"sslmode\", \"require\")\n df = df.load()\n \nelif \"{{sourceConnection_type}}\"==\"MySQL\":\n df = spark.read \\\n .format(\"jdbc\") \\\n .option(\"url\", f\"jdbc:mysql://{dBHost}:{dBPort}/{dBName}\") \\\n .option(\"query\", dBQuery) \\\n .option(\"password\", dBPassword) \\\n .option(\"user\", dBUsername)\n if \"{{sourceConnection_sslRequired}}\" == \"True\":\n df = df.option(\"sslmode\", \"require\")\n df = df.load()\n \n# Set order by column for SortOrder\ndf = df.orderBy(\"{{primaryKeyColumn}}\")\n\n# Store results in temp table\ndf.createOrReplaceTempView(\"{{tempTableName}}\")",
"user": "anonymous",
"focus": true,
"title": "Read Source Database",
"config": {
"title": true,
"enabled": true,
"results": {},
"colWidth": 12,
"fontSize": 9,
"tableHide": false,
"editorHide": false,
"editorMode": "ace/mode/python",
"editorSetting": {
"language": "python",
"completionKey": "TAB",
"editOnDblClick": false,
"completionSupport": true
}
},
"status": "READY",
"jobName": "paragraph_1619459617352_2047119125",
"progress": 0,
"settings": {
"forms": {},
"params": {}
},
"$$hashKey": "object:928",
"dateCreated": "2021-04-26T17:53:37+0000",
"dateUpdated": "2021-04-26T19:16:48+0000",
"runtimeInfos": {},
"progressUpdateIntervalMs": 500
},
{
"id": "paragraph_1615528549981_865522288",
"apps": [],
"text": "%spark.sql\n-- Create table if not exist\nCREATE TABLE IF NOT EXISTS cuelake.{{destinationTableName}} USING iceberg \n TBLPROPERTIES (\n 'write.metadata.delete-after-commit.enabled'= 'true',\n 'write.metadata.previous-versions-max' = 2,\n 'history.expire.max-snapshot-age-ms' = '60000',\n 'history.expire.min-snapshots-to-keep' = 1,\n 'write.spark.fanout.enabled' = 'true',\n 'write.metadata.metrics.default' = 'none'\n )\n AS SELECT /*+ COALESCE(1) */ * from {{tempTableName}}",
"user": "anonymous",
"title": "Create Table If Not Exist",
"config": {
"title": true,
"enabled": true,
"results": {},
"colWidth": 12,
"fontSize": 9,
"editorHide": false,
"editorMode": "ace/mode/sql",
"editorSetting": {
"language": "sql",
"completionKey": "TAB",
"editOnDblClick": false,
"completionSupport": true
}
},
"status": "READY",
"jobName": "paragraph_1619459617352_1902312980",
"progress": 0,
"settings": {
"forms": {},
"params": {}
},
"$$hashKey": "object:929",
"dateCreated": "2021-04-26T17:53:37+0000",
"dateUpdated": "2021-04-26T17:53:37+0000",
"runtimeInfos": {},
"progressUpdateIntervalMs": 500
},
{
"id": "paragraph_1616655403846_1323321554",
"apps": [],
"text": "%spark\nval tableName = \"{{destinationTableName}}\"\n\nimport org.apache.iceberg.actions.Actions;\nimport org.apache.iceberg.hadoop.HadoopCatalog;\nval catalog = new HadoopCatalog(sc.hadoopConfiguration, sc.getConf.get(\"spark.sql.catalog.cuelake.warehouse\"));\n\nimport org.apache.iceberg.Table;\nimport org.apache.iceberg.catalog.TableIdentifier;\n\nval name = TableIdentifier.of(tableName);\nval table = catalog.loadTable(name);\n\n// Expire older snapshots and commit\ntable.expireSnapshots().commit()\n\n// Run Compaction for table\nActions.forTable(table).rewriteDataFiles()\n .targetSizeInBytes(500 * 1024 * 1024 * 10) // 5000 MB\n .execute();",
"user": "anonymous",
"title": "Iceberg Table Optimizations",
"config": {
"title": true,
"enabled": true,
"results": {},
"colWidth": 12,
"fontSize": 9,
"editorMode": "ace/mode/scala",
"editorSetting": {
"language": "scala",
"completionKey": "TAB",
"editOnDblClick": false,
"completionSupport": true
}
},
"status": "READY",
"jobName": "paragraph_1619459617352_1034740601",
"progress": 0,
"settings": {
"forms": {},
"params": {}
},
"$$hashKey": "object:930",
"dateCreated": "2021-04-26T17:53:37+0000",
"dateUpdated": "2021-04-26T17:53:37+0000",
"runtimeInfos": {},
"progressUpdateIntervalMs": 500
},
{
"id": "paragraph_1615885036622_2137552499",
"apps": [],
"text": "%spark.pyspark\nmaxVal=spark.sql(f\"SELECT MAX({{timestampColumn}}) FROM cuelake.{{destinationTableName}}\").collect()[0][0]\nz.put(\"{{tempTableName}}_val\", maxVal)\nprint(\"Timestamp value in target table: \" + str(maxVal))",
"user": "anonymous",
"title": "Get Timestamp From Destination Table",
"config": {
"title": true,
"enabled": true,
"results": {},
"colWidth": 12,
"fontSize": 9,
"editorMode": "ace/mode/python",
"editorSetting": {
"language": "python",
"completionKey": "TAB",
"editOnDblClick": false,
"completionSupport": true
}
},
"status": "READY",
"jobName": "paragraph_1619459617352_975985851",
"progress": 0,
"settings": {
"forms": {},
"params": {}
},
"$$hashKey": "object:931",
"dateCreated": "2021-04-26T17:53:37+0000",
"dateUpdated": "2021-04-26T17:53:37+0000",
"runtimeInfos": {},
"progressUpdateIntervalMs": 500
},
{
"id": "paragraph_1616144051503_913231705",
"apps": [],
"text": "%spark.sql\n-- SQL Query to configure sort order\nALTER TABLE cuelake.{{destinationTableName}} WRITE ORDERED BY {{primaryKeyColumn}};",
"user": "anonymous",
"title": "Update Iceberg Table Properties",
"config": {
"title": true,
"enabled": true,
"results": {},
"colWidth": 12,
"fontSize": 9,
"editorMode": "ace/mode/sql",
"editorSetting": {
"language": "sql",
"completionKey": "TAB",
"editOnDblClick": false,
"completionSupport": true
}
},
"status": "READY",
"jobName": "paragraph_1619459617352_810841844",
"progress": 0,
"settings": {
"forms": {},
"params": {}
},
"$$hashKey": "object:932",
"dateCreated": "2021-04-26T17:53:37+0000",
"dateUpdated": "2021-04-26T17:53:37+0000",
"runtimeInfos": {},
"progressUpdateIntervalMs": 500
},
{
"id": "paragraph_1616147422888_2080009648",
"apps": [],
"text": "%spark.pyspark\nmergeSql = f\"MERGE INTO cuelake.{{destinationTableName}} t USING (SELECT * from {{tempTableName}} where {{timestampColumn}} > \\\"{z.get('{{tempTableName}}_val')}\\\") u ON t.{{primaryKeyColumn}} = u.{{primaryKeyColumn}} WHEN MATCHED THEN UPDATE SET * WHEN NOT MATCHED THEN INSERT *\"\nprint(\"Executing SQL: \" + mergeSql)\nspark.sql(mergeSql)",
"user": "anonymous",
"title": "Merge New Data",
"config": {
"title": true,
"enabled": true,
"results": {},
"colWidth": 12,
"fontSize": 9,
"editorMode": "ace/mode/python",
"editorSetting": {
"language": "python",
"completionKey": "TAB",
"editOnDblClick": false,
"completionSupport": true
}
},
"status": "READY",
"jobName": "paragraph_1619459617353_1235940290",
"progress": 0,
"settings": {
"forms": {},
"params": {}
},
"$$hashKey": "object:933",
"dateCreated": "2021-04-26T17:53:37+0000",
"dateUpdated": "2021-04-26T17:53:37+0000",
"runtimeInfos": {},
"progressUpdateIntervalMs": 500
}
],
"angularObjects": {},
"defaultInterpreterGroup": "spark"
},
"formJson": {
"fields": [
{
"name": "sourceConnection",
"type": "connectionSelect",
"label": "Source Connection",
"rules": [
{
"message": "Source Connection is required",
"required": true
}
],
"filter": [
"Oracle",
"Postgres",
"MySQL"
]
},
{
"name": "sqlQuery",
"type": "sql",
"label": "SQL Query",
"rules": [
{
"message": "SQL Query is required",
"required": true
}
]
},
{
"name": "timestampColumn",
"type": "text",
"label": "Timestamp Column",
"rules": [
{
"message": "Timestamp Column is required",
"required": true
}
]
},
{
"name": "primaryKeyColumn",
"type": "text",
"label": "Primary Key Column",
"rules": [
{
"message": "Primary Key Column is required",
"required": true
}
]
},
{
"name": "destinationTableName",
"type": "text",
"label": "Destination Table Name",
"rules": [
{
"message": "Destination Table Name is required",
"required": true
}
]
}
]
},
"name": "Incremental Refresh"
}
},
{
"model": "genie.notebooktemplate",
"pk": 2,
"fields": {
"template": {
"info": {},
"name": "{{name}}",
"path": "/{{name}}",
"config": {
"looknfeel": "default",
"personalizedMode": "false",
"isZeppelinNotebookCronEnable": false
},
"version": "0.9.0",
"noteForms": {},
"noteParams": {},
"paragraphs": [
{
"id": "paragraph_1616145245686_1473486751",
"apps": [],
"text": "%spark.pyspark\n# ==========================================\n# Connection Properties\n# ==========================================\ndBHost = \"{{sourceConnection_host}}\"\ndBPort = \"{{sourceConnection_port}}\"\ndBSID = \"{{sourceConnection_sid}}\"\ndBName = \"{{sourceConnection_database}}\"\ndBUsername = \"{{sourceConnection_username}}\"\ndBPassword = \"{{sourceConnection_password}}\"\ndBQuery = \"\"\"{{sqlQuery}}\"\"\"\nprimaryColumnName = \"{{primaryKeyColumn}}\"\n# ==========================================\n\nif \"{{sourceConnection_type}}\"==\"Oracle\":\n df = spark.read \\\n .format(\"jdbc\") \\\n .option(\"url\", f\"jdbc:oracle:thin:@{dBHost}:{dBPort}/{dBSID}\") \\\n .option(\"query\", dBQuery) \\\n .option(\"password\", dBPassword) \\\n .option(\"user\", dBUsername) \\\n .option(\"driver\", \"oracle.jdbc.driver.OracleDriver\") \\\n .load()\n \nelif \"{{sourceConnection_type}}\"==\"Postgres\":\n df = spark.read \\\n .format(\"jdbc\") \\\n .option(\"url\", f\"jdbc:postgresql://{dBHost}:{dBPort}/{dBName}\") \\\n .option(\"query\", dBQuery) \\\n .option(\"password\", dBPassword) \\\n .option(\"user\", dBUsername) \\\n .option(\"driver\", \"org.postgresql.Driver\")\n if \"{{sourceConnection_sslRequired}}\" == \"True\":\n df = df.option(\"sslmode\", \"require\")\n df = df.load()\n \nelif \"{{sourceConnection_type}}\"==\"MySQL\":\n df = spark.read \\\n .format(\"jdbc\") \\\n .option(\"url\", f\"jdbc:mysql://{dBHost}:{dBPort}/{dBName}\") \\\n .option(\"query\", dBQuery) \\\n .option(\"password\", dBPassword) \\\n .option(\"user\", dBUsername)\n if \"{{sourceConnection_sslRequired}}\" == \"True\":\n df = df.option(\"sslmode\", \"require\")\n df = df.load()\n\n# Store results in temp table\ndf.createOrReplaceTempView(\"{{tempTableName}}\")",
"user": "anonymous",
"focus": true,
"title": "Read Source Database",
"config": {
"title": true,
"enabled": true,
"results": {},
"colWidth": 12,
"fontSize": 9,
"tableHide": false,
"editorHide": false,
"editorMode": "ace/mode/python",
"editorSetting": {
"language": "python",
"completionKey": "TAB",
"editOnDblClick": false,
"completionSupport": true
}
},
"status": "READY",
"jobName": "paragraph_1619471291607_747575162",
"progress": 0,
"settings": {
"forms": {},
"params": {}
},
"$$hashKey": "object:1438",
"dateCreated": "2021-04-26T21:08:11+0000",
"dateUpdated": "2021-04-26T21:09:38+0000",
"runtimeInfos": {},
"progressUpdateIntervalMs": 500
},
{
"id": "paragraph_1615528549981_865522288",
"apps": [],
"text": "%spark.sql\nDROP TABLE IF EXISTS cuelake.{{destinationTableName}}; \nCREATE TABLE cuelake.{{destinationTableName}} USING iceberg AS SELECT /*+ COALESCE(1) */ * from {{tempTableName}}",
"user": "anonymous",
"title": "Refresh Destination Table",
"config": {
"title": true,
"enabled": true,
"results": {},
"colWidth": 12,
"fontSize": 9,
"editorHide": false,
"editorMode": "ace/mode/sql",
"editorSetting": {
"language": "sql",
"completionKey": "TAB",
"editOnDblClick": false,
"completionSupport": true
}
},
"status": "READY",
"jobName": "paragraph_1619471291608_1787407030",
"progress": 0,
"settings": {
"forms": {},
"params": {}
},
"$$hashKey": "object:1439",
"dateCreated": "2021-04-26T21:08:11+0000",
"dateUpdated": "2021-04-26T21:08:11+0000",
"runtimeInfos": {},
"progressUpdateIntervalMs": 500
}
],
"angularObjects": {},
"defaultInterpreterGroup": "spark"
},
"formJson": {
"fields": [
{
"name": "sourceConnection",
"type": "connectionSelect",
"label": "Source Connection",
"rules": [
{
"message": "Source Connection is required",
"required": true
}
],
"filter": [
"Oracle",
"Postgres",
"MySQL"
]
},
{
"name": "sqlQuery",
"type": "sql",
"label": "SQL Query",
"rules": [
{
"message": "SQL Query is required",
"required": true
}
]
},
{
"name": "destinationTableName",
"type": "text",
"label": "Destination Table Name",
"rules": [
{
"message": "Destination Table Name is required",
"required": true
}
]
}
]
},
"name": "Full Refresh"
}
},
{
"model": "genie.notebooktemplate",
"pk": 3,
"fields": {
"template": {
"info": {
"isRunning": false
},
"name": "{{name}}",
"config": {
"looknfeel": "default",
"personalizedMode": "false",
"isZeppelinNotebookCronEnable": false
},
"version": "0.9.0",
"noteForms": {},
"noteParams": {},
"paragraphs": [
{
"apps": [],
"text": "%spark.sql\nSELECT * FROM",
"user": "anonymous",
"focus": true,
"config": {
"enabled": true,
"results": {},
"colWidth": 12,
"fontSize": 9,
"editorMode": "ace/mode/text",
"editorSetting": {
"language": "text",
"completionKey": "TAB",
"editOnDblClick": false,
"completionSupport": true
}
},
"status": "READY",
"settings": {
"forms": {},
"params": {}
},
"runtimeInfos": {},
"progressUpdateIntervalMs": 500
}
],
"angularObjects": {},
"defaultInterpreterGroup": "spark"
},
"formJson": {
"fields": []
},
"name": "Blank"
}
}
]