cuelake.yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: cuelake-conf-map
data:
# Change below properties to point to your production database
# POSTGRES_DB_HOST: localhost
# POSTGRES_DB_USERNAME: postgres
# POSTGRES_DB_PASSWORD: postgres
# POSTGRES_DB_SCHEMA: cuelake
# POSTGRES_DB_PORT: '5432'
## Change below properties to point to your metastore database
# METASTORE_POSTGRES_HOST: localhost
# METASORE_POSTGRES_PORT: '5432'
# METASORE_POSTGRES_USERNAME: postgres
# METASORE_POSTGRES_PASSWORD: postgres
# METASORE_POSTGRES_DATABASE: cuelake_metastore
---
apiVersion: v1
kind: ConfigMap
metadata:
name: zeppelin-server-conf-map
data:
# 'serviceDomain' is a Domain name to use for accessing Zeppelin UI.
# Should point IP address of 'zeppelin-server' service.
#
# Wildcard subdomain need to be point the same IP address to access service inside of Pod (such as SparkUI).
# i.e. if service domain is 'local.zeppelin-project.org', DNS configuration should make 'local.zeppelin-project.org' and '*.local.zeppelin-project.org' point the same address.
#
# Default value is 'local.zeppelin-project.org' while it points 127.0.0.1 and `kubectl port-forward zeppelin-server` will give localhost to connects.
# If you have your ingress controller configured to connect to `zeppelin-server` service and have a domain name for it (with wildcard subdomain point the same address), you can replace serviceDomain field with your own domain.
SERVICE_DOMAIN: local.zeppelin-project.org:8080
ZEPPELIN_K8S_SPARK_CONTAINER_IMAGE: cuebook/spark:3.0.2.18
ZEPPELIN_K8S_CONTAINER_IMAGE: cuebook/zeppelin-interpreter:0.9.0.4
ZEPPELIN_HOME: /zeppelin
ZEPPELIN_SERVER_RPC_PORTRANGE: 12320:12320
# default value of 'master' property for spark interpreter.
SPARK_MASTER: k8s://https://kubernetes.default.svc
# default value of 'SPARK_HOME' property for spark interpreter.
SPARK_HOME: /spark
---
apiVersion: v1
kind: ConfigMap
metadata:
name: zeppelin-server-conf
data:
nginx.conf: |
daemon off;
worker_processes auto;
events {
worker_connections 1024;
}
http {
map $http_upgrade $connection_upgrade {
default upgrade;
'' close;
}
# first server block will be default. Proxy zeppelin server.
server {
listen 80;
location / {
proxy_pass http://localhost:8080;
proxy_set_header Host $host;
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection $connection_upgrade;
proxy_redirect http://localhost $scheme://SERVICE_DOMAIN;
}
}
# match request domain [port]-[service].[serviceDomain]
# proxy extra service such as spark-ui
server {
listen 80;
server_name "~(?<svc_port>[0-9]+)-(?<svc_name>[^.]*)\.(.*)";
location / {
resolver 127.0.0.1:53 ipv6=off;
proxy_pass http://$svc_name.NAMESPACE.svc:$svc_port;
proxy_set_header Host $host;
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection $connection_upgrade;
proxy_redirect http://localhost $scheme://SERVICE_DOMAIN;
# redirect rule for spark ui. 302 redirect response misses port number of service domain
proxy_redirect ~(http:[/]+[0-9]+[-][^-]+[-][^.]+)[^/]+(\/jobs.*) $1.SERVICE_DOMAIN$2;
}
}
}
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: zeppelin-server-main
labels:
app.kubernetes.io/name: zeppelin-server-main
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: zeppelin-server-main
strategy:
type: RollingUpdate
template:
metadata:
labels:
app.kubernetes.io/name: zeppelin-server-main
spec:
serviceAccountName: cuelake
volumes:
- name: zeppelin-server-notebook-volume
persistentVolumeClaim:
claimName: zeppelin-server-notebook-volume-pvc
- name: zeppelin-server-conf
persistentVolumeClaim:
claimName: zeppelin-server-conf-pvc
- name: nginx-conf
configMap:
name: zeppelin-server-conf
items:
- key: nginx.conf
path: nginx.conf
containers:
- name: zeppelin-server
image: cuebook/zeppelin-server-lite:0.3
command: ["sh", "-c"]
args:
- curl https://raw.githubusercontent.com/cuebook/cuelake/main/zeppelinConf/zeppelin-env.sh -o $(ZEPPELIN_HOME)/conf/zeppelin-env.sh && curl https://raw.githubusercontent.com/cuebook/cuelake/main/zeppelinConf/zeppelin-site.xml -o $(ZEPPELIN_HOME)/conf/zeppelin-site.xml && $(ZEPPELIN_HOME)/bin/zeppelin.sh
lifecycle:
preStop:
exec:
# SIGTERM triggers a quick exit; gracefully terminate instead
command: ["sh", "-c", "ps -ef | grep org.apache.zeppelin.server.ZeppelinServer | grep -v grep | awk '{print $2}' | xargs kill"]
ports:
- name: http
containerPort: 8080
- name: https
containerPort: 8443
- name: rpc
containerPort: 12320
env:
- name: POD_UID
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: metadata.uid
- name: POD_NAME
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: metadata.name
envFrom:
- configMapRef:
name: zeppelin-server-conf-map
volumeMounts:
- name: zeppelin-server-notebook-volume # configure this to persist notebook
mountPath: /zeppelin/notebook
- name: zeppelin-server-conf # configure this to persist Zeppelin configuration
mountPath: /zeppelin/conf
# - name: zeppelin-server-custom-k8s # configure this to mount customized Kubernetes spec for interpreter
# mountPath: /zeppelin/k8s
- name: zeppelin-server-gateway
image: nginx:1.14.0
command: ["/bin/sh", "-c"]
env:
- name: SERVICE_DOMAIN
valueFrom:
configMapKeyRef:
name: zeppelin-server-conf-map
key: SERVICE_DOMAIN
args:
- cp -f /tmp/conf/nginx.conf /etc/nginx/nginx.conf;
sed -i -e "s/SERVICE_DOMAIN/$SERVICE_DOMAIN/g" /etc/nginx/nginx.conf;
sed -i -e "s/NAMESPACE/$(cat /var/run/secrets/kubernetes.io/serviceaccount/namespace)/g" /etc/nginx/nginx.conf;
cat /etc/nginx/nginx.conf;
/usr/sbin/nginx
volumeMounts:
- name: nginx-conf
mountPath: /tmp/conf
lifecycle:
preStop:
exec:
# SIGTERM triggers a quick exit; gracefully terminate instead
command: ["/usr/sbin/nginx", "-s", "quit"]
- name: dnsmasq # nginx requires dns resolver for dynamic dns resolution
image: "janeczku/go-dnsmasq:release-1.0.5"
args:
- --listen
- "127.0.0.1:53"
- --default-resolver
- --append-search-domains
- --hostsfile=/etc/hosts
- --verbose
---
kind: Service
apiVersion: v1
metadata:
name: zeppelin-server
spec:
ports:
- name: http
port: 80
- name: rpc # port name is referenced in the code. So it shouldn't be changed.
port: 12320
selector:
app.kubernetes.io/name: zeppelin-server-main
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: cuelake
---
kind: Role
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: cuelake-role
rules:
- apiGroups: [""]
resources: ["pods", "pods/exec", "services", "configmaps"]
verbs: ["create", "get", "update", "patch", "list", "delete", "watch"]
- apiGroups: ["rbac.authorization.k8s.io"]
resources: ["roles", "rolebindings"]
verbs: ["bind", "create", "get", "update", "patch", "list", "delete", "watch"]
---
kind: RoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: cuelake-role-binding
subjects:
- kind: ServiceAccount
name: cuelake
roleRef:
kind: Role
name: cuelake-role
apiGroup: rbac.authorization.k8s.io
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: zeppelin-server-conf-pvc
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 1Gi
status: {}
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: zeppelin-server-notebook-volume-pvc
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 1Gi
status: {}
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: lakehouse
labels:
app.kubernetes.io/name: lakehouse
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: lakehouse
strategy:
type: RollingUpdate
template:
metadata:
labels:
app.kubernetes.io/name: lakehouse
spec:
serviceAccountName: cuelake
volumes:
- name: lakehouse-db-volume
persistentVolumeClaim:
claimName: lakehouse-db-volume-pvc
containers:
- name: lakehouse
image: cuebook/lakehouse:0.3
resources:
requests:
memory: "2560Mi"
cpu: "250m"
volumeMounts:
- name: lakehouse-db-volume # configure this to persist db
mountPath: /code/db
env:
- name: POD_NAMESPACE
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: metadata.namespace
envFrom:
- configMapRef:
name: cuelake-conf-map
ports:
- name: http
containerPort: 80
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: lakehouse-db-volume-pvc
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 1Gi
status: {}
---
kind: Service
apiVersion: v1
metadata:
name: lakehouse
spec:
ports:
- name: http
port: 80
selector:
app.kubernetes.io/name: lakehouse
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: redis
labels:
app.kubernetes.io/name: redis
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: redis
strategy:
type: RollingUpdate
template:
metadata:
labels:
app.kubernetes.io/name: redis
spec:
containers:
- name: redis
image: redis:alpine
ports:
- name: redis
containerPort: 6379
---
kind: Service
apiVersion: v1
metadata:
name: redis
spec:
ports:
- name: redis
port: 6379
selector:
app.kubernetes.io/name: redis
---
kind: Service
apiVersion: v1
metadata:
name: sparkui
spec:
ports:
- name: spark-ui
protocol: TCP
port: 4040
targetPort: 4040
selector:
interpreterSettingName: spark
type: ClusterIP
sessionAffinity: None