cuebook/cuelake

View on GitHub
cuelake.yaml

Summary

Maintainability
Test Coverage
apiVersion: v1
kind: ConfigMap
metadata:
  name: cuelake-conf-map
data:
  # Change below properties to point to your production database
  # POSTGRES_DB_HOST: localhost
  # POSTGRES_DB_USERNAME: postgres
  # POSTGRES_DB_PASSWORD: postgres
  # POSTGRES_DB_SCHEMA: cuelake
  # POSTGRES_DB_PORT: '5432'

  ## Change below properties to point to your metastore database
  # METASTORE_POSTGRES_HOST: localhost
  # METASORE_POSTGRES_PORT: '5432'
  # METASORE_POSTGRES_USERNAME: postgres
  # METASORE_POSTGRES_PASSWORD: postgres
  # METASORE_POSTGRES_DATABASE: cuelake_metastore
---
apiVersion: v1
kind: ConfigMap
metadata:
  name: zeppelin-server-conf-map
data:
  # 'serviceDomain' is a Domain name to use for accessing Zeppelin UI.
  # Should point IP address of 'zeppelin-server' service.
  #
  # Wildcard subdomain need to be point the same IP address to access service inside of Pod (such as SparkUI).
  # i.e. if service domain is 'local.zeppelin-project.org', DNS configuration should make 'local.zeppelin-project.org' and '*.local.zeppelin-project.org' point the same address.
  #
  # Default value is 'local.zeppelin-project.org' while it points 127.0.0.1 and `kubectl port-forward zeppelin-server` will give localhost to connects.
  # If you have your ingress controller configured to connect to `zeppelin-server` service and have a domain name for it (with wildcard subdomain point the same address), you can replace serviceDomain field with your own domain.
  SERVICE_DOMAIN: local.zeppelin-project.org:8080
  ZEPPELIN_K8S_SPARK_CONTAINER_IMAGE: cuebook/spark:3.0.2.18
  ZEPPELIN_K8S_CONTAINER_IMAGE: cuebook/zeppelin-interpreter:0.9.0.4
  ZEPPELIN_HOME: /zeppelin
  ZEPPELIN_SERVER_RPC_PORTRANGE: 12320:12320
  # default value of 'master' property for spark interpreter.
  SPARK_MASTER: k8s://https://kubernetes.default.svc
  # default value of 'SPARK_HOME' property for spark interpreter.
  SPARK_HOME: /spark
---
apiVersion: v1
kind: ConfigMap
metadata:
  name: zeppelin-server-conf
data:
  nginx.conf: |
    daemon off;
    worker_processes auto;
    events {
      worker_connections 1024;
    }
    http {
      map $http_upgrade $connection_upgrade {
        default upgrade;
        '' close;
      }

      # first server block will be default. Proxy zeppelin server.
      server {
        listen 80;
        location / {
          proxy_pass http://localhost:8080;
          proxy_set_header Host $host;
          proxy_http_version 1.1;
          proxy_set_header Upgrade $http_upgrade;
          proxy_set_header Connection $connection_upgrade;
          proxy_redirect http://localhost $scheme://SERVICE_DOMAIN;
        }
      }

      # match request domain [port]-[service].[serviceDomain]
      # proxy extra service such as spark-ui
      server {
        listen 80;
        server_name "~(?<svc_port>[0-9]+)-(?<svc_name>[^.]*)\.(.*)";
        location / {
          resolver 127.0.0.1:53 ipv6=off;
          proxy_pass http://$svc_name.NAMESPACE.svc:$svc_port;
          proxy_set_header Host $host;
          proxy_http_version 1.1;
          proxy_set_header Upgrade $http_upgrade;
          proxy_set_header Connection $connection_upgrade;
          proxy_redirect http://localhost $scheme://SERVICE_DOMAIN;

          # redirect rule for spark ui. 302 redirect response misses port number of service domain
          proxy_redirect ~(http:[/]+[0-9]+[-][^-]+[-][^.]+)[^/]+(\/jobs.*) $1.SERVICE_DOMAIN$2;
        }
      }
    }
---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: zeppelin-server-main
  labels:
    app.kubernetes.io/name: zeppelin-server-main
spec:
  replicas: 1
  selector:
    matchLabels:
      app.kubernetes.io/name: zeppelin-server-main
  strategy:
    type: RollingUpdate
  template:
    metadata:
      labels:
        app.kubernetes.io/name: zeppelin-server-main
    spec:
      serviceAccountName: cuelake
      volumes:
      - name: zeppelin-server-notebook-volume
        persistentVolumeClaim:
          claimName: zeppelin-server-notebook-volume-pvc
      - name: zeppelin-server-conf
        persistentVolumeClaim:
          claimName: zeppelin-server-conf-pvc
      - name: nginx-conf
        configMap:
          name: zeppelin-server-conf
          items:
          - key: nginx.conf
            path: nginx.conf
      containers:
      - name: zeppelin-server
        image: cuebook/zeppelin-server-lite:0.3
        command: ["sh", "-c"]
        args:
          - curl https://raw.githubusercontent.com/cuebook/cuelake/main/zeppelinConf/zeppelin-env.sh -o $(ZEPPELIN_HOME)/conf/zeppelin-env.sh && curl https://raw.githubusercontent.com/cuebook/cuelake/main/zeppelinConf/zeppelin-site.xml -o $(ZEPPELIN_HOME)/conf/zeppelin-site.xml && $(ZEPPELIN_HOME)/bin/zeppelin.sh
        lifecycle:
          preStop:
            exec:
              # SIGTERM triggers a quick exit; gracefully terminate instead
              command: ["sh", "-c", "ps -ef | grep org.apache.zeppelin.server.ZeppelinServer | grep -v grep | awk '{print $2}' | xargs kill"]
        ports:
        - name: http
          containerPort: 8080
        - name: https
          containerPort: 8443
        - name: rpc
          containerPort: 12320
        env:
        - name: POD_UID
          valueFrom:
            fieldRef:
              apiVersion: v1
              fieldPath: metadata.uid
        - name: POD_NAME
          valueFrom:
            fieldRef:
              apiVersion: v1
              fieldPath: metadata.name
        envFrom:
        - configMapRef:
            name: zeppelin-server-conf-map
        volumeMounts:
        - name: zeppelin-server-notebook-volume     # configure this to persist notebook
          mountPath: /zeppelin/notebook
        - name: zeppelin-server-conf                # configure this to persist Zeppelin configuration
          mountPath: /zeppelin/conf
      #  - name: zeppelin-server-custom-k8s          # configure this to mount customized Kubernetes spec for interpreter
      #    mountPath: /zeppelin/k8s
      - name: zeppelin-server-gateway
        image: nginx:1.14.0
        command: ["/bin/sh", "-c"]
        env:
        - name: SERVICE_DOMAIN
          valueFrom:
            configMapKeyRef:
              name: zeppelin-server-conf-map
              key: SERVICE_DOMAIN
        args:
          - cp -f /tmp/conf/nginx.conf /etc/nginx/nginx.conf;
            sed -i -e "s/SERVICE_DOMAIN/$SERVICE_DOMAIN/g" /etc/nginx/nginx.conf;
            sed -i -e "s/NAMESPACE/$(cat /var/run/secrets/kubernetes.io/serviceaccount/namespace)/g" /etc/nginx/nginx.conf;
            cat /etc/nginx/nginx.conf;
            /usr/sbin/nginx
        volumeMounts:
          - name: nginx-conf
            mountPath: /tmp/conf
        lifecycle:
          preStop:
            exec:
              # SIGTERM triggers a quick exit; gracefully terminate instead
              command: ["/usr/sbin/nginx", "-s", "quit"]
      - name: dnsmasq  # nginx requires dns resolver for dynamic dns resolution
        image: "janeczku/go-dnsmasq:release-1.0.5"
        args:
          - --listen
          - "127.0.0.1:53"
          - --default-resolver
          - --append-search-domains
          - --hostsfile=/etc/hosts
          - --verbose
---
kind: Service
apiVersion: v1
metadata:
  name: zeppelin-server
spec:
  ports:
    - name: http
      port: 80
    - name: rpc            # port name is referenced in the code. So it shouldn't be changed.
      port: 12320
  selector:
    app.kubernetes.io/name: zeppelin-server-main
---
apiVersion: v1
kind: ServiceAccount
metadata:
  name: cuelake
---
kind: Role
apiVersion: rbac.authorization.k8s.io/v1
metadata:
  name: cuelake-role
rules:
- apiGroups: [""]
  resources: ["pods", "pods/exec", "services", "configmaps"]
  verbs: ["create", "get", "update", "patch", "list", "delete", "watch"]
- apiGroups: ["rbac.authorization.k8s.io"]
  resources: ["roles", "rolebindings"]
  verbs: ["bind", "create", "get", "update", "patch", "list", "delete", "watch"]
---
kind: RoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
  name: cuelake-role-binding
subjects:
- kind: ServiceAccount
  name: cuelake
roleRef:
  kind: Role
  name: cuelake-role
  apiGroup: rbac.authorization.k8s.io
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
  name: zeppelin-server-conf-pvc
spec:
  accessModes:
  - ReadWriteOnce
  resources:
    requests:
      storage: 1Gi
status: {}
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
  name: zeppelin-server-notebook-volume-pvc
spec:
  accessModes:
  - ReadWriteOnce
  resources:
    requests:
      storage: 1Gi
status: {}
---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: lakehouse
  labels:
    app.kubernetes.io/name: lakehouse
spec:
  replicas: 1
  selector:
    matchLabels:
      app.kubernetes.io/name: lakehouse
  strategy:
    type: RollingUpdate
  template:
    metadata:
      labels:
        app.kubernetes.io/name: lakehouse
    spec:
      serviceAccountName: cuelake
      volumes:
      - name: lakehouse-db-volume
        persistentVolumeClaim:
          claimName: lakehouse-db-volume-pvc
      containers:
      - name: lakehouse
        image: cuebook/lakehouse:0.3
        resources:
          requests:
            memory: "2560Mi"
            cpu: "250m"
        volumeMounts:
        - name: lakehouse-db-volume     # configure this to persist db
          mountPath: /code/db
        env:
        - name: POD_NAMESPACE
          valueFrom:
            fieldRef:
              apiVersion: v1
              fieldPath: metadata.namespace
        envFrom:
        - configMapRef:
            name: cuelake-conf-map
        ports:
        - name: http
          containerPort: 80
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
  name: lakehouse-db-volume-pvc
spec:
  accessModes:
  - ReadWriteOnce
  resources:
    requests:
      storage: 1Gi
status: {}
---
kind: Service
apiVersion: v1
metadata:
  name: lakehouse
spec:
  ports:
    - name: http
      port: 80
  selector:
    app.kubernetes.io/name: lakehouse
---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: redis
  labels:
    app.kubernetes.io/name: redis
spec:
  replicas: 1
  selector:
    matchLabels:
      app.kubernetes.io/name: redis
  strategy:
    type: RollingUpdate
  template:
    metadata:
      labels:
        app.kubernetes.io/name: redis
    spec:
      containers:
      - name: redis
        image: redis:alpine
        ports:
        - name: redis
          containerPort: 6379
---
kind: Service
apiVersion: v1
metadata:
  name: redis
spec:
  ports:
    - name: redis
      port: 6379
  selector:
    app.kubernetes.io/name: redis
---
kind: Service
apiVersion: v1
metadata:
  name: sparkui
spec:
  ports:
    - name: spark-ui
      protocol: TCP
      port: 4040
      targetPort: 4040
  selector:
    interpreterSettingName: spark
  type: ClusterIP
  sessionAffinity: None