kubernetes/manifests/west/torchserve-deployment.yaml
# https://github.com/pytorch/serve/blob/master/kubernetes/Helm/templates/torchserve.yaml
---
kind: Deployment
apiVersion: apps/v1
metadata:
name: torchserve-deployment
namespace: hm-cnn
labels:
app.kubernetes.io/name: torchserve
app: torchserve
spec:
replicas: 1
selector:
matchLabels:
app: torchserve
template:
metadata:
labels:
app: torchserve
spec:
containers:
- name: torchserve
# image: docker.io/pytorch/torchserve:0.4.2-gpu
image: docker.io/pytorch/torchserve:0.4.2-cpu
args: ["torchserve", "--start", "--ts-config", "/data/model-server/config/config.properties"]
ports:
- name: inference
protocol: TCP
containerPort: 8080
- name: inference-grpc
protocol: TCP
containerPort: 7070
- name: management
protocol: TCP
containerPort: 8081
- name: management-grpc
protocol: TCP
containerPort: 7071
- name: metrics
protocol: TCP
containerPort: 8082
volumeMounts:
- name: model-server-volume
mountPath: /data/model-server
resources:
limits:
cpu: "1"
memory: 4Gi
nvidia.com/gpu: "0"
requests:
cpu: "1"
memory: 1Gi
volumes:
- name: model-server-volume
persistentVolumeClaim:
claimName: model-server-pvc