machine-learning/triton/amazon-sagamaker-triton-resnet-50/set_up/notebook.ipynb
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "7871e6ed",
"metadata": {},
"outputs": [],
"source": [
"# https://github.com/aws/amazon-sagemaker-examples/blob/main/multi-model-endpoints/mme-on-gpu/cv/resnet50_mme_with_gpu.ipynb\n",
"\n",
"# Prepare PyTorch Model\n",
"!docker run \\\n",
" --gpus=all \\\n",
" --volume=\"${PWD}/workspace:/workspace\" \\\n",
" --interactive \\\n",
" --tty \\\n",
" --rm \\\n",
" nvcr.io/nvidia/pytorch:22.07-py3 \\\n",
" /bin/bash generate_model_pytorch.sh"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3502dd08",
"metadata": {},
"outputs": [],
"source": [
"!mkdir -p triton-serve-pt/resnet-50/"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2b8795a2",
"metadata": {},
"outputs": [],
"source": [
"%%writefile triton-serve-pt/resnet-50/config.pbtxt\n",
"name: \"resnet\"\n",
"platform: \"pytorch_libtorch\"\n",
"max_batch_size: 128\n",
"input {\n",
" name: \"INPUT__0\"\n",
" data_type: TYPE_FP32\n",
" dims: 3\n",
" dims: 224\n",
" dims: 224\n",
"}\n",
"output {\n",
" name: \"OUTPUT__0\"\n",
" data_type: TYPE_FP32\n",
" dims: 1000\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "996e0df3",
"metadata": {},
"outputs": [],
"source": [
"# Prepare TensorRT Model\n",
"!docker run \\\n",
" --gpus=all \\\n",
" --volume=\"${PWD}/workspace:/workspace\" \\\n",
" --interactive \\\n",
" --tty \\\n",
" --rm \\\n",
" nvcr.io/nvidia/pytorch:22.07-py3 \\\n",
" /bin/bash generate_model_trt.sh"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0dff45fc",
"metadata": {},
"outputs": [],
"source": [
"!mkdir -p triton-serve-trt/resnet-50/"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c8a19004",
"metadata": {},
"outputs": [],
"source": [
"%%writefile triton-serve-trt/resnet-50/config.pbtxt\n",
"name: \"resnet\"\n",
"platform: \"tensorrt_plan\"\n",
"max_batch_size: 128\n",
"input {\n",
" name: \"input\"\n",
" data_type: TYPE_FP32\n",
" dims: 3\n",
" dims: 224\n",
" dims: 224\n",
"}\n",
"output {\n",
" name: \"output\"\n",
" data_type: TYPE_FP32\n",
" dims: 1000\n",
"}\n",
"model_warmup {\n",
" name: \"bs128 Warmup\"\n",
" batch_size: 128\n",
" inputs: {\n",
" key: \"input\"\n",
" value: {\n",
" data_type: TYPE_FP32\n",
" dims: 3\n",
" dims: 224\n",
" dims: 224\n",
" zero_data: false\n",
" }\n",
" }\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9462a5db2be4503",
"metadata": {},
"outputs": [],
"source": [
"!mkdir -p triton-serve-pt/resnet-50/1/\n",
"!mv -f workspace/model.pt triton-serve-pt/resnet-50/1/\n",
"!tar --directory=triton-serve-pt/ --create --gzip --file resnet_pt_v0.tar.gz resnet-50\n",
"\n",
"!mkdir -p triton-serve-trt/resnet-50/1/\n",
"!mv -f workspace/model.plan triton-serve-trt/resnet-50/1/\n",
"!tar --directory=triton-serve-trt/ --create --gzip --file resnet_trt_v0.tar.gz resnet-50"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "215f3cdc",
"metadata": {},
"outputs": [],
"source": [
"# Upload to S3\n",
"\n",
"import boto3\n",
"import sagemaker\n",
"\n",
"\n",
"model_name = \"resnet-50\"\n",
"sagemaker_session = sagemaker.Session(boto_session=boto3.Session())\n",
"model_uri_pt = sagemaker_session.upload_data(\n",
" path=\"resnet_pt_v0.tar.gz\", key_prefix=model_name\n",
")\n",
"model_uri_trt = sagemaker_session.upload_data(\n",
" path=\"resnet_trt_v0.tar.gz\", key_prefix=model_name\n",
")"
]
}
],
"metadata": {
"instance_type": "ml.p3.2xlarge",
"kernelspec": {
"display_name": "conda_python3",
"language": "python",
"name": "conda_python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}