PiePline/piepline

View on GitHub
examples/notebooks/img_classification.ipynb

Summary

Maintainability
Test Coverage
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# MNIST digits classification example\n",
    "This example based on PyTorch [example](https://github.com/pytorch/examples/tree/master/mnist)\n",
    "\n",
    "Define imports:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from neural_pipeline.builtin.monitors.mpl import MPLMonitor\n",
    "from neural_pipeline import DataProducer, AbstractDataset, TrainConfig, TrainStage, ValidationStage, Trainer, FileStructManager\n",
    "\n",
    "import torch\n",
    "from torch import nn\n",
    "import torch.nn.functional as F\n",
    "from torchvision import datasets, transforms"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "class Net(nn.Module):\n",
    "    def __init__(self):\n",
    "        super(Net, self).__init__()\n",
    "        self.conv1 = nn.Conv2d(1, 20, 5, 1)\n",
    "        self.conv2 = nn.Conv2d(20, 50, 5, 1)\n",
    "        self.fc1 = nn.Linear(4 * 4 * 50, 500)\n",
    "        self.fc2 = nn.Linear(500, 10)\n",
    "\n",
    "    def forward(self, x):\n",
    "        x = F.relu(self.conv1(x))\n",
    "        x = F.max_pool2d(x, 2, 2)\n",
    "        x = F.relu(self.conv2(x))\n",
    "        x = F.max_pool2d(x, 2, 2)\n",
    "        x = x.view(-1, 4 * 4 * 50)\n",
    "        x = F.relu(self.fc1(x))\n",
    "        x = self.fc2(x)\n",
    "        return F.log_softmax(x, dim=1)\n",
    "    \n",
    "model = Net()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Lets define metrics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "class NLLLoss(torch.nn.Module):\n",
    "    def forward(self, output: torch.Tensor, target: torch.Tensor):\n",
    "        return F.nll_loss(output, target)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Define NeuralPipeline compatible dataset."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "class MNISTDataset(AbstractDataset):\n",
    "    transforms = transforms.Compose([transforms.ToTensor(),\n",
    "                                     transforms.Normalize((0.1307,), (0.3081,))])\n",
    "\n",
    "    def __init__(self, data_dir: str, is_train: bool):\n",
    "        self.dataset = datasets.MNIST(data_dir, train=is_train, download=True)\n",
    "\n",
    "    def __len__(self):\n",
    "        return len(self.dataset)\n",
    "\n",
    "    def __getitem__(self, item):\n",
    "        data, target = self.dataset[item]\n",
    "        return {'data': self.transforms(data), 'target': target}"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Define training process"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fsm = FileStructManager(base_dir='data', is_continue=False)\n",
    "\n",
    "train_dataset = DataProducer([MNISTDataset('data/dataset', True)],\n",
    "                             batch_size=4, num_workers=2)\n",
    "validation_dataset = DataProducer([MNISTDataset('data/dataset', False)],\n",
    "                                  batch_size=4, num_workers=2)\n",
    "\n",
    "train_config = TrainConfig([TrainStage(train_dataset), ValidationStage(validation_dataset)],\n",
    "                           torch.nn.NLLLoss(),\n",
    "                           torch.optim.SGD(model.parameters(), lr=1e-4, momentum=0.5))\n",
    "\n",
    "trainer = Trainer(model, train_config, fsm, torch.device('cuda:0'))\n",
    "trainer.set_epoch_num(50)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Start training process"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "trainer.train()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python (datascience)",
   "language": "python",
   "name": "datascience"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}