DynaSlum/satsense

View on GitHub
notebooks/Classification/Classification_Example.ipynb

Summary

Maintainability
Test Coverage
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%load_ext autoreload\n",
    "%autoreload 2"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Define training and test data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pathlib import Path\n",
    "\n",
    "sampling_step_size = 10, 10\n",
    "\n",
    "windows = (\n",
    "    (50, 50),\n",
    "    (100, 100),\n",
    "    (200, 200),\n",
    ")\n",
    "\n",
    "home = Path.home()\n",
    "data = home / 'DynaSlum' / 'Work'\n",
    "\n",
    "train_files = (\n",
    "    data / 'section_1_multiband.tif',\n",
    "    data / 'section_2_multiband.tif',\n",
    ")\n",
    "\n",
    "test_files = (\n",
    "    data / 'section_3_multiband.tif',\n",
    ")\n",
    "\n",
    "ground_truth_file = data / 'slum_approved.shp'\n",
    "\n",
    "# Path where temporary files are saved\n",
    "work = home / 'satsense_notebook'"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Define the set of features for classification"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from satsense.features import (NirNDVI, HistogramOfGradients, Pantex, Sift,\n",
    "                               Lacunarity, Texton)\n",
    "from satsense import Image\n",
    "\n",
    "train_images = [Image(file, 'worldview3') for file in train_files]\n",
    "\n",
    "ndvi = NirNDVI(windows)\n",
    "hog = HistogramOfGradients(windows)\n",
    "pantex = Pantex(windows)\n",
    "lacunarity = Lacunarity(windows)\n",
    "sift = Sift.from_images(windows, train_images)\n",
    "texton = Texton.from_images(windows, train_images)\n",
    "\n",
    "features = [\n",
    "    ndvi,\n",
    "    hog,\n",
    "    pantex,\n",
    "    lacunarity,\n",
    "    sift,\n",
    "    texton,\n",
    "]\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Compute and save features"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "from pathlib import Path\n",
    "\n",
    "from satsense import extract_features\n",
    "from satsense.generators import FullGenerator\n",
    "\n",
    "def compute_features(filenames):\n",
    "    paths = []\n",
    "    for filename in filenames:\n",
    "        image = Image(filename, 'worldview3')\n",
    "        path = str(work / Path(filename).stem) + os.sep\n",
    "        paths.append(path)        \n",
    "        if not os.path.exists(path):\n",
    "            os.makedirs(path)\n",
    "            generator = FullGenerator(image, sampling_step_size)\n",
    "            for feature_vector in extract_features(features, generator):\n",
    "                feature_vector.save(path)\n",
    "    return paths\n",
    "        \n",
    "train_data_paths = compute_features(train_files)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Load training data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "\n",
    "from satsense import Image, FeatureVector\n",
    "from satsense.util.mask import get_ndxi_mask, load_mask_from_shapefile, resample, save_mask2file\n",
    "from satsense.features import NirNDVI, WVSI\n",
    "from satsense.generators import FullGenerator\n",
    "\n",
    "def load_feature_vector(features, path):\n",
    "    \"\"\"Load feature values from file.\"\"\"\n",
    "    feature_vector = []\n",
    "    for feature in features:\n",
    "        vector = FeatureVector.from_file(feature, path).vector\n",
    "        # flatten window/feature_size dimensions\n",
    "        vector.shape = (vector.shape[0], vector.shape[1], -1)\n",
    "        feature_vector.append(vector)\n",
    "    feature_vector = np.ma.dstack(feature_vector)\n",
    "    return feature_vector\n",
    "\n",
    "def load_ground_truth(filename, sampling_step_size, path, shape, crs, transform):\n",
    "    ground_truth = load_mask_from_shapefile(filename, shape, transform)\n",
    "    mask_file = path / 'ground_truth_mask.tif'\n",
    "    ground_truth_mask = save_mask2file(ground_truth, mask_file, crs, transform)\n",
    "    ground_truth_image = Image(mask_file, 'monochrome', normalization_parameters=False)\n",
    "    ground_truth = resample(FullGenerator(ground_truth_image, sampling_step_size))\n",
    "    return ground_truth\n",
    "\n",
    "labels = {\n",
    "    'other': 0,\n",
    "    'deprived_neighbourhood': 1,\n",
    "    'vegetation': 2,\n",
    "}\n",
    "\n",
    "x_train = []\n",
    "y_train = []\n",
    "\n",
    "for path, image in zip(train_data_paths, train_images):\n",
    "    print(\"Processing\", image.filename)\n",
    "    # Load feature vector\n",
    "    feature_vector = load_feature_vector(features, path)\n",
    "    \n",
    "    label_vector = np.zeros(feature_vector.shape[:2], dtype=np.uint8)\n",
    "\n",
    "    # Create deprived neighbourhood labels\n",
    "    ground_truth = load_ground_truth(\n",
    "        ground_truth_file, sampling_step_size, path, image.shape, image.crs, image.transform)\n",
    "    label_vector[ground_truth] = labels['deprived_neighbourhood']\n",
    "\n",
    "    # Create vegetation labels\n",
    "    generator = FullGenerator(image, sampling_step_size)\n",
    "    vegetation_mask = get_ndxi_mask(generator, NirNDVI)\n",
    "    label_vector[vegetation_mask] = labels['vegetation']\n",
    "\n",
    "    # Create x_train and y_train\n",
    "    feature_vector.shape = (-1, feature_vector.shape[2])\n",
    "    label_vector.shape = (-1, )\n",
    "\n",
    "    x_train.append(feature_vector)\n",
    "    y_train.append(label_vector)\n",
    "    \n",
    "x_train = np.concatenate(x_train)\n",
    "y_train = np.concatenate(y_train)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Train a classifier"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.ensemble import GradientBoostingClassifier\n",
    "\n",
    "classifier = GradientBoostingClassifier(verbose=True)\n",
    "    \n",
    "classifier.fit(x_train, y_train)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Load test data and assess performance"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.metrics import classification_report, matthews_corrcoef, confusion_matrix\n",
    "\n",
    "test_data_paths = compute_features(test_files)\n",
    "\n",
    "test_images = [Image(f, 'worldview3') for f in test_files]\n",
    "\n",
    "for path, image in zip(test_data_paths, test_images):\n",
    "    print('Performance on', image.filename)\n",
    "    # Create x_test\n",
    "    x_test = load_feature_vector(features, path)\n",
    "    shape = x_test.shape\n",
    "    x_test.shape = (-1, shape[2])\n",
    "    \n",
    "    # Predict the labels\n",
    "    y_pred = classifier.predict(x_test)\n",
    "    \n",
    "    # Create y_test\n",
    "    y_test = np.zeros(shape[:2], dtype=np.uint8)\n",
    "    \n",
    "    # Create deprived neighbourhood labels \n",
    "    ground_truth = load_ground_truth(\n",
    "        ground_truth_file, sampling_step_size, path, image.shape, image.crs, image.transform)\n",
    "    y_test[ground_truth] = labels['deprived_neighbourhood']\n",
    "\n",
    "    # Create vegetation labels\n",
    "    generator = FullGenerator(image, sampling_step_size)\n",
    "    vegetation_mask = get_ndxi_mask(generator, NirNDVI)\n",
    "    y_test[vegetation_mask] = labels['vegetation']\n",
    "    y_test.shape = (-1, )\n",
    "    \n",
    "    # Assess performance\n",
    "\n",
    "    # Label the vegetation as buildings to create more accurate representation of the performance\n",
    "    # y_pred[y_pred == labels['vegetation']] = labels['other']\n",
    "    # y_test[y_test == labels['vegetation']] = labels['other']\n",
    "\n",
    "    print(matthews_corrcoef(y_test, y_pred))\n",
    "    print(classification_report(y_test, y_pred, labels=list(labels.values()), target_names=list(labels.keys())))\n",
    "    print(confusion_matrix(y_test, y_pred))\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}