notebooks/Classification/Classification_Example.ipynb
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%load_ext autoreload\n",
"%autoreload 2"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Define training and test data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from pathlib import Path\n",
"\n",
"sampling_step_size = 10, 10\n",
"\n",
"windows = (\n",
" (50, 50),\n",
" (100, 100),\n",
" (200, 200),\n",
")\n",
"\n",
"home = Path.home()\n",
"data = home / 'DynaSlum' / 'Work'\n",
"\n",
"train_files = (\n",
" data / 'section_1_multiband.tif',\n",
" data / 'section_2_multiband.tif',\n",
")\n",
"\n",
"test_files = (\n",
" data / 'section_3_multiband.tif',\n",
")\n",
"\n",
"ground_truth_file = data / 'slum_approved.shp'\n",
"\n",
"# Path where temporary files are saved\n",
"work = home / 'satsense_notebook'"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Define the set of features for classification"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from satsense.features import (NirNDVI, HistogramOfGradients, Pantex, Sift,\n",
" Lacunarity, Texton)\n",
"from satsense import Image\n",
"\n",
"train_images = [Image(file, 'worldview3') for file in train_files]\n",
"\n",
"ndvi = NirNDVI(windows)\n",
"hog = HistogramOfGradients(windows)\n",
"pantex = Pantex(windows)\n",
"lacunarity = Lacunarity(windows)\n",
"sift = Sift.from_images(windows, train_images)\n",
"texton = Texton.from_images(windows, train_images)\n",
"\n",
"features = [\n",
" ndvi,\n",
" hog,\n",
" pantex,\n",
" lacunarity,\n",
" sift,\n",
" texton,\n",
"]\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Compute and save features"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"from pathlib import Path\n",
"\n",
"from satsense import extract_features\n",
"from satsense.generators import FullGenerator\n",
"\n",
"def compute_features(filenames):\n",
" paths = []\n",
" for filename in filenames:\n",
" image = Image(filename, 'worldview3')\n",
" path = str(work / Path(filename).stem) + os.sep\n",
" paths.append(path) \n",
" if not os.path.exists(path):\n",
" os.makedirs(path)\n",
" generator = FullGenerator(image, sampling_step_size)\n",
" for feature_vector in extract_features(features, generator):\n",
" feature_vector.save(path)\n",
" return paths\n",
" \n",
"train_data_paths = compute_features(train_files)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Load training data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"\n",
"from satsense import Image, FeatureVector\n",
"from satsense.util.mask import get_ndxi_mask, load_mask_from_shapefile, resample, save_mask2file\n",
"from satsense.features import NirNDVI, WVSI\n",
"from satsense.generators import FullGenerator\n",
"\n",
"def load_feature_vector(features, path):\n",
" \"\"\"Load feature values from file.\"\"\"\n",
" feature_vector = []\n",
" for feature in features:\n",
" vector = FeatureVector.from_file(feature, path).vector\n",
" # flatten window/feature_size dimensions\n",
" vector.shape = (vector.shape[0], vector.shape[1], -1)\n",
" feature_vector.append(vector)\n",
" feature_vector = np.ma.dstack(feature_vector)\n",
" return feature_vector\n",
"\n",
"def load_ground_truth(filename, sampling_step_size, path, shape, crs, transform):\n",
" ground_truth = load_mask_from_shapefile(filename, shape, transform)\n",
" mask_file = path / 'ground_truth_mask.tif'\n",
" ground_truth_mask = save_mask2file(ground_truth, mask_file, crs, transform)\n",
" ground_truth_image = Image(mask_file, 'monochrome', normalization_parameters=False)\n",
" ground_truth = resample(FullGenerator(ground_truth_image, sampling_step_size))\n",
" return ground_truth\n",
"\n",
"labels = {\n",
" 'other': 0,\n",
" 'deprived_neighbourhood': 1,\n",
" 'vegetation': 2,\n",
"}\n",
"\n",
"x_train = []\n",
"y_train = []\n",
"\n",
"for path, image in zip(train_data_paths, train_images):\n",
" print(\"Processing\", image.filename)\n",
" # Load feature vector\n",
" feature_vector = load_feature_vector(features, path)\n",
" \n",
" label_vector = np.zeros(feature_vector.shape[:2], dtype=np.uint8)\n",
"\n",
" # Create deprived neighbourhood labels\n",
" ground_truth = load_ground_truth(\n",
" ground_truth_file, sampling_step_size, path, image.shape, image.crs, image.transform)\n",
" label_vector[ground_truth] = labels['deprived_neighbourhood']\n",
"\n",
" # Create vegetation labels\n",
" generator = FullGenerator(image, sampling_step_size)\n",
" vegetation_mask = get_ndxi_mask(generator, NirNDVI)\n",
" label_vector[vegetation_mask] = labels['vegetation']\n",
"\n",
" # Create x_train and y_train\n",
" feature_vector.shape = (-1, feature_vector.shape[2])\n",
" label_vector.shape = (-1, )\n",
"\n",
" x_train.append(feature_vector)\n",
" y_train.append(label_vector)\n",
" \n",
"x_train = np.concatenate(x_train)\n",
"y_train = np.concatenate(y_train)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Train a classifier"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.ensemble import GradientBoostingClassifier\n",
"\n",
"classifier = GradientBoostingClassifier(verbose=True)\n",
" \n",
"classifier.fit(x_train, y_train)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Load test data and assess performance"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.metrics import classification_report, matthews_corrcoef, confusion_matrix\n",
"\n",
"test_data_paths = compute_features(test_files)\n",
"\n",
"test_images = [Image(f, 'worldview3') for f in test_files]\n",
"\n",
"for path, image in zip(test_data_paths, test_images):\n",
" print('Performance on', image.filename)\n",
" # Create x_test\n",
" x_test = load_feature_vector(features, path)\n",
" shape = x_test.shape\n",
" x_test.shape = (-1, shape[2])\n",
" \n",
" # Predict the labels\n",
" y_pred = classifier.predict(x_test)\n",
" \n",
" # Create y_test\n",
" y_test = np.zeros(shape[:2], dtype=np.uint8)\n",
" \n",
" # Create deprived neighbourhood labels \n",
" ground_truth = load_ground_truth(\n",
" ground_truth_file, sampling_step_size, path, image.shape, image.crs, image.transform)\n",
" y_test[ground_truth] = labels['deprived_neighbourhood']\n",
"\n",
" # Create vegetation labels\n",
" generator = FullGenerator(image, sampling_step_size)\n",
" vegetation_mask = get_ndxi_mask(generator, NirNDVI)\n",
" y_test[vegetation_mask] = labels['vegetation']\n",
" y_test.shape = (-1, )\n",
" \n",
" # Assess performance\n",
"\n",
" # Label the vegetation as buildings to create more accurate representation of the performance\n",
" # y_pred[y_pred == labels['vegetation']] = labels['other']\n",
" # y_test[y_test == labels['vegetation']] = labels['other']\n",
"\n",
" print(matthews_corrcoef(y_test, y_pred))\n",
" print(classification_report(y_test, y_pred, labels=list(labels.values()), target_names=list(labels.keys())))\n",
" print(confusion_matrix(y_test, y_pred))\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.7"
}
},
"nbformat": 4,
"nbformat_minor": 2
}