src/qinfer/tests/test_region_estimates.py from QInfer/python-qinfer

src/qinfer/tests/test_region_estimates.py
Summary

Maintainability

5 days
Test Coverage

Issues
#!/usr/bin/python
# -*- coding: utf-8 -*-
##
# test_region_estimates.py: Checks that computed credible regions are working.
##
# © 2017, Chris Ferrie (csferrie@gmail.com) and
#         Christopher Granade (cgranade@cgranade.com).
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
#     1. Redistributions of source code must retain the above copyright
#        notice, this list of conditions and the following disclaimer.
#
#     2. Redistributions in binary form must reproduce the above copyright
#        notice, this list of conditions and the following disclaimer in the
#        documentation and/or other materials provided with the distribution.
#
#     3. Neither the name of the copyright holder nor the names of its
#        contributors may be used to endorse or promote products derived from
#        this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
##

## FEATURES ###################################################################

from __future__ import absolute_import
from __future__ import division # Ensures that a/b is always a float.

## IMPORTS ####################################################################

import numpy as np
from numpy.testing import assert_equal, assert_almost_equal, assert_array_less

from qinfer.abstract_model import FiniteOutcomeModel
from qinfer.tests.base_test import DerandomizedTestCase, MockModel
from qinfer.distributions import MultivariateNormalDistribution, ParticleDistribution
from qinfer.smc import SMCUpdater

## FUNCTIONS ##################################################################

def unique_rows(a):
    """
    Discards duplicate rows.
    """
    # from http://stackoverflow.com/a/16971324/1082565
    ind = np.lexsort(a.T)
    return a[ind[np.concatenate(([True],np.any(a[ind[1:]]!=a[ind[:-1]],axis=1)))]]

## CLASSES ####################################################################

class TestSMCCredibleRegions(DerandomizedTestCase):

    N_PARTICLES = 10000
    N_MPS = 4
    MEAN = np.array([2,3,5,7])
    COV = np.array([[1,0,0,0.5],[0,1,0.2,0],[0,0.2,2,0],[0.5,0,0,1]])
    SLICE = np.s_[:2]

    def test_est_credible_region(self):
        """
        Tests that est_credible_region doesn't fail miserably
        """
        dist = MultivariateNormalDistribution(self.MEAN, self.COV)
        # the model is irrelevant; we just want the updater to have some particles
        # with the desired normal distribution.
        u = SMCUpdater(MockModel(self.N_MPS), self.N_PARTICLES, dist)

        # first check that 0.95 confidence points consume 0.9 confidence points
        points1 = u.est_credible_region(level=0.95)
        points2 = u.est_credible_region(level=0.9)
        assert_almost_equal(
            np.sort(unique_rows(np.concatenate([points1, points2])), axis=0),
            np.sort(points1, axis=0)
        )

        # do the same thing with different slice
        points1 = u.est_credible_region(level=0.95, modelparam_slice=self.SLICE)
        points2 = u.est_credible_region(level=0.9, modelparam_slice=self.SLICE)
        assert_almost_equal(
            np.sort(unique_rows(np.concatenate([points1, points2])), axis=0),
            np.sort(points1, axis=0)
        )

    def test_region_est_hull(self):
        """
        Tests that test_region_est_hull works
        """
        dist = MultivariateNormalDistribution(self.MEAN, self.COV)
        # the model is irrelevant; we just want the updater to have some particles
        # with the desired normal distribution.
        u = SMCUpdater(MockModel(self.N_MPS), self.N_PARTICLES, dist)

        faces, vertices = u.region_est_hull(level=0.95)

        # In this multinormal case, the convex hull surface
        # should be centered at MEAN
        assert_almost_equal(
            np.round(np.mean(vertices, axis=0)),
            np.round(self.MEAN)
        )

        # And a lower level should result in a smaller hull
        # and therefore smaller sample variance
        faces2, vertices2 = u.region_est_hull(level=0.2)
        assert_array_less(np.var(vertices2, axis=0), np.var(vertices, axis=0))

    def test_region_est_ellipsoid(self):
        """
        Tests that region_est_ellipsoid works.
        """

        dist = MultivariateNormalDistribution(self.MEAN, self.COV)
        # the model is irrelevant; we just want the updater to have some particles
        # with the desired normal distribution.
        u = SMCUpdater(MockModel(4), self.N_PARTICLES, dist)

        # ask for a confidence level of 0.5
        A, c = u.region_est_ellipsoid(level=0.5)

        # center of ellipse should be the mean of the multinormal
        assert_almost_equal(np.round(c), self.MEAN, 1)

        # finally, the principal lengths of the ellipsoid
        # should be the same as COV
        _, QA, _ = np.linalg.svd(A)
        _, QC, _ = np.linalg.svd(self.COV)
        QA, QC = np.sqrt(QA), np.sqrt(QC)
        assert_almost_equal(
            QA / np.linalg.norm(QA),
            QC / np.linalg.norm(QC),
            1
        )

    def test_in_credible_region(self):
        """
        Tests that in_credible_region works.
        """

        dist = MultivariateNormalDistribution(self.MEAN, self.COV)
        # the model is irrelevant; we just want the updater to have some particles
        # with the desired normal distribution.
        u = SMCUpdater(MockModel(4), self.N_PARTICLES, dist)

        # some points to test with
        test_points = np.random.multivariate_normal(self.MEAN, self.COV, self.N_PARTICLES)

        # method='pce'
        results = [
            u.in_credible_region(test_points, level=0.9, method='pce'),
            u.in_credible_region(test_points, level=0.84, method='pce'),
            u.in_credible_region(test_points, level=0.5, method='pce'),
        ]
        assert_almost_equal(
            np.array([np.mean(x.astype('float')) for x in results]),
            np.array([0.9, 0.84, 0.5]),
            3
        )

        # method='hpd-hull'
        results1 = [
            u.in_credible_region(test_points, level=0.9, method='hpd-hull'),
            u.in_credible_region(test_points, level=0.84, method='hpd-hull'),
            u.in_credible_region(test_points, level=0.5, method='hpd-hull'),
        ]
        assert_array_less(
            np.array([0.9, 0.84, 0.5]),
            np.array([np.mean(x.astype('float')) for x in results1])
        )

        # method='hpd-mvee'
        results2 = [
            u.in_credible_region(test_points, level=0.9, method='hpd-mvee'),
            u.in_credible_region(test_points, level=0.84, method='hpd-mvee'),
            u.in_credible_region(test_points, level=0.5, method='hpd-mvee'),
        ]
        assert_array_less(
            np.array([0.9, 0.84, 0.5]),
            np.array([np.mean(x.astype('float')) for x in results2])
        )

        # the mvee should be bigger than the convex hull.
        # this passes iff all points in the ellipses are
        # also in the hulls.
        assert_array_less(
            np.hstack([x.astype('float') for x in results1]),
            np.hstack([x.astype('float') for x in results2]) + 0.5
        )

        # check for no failures with slices.
        u.in_credible_region(test_points[:100,self.SLICE], level=0.9, method='pce', modelparam_slice=self.SLICE)
        u.in_credible_region(test_points[:100,self.SLICE], level=0.9, method='hpd-hull', modelparam_slice=self.SLICE)
        u.in_credible_region(test_points[:100,self.SLICE], level=0.9, method='hpd-mvee', modelparam_slice=self.SLICE)

        # check for no failures with single inputs
        assert(u.in_credible_region(test_points[0,:], level=0.9, method='pce').size == 1)
        assert(u.in_credible_region(test_points[0,:], level=0.9, method='hpd-hull').size == 1)
        assert(u.in_credible_region(test_points[0,:], level=0.9, method='hpd-mvee').size == 1)

class TestCredibleRegions(DerandomizedTestCase):

    N_PARTICLES = 10000
    N_MPS = 4
    MEAN = np.array([2,3,5,7])
    COV = np.array([[1,0,0,0.5],[0,1,0.2,0],[0,0.2,2,0],[0.5,0,0,1]])
    SLICE = np.s_[:2]

    def test_est_credible_region(self):
        """
        Tests that est_credible_region doesn't fail miserably
        """
        dist = MultivariateNormalDistribution(self.MEAN, self.COV)
        u = ParticleDistribution(
            particle_locations = dist.sample(self.N_PARTICLES),
            particle_weights = np.ones(self.N_PARTICLES)/self.N_PARTICLES
        )

        # first check that 0.95 confidence points consume 0.9 confidence points
        points1 = u.est_credible_region(level=0.95)
        points2 = u.est_credible_region(level=0.9)
        assert_almost_equal(
            np.sort(unique_rows(np.concatenate([points1, points2])), axis=0),
            np.sort(points1, axis=0)
        )

        # do the same thing with different slice
        points1 = u.est_credible_region(level=0.95, modelparam_slice=self.SLICE)
        points2 = u.est_credible_region(level=0.9, modelparam_slice=self.SLICE)
        assert_almost_equal(
            np.sort(unique_rows(np.concatenate([points1, points2])), axis=0),
            np.sort(points1, axis=0)
        )

    def test_region_est_hull(self):
        """
        Tests that test_region_est_hull works
        """
        dist = MultivariateNormalDistribution(self.MEAN, self.COV)
        u = ParticleDistribution(
            particle_locations = dist.sample(self.N_PARTICLES),
            particle_weights = np.ones(self.N_PARTICLES)/self.N_PARTICLES
        )

        faces, vertices = u.region_est_hull(level=0.95)

        # In this multinormal case, the convex hull surface
        # should be centered at MEAN
        assert_almost_equal(
            np.round(np.mean(vertices, axis=0)),
            np.round(self.MEAN)
        )

        # And a lower level should result in a smaller hull
        # and therefore smaller sample variance
        faces2, vertices2 = u.region_est_hull(level=0.2)
        assert_array_less(np.var(vertices2, axis=0), np.var(vertices, axis=0))

    def test_region_est_ellipsoid(self):
        """
        Tests that region_est_ellipsoid works.
        """

        dist = MultivariateNormalDistribution(self.MEAN, self.COV)
        u = ParticleDistribution(
            particle_locations = dist.sample(self.N_PARTICLES),
            particle_weights = np.ones(self.N_PARTICLES)/self.N_PARTICLES
        )

        # ask for a confidence level of 0.5
        A, c = u.region_est_ellipsoid(level=0.5)

        # center of ellipse should be the mean of the multinormal
        assert_almost_equal(np.round(c), self.MEAN, 1)

        # finally, the principal lengths of the ellipsoid
        # should be the same as COV
        _, QA, _ = np.linalg.svd(A)
        _, QC, _ = np.linalg.svd(self.COV)
        QA, QC = np.sqrt(QA), np.sqrt(QC)
        assert_almost_equal(
            QA / np.linalg.norm(QA),
            QC / np.linalg.norm(QC),
            1
        )

    def test_in_credible_region(self):
        """
        Tests that in_credible_region works.
        """

        dist = MultivariateNormalDistribution(self.MEAN, self.COV)
        u = ParticleDistribution(
            particle_locations = dist.sample(self.N_PARTICLES),
            particle_weights = np.ones(self.N_PARTICLES)/self.N_PARTICLES
        )

        # some points to test with
        test_points = np.random.multivariate_normal(self.MEAN, self.COV, self.N_PARTICLES)

        # method='pce'
        results = [
            u.in_credible_region(test_points, level=0.9, method='pce'),
            u.in_credible_region(test_points, level=0.84, method='pce'),
            u.in_credible_region(test_points, level=0.5, method='pce'),
        ]
        assert_almost_equal(
            np.array([np.mean(x.astype('float')) for x in results]),
            np.array([0.9, 0.84, 0.5]),
            3
        )

        # method='hpd-hull'
        results1 = [
            u.in_credible_region(test_points, level=0.9, method='hpd-hull'),
            u.in_credible_region(test_points, level=0.84, method='hpd-hull'),
            u.in_credible_region(test_points, level=0.5, method='hpd-hull'),
        ]
        assert_array_less(
            np.array([0.9, 0.84, 0.5]),
            np.array([np.mean(x.astype('float')) for x in results1])
        )

        # method='hpd-mvee'
        results2 = [
            u.in_credible_region(test_points, level=0.9, method='hpd-mvee'),
            u.in_credible_region(test_points, level=0.84, method='hpd-mvee'),
            u.in_credible_region(test_points, level=0.5, method='hpd-mvee'),
        ]
        assert_array_less(
            np.array([0.9, 0.84, 0.5]),
            np.array([np.mean(x.astype('float')) for x in results2])
        )

        # the mvee should be bigger than the convex hull.
        # this passes iff all points in the ellipses are
        # also in the hulls.
        assert_array_less(
            np.hstack([x.astype('float') for x in results1]),
            np.hstack([x.astype('float') for x in results2]) + 0.5
        )

        # check for no failures with slices.
        u.in_credible_region(test_points[:100,self.SLICE], level=0.9, method='pce', modelparam_slice=self.SLICE)
        u.in_credible_region(test_points[:100,self.SLICE], level=0.9, method='hpd-hull', modelparam_slice=self.SLICE)
        u.in_credible_region(test_points[:100,self.SLICE], level=0.9, method='hpd-mvee', modelparam_slice=self.SLICE)

        # check for no failures with single inputs
        assert(u.in_credible_region(test_points[0,:], level=0.9, method='pce').size == 1)
        assert(u.in_credible_region(test_points[0,:], level=0.9, method='hpd-hull').size == 1)
        assert(u.in_credible_region(test_points[0,:], level=0.9, method='hpd-mvee').size == 1)