oujago/NumpyDL

View on GitHub
npdl/initializations.py

Summary

Maintainability
D
2 days
Test Coverage
# -*- coding: utf-8 -*-
"""
Functions to create initializers for parameter variables.

Examples
--------
>>> from npdl.layers import Dense
>>> from npdl.initializations import GlorotUniform
>>> l1 = Dense(n_out=300, n_in=100, init=GlorotUniform())
"""

import copy
import numpy as np

from npdl.utils.random import get_rng
from npdl.utils.random import get_dtype


class Initializer(object):
    """Base class for parameter weight initializers.

    The :class:`Initializer` class represents a weight initializer used
    to initialize weight parameters in a neural network layer. It should be
    subclassed when implementing new types of weight initializers.
    """
    def __call__(self, size):
        """Makes :class:`Initializer` instances callable like a function, invoking
        their :meth:`call()` method.
        """
        return self.call(size)

    def call(self, size):
        """Sample should return a numpy.array of size shape and data type
        ``numpy.float32``.
        
        Parameters
        ----------
        size : tuple or int.
            Integer or tuple specifying the size of the returned
            matrix.
        
        Returns
        -------
        numpy.array. 
            Matrix of size shape and dtype ``numpy.float32``.
        """
        raise NotImplementedError()

    def __str__(self):
        return self.__class__.__name__


class Zero(Initializer):
    """Initialize weights with zero value.
    """
    def call(self, size):
        return _cast_dtype(np.zeros(size))


class One(Initializer):
    """Initialize weights with one value.
    """
    def call(self, size):
        return _cast_dtype(np.ones(size))


class Uniform(Initializer):
    """Sample initial weights from the uniform distribution.

    Parameters are sampled from U(a, b).
    
    Parameters
    ----------
    scale : float or tuple.
        When std is None then range determines a, b. If range is a float the
        weights are sampled from U(-range, range). If range is a tuple the
        weights are sampled from U(range[0], range[1]).
    """
    def __init__(self, scale=0.05):
        self.scale = scale

    def call(self, size):
        return _cast_dtype(get_rng().uniform(-self.scale, self.scale, size=size))


class Normal(Initializer):

    """Sample initial weights from the Gaussian distribution.

    Initial weight parameters are sampled from N(mean, std).
    
    Parameters
    ----------
    std : float. 
        Std of initial parameters.
    mean : float. 
        Mean of initial parameters.
    """
    def __init__(self, std=0.01, mean=0.0):
        self.std = std
        self.mean = mean

    def call(self, size):
        return _cast_dtype(get_rng().normal(loc=self.mean, scale=self.std, size=size))


class LecunUniform(Initializer):
    """LeCun uniform initializer.

    It draws samples from a uniform distribution within [-limit, limit]
    where `limit` is `sqrt(3 / fan_in)` [1]_
    where `fan_in` is the number of input units in the weight matrix.
    
    References
    ----------
    .. [1] LeCun 98, Efficient Backprop, http://yann.lecun.com/exdb/publis/pdf/lecun-98b.pdf
    
    """
    def call(self, size):
        fan_in, fan_out = decompose_size(size)
        return Uniform(np.sqrt(3. / fan_in))(size)


class GlorotUniform(Initializer):
    """Glorot uniform initializer, also called Xavier uniform initializer.

    It draws samples from a uniform distribution within [-limit, limit]
    where `limit` is `sqrt(6 / (fan_in + fan_out))` [1]_
    where `fan_in` is the number of input units in the weight matrix
    and `fan_out` is the number of output units in the weight matrix.
    
    References
    ----------
    .. [1] Glorot & Bengio, AISTATS 2010. http://jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf
    """
    def call(self, size):
        fan_in, fan_out = decompose_size(size)
        return Uniform(np.sqrt(6 / (fan_in + fan_out)))(size)


class GlorotNormal(Initializer):
    """Glorot normal initializer, also called Xavier normal initializer.

    It draws samples from a truncated normal distribution centered on 0
    with `stddev = sqrt(2 / (fan_in + fan_out))` [1]_
    where `fan_in` is the number of input units in the weight matrix
    and `fan_out` is the number of output units in the weight matrix.
    
    References
    ----------
    .. [1] Glorot & Bengio, AISTATS 2010. http://jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf
    """
    def call(self, size):
        fan_in, fan_out = decompose_size(size)
        return Normal(np.sqrt(2 / (fan_out + fan_in)))(size)


class HeNormal(Initializer):
    """He normal initializer.

    It draws samples from a truncated normal distribution centered on 0
    with `stddev = sqrt(2 / fan_in)` [1]_
    where `fan_in` is the number of input units in the weight matrix.
    
    References
    ----------
    .. [1] He et al., http://arxiv.org/abs/1502.01852
    """
    def call(self, size):
        fan_in, fan_out = decompose_size(size)
        return Normal(np.sqrt(2. / fan_in))(size)


class HeUniform(Initializer):
    """He uniform variance scaling initializer.

    It draws samples from a uniform distribution within [-limit, limit]
    where `limit` is `sqrt(6 / fan_in)` [1]_
    where `fan_in` is the number of input units in the weight matrix.
    
    References
    ----------
    .. [1] He et al., http://arxiv.org/abs/1502.01852
    """
    def call(self, size):
        fan_in, fan_out = decompose_size(size)
        return Uniform(np.sqrt(6. / fan_in))(size)


class Orthogonal(Initializer):
    """Intialize weights as Orthogonal matrix.

    Orthogonal matrix initialization [1]_. For n-dimensional shapes where
    n > 2, the n-1 trailing axes are flattened. For convolutional layers, this
    corresponds to the fan-in, so this makes the initialization usable for
    both dense and convolutional layers.

    Parameters
    ----------
    gain : float or 'relu'.
        Scaling factor for the weights. Set this to ``1.0`` for linear and
        sigmoid units, to 'relu' or ``sqrt(2)`` for rectified linear units, and
        to ``sqrt(2/(1+alpha**2))`` for leaky rectified linear units with
        leakiness ``alpha``. Other transfer functions may need different
        factors.

    References
    ----------
    .. [1] Saxe, Andrew M., James L. McClelland, and Surya Ganguli.
           "Exact solutions to the nonlinear dynamics of learning in deep
           linear neural networks." arXiv preprint arXiv:1312.6120 (2013).
    """
    def __init__(self, gain=1.0):
        if gain == 'relu':
            gain = np.sqrt(2)
        self.gain = gain

    def call(self, size):
        flat_shape = (size[0], np.prod(size[1:]))
        a = get_rng().normal(loc=0., scale=1., size=flat_shape)
        u, _, v = np.linalg.svd(a, full_matrices=False)
        q = u if u.shape == flat_shape else v
        q = q.reshape(size)
        q = self.gain * q
        return _cast_dtype(q)


def decompose_size(size):
    """Computes the number of input and output units for a weight shape.
    
    Parameters
    ----------
    size 
        Integer shape tuple.
    
    Returns
    -------
    A tuple of scalars, `(fan_in, fan_out)`.
    """
    if len(size) == 2:
        fan_in = size[0]
        fan_out = size[1]

    elif len(size) == 4 or len(size) == 5:
        respective_field_size = np.prod(size[2:])
        fan_in = size[1] * respective_field_size
        fan_out = size[0] * respective_field_size

    else:
        fan_in = fan_out = int(np.sqrt(np.prod(size)))

    return fan_in, fan_out


def _cast_dtype(res):
    return np.array(res, dtype=get_dtype())


_zero = Zero()
_one = One()


def get(initialization):
    if initialization.__class__.__name__ == 'str':
        if initialization in ['zero', 'Zero']:
            return Zero()
        if initialization in ['one', 'One']:
            return One()
        if initialization in ['uniform', 'Uniform']:
            return Uniform()
        if initialization in ['normal', 'Normal']:
            return Normal()
        if initialization in ['lecun_uniform', 'LecunUniform']:
            return LecunUniform()
        if initialization in ['glorot_uniform', 'GlorotUniform']:
            return GlorotUniform()
        if initialization in ['glorot_normal', 'GlorotNormal']:
            return GlorotNormal()
        if initialization in ['HeNormal', 'he_normal']:
            return HeNormal()
        if initialization in ['HeUniform', 'he_uniform']:
            return HeUniform()
        if initialization in ['Orthogonal', 'orthogonal']:
            return Orthogonal()
        raise ValueError('Unknown initialization name: {}.'.format(initialization))

    elif isinstance(initialization, Initializer):
        return copy.deepcopy(initialization)

    else:
        raise ValueError("Unknown type: {}.".format(initialization.__class__.__name__))