lib/graph/coarsening.py from rusty1s/embedded_gcnn

lib/graph/coarsening.py
Summary

Maintainability

1 hr
Test Coverage

Issues
from __future__ import division
from six.moves import xrange

import numpy as np
import numpy_groupies as npg
import scipy.sparse as sp

from .clustering import normalized_cut
from .distortion import perm_adj
from .adjacency import points_to_adj


def coarsen_adj(adj,
                points,
                mass,
                levels,
                scale_invariance=False,
                stddev=1,
                rid=None):
    """Coarsen and permute a given adjacency matrix a number of levels deep,
    so that pairwise nodes in the same cluster are neighbored in the adjacency
    matrix. Returns `levels + 1` adjacency matrices (dist and rad) and the
    permutation of the first level to permute a given feature matrix."""

    # Coarse adjacency  a defined number of levels deep.
    adjs_dist, adjs_rad, cluster_maps = _coarsen_adj(
        adj, points, mass, levels, scale_invariance, stddev, rid)

    # Permutate adjacencies to a binary tree for an efficient O(n) pooling.
    perms = _compute_perms(cluster_maps)
    adjs_dist = [perm_adj(adjs_dist[i], perms[i]) for i in xrange(levels + 1)]
    adjs_rad = [perm_adj(adjs_rad[i], perms[i]) for i in xrange(levels + 1)]

    return adjs_dist, adjs_rad, perms[0]


def _coarsen_adj(adj,
                 points,
                 mass,
                 levels,
                 scale_invariance=False,
                 stddev=1,
                 rid=None):

    adj_dist, adj_rad = points_to_adj(adj, points, scale_invariance, stddev)

    adjs_dist = [adj_dist]
    adjs_rad = [adj_rad]
    cluster_maps = []

    for _ in xrange(levels):
        # Calculate normalized cut clustering.
        cluster_map = normalized_cut(adj_dist, rid)
        cluster_maps.append(cluster_map)

        # Coarsen adjacency.
        adj, points, mass = _coarsen_clustered_adj(adj, points, mass,
                                                   cluster_map)
        # Compute to distance/radian adjacency.
        adj_dist, adj_rad = points_to_adj(adj, points, scale_invariance,
                                          stddev)
        adjs_dist.append(adj_dist)
        adjs_rad.append(adj_rad)

        # Iterate by degree at next iteration.
        if adj_dist.data.size > 0:
            degree = npg.aggregate(adj_dist.row, adj_dist.data, func='sum')
            rid = np.argsort(degree)
        else:
            rid = None

    return adjs_dist, adjs_rad, cluster_maps


def _coarsen_clustered_adj(adj, points, mass, cluster_map):
    # Abort if adjacency contains only one or less points.
    if points.shape[0] < 2:
        return adj, points, mass

    rows = cluster_map[adj.row]
    cols = cluster_map[adj.col]

    n = cluster_map.max() + 1
    adj = sp.coo_matrix((adj.data, (rows, cols)), shape=(n, n))
    adj.setdiag(0)
    adj.eliminate_zeros()
    adj = adj.tocsc().tocoo()  # Sum up duplicate row/col entries.

    points_y = mass * points[:, :1].flatten()
    points_y = npg.aggregate(cluster_map, points_y, func='sum')

    points_x = mass * points[:, 1:].flatten()
    points_x = npg.aggregate(cluster_map, points_x, func='sum')

    mass = npg.aggregate(cluster_map, mass, func='sum')

    points_y = points_y / mass
    points_x = points_x / mass

    points_y = np.reshape(points_y, (-1, 1))
    points_x = np.reshape(points_x, (-1, 1))
    points = np.concatenate((points_y, points_x), axis=1)

    return adj.tocsc().tocoo(), points, mass


def _compute_perms(cluster_maps):
    max_cluster = np.max(cluster_maps[-1]) + 1
    n = max_cluster
    perm = np.arange(n)
    perms = [perm]

    for i in xrange(len(cluster_maps) - 1, -1, -1):  # Iterate backwards
        cluster_map = cluster_maps[i]

        # Add single fake nodes to end of cluster map.
        idx, counts = np.unique(cluster_map, return_counts=True)
        singles = idx[np.where(counts == 1)]

        # Add double fake nodes to end of cluster map.
        max_cluster = (cluster_map.size + singles.size) // 2
        doubles = np.arange(max_cluster, n).repeat(2)

        cluster_map = np.concatenate((cluster_map, singles, doubles))

        # Permutate cluster_map.
        cluster_map = np.argsort(perm)[cluster_map]
        rid = np.argsort(cluster_map)
        n *= 2
        perm = np.arange(n)[rid]
        perms.append(perm)

    return perms[::-1]