Module VITAE.metric

Expand source code
import networkx as nx
import networkx.algorithms.isomorphism as iso
import numpy as np
import pandas as pd
from scipy.sparse.csgraph import laplacian
from scipy.linalg import eigh
from scipy.integrate import quad
from sklearn.metrics import pairwise_distances
import warnings

import numba
from numba import jit, float32


def topology(G_true, G_pred):
    '''Evaulate topology metrics.

    Parameters
    ----------
    G_true : nx.Graph
        The reference graph.
    G_pred : nx.Graph
        The estimated graph.
    
    Returns
    ----------
    res : dict
        a dict containing evaulation results.
    '''     
    res = {}
    
    # 1. Isomorphism with same initial node
    def comparison(N1, N2):
        if N1['is_init'] != N2['is_init']:
            return False
        else:
            return True
    score_isomorphism = int(nx.is_isomorphic(G_true, G_pred, node_match=comparison))
    res['ISO score'] = score_isomorphism
    
    # 2. GED (graph edit distance)
    if len(G_true)>10:
        warnings.warn("Didn't calculate graph edit distances for large graphs.")
        res['GED score'] = np.nan  
    else:
        max_num_oper = len(G_true)
        GED = nx.graph_edit_distance(G_pred, G_true, 
                                node_match=comparison,
                                upper_bound=max_num_oper)
        if GED is None:
            res['GED score'] = 0
        else:            
            score_GED = 1 - GED / max_num_oper
            res['GED score'] = score_GED
        
    # 3. Ipsen-Mikhailov distance
    if len(G_true)==len(G_pred):
        score_IM = 1 - IM_dist(G_true, G_pred)
        score_IM = np.maximum(0, score_IM)
    else:
        score_IM = 0
    res['IM score'] = score_IM
    return res


def IM_dist(G1, G2):
    '''The Ipsen-Mikailov distance is a global (spectral) metric, 
    corresponding to the square-root of the squared difference of the
    Laplacian spectrum for each graph.

    Implementation adapt from
    https://netrd.readthedocs.io/en/latest/_modules/netrd/distance/hamming_ipsen_mikhailov.html

    Parameters
    ----------
    G1 : nx.Graph
    G2 : nx.Graph

    Returns
    ----------
    IM(G1,G2) : float
        The IM distance between G1 and G2.
    '''
    adj1 = nx.to_numpy_array(G1)
    adj2 = nx.to_numpy_array(G2)
    hwhm = 0.08
    
    N = len(adj1)
    # get laplacian matrix
    L1 = laplacian(adj1, normed=False)
    L2 = laplacian(adj2, normed=False)

    # get the modes for the positive-semidefinite laplacian
    w1 = np.sqrt(np.abs(eigh(L1)[0][1:]))
    w2 = np.sqrt(np.abs(eigh(L2)[0][1:]))

    # we calculate the norm for both spectrum
    norm1 = (N - 1) * np.pi / 2 - np.sum(np.arctan(-w1 / hwhm))
    norm2 = (N - 1) * np.pi / 2 - np.sum(np.arctan(-w2 / hwhm))

    # define both spectral densities
    density1 = lambda w: np.sum(hwhm / ((w - w1) ** 2 + hwhm ** 2)) / norm1
    density2 = lambda w: np.sum(hwhm / ((w - w2) ** 2 + hwhm ** 2)) / norm2

    func = lambda w: (density1(w) - density2(w)) ** 2
    return np.sqrt(quad(func, 0, np.inf, limit=100)[0])


@jit((float32[:,:], float32[:,:]), nopython=True, nogil=True)
def _rand_index(true, pred):
    n = true.shape[0]
    m_true = true.shape[1]
    m_pred = pred.shape[1]
    RI = 0.0
    for i in range(1, n-1):
        for j in range(i, n):
            RI_ij = 0.0
            for k in range(m_true):
                RI_ij += true[i,k]*true[j,k]
            for k in range(m_pred):
                RI_ij -= pred[i,k]*pred[j,k]
            RI += 1-np.abs(RI_ij)
    return RI / (n*(n-1)/2.0)


def get_GRI(true, pred):
    '''Compute the GRI.

    Parameters
    ----------
    ture : np.array
        [n_samples, n_cluster_1] for proportions or [n_samples, ] for grouping
    pred : np.array
        [n_samples, n_cluster_2] for estimated proportions or [n_samples, ] for grouping

    Returns
    ----------
    GRI : float
        The GRI of two groups of proportions in the trajectories.
    '''
    if len(true)!=len(pred):
        raise ValueError('Inputs should have same lengths!')
        
    if len(true.shape)==1:
        true = pd.get_dummies(true).values
    if len(pred.shape)==1:
        pred = pd.get_dummies(pred).values
    
    true = np.sqrt(true).astype(np.float32)
    pred = np.sqrt(pred).astype(np.float32)

    return _rand_index(true, pred)

Functions

def topology(G_true, G_pred)

Evaulate topology metrics.

Parameters

G_true : nx.Graph
The reference graph.
G_pred : nx.Graph
The estimated graph.

Returns

res : dict
a dict containing evaulation results.
Expand source code
def topology(G_true, G_pred):
    '''Evaulate topology metrics.

    Parameters
    ----------
    G_true : nx.Graph
        The reference graph.
    G_pred : nx.Graph
        The estimated graph.
    
    Returns
    ----------
    res : dict
        a dict containing evaulation results.
    '''     
    res = {}
    
    # 1. Isomorphism with same initial node
    def comparison(N1, N2):
        if N1['is_init'] != N2['is_init']:
            return False
        else:
            return True
    score_isomorphism = int(nx.is_isomorphic(G_true, G_pred, node_match=comparison))
    res['ISO score'] = score_isomorphism
    
    # 2. GED (graph edit distance)
    if len(G_true)>10:
        warnings.warn("Didn't calculate graph edit distances for large graphs.")
        res['GED score'] = np.nan  
    else:
        max_num_oper = len(G_true)
        GED = nx.graph_edit_distance(G_pred, G_true, 
                                node_match=comparison,
                                upper_bound=max_num_oper)
        if GED is None:
            res['GED score'] = 0
        else:            
            score_GED = 1 - GED / max_num_oper
            res['GED score'] = score_GED
        
    # 3. Ipsen-Mikhailov distance
    if len(G_true)==len(G_pred):
        score_IM = 1 - IM_dist(G_true, G_pred)
        score_IM = np.maximum(0, score_IM)
    else:
        score_IM = 0
    res['IM score'] = score_IM
    return res
def IM_dist(G1, G2)

The Ipsen-Mikailov distance is a global (spectral) metric, corresponding to the square-root of the squared difference of the Laplacian spectrum for each graph.

Implementation adapt from https://netrd.readthedocs.io/en/latest/_modules/netrd/distance/hamming_ipsen_mikhailov.html

Parameters

G1 : nx.Graph
 
G2 : nx.Graph
 

Returns

IM(G1,G2) : float
The IM distance between G1 and G2.
Expand source code
def IM_dist(G1, G2):
    '''The Ipsen-Mikailov distance is a global (spectral) metric, 
    corresponding to the square-root of the squared difference of the
    Laplacian spectrum for each graph.

    Implementation adapt from
    https://netrd.readthedocs.io/en/latest/_modules/netrd/distance/hamming_ipsen_mikhailov.html

    Parameters
    ----------
    G1 : nx.Graph
    G2 : nx.Graph

    Returns
    ----------
    IM(G1,G2) : float
        The IM distance between G1 and G2.
    '''
    adj1 = nx.to_numpy_array(G1)
    adj2 = nx.to_numpy_array(G2)
    hwhm = 0.08
    
    N = len(adj1)
    # get laplacian matrix
    L1 = laplacian(adj1, normed=False)
    L2 = laplacian(adj2, normed=False)

    # get the modes for the positive-semidefinite laplacian
    w1 = np.sqrt(np.abs(eigh(L1)[0][1:]))
    w2 = np.sqrt(np.abs(eigh(L2)[0][1:]))

    # we calculate the norm for both spectrum
    norm1 = (N - 1) * np.pi / 2 - np.sum(np.arctan(-w1 / hwhm))
    norm2 = (N - 1) * np.pi / 2 - np.sum(np.arctan(-w2 / hwhm))

    # define both spectral densities
    density1 = lambda w: np.sum(hwhm / ((w - w1) ** 2 + hwhm ** 2)) / norm1
    density2 = lambda w: np.sum(hwhm / ((w - w2) ** 2 + hwhm ** 2)) / norm2

    func = lambda w: (density1(w) - density2(w)) ** 2
    return np.sqrt(quad(func, 0, np.inf, limit=100)[0])
def get_GRI(true, pred)

Compute the GRI.

Parameters

ture : np.array
[n_samples, n_cluster_1] for proportions or [n_samples, ] for grouping
pred : np.array
[n_samples, n_cluster_2] for estimated proportions or [n_samples, ] for grouping

Returns

GRI : float
The GRI of two groups of proportions in the trajectories.
Expand source code
def get_GRI(true, pred):
    '''Compute the GRI.

    Parameters
    ----------
    ture : np.array
        [n_samples, n_cluster_1] for proportions or [n_samples, ] for grouping
    pred : np.array
        [n_samples, n_cluster_2] for estimated proportions or [n_samples, ] for grouping

    Returns
    ----------
    GRI : float
        The GRI of two groups of proportions in the trajectories.
    '''
    if len(true)!=len(pred):
        raise ValueError('Inputs should have same lengths!')
        
    if len(true.shape)==1:
        true = pd.get_dummies(true).values
    if len(pred.shape)==1:
        pred = pd.get_dummies(pred).values
    
    true = np.sqrt(true).astype(np.float32)
    pred = np.sqrt(pred).astype(np.float32)

    return _rand_index(true, pred)