pysal.explore.inequality.gini 源代码

"""
Gini based Inequality Metrics
"""

__author__ = "Sergio J. Rey <srey@asu.edu> "

import numpy as np
from scipy.stats import norm as NORM

__all__ = ['Gini', 'Gini_Spatial']


def _gini(x):
    """
    Memory efficient calculation of Gini coefficient in relative mean difference form

    Parameters
    ----------

    x : array-like

    Attributes
    ----------

    g : float
        Gini coefficient

    Notes
    -----
    Based on http://www.statsdirect.com/help/default.htm#nonparametric_methods/gini.htm

    """
    n = len(x)
    try:
        x_sum = x.sum()
    except AttributeError:
        x = np.asarray(x)
        x_sum = x.sum()
    n_x_sum = n * x_sum
    r_x = (2. * np.arange(1, len(x)+1) * x[np.argsort(x)]).sum()
    return (r_x - n_x_sum - x_sum) / n_x_sum


class Gini:
    """
    Classic Gini coefficient in absolute deviation form

    Parameters
    ----------

    y : array (n,1)
       attribute

    Attributes
    ----------

    g : float
       Gini coefficient

    """

    def __init__(self, x):

        self.g = _gini(x)


[文档]class Gini_Spatial:
    """
    Spatial Gini coefficient

    Provides for computationally based inference regarding the contribution of
    spatial neighbor pairs to overall inequality across a set of regions. See :cite:`Rey_2013_sea`.

    Parameters
    ----------

    y : array (n,1)
       attribute

    w : binary spatial weights object

    permutations : int (default = 99)
       number of permutations for inference

    Attributes
    ----------

    g : float
       Gini coefficient

    wg : float
       Neighbor inequality component (geographic inequality)

    wcg : float
       Non-neighbor inequality component (geographic complement inequality)

    wcg_share : float
       Share of inequality in non-neighbor component

    If Permuations > 0

    p_sim : float
       pseudo p-value for spatial gini

    e_wcg : float
       expected value of non-neighbor inequality component (level) from permutations

    s_wcg : float
           standard deviation non-neighbor inequality component (level) from permutations

    z_wcg : float
           z-value non-neighbor inequality component (level) from permutations

    p_z_sim : float
             pseudo  p-value based on standard normal approximation of permutation based values


    Examples
    --------
    >>> import pysal.lib
    >>> import numpy as np
    >>> from pysal.explore.inequality.gini import Gini_Spatial

    Use data from the 32 Mexican States, Decade frequency 1940-2010

    >>> f=pysal.lib.io.open(pysal.lib.examples.get_path("mexico.csv"))
    >>> vnames=["pcgdp%d"%dec for dec in range(1940,2010,10)]
    >>> y=np.transpose(np.array([f.by_col[v] for v in vnames]))

    Define regime neighbors

    >>> regimes=np.array(f.by_col('hanson98'))
    >>> w = pysal.lib.weights.block_weights(regimes)
    >>> np.random.seed(12345)
    >>> gs = Gini_Spatial(y[:,0],w)
    >>> gs.p_sim
    0.04
    >>> gs.wcg
    4353856.0
    >>> gs.e_wcg
    4170356.7474747472

    Thus, the amount of inequality between pairs of states that are not in the
    same regime (neighbors) is significantly higher than what is expected
    under the null of random spatial inequality.

    """
[文档]    def __init__(self, x, w, permutations=99):

        x = np.asarray(x)
        g = _gini(x)
        self.g = g
        n = len(x)
        den = x.mean() * 2 * n**2
        d = g * den
        wg = self._calc(x, w)
        wcg = d - wg
        self.g = g
        self.wcg = wcg
        self.wg = wg
        self.dtotal = d
        self.den = den
        self.wcg_share = wcg / den

        if permutations:
            ids = np.arange(n)
            wcgp = np.zeros((permutations, ))
            for perm in range(permutations):
                np.random.shuffle(ids)
                wcgp[perm] = d - self._calc(x[ids], w)
            above = wcgp >= self.wcg
            larger = above.sum()
            if (permutations - larger) < larger:
                larger = permutations - larger
            self.wcgp = wcgp
            self.p_sim = (larger + 1.) / (permutations + 1.)
            self.e_wcg = wcgp.mean()
            self.s_wcg = wcgp.std()
            self.z_wcg = (self.wcg - self.e_wcg) / self.s_wcg
            self.p_z_sim = 1.0 - NORM.cdf(self.z_wcg)

    def _calc(self, x, w):
        sad_sum = 0.0
        for i, js in w.neighbors.items():
            sad_sum += np.abs(x[i]-x[js]).sum()
        return sad_sum