pysal.explore.inequality.theil 源代码

"""Theil Inequality metrics

"""
__author__ = "Sergio J. Rey <srey@asu.edu> "

# from pysal.lib.common import *
import numpy as np
__all__ = ['Theil', 'TheilD', 'TheilDSim']

SMALL = np.finfo('float').tiny


[文档]class Theil: """ Classic Theil measure of inequality .. math:: T = \sum_{i=1}^n \left( \\frac{y_i}{\sum_{i=1}^n y_i} \ln \left[ N \\frac{y_i}{\sum_{i=1}^n y_i}\\right] \\right) Parameters ---------- y : array (n,t) or (n,) with n taken as the observations across which inequality is calculated. If y is (n,) then a scalar inequality value is determined. If y is (n,t) then an array of inequality values are determined, one value for each column in y. Attributes ---------- T : array (t,) or (1,) Theil's T for each column of y Notes ----- This computation involves natural logs. To prevent ln[0] from occurring, a small value is added to each element of y before beginning the computation. Examples -------- >>> import pysal.lib >>> import numpy as np >>> from pysal.explore.inequality.theil import Theil >>> f=pysal.lib.io.open(pysal.lib.examples.get_path("mexico.csv")) >>> vnames=["pcgdp%d"%dec for dec in range(1940,2010,10)] >>> y=np.array([f.by_col[v] for v in vnames]).T >>> theil_y=Theil(y) >>> theil_y.T array([0.20894344, 0.15222451, 0.10472941, 0.10194725, 0.09560113, 0.10511256, 0.10660832]) """
[文档] def __init__(self, y): n = len(y) y = y + SMALL * (y == 0) # can't have 0 values yt = y.sum(axis=0) s = y / (yt * 1.0) lns = np.log(n * s) slns = s * lns t = sum(slns) self.T = t
[文档]class TheilD: """Decomposition of Theil's T based on partitioning of observations into exhaustive and mutually exclusive groups Parameters ---------- y : array (n,t) or (n, ) with n taken as the observations across which inequality is calculated If y is (n,) then a scalar inequality value is determined. If y is (n,t) then an array of inequality values are determined, one value for each column in y. partition : array (n, ) elements indicating which partition each observation belongs to. These are assumed to be exhaustive. Attributes ---------- T : array (n,t) or (n,) global inequality T bg : array (n,t) or (n,) between group inequality wg : array (n,t) or (n,) within group inequality Examples -------- >>> import pysal.lib >>> from pysal.explore.inequality.theil import TheilD >>> import numpy as np >>> f=pysal.lib.io.open(pysal.lib.examples.get_path("mexico.csv")) >>> vnames=["pcgdp%d"%dec for dec in range(1940,2010,10)] >>> y = np.array([f.by_col[v] for v in vnames]).T >>> regimes=np.array(f.by_col('hanson98')) >>> theil_d=TheilD(y,regimes) >>> theil_d.bg array([0.0345889 , 0.02816853, 0.05260921, 0.05931219, 0.03205257, 0.02963731, 0.03635872]) >>> theil_d.wg array([0.17435454, 0.12405598, 0.0521202 , 0.04263506, 0.06354856, 0.07547525, 0.0702496 ]) """
[文档] def __init__(self, y, partition): groups = np.unique(partition) T = Theil(y).T ytot = y.sum(axis=0) #group totals gtot = np.array([y[partition == gid].sum(axis=0) for gid in groups]) mm = np.dot if ytot.size == 1: # y is 1-d sg = gtot / (ytot * 1.) sg.shape = (sg.size, 1) else: sg = mm(gtot, np.diag(1. / ytot)) ng = np.array([sum(partition == gid) for gid in groups]) ng.shape = (ng.size,) # ensure ng is 1-d n = y.shape[0] # between group inequality sg = sg + (sg==0) # handle case when a partition has 0 for sum bg = np.multiply(sg, np.log(mm(np.diag(n * 1. / ng), sg))).sum(axis=0) self.T = T self.bg = bg self.wg = T - bg
[文档]class TheilDSim: """Random permutation based inference on Theil's inequality decomposition. Provides for computationally based inference regarding the inequality decomposition using random spatial permutations. See :cite:`rey_interregional_2010`. Parameters ---------- y : array (n,t) or (n, ) with n taken as the observations across which inequality is calculated If y is (n,) then a scalar inequality value is determined. If y is (n,t) then an array of inequality values are determined, one value for each column in y. partition : array (n, ) elements indicating which partition each observation belongs to. These are assumed to be exhaustive. permutations : int Number of random spatial permutations for computationally based inference on the decomposition. Attributes ---------- observed : array (n,t) or (n,) TheilD instance for the observed data. bg : array (permutations+1,t) between group inequality bg_pvalue : array (t,1) p-value for the between group measure. Measures the percentage of the realized values that were greater than or equal to the observed bg value. Includes the observed value. wg : array (size=permutations+1) within group inequality Depending on the shape of y, 1 or 2-dimensional Examples -------- >>> import pysal.lib >>> from pysal.explore.inequality.theil import TheilDSim >>> import numpy as np >>> f=pysal.lib.io.open(pysal.lib.examples.get_path("mexico.csv")) >>> vnames=["pcgdp%d"%dec for dec in range(1940,2010,10)] >>> y=np.array([f.by_col[v] for v in vnames]).T >>> regimes=np.array(f.by_col('hanson98')) >>> np.random.seed(10) >>> theil_ds=TheilDSim(y,regimes,999) >>> theil_ds.bg_pvalue array([0.4 , 0.344, 0.001, 0.001, 0.034, 0.072, 0.032]) """
[文档] def __init__(self, y, partition, permutations=99): observed = TheilD(y, partition) bg_ct = observed.bg == observed.bg # already have one extreme value bg_ct = bg_ct * 1.0 results = [observed] for perm in range(permutations): yp = np.random.permutation(y) t = TheilD(yp, partition) bg_ct += (1.0 * t.bg >= observed.bg) results.append(t) self.results = results self.T = observed.T self.bg_pvalue = bg_ct / (permutations * 1.0 + 1) self.bg = np.array([r.bg for r in results]) self.wg = np.array([r.wg for r in results])