pysal.explore.esda.moran 源代码

"""
Moran's I Spatial Autocorrelation Statistics

"""
__author__ = "Sergio J. Rey <srey@asu.edu>, \
        Dani Arribas-Bel <daniel.arribas.bel@gmail.com>"
from pysal.lib.weights.spatial_lag import lag_spatial as slag
from .smoothing import assuncao_rate
from .tabular import _univariate_handler, _bivariate_handler
import scipy.stats as stats
import numpy as np

__all__ = ["Moran", "Moran_Local", "Moran_BV", "Moran_BV_matrix",
           "Moran_Local_BV", "Moran_Rate", "Moran_Local_Rate"]

PERMUTATIONS = 999


[文档]class Moran(object): """Moran's I Global Autocorrelation Statistic Parameters ---------- y : array variable measured across n spatial units w : W spatial weights instance transformation : string weights transformation, default is row-standardized "r". Other options include "B": binary, "D": doubly-standardized, "U": untransformed (general weights), "V": variance-stabilizing. permutations : int number of random permutations for calculation of pseudo-p_values two_tailed : boolean If True (default) analytical p-values for Moran are two tailed, otherwise if False, they are one-tailed. Attributes ---------- y : array original variable w : W original w object permutations : int number of permutations I : float value of Moran's I EI : float expected value under normality assumption VI_norm : float variance of I under normality assumption seI_norm : float standard deviation of I under normality assumption z_norm : float z-value of I under normality assumption p_norm : float p-value of I under normality assumption VI_rand : float variance of I under randomization assumption seI_rand : float standard deviation of I under randomization assumption z_rand : float z-value of I under randomization assumption p_rand : float p-value of I under randomization assumption two_tailed : boolean If True p_norm and p_rand are two-tailed, otherwise they are one-tailed. sim : array (if permutations>0) vector of I values for permuted samples p_sim : array (if permutations>0) p-value based on permutations (one-tailed) null: spatial randomness alternative: the observed I is extreme if it is either extremely greater or extremely lower than the values obtained based on permutations EI_sim : float (if permutations>0) average value of I from permutations VI_sim : float (if permutations>0) variance of I from permutations seI_sim : float (if permutations>0) standard deviation of I under permutations. z_sim : float (if permutations>0) standardized I based on permutations p_z_sim : float (if permutations>0) p-value based on standard normal approximation from permutations Notes ----- Technical details and derivations can be found in :cite:`cliff81`. Examples -------- >>> import pysal.lib >>> w = pysal.lib.io.open(pysal.lib.examples.get_path("stl.gal")).read() >>> f = pysal.lib.io.open(pysal.lib.examples.get_path("stl_hom.txt")) >>> y = np.array(f.by_col['HR8893']) >>> from pysal.explore.esda.moran import Moran >>> mi = Moran(y, w) >>> round(mi.I, 3) 0.244 >>> mi.EI -0.012987012987012988 >>> mi.p_norm 0.00027147862770937614 SIDS example replicating OpenGeoda >>> w = pysal.lib.io.open(pysal.lib.examples.get_path("sids2.gal")).read() >>> f = pysal.lib.io.open(pysal.lib.examples.get_path("sids2.dbf")) >>> SIDR = np.array(f.by_col("SIDR74")) >>> mi = Moran(SIDR, w) >>> round(mi.I, 3) 0.248 >>> mi.p_norm 0.0001158330781489969 One-tailed >>> mi_1 = Moran(SIDR, w, two_tailed=False) >>> round(mi_1.I, 3) 0.248 >>> round(mi_1.p_norm, 4) 0.0001 """
[文档] def __init__(self, y, w, transformation="r", permutations=PERMUTATIONS, two_tailed=True): y = np.asarray(y).flatten() self.y = y w.transform = transformation self.w = w self.permutations = permutations self.__moments() self.I = self.__calc(self.z) self.z_norm = (self.I - self.EI) / self.seI_norm self.z_rand = (self.I - self.EI) / self.seI_rand if self.z_norm > 0: self.p_norm = 1 - stats.norm.cdf(self.z_norm) self.p_rand = 1 - stats.norm.cdf(self.z_rand) else: self.p_norm = stats.norm.cdf(self.z_norm) self.p_rand = stats.norm.cdf(self.z_rand) if two_tailed: self.p_norm *= 2. self.p_rand *= 2. if permutations: sim = [self.__calc(np.random.permutation(self.z)) for i in range(permutations)] self.sim = sim = np.array(sim) above = sim >= self.I larger = above.sum() if (self.permutations - larger) < larger: larger = self.permutations - larger self.p_sim = (larger + 1.) / (permutations + 1.) self.EI_sim = sim.sum() / permutations self.seI_sim = np.array(sim).std() self.VI_sim = self.seI_sim ** 2 self.z_sim = (self.I - self.EI_sim) / self.seI_sim if self.z_sim > 0: self.p_z_sim = 1 - stats.norm.cdf(self.z_sim) else: self.p_z_sim = stats.norm.cdf(self.z_sim) # provide .z attribute that is znormalized sy = y.std() self.z /= sy
def __moments(self): self.n = len(self.y) y = self.y z = y - y.mean() self.z = z self.z2ss = (z * z).sum() self.EI = -1. / (self.n - 1) n = self.n n2 = n * n s1 = self.w.s1 s0 = self.w.s0 s2 = self.w.s2 s02 = s0 * s0 v_num = n2 * s1 - n * s2 + 3 * s02 v_den = (n - 1) * (n + 1) * s02 self.VI_norm = v_num / v_den - (1.0 / (n - 1)) ** 2 self.seI_norm = self.VI_norm ** (1 / 2.) # variance under randomization xd4 = z**4 xd2 = z**2 k_num = xd4.sum() / n k_den = (xd2.sum() / n)**2 k = k_num / k_den EI = self.EI A = n * ((n2 - 3 * n + 3) * s1 - n * s2 + 3 * s02) B = k * ((n2 - n) * s1 - 2 * n * s2 + 6 * s02 ) VIR = (A - B) / ((n - 1) * (n - 2) * (n - 3 ) * s02) - EI*EI self.VI_rand = VIR self.seI_rand = VIR ** (1 / 2.) def __calc(self, z): zl = slag(self.w, z) inum = (z * zl).sum() return self.n / self.w.s0 * inum / self.z2ss @property def _statistic(self): """More consistent hidden attribute to access ESDA statistics""" return self.I @classmethod def by_col(cls, df, cols, w=None, inplace=False, pvalue='sim', outvals=None, **stat_kws): """ Function to compute a Moran statistic on a dataframe Arguments --------- df : pandas.DataFrame a pandas dataframe with a geometry column cols : string or list of string name or list of names of columns to use to compute the statistic w : pysal weights object a weights object aligned with the dataframe. If not provided, this is searched for in the dataframe's metadata inplace : bool a boolean denoting whether to operate on the dataframe inplace or to return a series contaning the results of the computation. If operating inplace, the derived columns will be named 'column_moran' pvalue : string a string denoting which pvalue should be returned. Refer to the the Moran statistic's documentation for available p-values outvals : list of strings list of arbitrary attributes to return as columns from the Moran statistic **stat_kws : keyword arguments options to pass to the underlying statistic. For this, see the documentation for the Moran statistic. Returns -------- If inplace, None, and operation is conducted on dataframe in memory. Otherwise, returns a copy of the dataframe with the relevant columns attached. See Also --------- Moran """ return _univariate_handler(df, cols, w=w, inplace=inplace, pvalue=pvalue, outvals=outvals, stat=cls, swapname=cls.__name__.lower(), **stat_kws)
[文档]class Moran_BV(object): """ Bivariate Moran's I Parameters ---------- x : array x-axis variable y : array wy will be on y axis w : W weight instance assumed to be aligned with y transformation : {'R', 'B', 'D', 'U', 'V'} weights transformation, default is row-standardized "r". Other options include "B": binary, "D": doubly-standardized, "U": untransformed (general weights), "V": variance-stabilizing. permutations : int number of random permutations for calculation of pseudo p_values Attributes ---------- zx : array original x variable standardized by mean and std zy : array original y variable standardized by mean and std w : W original w object permutation : int number of permutations I : float value of bivariate Moran's I sim : array (if permutations>0) vector of I values for permuted samples p_sim : float (if permutations>0) p-value based on permutations (one-sided) null: spatial randomness alternative: the observed I is extreme it is either extremely high or extremely low EI_sim : array (if permutations>0) average value of I from permutations VI_sim : array (if permutations>0) variance of I from permutations seI_sim : array (if permutations>0) standard deviation of I under permutations. z_sim : array (if permutations>0) standardized I based on permutations p_z_sim : float (if permutations>0) p-value based on standard normal approximation from permutations Notes ----- Inference is only based on permutations as analytical results are not too reliable. Examples -------- >>> import pysal.lib >>> import numpy as np Set random number generator seed so we can replicate the example >>> np.random.seed(10) Open the sudden infant death dbf file and read in rates for 74 and 79 converting each to a numpy array >>> f = pysal.lib.io.open(pysal.lib.examples.get_path("sids2.dbf")) >>> SIDR74 = np.array(f.by_col['SIDR74']) >>> SIDR79 = np.array(f.by_col['SIDR79']) Read a GAL file and construct our spatial weights object >>> w = pysal.lib.io.open(pysal.lib.examples.get_path("sids2.gal")).read() Create an instance of Moran_BV >>> from pysal.explore.esda.moran import Moran_BV >>> mbi = Moran_BV(SIDR79, SIDR74, w) What is the bivariate Moran's I value >>> round(mbi.I, 3) 0.156 Based on 999 permutations, what is the p-value of our statistic >>> round(mbi.p_z_sim, 3) 0.001 """
[文档] def __init__(self, x, y, w, transformation="r", permutations=PERMUTATIONS): x = np.asarray(x).flatten() y = np.asarray(y).flatten() zy = (y - y.mean()) / y.std(ddof=1) zx = (x - x.mean()) / x.std(ddof=1) self.y = y self.x = x self.zx = zx self.zy = zy n = x.shape[0] self.den = n - 1. # zx'zx = zy'zy = n-1 w.transform = transformation self.w = w self.I = self.__calc(zy) if permutations: nrp = np.random.permutation sim = [self.__calc(nrp(zy)) for i in range(permutations)] self.sim = sim = np.array(sim) above = sim >= self.I larger = above.sum() if (permutations - larger) < larger: larger = permutations - larger self.p_sim = (larger + 1.) / (permutations + 1.) self.EI_sim = sim.sum() / permutations self.seI_sim = np.array(sim).std() self.VI_sim = self.seI_sim ** 2 self.z_sim = (self.I - self.EI_sim) / self.seI_sim if self.z_sim > 0: self.p_z_sim = 1 - stats.norm.cdf(self.z_sim) else: self.p_z_sim = stats.norm.cdf(self.z_sim)
def __calc(self, zy): wzy = slag(self.w, zy) self.num = (self.zx * wzy).sum() return self.num / self.den @property def _statistic(self): """More consistent hidden attribute to access ESDA statistics""" return self.I @classmethod def by_col(cls, df, x, y=None, w=None, inplace=False, pvalue='sim', outvals=None, **stat_kws): """ Function to compute a Moran_BV statistic on a dataframe Arguments --------- df : pandas.DataFrame a pandas dataframe with a geometry column X : list of strings column name or list of column names to use as X values to compute the bivariate statistic. If no Y is provided, pairwise comparisons among these variates are used instead. Y : list of strings column name or list of column names to use as Y values to compute the bivariate statistic. if no Y is provided, pariwise comparisons among the X variates are used instead. w : pysal weights object a weights object aligned with the dataframe. If not provided, this is searched for in the dataframe's metadata inplace : bool a boolean denoting whether to operate on the dataframe inplace or to return a series contaning the results of the computation. If operating inplace, the derived columns will be named 'column_moran_local' pvalue : string a string denoting which pvalue should be returned. Refer to the the Moran_BV statistic's documentation for available p-values outvals : list of strings list of arbitrary attributes to return as columns from the Moran_BV statistic **stat_kws : keyword arguments options to pass to the underlying statistic. For this, see the documentation for the Moran_BV statistic. Returns -------- If inplace, None, and operation is conducted on dataframe in memory. Otherwise, returns a copy of the dataframe with the relevant columns attached. See Also --------- Moran_BV """ return _bivariate_handler(df, x, y=y, w=w, inplace=inplace, pvalue = pvalue, outvals = outvals, swapname=cls.__name__.lower(), stat=cls,**stat_kws)
[文档]def Moran_BV_matrix(variables, w, permutations=0, varnames=None): """ Bivariate Moran Matrix Calculates bivariate Moran between all pairs of a set of variables. Parameters ---------- variables : array or pandas.DataFrame sequence of variables to be assessed w : W a spatial weights object permutations : int number of permutations varnames : list, optional if variables is an array Strings for variable names. Will add an attribute to `Moran_BV` objects in results needed for plotting in `splot` or `.plot()`. Default =None. Note: If variables is a `pandas.DataFrame` varnames will automatically be generated Returns ------- results : dictionary (i, j) is the key for the pair of variables, values are the Moran_BV objects. Examples -------- open dbf >>> import pysal.lib >>> f = pysal.lib.io.open(pysal.lib.examples.get_path("sids2.dbf")) pull of selected variables from dbf and create numpy arrays for each >>> varnames = ['SIDR74', 'SIDR79', 'NWR74', 'NWR79'] >>> vars = [np.array(f.by_col[var]) for var in varnames] create a contiguity matrix from an external gal file >>> w = pysal.lib.io.open(pysal.lib.examples.get_path("sids2.gal")).read() create an instance of Moran_BV_matrix >>> from pysal.explore.esda.moran import Moran_BV_matrix >>> res = Moran_BV_matrix(vars, w, varnames = varnames) check values >>> round(res[(0, 1)].I,7) 0.1936261 >>> round(res[(3, 0)].I,7) 0.3770138 """ try: # check if pandas is installed import pandas if isinstance(variables, pandas.DataFrame): # if yes use variables as df and convert to numpy_array varnames = pandas.Index.tolist(variables.columns) variables_n = [] for var in varnames: variables_n.append(variables[str(var)].values) else: variables_n = variables except ImportError: variables_n = variables results = _Moran_BV_Matrix_array(variables=variables_n, w=w, permutations=permutations, varnames=varnames) return results
def _Moran_BV_Matrix_array(variables, w, permutations=0, varnames=None): """ Base calculation for MORAN_BV_Matrix """ if varnames is None: varnames = ['x{}'.format(i) for i in range(k)] k = len(variables) rk = list(range(0, k - 1)) results = {} for i in rk: for j in range(i + 1, k): y1 = variables[i] y2 = variables[j] results[i, j] = Moran_BV(y1, y2, w, permutations=permutations) results[j, i] = Moran_BV(y2, y1, w, permutations=permutations) results[i, j].varnames = {'x': varnames[i], 'y': varnames[j]} results[j, i].varnames = {'x': varnames[j], 'y': varnames[i]} return results
[文档]class Moran_Rate(Moran): """ Adjusted Moran's I Global Autocorrelation Statistic for Rate Variables :cite:`Assun_o_1999` Parameters ---------- e : array an event variable measured across n spatial units b : array a population-at-risk variable measured across n spatial units w : W spatial weights instance adjusted : boolean whether or not Moran's I needs to be adjusted for rate variable transformation : {'R', 'B', 'D', 'U', 'V'} weights transformation, default is row-standardized "r". Other options include "B": binary, "D": doubly-standardized, "U": untransformed (general weights), "V": variance-stabilizing. two_tailed : boolean If True (default), analytical p-values for Moran's I are two-tailed, otherwise they are one tailed. permutations : int number of random permutations for calculation of pseudo p_values Attributes ---------- y : array rate variable computed from parameters e and b if adjusted is True, y is standardized rates otherwise, y is raw rates w : W original w object permutations : int number of permutations I : float value of Moran's I EI : float expected value under normality assumption VI_norm : float variance of I under normality assumption seI_norm : float standard deviation of I under normality assumption z_norm : float z-value of I under normality assumption p_norm : float p-value of I under normality assumption VI_rand : float variance of I under randomization assumption seI_rand : float standard deviation of I under randomization assumption z_rand : float z-value of I under randomization assumption p_rand : float p-value of I under randomization assumption two_tailed : boolean If True, p_norm and p_rand are two-tailed p-values, otherwise they are one-tailed. sim : array (if permutations>0) vector of I values for permuted samples p_sim : array (if permutations>0) p-value based on permutations (one-sided) null: spatial randomness alternative: the observed I is extreme if it is either extremely greater or extremely lower than the values obtained from permutaitons EI_sim : float (if permutations>0) average value of I from permutations VI_sim : float (if permutations>0) variance of I from permutations seI_sim : float (if permutations>0) standard deviation of I under permutations. z_sim : float (if permutations>0) standardized I based on permutations p_z_sim : float (if permutations>0) p-value based on standard normal approximation from Examples -------- >>> import pysal.lib >>> w = pysal.lib.io.open(pysal.lib.examples.get_path("sids2.gal")).read() >>> f = pysal.lib.io.open(pysal.lib.examples.get_path("sids2.dbf")) >>> e = np.array(f.by_col('SID79')) >>> b = np.array(f.by_col('BIR79')) >>> from pysal.explore.esda.moran import Moran_Rate >>> mi = Moran_Rate(e, b, w, two_tailed=False) >>> "%6.4f" % mi.I '0.1662' >>> "%6.4f" % mi.p_norm '0.0042' """
[文档] def __init__(self, e, b, w, adjusted=True, transformation="r", permutations=PERMUTATIONS, two_tailed=True): e = np.asarray(e).flatten() b = np.asarray(b).flatten() if adjusted: y = assuncao_rate(e, b) else: y = e * 1.0 / b Moran.__init__(self, y, w, transformation=transformation, permutations=permutations, two_tailed=two_tailed)
@classmethod def by_col(cls, df, events, populations, w=None, inplace=False, pvalue='sim', outvals=None, swapname='', **stat_kws): """ Function to compute a Moran_Rate statistic on a dataframe Arguments --------- df : pandas.DataFrame a pandas dataframe with a geometry column events : string or list of strings one or more names where events are stored populations : string or list of strings one or more names where the populations corresponding to the events are stored. If one population column is provided, it is used for all event columns. If more than one population column is provided but there is not a population for every event column, an exception will be raised. w : pysal weights object a weights object aligned with the dataframe. If not provided, this is searched for in the dataframe's metadata inplace : bool a boolean denoting whether to operate on the dataframe inplace or to return a series contaning the results of the computation. If operating inplace, the derived columns will be named 'column_moran_rate' pvalue : string a string denoting which pvalue should be returned. Refer to the the Moran_Rate statistic's documentation for available p-values outvals : list of strings list of arbitrary attributes to return as columns from the Moran_Rate statistic **stat_kws : keyword arguments options to pass to the underlying statistic. For this, see the documentation for the Moran_Rate statistic. Returns -------- If inplace, None, and operation is conducted on dataframe in memory. Otherwise, returns a copy of the dataframe with the relevant columns attached. See Also --------- Moran_Rate """ if not inplace: new = df.copy() cls.by_col(new, events, populations, w=w, inplace=True, pvalue=pvalue, outvals=outvals, swapname=swapname, **stat_kws) return new if isinstance(events, str): events = [events] if isinstance(populations, str): populations = [populations] if len(populations) < len(events): populations = populations * len(events) if len(events) != len(populations): raise ValueError('There is not a one-to-one matching between events and ' 'populations!\nEvents: {}\n\nPopulations:' ' {}'.format(events, populations)) adjusted = stat_kws.pop('adjusted', True) if isinstance(adjusted, bool): adjusted = [adjusted] * len(events) if swapname is '': swapname = cls.__name__.lower() rates = [assuncao_rate(df[e], df[pop]) if adj else df[e].astype(float) / df[pop] for e,pop,adj in zip(events, populations, adjusted)] names = ['-'.join((e,p)) for e,p in zip(events, populations)] out_df = df.copy() rate_df = out_df.from_items(list(zip(names, rates))) #trick to avoid importing pandas stat_df = _univariate_handler(rate_df, names, w=w, inplace=False, pvalue = pvalue, outvals = outvals, swapname=swapname, stat=Moran, #how would this get done w/super? **stat_kws) for col in stat_df.columns: df[col] = stat_df[col]
[文档]class Moran_Local(object): """Local Moran Statistics Parameters ---------- y : array (n,1), attribute array w : W weight instance assumed to be aligned with y transformation : {'R', 'B', 'D', 'U', 'V'} weights transformation, default is row-standardized "r". Other options include "B": binary, "D": doubly-standardized, "U": untransformed (general weights), "V": variance-stabilizing. permutations : int number of random permutations for calculation of pseudo p_values geoda_quads : boolean (default=False) If True use GeoDa scheme: HH=1, LL=2, LH=3, HL=4 If False use PySAL Scheme: HH=1, LH=2, LL=3, HL=4 Attributes ---------- y : array original variable w : W original w object permutations : int number of random permutations for calculation of pseudo p_values Is : array local Moran's I values q : array (if permutations>0) values indicate quandrant location 1 HH, 2 LH, 3 LL, 4 HL sim : array (permutations by n) (if permutations>0) I values for permuted samples p_sim : array (if permutations>0) p-values based on permutations (one-sided) null: spatial randomness alternative: the observed Ii is further away or extreme from the median of simulated values. It is either extremelyi high or extremely low in the distribution of simulated Is. EI_sim : array (if permutations>0) average values of local Is from permutations VI_sim : array (if permutations>0) variance of Is from permutations seI_sim : array (if permutations>0) standard deviations of Is under permutations. z_sim : arrray (if permutations>0) standardized Is based on permutations p_z_sim : array (if permutations>0) p-values based on standard normal approximation from permutations (one-sided) for two-sided tests, these values should be multiplied by 2 Notes ----- For technical details see :cite:`Anselin95`. Examples -------- >>> import pysal.lib >>> import numpy as np >>> np.random.seed(10) >>> w = pysal.lib.io.open(pysal.lib.examples.get_path("desmith.gal")).read() >>> f = pysal.lib.io.open(pysal.lib.examples.get_path("desmith.txt")) >>> y = np.array(f.by_col['z']) >>> from pysal.explore.esda.moran import Moran_Local >>> lm = Moran_Local(y, w, transformation = "r", permutations = 99) >>> lm.q array([4, 4, 4, 2, 3, 3, 1, 4, 3, 3]) >>> lm.p_z_sim[0] 0.24669152541631179 >>> lm = Moran_Local(y, w, transformation = "r", permutations = 99, \ geoda_quads=True) >>> lm.q array([4, 4, 4, 3, 2, 2, 1, 4, 2, 2]) Note random components result is slightly different values across architectures so the results have been removed from doctests and will be moved into unittests that are conditional on architectures """
[文档] def __init__(self, y, w, transformation="r", permutations=PERMUTATIONS, geoda_quads=False): y = np.asarray(y).flatten() self.y = y n = len(y) self.n = n self.n_1 = n - 1 z = y - y.mean() # setting for floating point noise orig_settings = np.seterr() np.seterr(all="ignore") sy = y.std() z /= sy np.seterr(**orig_settings) self.z = z w.transform = transformation self.w = w self.permutations = permutations self.den = (z * z).sum() self.Is = self.calc(self.w, self.z) self.geoda_quads = geoda_quads quads = [1, 2, 3, 4] if geoda_quads: quads = [1, 3, 2, 4] self.quads = quads self.__quads() if permutations: self.__crand() sim = np.transpose(self.rlisas) above = sim >= self.Is larger = above.sum(0) low_extreme = (self.permutations - larger) < larger larger[low_extreme] = self.permutations - larger[low_extreme] self.p_sim = (larger + 1.0) / (permutations + 1.0) self.sim = sim self.EI_sim = sim.mean(axis=0) self.seI_sim = sim.std(axis=0) self.VI_sim = self.seI_sim * self.seI_sim self.z_sim = (self.Is - self.EI_sim) / self.seI_sim self.p_z_sim = 1 - stats.norm.cdf(np.abs(self.z_sim))
def calc(self, w, z): zl = slag(w, z) return self.n_1 * self.z * zl / self.den def __crand(self): """ conditional randomization for observation i with ni neighbors, the candidate set cannot include i (we don't want i being a neighbor of i). we have to sample without replacement from a set of ids that doesn't include i. numpy doesn't directly support sampling wo replacement and it is expensive to implement this. instead we omit i from the original ids, permute the ids and take the first ni elements of the permuted ids as the neighbors to i in each randomization. """ z = self.z lisas = np.zeros((self.n, self.permutations)) n_1 = self.n - 1 prange = list(range(self.permutations)) k = self.w.max_neighbors + 1 nn = self.n - 1 rids = np.array([np.random.permutation(nn)[0:k] for i in prange]) ids = np.arange(self.w.n) ido = self.w.id_order w = [self.w.weights[ido[i]] for i in ids] wc = [self.w.cardinalities[ido[i]] for i in ids] for i in range(self.w.n): idsi = ids[ids != i] np.random.shuffle(idsi) tmp = z[idsi[rids[:, 0:wc[i]]]] lisas[i] = z[i] * (w[i] * tmp).sum(1) self.rlisas = (n_1 / self.den) * lisas def __quads(self): zl = slag(self.w, self.z) zp = self.z > 0 lp = zl > 0 pp = zp * lp np = (1 - zp) * lp nn = (1 - zp) * (1 - lp) pn = zp * (1 - lp) self.q = self.quads[0] * pp + self.quads[1] * np + self.quads[2] * nn \ + self.quads[3] * pn @property def _statistic(self): """More consistent hidden attribute to access ESDA statistics""" return self.Is @classmethod def by_col(cls, df, cols, w=None, inplace=False, pvalue='sim', outvals=None, **stat_kws): """ Function to compute a Moran_Local statistic on a dataframe Arguments --------- df : pandas.DataFrame a pandas dataframe with a geometry column cols : string or list of string name or list of names of columns to use to compute the statistic w : pysal weights object a weights object aligned with the dataframe. If not provided, this is searched for in the dataframe's metadata inplace : bool a boolean denoting whether to operate on the dataframe inplace or to return a series contaning the results of the computation. If operating inplace, the derived columns will be named 'column_moran_local' pvalue : string a string denoting which pvalue should be returned. Refer to the the Moran_Local statistic's documentation for available p-values outvals : list of strings list of arbitrary attributes to return as columns from the Moran_Local statistic **stat_kws : keyword arguments options to pass to the underlying statistic. For this, see the documentation for the Moran_Local statistic. Returns -------- If inplace, None, and operation is conducted on dataframe in memory. Otherwise, returns a copy of the dataframe with the relevant columns attached. See Also --------- Moran_Local """ return _univariate_handler(df, cols, w=w, inplace=inplace, pvalue=pvalue, outvals=outvals, stat=cls, swapname=cls.__name__.lower(), **stat_kws)
[文档]class Moran_Local_BV(object): """Bivariate Local Moran Statistics Parameters ---------- x : array x-axis variable y : array (n,1), wy will be on y axis w : W weight instance assumed to be aligned with y transformation : {'R', 'B', 'D', 'U', 'V'} weights transformation, default is row-standardized "r". Other options include "B": binary, "D": doubly-standardized, "U": untransformed (general weights), "V": variance-stabilizing. permutations : int number of random permutations for calculation of pseudo p_values geoda_quads : boolean (default=False) If True use GeoDa scheme: HH=1, LL=2, LH=3, HL=4 If False use PySAL Scheme: HH=1, LH=2, LL=3, HL=4 Attributes ---------- zx : array original x variable standardized by mean and std zy : array original y variable standardized by mean and std w : W original w object permutations : int number of random permutations for calculation of pseudo p_values Is : float value of Moran's I q : array (if permutations>0) values indicate quandrant location 1 HH, 2 LH, 3 LL, 4 HL sim : array (if permutations>0) vector of I values for permuted samples p_sim : array (if permutations>0) p-value based on permutations (one-sided) null: spatial randomness alternative: the observed Ii is further away or extreme from the median of simulated values. It is either extremelyi high or extremely low in the distribution of simulated Is. EI_sim : array (if permutations>0) average values of local Is from permutations VI_sim : array (if permutations>0) variance of Is from permutations seI_sim : array (if permutations>0) standard deviations of Is under permutations. z_sim : arrray (if permutations>0) standardized Is based on permutations p_z_sim : array (if permutations>0) p-values based on standard normal approximation from permutations (one-sided) for two-sided tests, these values should be multiplied by 2 Examples -------- >>> import pysal.lib >>> import numpy as np >>> np.random.seed(10) >>> w = pysal.lib.io.open(pysal.lib.examples.get_path("sids2.gal")).read() >>> f = pysal.lib.io.open(pysal.lib.examples.get_path("sids2.dbf")) >>> x = np.array(f.by_col['SIDR79']) >>> y = np.array(f.by_col['SIDR74']) >>> from pysal.explore.esda.moran import Moran_Local_BV >>> lm =Moran_Local_BV(x, y, w, transformation = "r", \ permutations = 99) >>> lm.q[:10] array([3, 4, 3, 4, 2, 1, 4, 4, 2, 4]) >>> lm = Moran_Local_BV(x, y, w, transformation = "r", \ permutations = 99, geoda_quads=True) >>> lm.q[:10] array([2, 4, 2, 4, 3, 1, 4, 4, 3, 4]) Note random components result is slightly different values across architectures so the results have been removed from doctests and will be moved into unittests that are conditional on architectures """
[文档] def __init__(self, x, y, w, transformation="r", permutations=PERMUTATIONS, geoda_quads=False): x = np.asarray(x).flatten() y = np.asarray(y).flatten() self.y = y self.x =x n = len(y) self.n = n self.n_1 = n - 1 zx = x - x.mean() zy = y - y.mean() # setting for floating point noise orig_settings = np.seterr() np.seterr(all="ignore") sx = x.std() zx /= sx sy = y.std() zy /= sy np.seterr(**orig_settings) self.zx = zx self.zy = zy w.transform = transformation self.w = w self.permutations = permutations self.den = (zx * zx).sum() self.Is = self.calc(self.w, self.zx, self.zy) self.geoda_quads = geoda_quads quads = [1, 2, 3, 4] if geoda_quads: quads = [1, 3, 2, 4] self.quads = quads self.__quads() if permutations: self.__crand() sim = np.transpose(self.rlisas) above = sim >= self.Is larger = above.sum(0) low_extreme = (self.permutations - larger) < larger larger[low_extreme] = self.permutations - larger[low_extreme] self.p_sim = (larger + 1.0) / (permutations + 1.0) self.sim = sim self.EI_sim = sim.mean(axis=0) self.seI_sim = sim.std(axis=0) self.VI_sim = self.seI_sim * self.seI_sim self.z_sim = (self.Is - self.EI_sim) / self.seI_sim self.p_z_sim = 1 - stats.norm.cdf(np.abs(self.z_sim))
def calc(self, w, zx, zy): zly = slag(w, zy) return self.n_1 * self.zx * zly / self.den def __crand(self): """ conditional randomization for observation i with ni neighbors, the candidate set cannot include i (we don't want i being a neighbor of i). we have to sample without replacement from a set of ids that doesn't include i. numpy doesn't directly support sampling wo replacement and it is expensive to implement this. instead we omit i from the original ids, permute the ids and take the first ni elements of the permuted ids as the neighbors to i in each randomization. """ lisas = np.zeros((self.n, self.permutations)) n_1 = self.n - 1 prange = list(range(self.permutations)) k = self.w.max_neighbors + 1 nn = self.n - 1 rids = np.array([np.random.permutation(nn)[0:k] for i in prange]) ids = np.arange(self.w.n) ido = self.w.id_order w = [self.w.weights[ido[i]] for i in ids] wc = [self.w.cardinalities[ido[i]] for i in ids] zx = self.zx zy = self.zy for i in range(self.w.n): idsi = ids[ids != i] np.random.shuffle(idsi) tmp = zy[idsi[rids[:, 0:wc[i]]]] lisas[i] = zx[i] * (w[i] * tmp).sum(1) self.rlisas = (n_1 / self.den) * lisas def __quads(self): zl = slag(self.w, self.zy) zp = self.zx > 0 lp = zl > 0 pp = zp * lp np = (1 - zp) * lp nn = (1 - zp) * (1 - lp) pn = zp * (1 - lp) self.q = self.quads[0] * pp + self.quads[1] * np + self.quads[2] * nn \ + self.quads[3] * pn @property def _statistic(self): """More consistent hidden attribute to access ESDA statistics""" return self.Is @classmethod def by_col(cls, df, x, y=None, w=None, inplace=False, pvalue='sim', outvals=None, **stat_kws): """ Function to compute a Moran_Local_BV statistic on a dataframe Arguments --------- df : pandas.DataFrame a pandas dataframe with a geometry column X : list of strings column name or list of column names to use as X values to compute the bivariate statistic. If no Y is provided, pairwise comparisons among these variates are used instead. Y : list of strings column name or list of column names to use as Y values to compute the bivariate statistic. if no Y is provided, pariwise comparisons among the X variates are used instead. w : pysal weights object a weights object aligned with the dataframe. If not provided, this is searched for in the dataframe's metadata inplace : bool a boolean denoting whether to operate on the dataframe inplace or to return a series contaning the results of the computation. If operating inplace, the derived columns will be named 'column_moran_local_bv' pvalue : string a string denoting which pvalue should be returned. Refer to the the Moran_Local_BV statistic's documentation for available p-values outvals : list of strings list of arbitrary attributes to return as columns from the Moran_Local_BV statistic **stat_kws : keyword arguments options to pass to the underlying statistic. For this, see the documentation for the Moran_Local_BV statistic. Returns -------- If inplace, None, and operation is conducted on dataframe in memory. Otherwise, returns a copy of the dataframe with the relevant columns attached. See Also --------- Moran_Local_BV """ return _bivariate_handler(df, x, y=y, w=w, inplace=inplace, pvalue = pvalue, outvals = outvals, swapname=cls.__name__.lower(), stat=cls,**stat_kws)
[文档]class Moran_Local_Rate(Moran_Local): """ Adjusted Local Moran Statistics for Rate Variables [Assuncao1999]_ Parameters ---------- e : array (n,1), an event variable across n spatial units b : array (n,1), a population-at-risk variable across n spatial units w : W weight instance assumed to be aligned with y adjusted : boolean whether or not local Moran statistics need to be adjusted for rate variable transformation : {'R', 'B', 'D', 'U', 'V'} weights transformation, default is row-standardized "r". Other options include "B": binary, "D": doubly-standardized, "U": untransformed (general weights), "V": variance-stabilizing. permutations : int number of random permutations for calculation of pseudo p_values geoda_quads : boolean (default=False) If True use GeoDa scheme: HH=1, LL=2, LH=3, HL=4 If False use PySAL Scheme: HH=1, LH=2, LL=3, HL=4 Attributes ---------- y : array rate variables computed from parameters e and b if adjusted is True, y is standardized rates otherwise, y is raw rates w : W original w object permutations : int number of random permutations for calculation of pseudo p_values I : float value of Moran's I q : array (if permutations>0) values indicate quandrant location 1 HH, 2 LH, 3 LL, 4 HL sim : array (if permutations>0) vector of I values for permuted samples p_sim : array (if permutations>0) p-value based on permutations (one-sided) null: spatial randomness alternative: the observed Ii is further away or extreme from the median of simulated Iis. It is either extremely high or extremely low in the distribution of simulated Is EI_sim : float (if permutations>0) average value of I from permutations VI_sim : float (if permutations>0) variance of I from permutations seI_sim : float (if permutations>0) standard deviation of I under permutations. z_sim : float (if permutations>0) standardized I based on permutations p_z_sim : float (if permutations>0) p-value based on standard normal approximation from permutations (one-sided) for two-sided tests, these values should be multiplied by 2 Examples -------- >>> import pysal.lib >>> import numpy as np >>> np.random.seed(10) >>> w = pysal.lib.io.open(pysal.lib.examples.get_path("sids2.gal")).read() >>> f = pysal.lib.io.open(pysal.lib.examples.get_path("sids2.dbf")) >>> e = np.array(f.by_col('SID79')) >>> b = np.array(f.by_col('BIR79')) >>> from pysal.explore.esda.moran import Moran_Local_Rate >>> lm = Moran_Local_Rate(e, b, w, transformation = "r", permutations = 99) >>> lm.q[:10] array([2, 4, 3, 1, 2, 1, 1, 4, 2, 4]) >>> lm = Moran_Local_Rate(e, b, w, transformation = "r", permutations = 99, geoda_quads=True) >>> lm.q[:10] array([3, 4, 2, 1, 3, 1, 1, 4, 3, 4]) Note random components result is slightly different values across architectures so the results have been removed from doctests and will be moved into unittests that are conditional on architectures """
[文档] def __init__(self, e, b, w, adjusted=True, transformation="r", permutations=PERMUTATIONS, geoda_quads=False): e = np.asarray(e).flatten() b = np.asarray(b).flatten() if adjusted: y = assuncao_rate(e, b) else: y = e * 1.0 / b Moran_Local.__init__(self, y, w, transformation=transformation, permutations=permutations, geoda_quads=geoda_quads)
@classmethod def by_col(cls, df, events, populations, w=None, inplace=False, pvalue='sim', outvals=None, swapname='', **stat_kws): """ Function to compute a Moran_Local_Rate statistic on a dataframe Arguments --------- df : pandas.DataFrame a pandas dataframe with a geometry column events : string or list of strings one or more names where events are stored populations : string or list of strings one or more names where the populations corresponding to the events are stored. If one population column is provided, it is used for all event columns. If more than one population column is provided but there is not a population for every event column, an exception will be raised. w : pysal weights object a weights object aligned with the dataframe. If not provided, this is searched for in the dataframe's metadata inplace : bool a boolean denoting whether to operate on the dataframe inplace or to return a series contaning the results of the computation. If operating inplace, the derived columns will be named 'column_moran_local_rate' pvalue : string a string denoting which pvalue should be returned. Refer to the the Moran_Local_Rate statistic's documentation for available p-values outvals : list of strings list of arbitrary attributes to return as columns from the Moran_Local_Rate statistic **stat_kws : keyword arguments options to pass to the underlying statistic. For this, see the documentation for the Moran_Local_Rate statistic. Returns -------- If inplace, None, and operation is conducted on dataframe in memory. Otherwise, returns a copy of the dataframe with the relevant columns attached. See Also --------- Moran_Local_Rate """ if not inplace: new = df.copy() cls.by_col(new, events, populations, w=w, inplace=True, pvalue=pvalue, outvals=outvals, swapname=swapname, **stat_kws) return new if isinstance(events, str): events = [events] if isinstance(populations, str): populations = [populations] if len(populations) < len(events): populations = populations * len(events) if len(events) != len(populations): raise ValueError('There is not a one-to-one matching between events and ' 'populations!\nEvents: {}\n\nPopulations:' ' {}'.format(events, populations)) adjusted = stat_kws.pop('adjusted', True) if isinstance(adjusted, bool): adjusted = [adjusted] * len(events) if swapname is '': swapname = cls.__name__.lower() rates = [assuncao_rate(df[e], df[pop]) if adj else df[e].astype(float) / df[pop] for e,pop,adj in zip(events, populations, adjusted)] names = ['-'.join((e,p)) for e,p in zip(events, populations)] out_df = df.copy() rate_df = out_df.from_items(list(zip(names, rates))) #trick to avoid importing pandas _univariate_handler(rate_df, names, w=w, inplace=True, pvalue = pvalue, outvals = outvals, swapname=swapname, stat=Moran_Local, #how would this get done w/super? **stat_kws) for col in rate_df.columns: df[col] = rate_df[col]