pysal.explore.esda.geary 源代码

"""
Geary's C statistic for spatial autocorrelation
"""
__author__ = "Serge Rey <sjsrey@gmail.com> "

import numpy as np
import scipy.stats as stats
from pysal.lib import weights
from .tabular import _univariate_handler

__all__ = ['Geary']


[文档]class Geary(object): """ Global Geary C Autocorrelation statistic Parameters ---------- y : array (n, 1) attribute vector w : W spatial weights transformation : {'B', 'R', 'D', 'U', 'V'} weights transformation, default is binary. Other options include "R": row-standardized, "D": doubly-standardized, "U": untransformed (general weights), "V": variance-stabilizing. permutations : int number of random permutations for calculation of pseudo-p_values Attributes ---------- y : array original variable w : W spatial weights permutations : int number of permutations C : float value of statistic EC : float expected value VC : float variance of G under normality assumption z_norm : float z-statistic for C under normality assumption z_rand : float z-statistic for C under randomization assumption p_norm : float p-value under normality assumption (one-tailed) p_rand : float p-value under randomization assumption (one-tailed) sim : array (if permutations!=0) vector of I values for permutated samples p_sim : float (if permutations!=0) p-value based on permutations (one-tailed) null: sptial randomness alternative: the observed C is extreme it is either extremely high or extremely low EC_sim : float (if permutations!=0) average value of C from permutations VC_sim : float (if permutations!=0) variance of C from permutations seC_sim : float (if permutations!=0) standard deviation of C under permutations. z_sim : float (if permutations!=0) standardized C based on permutations p_z_sim : float (if permutations!=0) p-value based on standard normal approximation from permutations (one-tailed) Examples -------- >>> import pysal.lib >>> from pysal.explore.esda.geary import Geary >>> w = pysal.lib.io.open(pysal.lib.examples.get_path("book.gal")).read() >>> f = pysal.lib.io.open(pysal.lib.examples.get_path("book.txt")) >>> y = np.array(f.by_col['y']) >>> c = Geary(y,w,permutations=0) >>> round(c.C,7) 0.3330108 >>> round(c.p_norm,7) 9.2e-05 >>> Notes ----- Technical details and derivations can be found in :cite:`cliff81`. """
[文档] def __init__(self, y, w, transformation="r", permutations=999): if not isinstance(w, weights.W): raise TypeError('w must be a pysal weights object, got {}' ' instead'.format(type(w))) y = np.asarray(y).flatten() self.n = len(y) self.y = y w.transform = transformation self.w = w self.permutations = permutations self.__moments() xn = range(len(y)) self.xn = xn self.y2 = y * y yd = y - y.mean() yss = sum(yd * yd) self.den = yss * self.w.s0 * 2.0 self.C = self.__calc(y) de = self.C - 1.0 self.EC = 1.0 self.z_norm = de / self.seC_norm self.z_rand = de / self.seC_rand if de > 0: self.p_norm = 1 - stats.norm.cdf(self.z_norm) self.p_rand = 1 - stats.norm.cdf(self.z_rand) else: self.p_norm = stats.norm.cdf(self.z_norm) self.p_rand = stats.norm.cdf(self.z_rand) if permutations: sim = [self.__calc(np.random.permutation(self.y)) for i in range(permutations)] self.sim = sim = np.array(sim) above = sim >= self.C larger = sum(above) if (permutations - larger) < larger: larger = permutations - larger self.p_sim = (larger + 1.) / (permutations + 1.) self.EC_sim = sum(sim) / permutations self.seC_sim = np.array(sim).std() self.VC_sim = self.seC_sim ** 2 self.z_sim = (self.C - self.EC_sim) / self.seC_sim self.p_z_sim = 1 - stats.norm.cdf(np.abs(self.z_sim))
@property def _statistic(self): """ a standardized accessor for pysal.explore.esda statistics""" return self.C def __moments(self): y = self.y n = self.n w = self.w s0 = w.s0 s1 = w.s1 s2 = w.s2 s02 = s0 * s0 yd = y - y.mean() yd4 = yd**4 yd2 = yd**2 n2 = n * n k = (yd4.sum() / n) / ((yd2.sum()/n)**2) A = (n-1) * s1 * (n2 - 3*n + 3 - (n-1) * k) B = (1./4) * ((n-1) * s2 * (n2 + 3*n - 6 - (n2 - n +2) * k )) C = s02 * (n2 - 3 - (n-1)**2 * k) vc_rand = (A-B+C) / (n * (n-2) * (n-3)*s02) vc_norm = ((1 / (2 * (n + 1) * s02)) * ((2 * s1 + s2) * (n - 1) - 4 * s02)) self.VC_rand = vc_rand self.VC_norm = vc_norm self.seC_rand = vc_rand ** (0.5) self.seC_norm = vc_norm ** (0.5) def __calc(self, y): ys = np.zeros(y.shape) y2 = y ** 2 for i, i0 in enumerate(self.w.id_order): neighbors = self.w.neighbor_offsets[i0] wijs = self.w.weights[i0] z = list(zip(neighbors, wijs)) ys[i] = sum([wij * (y2[i] - 2 * y[i] * y[j] + y2[j]) for j, wij in z]) a = (self.n - 1) * sum(ys) return a / self.den @classmethod def by_col(cls, df, cols, w=None, inplace=False, pvalue='sim', outvals=None, **stat_kws): """ Function to compute a Geary statistic on a dataframe Arguments --------- df : pandas.DataFrame a pandas dataframe with a geometry column cols : string or list of string name or list of names of columns to use to compute the statistic w : pysal weights object a weights object aligned with the dataframe. If not provided, this is searched for in the dataframe's metadata inplace : bool a boolean denoting whether to operate on the dataframe inplace or to return a series contaning the results of the computation. If operating inplace, with default configurations, the derived columns will be named like 'column_geary' and 'column_p_sim' pvalue : string a string denoting which pvalue should be returned. Refer to the the Geary statistic's documentation for available p-values outvals : list of strings list of arbitrary attributes to return as columns from the Geary statistic **stat_kws : keyword arguments options to pass to the underlying statistic. For this, see the documentation for the Geary statistic. Returns -------- If inplace, None, and operation is conducted on dataframe in memory. Otherwise, returns a copy of the dataframe with the relevant columns attached. Notes ----- Technical details and derivations can be found in :cite:`cliff81`. """ return _univariate_handler(df, cols, w=w, inplace=inplace, pvalue=pvalue, outvals=outvals, stat=cls, swapname=cls.__name__.lower(), **stat_kws)