pysal.explore.esda.gamma 源代码

"""
Gamma index for spatial autocorrelation


"""
__author__ = "Luc Anselin <luc.anselin@asu.edu> Serge Rey <sjsrey@gmail.com>"

import numpy as np
from pysal.lib.weights.spatial_lag import lag_spatial
from .tabular import _univariate_handler

__all__ = ['Gamma']

PERMUTATIONS = 999


[文档]class Gamma(object): """Gamma index for spatial autocorrelation Parameters ---------- y : array variable measured across n spatial units w : W spatial weights instance can be binary or row-standardized operation : {'c', 's', 'a'} attribute similarity function where, 'c' cross product 's' squared difference 'a' absolute difference standardize : {'no', 'yes'} standardize variables first 'no' keep as is 'yes' or 'y' standardize to mean zero and variance one permutations : int number of random permutations for calculation of pseudo-p_values Attributes ---------- y : array original variable w : W original w object op : {'c', 's', 'a'} attribute similarity function, as per parameters attribute similarity function stand : {'no', 'yes'} standardization permutations : int number of permutations gamma : float value of Gamma index sim_g : array (if permutations>0) vector of Gamma index values for permuted samples p_sim_g : array (if permutations>0) p-value based on permutations (one-sided) null: spatial randomness alternative: the observed Gamma is more extreme than under randomness implemented as a two-sided test mean_g : float average of permuted Gamma values min_g : float minimum of permuted Gamma values max_g : float maximum of permuted Gamma values Examples -------- use same example as for join counts to show similarity >>> import pysal.lib, numpy as np >>> from pysal.explore.esda.gamma import Gamma >>> w = pysal.lib.weights.lat2W(4,4) >>> y=np.ones(16) >>> y[0:8]=0 >>> np.random.seed(12345) >>> g = Gamma(y,w) >>> g.g 20.0 >>> round(g.g_z, 3) 3.188 >>> round(g.p_sim_g, 3) 0.003 >>> g.min_g 0.0 >>> g.max_g 20.0 >>> g.mean_g 11.093093093093094 >>> np.random.seed(12345) >>> g1 = Gamma(y,w,operation='s') >>> g1.g 8.0 >>> round(g1.g_z, 3) -3.706 >>> g1.p_sim_g 0.001 >>> g1.min_g 14.0 >>> g1.max_g 48.0 >>> g1.mean_g 25.623623623623622 >>> np.random.seed(12345) >>> g2 = Gamma(y,w,operation='a') >>> g2.g 8.0 >>> round(g2.g_z, 3) -3.706 >>> g2.p_sim_g 0.001 >>> g2.min_g 14.0 >>> g2.max_g 48.0 >>> g2.mean_g 25.623623623623622 >>> np.random.seed(12345) >>> g3 = Gamma(y,w,standardize='y') >>> g3.g 32.0 >>> round(g3.g_z, 3) 3.706 >>> g3.p_sim_g 0.001 >>> g3.min_g -48.0 >>> g3.max_g 20.0 >>> g3.mean_g -3.2472472472472473 >>> np.random.seed(12345) >>> def func(z,i,j): ... q = z[i]*z[j] ... return q ... >>> g4 = Gamma(y,w,operation=func) >>> g4.g 20.0 >>> round(g4.g_z, 3) 3.188 >>> round(g4.p_sim_g, 3) 0.003 Notes ----- For further technical details see :cite:`Hubert_1981`. """
[文档] def __init__(self, y, w, operation='c', standardize='no', permutations=PERMUTATIONS): y = np.asarray(y).flatten() self.w = w self.y = y self.op = operation self.stand = standardize.lower() self.permutations = permutations if self.stand == 'yes' or self.stand == 'y': ym = np.mean(self.y) ysd = np.std(self.y) ys = (self.y - ym) / ysd self.y = ys self.g = self.__calc(self.y, self.op) if permutations: sim = [self.__calc(np.random.permutation(self.y), self.op) for i in range(permutations)] self.sim_g = np.array(sim) self.min_g = np.min(self.sim_g) self.mean_g = np.mean(self.sim_g) self.max_g = np.max(self.sim_g) p_sim_g = self.__pseudop(self.sim_g, self.g) self.p_sim_g = p_sim_g self.g_z = (self.g - self.mean_g) / np.std(self.sim_g)
@property def _statistic(self): return self.g @property def p_sim(self): """new name to fit with Moran module""" return self.p_sim_g def __calc(self, z, op): if op == 'c': # cross-product zl = lag_spatial(self.w, z) g = (z * zl).sum() elif op == 's': # squared difference zs = np.zeros(z.shape) z2 = z ** 2 for i, i0 in enumerate(self.w.id_order): neighbors = self.w.neighbor_offsets[i0] wijs = self.w.weights[i0] zw = list(zip(neighbors, wijs)) zs[i] = sum([wij * (z2[i] - 2.0 * z[i] * z[ j] + z2[j]) for j, wij in zw]) g = zs.sum() elif op == 'a': # absolute difference zs = np.zeros(z.shape) for i, i0 in enumerate(self.w.id_order): neighbors = self.w.neighbor_offsets[i0] wijs = self.w.weights[i0] zw = list(zip(neighbors, wijs)) zs[i] = sum([wij * abs(z[i] - z[j]) for j, wij in zw]) g = zs.sum() else: # any previously defined function op zs = np.zeros(z.shape) for i, i0 in enumerate(self.w.id_order): neighbors = self.w.neighbor_offsets[i0] wijs = self.w.weights[i0] zw = list(zip(neighbors, wijs)) zs[i] = sum([wij * op(z, i, j) for j, wij in zw]) g = zs.sum() return g def __pseudop(self, sim, g): above = sim >= g larger = above.sum() psim = (larger + 1.) / (self.permutations + 1.) if psim > 0.5: psim = (self.permutations - larger + 1.) / (self.permutations + 1.) return psim @classmethod def by_col(cls, df, cols, w=None, inplace=False, pvalue = 'sim', outvals = None, **stat_kws): return _univariate_handler(df, cols, w=w, inplace=inplace, pvalue=pvalue, outvals=outvals, stat=cls, swapname=cls.__name__.lower(), **stat_kws)