pysal.explore.esda.join_counts 源代码

"""
Spatial autocorrelation for binary attributes

"""
__author__ = "Sergio J. Rey <srey@asu.edu> , Luc Anselin <luc.anselin@asu.edu>"

from pysal.lib.weights.spatial_lag import lag_spatial
from .tabular import _univariate_handler
import numpy as np

__all__ = ['Join_Counts']

PERMUTATIONS = 999


[文档]class Join_Counts(object): """Binary Join Counts Parameters ---------- y : array binary variable measured across n spatial units w : W spatial weights instance permutations : int number of random permutations for calculation of pseudo-p_values Attributes ---------- y : array original variable w : W original w object permutations : int number of permutations bb : float number of black-black joins ww : float number of white-white joins bw : float number of black-white joins J : float number of joins sim_bb : array (if permutations>0) vector of bb values for permuted samples p_sim_bb : array (if permutations>0) p-value based on permutations (one-sided) null: spatial randomness alternative: the observed bb is greater than under randomness mean_bb : float average of permuted bb values min_bb : float minimum of permuted bb values max_bb : float maximum of permuted bb values sim_bw : array (if permutations>0) vector of bw values for permuted samples p_sim_bw : array (if permutations>0) p-value based on permutations (one-sided) null: spatial randomness alternative: the observed bw is greater than under randomness mean_bw : float average of permuted bw values min_bw : float minimum of permuted bw values max_bw : float maximum of permuted bw values Examples -------- Replicate example from anselin and rey >>> import numpy as np >>> import pysal.lib >>> w = pysal.lib.weights.lat2W(4, 4) >>> y = np.ones(16) >>> y[0:8] = 0 >>> np.random.seed(12345) >>> from pysal.explore.esda.join_counts import Join_Counts >>> jc = Join_Counts(y, w) >>> jc.bb 10.0 >>> jc.bw 4.0 >>> jc.ww 10.0 >>> jc.J 24.0 >>> len(jc.sim_bb) 999 >>> round(jc.p_sim_bb, 3) 0.003 >>> round(np.mean(jc.sim_bb), 3) 5.547 >>> np.max(jc.sim_bb) 10.0 >>> np.min(jc.sim_bb) 0.0 >>> len(jc.sim_bw) 999 >>> jc.p_sim_bw 1.0 >>> np.mean(jc.sim_bw) 12.811811811811811 >>> np.max(jc.sim_bw) 24.0 >>> np.min(jc.sim_bw) 7.0 >>> Notes ----- Technical details and derivations can be found in :cite:`cliff81`. """
[文档] def __init__(self, y, w, permutations=PERMUTATIONS): y = np.asarray(y).flatten() w.transformation = 'b' # ensure we have binary weights self.w = w self.y = y self.permutations = permutations self.J = w.s0 / 2. self.bb, self.ww, self.bw = self.__calc(self.y) if permutations: sim = [self.__calc(np.random.permutation(self.y)) for i in range(permutations)] sim_jc = np.array(sim) self.sim_bb = sim_jc[:, 0] self.min_bb = np.min(self.sim_bb) self.mean_bb = np.mean(self.sim_bb) self.max_bb = np.max(self.sim_bb) self.sim_bw = sim_jc[:, 2] self.min_bw = np.min(self.sim_bw) self.mean_bw = np.mean(self.sim_bw) self.max_bw = np.max(self.sim_bw) p_sim_bb = self.__pseudop(self.sim_bb, self.bb) p_sim_bw = self.__pseudop(self.sim_bw, self.bw) self.p_sim_bb = p_sim_bb self.p_sim_bw = p_sim_bw
def __calc(self, z): zl = lag_spatial(self.w, z) bb = sum(z * zl) / 2.0 zw = 1 - z zl = lag_spatial(self.w, zw) ww = sum(zw * zl) / 2.0 bw = self.J - (bb + ww) return (bb, ww, bw) def __pseudop(self, sim, jc): above = sim >= jc larger = sum(above) psim = (larger + 1.) / (self.permutations + 1.) return psim @property def _statistic(self): return self.bw @classmethod def by_col(cls, df, cols, w=None, inplace=False, pvalue='sim', outvals=None, **stat_kws): """ Function to compute a Join_Count statistic on a dataframe Arguments --------- df : pandas.DataFrame a pandas dataframe with a geometry column cols : string or list of string name or list of names of columns to use to compute the statistic w : pysal weights object a weights object aligned with the dataframe. If not provided, this is searched for in the dataframe's metadata inplace : bool a boolean denoting whether to operate on the dataframe inplace or to return a series contaning the results of the computation. If operating inplace, the derived columns will be named 'column_join_count' pvalue : string a string denoting which pvalue should be returned. Refer to the the Join_Count statistic's documentation for available p-values outvals : list of strings list of arbitrary attributes to return as columns from the Join_Count statistic **stat_kws : keyword arguments options to pass to the underlying statistic. For this, see the documentation for the Join_Count statistic. Returns -------- If inplace, None, and operation is conducted on dataframe in memory. Otherwise, returns a copy of the dataframe with the relevant columns attached. See Also --------- Join_Count """ if outvals is None: outvals = [] outvals.extend(['bb', 'p_sim_bw', 'p_sim_bb']) pvalue = '' return _univariate_handler(df, cols, w=w, inplace=inplace, pvalue=pvalue, outvals=outvals, stat=cls, swapname='bw', **stat_kws)