pysal.explore.esda.moran 源代码

"""
Moran's I Spatial Autocorrelation Statistics

"""
__author__ = "Sergio J. Rey <srey@asu.edu>, \
        Dani Arribas-Bel <daniel.arribas.bel@gmail.com>"
from pysal.lib.weights.spatial_lag import lag_spatial as slag
from .smoothing import assuncao_rate
from .tabular import _univariate_handler, _bivariate_handler
import scipy.stats as stats
import numpy as np

__all__ = ["Moran", "Moran_Local", "Moran_BV", "Moran_BV_matrix",
           "Moran_Local_BV", "Moran_Rate", "Moran_Local_Rate"]

PERMUTATIONS = 999


[文档]class Moran(object):
    """Moran's I Global Autocorrelation Statistic

    Parameters
    ----------

    y               : array
                      variable measured across n spatial units
    w               : W
                      spatial weights instance
    transformation  : string
                      weights transformation,  default is row-standardized "r".
                      Other options include "B": binary,  "D":
                      doubly-standardized,  "U": untransformed
                      (general weights), "V": variance-stabilizing.
    permutations    : int
                      number of random permutations for calculation of
                      pseudo-p_values
    two_tailed      : boolean
                      If True (default) analytical p-values for Moran are two
                      tailed, otherwise if False, they are one-tailed.

    Attributes
    ----------
    y            : array
                   original variable
    w            : W
                   original w object
    permutations : int
                   number of permutations
    I            : float
                   value of Moran's I
    EI           : float
                   expected value under normality assumption
    VI_norm      : float
                   variance of I under normality assumption
    seI_norm     : float
                   standard deviation of I under normality assumption
    z_norm       : float
                   z-value of I under normality assumption
    p_norm       : float
                   p-value of I under normality assumption
    VI_rand      : float
                   variance of I under randomization assumption
    seI_rand     : float
                   standard deviation of I under randomization assumption
    z_rand       : float
                   z-value of I under randomization assumption
    p_rand       : float
                   p-value of I under randomization assumption
    two_tailed   : boolean
                   If True p_norm and p_rand are two-tailed, otherwise they
                   are one-tailed.
    sim          : array
                   (if permutations>0)
                   vector of I values for permuted samples
    p_sim        : array
                   (if permutations>0)
                   p-value based on permutations (one-tailed)
                   null: spatial randomness
                   alternative: the observed I is extreme if
                   it is either extremely greater or extremely lower
                   than the values obtained based on permutations
    EI_sim       : float
                   (if permutations>0)
                   average value of I from permutations
    VI_sim       : float
                   (if permutations>0)
                   variance of I from permutations
    seI_sim      : float
                   (if permutations>0)
                   standard deviation of I under permutations.
    z_sim        : float
                   (if permutations>0)
                   standardized I based on permutations
    p_z_sim      : float
                   (if permutations>0)
                   p-value based on standard normal approximation from
                   permutations

    Notes
    -----
    Technical details and derivations can be found in :cite:`cliff81`.


    Examples
    --------
    >>> import pysal.lib
    >>> w = pysal.lib.io.open(pysal.lib.examples.get_path("stl.gal")).read()
    >>> f = pysal.lib.io.open(pysal.lib.examples.get_path("stl_hom.txt"))
    >>> y = np.array(f.by_col['HR8893'])
    >>> from pysal.explore.esda.moran import Moran
    >>> mi = Moran(y,  w)
    >>> round(mi.I, 3)
    0.244
    >>> mi.EI
    -0.012987012987012988
    >>> mi.p_norm
    0.00027147862770937614

    SIDS example replicating OpenGeoda
    >>> w = pysal.lib.io.open(pysal.lib.examples.get_path("sids2.gal")).read()
    >>> f = pysal.lib.io.open(pysal.lib.examples.get_path("sids2.dbf"))
    >>> SIDR = np.array(f.by_col("SIDR74"))
    >>> mi = Moran(SIDR,  w)
    >>> round(mi.I, 3)
    0.248
    >>> mi.p_norm
    0.0001158330781489969

    One-tailed

    >>> mi_1 = Moran(SIDR,  w, two_tailed=False)
    >>> round(mi_1.I, 3)
    0.248
    >>> round(mi_1.p_norm, 4)
    0.0001

    """
[文档]    def __init__(self, y, w, transformation="r", permutations=PERMUTATIONS,
                 two_tailed=True):
        y = np.asarray(y).flatten()
        self.y = y
        w.transform = transformation
        self.w = w
        self.permutations = permutations
        self.__moments()
        self.I = self.__calc(self.z)
        self.z_norm = (self.I - self.EI) / self.seI_norm
        self.z_rand = (self.I - self.EI) / self.seI_rand

        if self.z_norm > 0:
            self.p_norm = 1 - stats.norm.cdf(self.z_norm)
            self.p_rand = 1 - stats.norm.cdf(self.z_rand)
        else:
            self.p_norm = stats.norm.cdf(self.z_norm)
            self.p_rand = stats.norm.cdf(self.z_rand)

        if two_tailed:
            self.p_norm *= 2.
            self.p_rand *= 2.

        if permutations:
            sim = [self.__calc(np.random.permutation(self.z))
                   for i in range(permutations)]
            self.sim = sim = np.array(sim)
            above = sim >= self.I
            larger = above.sum()
            if (self.permutations - larger) < larger:
                larger = self.permutations - larger
            self.p_sim = (larger + 1.) / (permutations + 1.)
            self.EI_sim = sim.sum() / permutations
            self.seI_sim = np.array(sim).std()
            self.VI_sim = self.seI_sim ** 2
            self.z_sim = (self.I - self.EI_sim) / self.seI_sim
            if self.z_sim > 0:
                self.p_z_sim = 1 - stats.norm.cdf(self.z_sim)
            else:
                self.p_z_sim = stats.norm.cdf(self.z_sim)

        # provide .z attribute that is znormalized
        sy = y.std()
        self.z /= sy

    def __moments(self):
        self.n = len(self.y)
        y = self.y
        z = y - y.mean()
        self.z = z
        self.z2ss = (z * z).sum()
        self.EI = -1. / (self.n - 1)
        n = self.n
        n2 = n * n
        s1 = self.w.s1
        s0 = self.w.s0
        s2 = self.w.s2
        s02 = s0 * s0
        v_num = n2 * s1 - n * s2 + 3 * s02
        v_den = (n - 1) * (n + 1) * s02
        self.VI_norm = v_num / v_den - (1.0 / (n - 1)) ** 2
        self.seI_norm = self.VI_norm ** (1 / 2.)

        # variance under randomization
        xd4 = z**4
        xd2 = z**2
        k_num = xd4.sum() / n
        k_den = (xd2.sum() / n)**2
        k = k_num / k_den
        EI = self.EI
        A = n * ((n2 - 3 * n + 3) * s1 - n * s2 + 3 * s02)
        B = k * ((n2 - n) * s1 - 2 * n * s2 + 6 * s02  )
        VIR = (A - B) / ((n - 1) * (n - 2) * (n - 3 ) * s02) - EI*EI
        self.VI_rand = VIR
        self.seI_rand = VIR ** (1 / 2.)

    def __calc(self, z):
        zl = slag(self.w, z)
        inum = (z * zl).sum()
        return self.n / self.w.s0 * inum / self.z2ss

    @property
    def _statistic(self):
        """More consistent hidden attribute to access ESDA statistics"""
        return self.I

    @classmethod
    def by_col(cls, df, cols, w=None, inplace=False, pvalue='sim', outvals=None, **stat_kws):
        """
        Function to compute a Moran statistic on a dataframe

        Arguments
        ---------
        df          :   pandas.DataFrame
                        a pandas dataframe with a geometry column
        cols        :   string or list of string
                        name or list of names of columns to use to compute the statistic
        w           :   pysal weights object
                        a weights object aligned with the dataframe. If not provided, this
                        is searched for in the dataframe's metadata
        inplace     :   bool
                        a boolean denoting whether to operate on the dataframe inplace or to
                        return a series contaning the results of the computation. If
                        operating inplace, the derived columns will be named
                        'column_moran'
        pvalue      :   string
                        a string denoting which pvalue should be returned. Refer to the
                        the Moran statistic's documentation for available p-values
        outvals     :   list of strings
                        list of arbitrary attributes to return as columns from the
                        Moran statistic
        **stat_kws  :   keyword arguments
                        options to pass to the underlying statistic. For this, see the
                        documentation for the Moran statistic.

        Returns
        --------
        If inplace, None, and operation is conducted on dataframe in memory. Otherwise,
        returns a copy of the dataframe with the relevant columns attached.

        See Also
        ---------
        Moran
        """
        return _univariate_handler(df, cols, w=w, inplace=inplace, pvalue=pvalue,
                                   outvals=outvals, stat=cls,
                                   swapname=cls.__name__.lower(), **stat_kws)

[文档]class Moran_BV(object):
    """
    Bivariate Moran's I

    Parameters
    ----------
    x : array
        x-axis variable
    y : array
        wy will be on y axis
    w : W
        weight instance assumed to be aligned with y
    transformation  : {'R', 'B', 'D', 'U', 'V'}
                      weights transformation, default is row-standardized "r".
                      Other options include
                      "B": binary,
                      "D": doubly-standardized,
                      "U": untransformed (general weights),
                      "V": variance-stabilizing.
    permutations    : int
                      number of random permutations for calculation of pseudo
                      p_values

    Attributes
    ----------
    zx            : array
                    original x variable standardized by mean and std
    zy            : array
                    original y variable standardized by mean and std
    w             : W
                    original w object
    permutation   : int
                    number of permutations
    I             : float
                    value of bivariate Moran's I
    sim           : array
                    (if permutations>0)
                    vector of I values for permuted samples
    p_sim         : float
                    (if permutations>0)
                    p-value based on permutations (one-sided)
                    null: spatial randomness
                    alternative: the observed I is extreme
                    it is either extremely high or extremely low
    EI_sim        : array
                    (if permutations>0)
                    average value of I from permutations
    VI_sim        : array
                    (if permutations>0)
                    variance of I from permutations
    seI_sim       : array
                    (if permutations>0)
                    standard deviation of I under permutations.
    z_sim         : array
                    (if permutations>0)
                    standardized I based on permutations
    p_z_sim       : float
                    (if permutations>0)
                    p-value based on standard normal approximation from
                    permutations

    Notes
    -----

    Inference is only based on permutations as analytical results are not too
    reliable.




    Examples
    --------
    >>> import pysal.lib
    >>> import numpy as np

    Set random number generator seed so we can replicate the example

    >>> np.random.seed(10)

    Open the sudden infant death dbf file and read in rates for 74 and 79
    converting each to a numpy array

    >>> f = pysal.lib.io.open(pysal.lib.examples.get_path("sids2.dbf"))
    >>> SIDR74 = np.array(f.by_col['SIDR74'])
    >>> SIDR79 = np.array(f.by_col['SIDR79'])

    Read a GAL file and construct our spatial weights object

    >>> w = pysal.lib.io.open(pysal.lib.examples.get_path("sids2.gal")).read()

    Create an instance of Moran_BV
    >>> from pysal.explore.esda.moran import Moran_BV
    >>> mbi = Moran_BV(SIDR79,  SIDR74,  w)

    What is the bivariate Moran's I value

    >>> round(mbi.I, 3)
    0.156

    Based on 999 permutations, what is the p-value of our statistic

    >>> round(mbi.p_z_sim, 3)
    0.001


    """
[文档]    def __init__(self, x, y, w, transformation="r", permutations=PERMUTATIONS):
        x = np.asarray(x).flatten()
        y = np.asarray(y).flatten()
        zy = (y - y.mean()) / y.std(ddof=1)
        zx = (x - x.mean()) / x.std(ddof=1)
        self.y = y
        self.x = x
        self.zx = zx
        self.zy = zy
        n = x.shape[0]
        self.den = n - 1.  # zx'zx = zy'zy = n-1
        w.transform = transformation
        self.w = w
        self.I = self.__calc(zy)
        if permutations:
            nrp = np.random.permutation
            sim = [self.__calc(nrp(zy)) for i in range(permutations)]
            self.sim = sim = np.array(sim)
            above = sim >= self.I
            larger = above.sum()
            if (permutations - larger) < larger:
                larger = permutations - larger
            self.p_sim = (larger + 1.) / (permutations + 1.)
            self.EI_sim = sim.sum() / permutations
            self.seI_sim = np.array(sim).std()
            self.VI_sim = self.seI_sim ** 2
            self.z_sim = (self.I - self.EI_sim) / self.seI_sim
            if self.z_sim > 0:
                self.p_z_sim = 1 - stats.norm.cdf(self.z_sim)
            else:
                self.p_z_sim = stats.norm.cdf(self.z_sim)

    def __calc(self, zy):
        wzy = slag(self.w, zy)
        self.num = (self.zx * wzy).sum()
        return self.num / self.den

    @property
    def _statistic(self):
        """More consistent hidden attribute to access ESDA statistics"""
        return self.I

    @classmethod
    def by_col(cls, df, x, y=None, w=None, inplace=False, pvalue='sim', outvals=None, **stat_kws):
        """
        Function to compute a Moran_BV statistic on a dataframe

        Arguments
        ---------
        df          :   pandas.DataFrame
                        a pandas dataframe with a geometry column
        X           :   list of strings
                        column name or list of column names to use as X values to compute
                        the bivariate statistic. If no Y is provided, pairwise comparisons
                        among these variates are used instead.
        Y           :   list of strings
                        column name or list of column names to use as Y values to compute
                        the bivariate statistic. if no Y is provided, pariwise comparisons
                        among the X variates are used instead.
        w           :   pysal weights object
                        a weights object aligned with the dataframe. If not provided, this
                        is searched for in the dataframe's metadata
        inplace     :   bool
                        a boolean denoting whether to operate on the dataframe inplace or to
                        return a series contaning the results of the computation. If
                        operating inplace, the derived columns will be named
                        'column_moran_local'
        pvalue      :   string
                        a string denoting which pvalue should be returned. Refer to the
                        the Moran_BV statistic's documentation for available p-values
        outvals     :   list of strings
                        list of arbitrary attributes to return as columns from the
                        Moran_BV statistic
        **stat_kws  :   keyword arguments
                        options to pass to the underlying statistic. For this, see the
                        documentation for the Moran_BV statistic.


        Returns
        --------
        If inplace, None, and operation is conducted on dataframe in memory. Otherwise,
        returns a copy of the dataframe with the relevant columns attached.

        See Also
        ---------
        Moran_BV
        """
        return _bivariate_handler(df, x, y=y, w=w, inplace=inplace,
                                  pvalue = pvalue, outvals = outvals,
                                  swapname=cls.__name__.lower(), stat=cls,**stat_kws)


[文档]def Moran_BV_matrix(variables, w, permutations=0, varnames=None):
    """
    Bivariate Moran Matrix

    Calculates bivariate Moran between all pairs of a set of variables.

    Parameters
    ----------
    variables    : array or pandas.DataFrame
                   sequence of variables to be assessed
    w            : W
                   a spatial weights object
    permutations : int
                   number of permutations
    varnames     : list, optional if variables is an array
                   Strings for variable names. Will add an
                   attribute to `Moran_BV` objects in results needed for plotting
                   in `splot` or `.plot()`. Default =None.
                   Note: If variables is a `pandas.DataFrame` varnames
                   will automatically be generated
    Returns
    -------
    results      : dictionary
                   (i,  j) is the key for the pair of variables, values are
                   the Moran_BV objects.

    Examples
    --------

    open dbf

    >>> import pysal.lib
    >>> f = pysal.lib.io.open(pysal.lib.examples.get_path("sids2.dbf"))

    pull of selected variables from dbf and create numpy arrays for each

    >>> varnames = ['SIDR74',  'SIDR79',  'NWR74',  'NWR79']
    >>> vars = [np.array(f.by_col[var]) for var in varnames]

    create a contiguity matrix from an external gal file

    >>> w = pysal.lib.io.open(pysal.lib.examples.get_path("sids2.gal")).read()

    create an instance of Moran_BV_matrix

    >>> from pysal.explore.esda.moran import Moran_BV_matrix
    >>> res = Moran_BV_matrix(vars,  w,  varnames = varnames)

    check values

    >>> round(res[(0,  1)].I,7)
    0.1936261
    >>> round(res[(3,  0)].I,7)
    0.3770138

    """
    try:
        # check if pandas is installed
        import pandas
        if isinstance(variables, pandas.DataFrame):
            # if yes use variables as df and convert to numpy_array
            varnames = pandas.Index.tolist(variables.columns)
            variables_n = []
            for var in varnames:
                variables_n.append(variables[str(var)].values)
        else:
            variables_n = variables
    except ImportError:
        variables_n = variables

    results = _Moran_BV_Matrix_array(variables=variables_n, w=w,
                                     permutations=permutations,
                                     varnames=varnames)
    return results


def _Moran_BV_Matrix_array(variables, w, permutations=0, varnames=None):
    """
    Base calculation for MORAN_BV_Matrix
    """
    if varnames is None:
        varnames = ['x{}'.format(i) for i in range(k)]

    k = len(variables)
    rk = list(range(0, k - 1))
    results = {}
    for i in rk:
        for j in range(i + 1, k):
            y1 = variables[i]
            y2 = variables[j]
            results[i, j] = Moran_BV(y1, y2, w, permutations=permutations)
            results[j, i] = Moran_BV(y2, y1, w, permutations=permutations)
            results[i, j].varnames = {'x': varnames[i], 'y': varnames[j]}
            results[j, i].varnames = {'x': varnames[j], 'y': varnames[i]}
    return results


[文档]class Moran_Rate(Moran):
    """
    Adjusted Moran's I Global Autocorrelation Statistic for Rate
    Variables :cite:`Assun_o_1999`

    Parameters
    ----------

    e               : array
                      an event variable measured across n spatial units
    b               : array
                      a population-at-risk variable measured across n spatial
                      units
    w               : W
                      spatial weights instance
    adjusted        : boolean
                      whether or not Moran's I needs to be adjusted for rate
                      variable
    transformation  : {'R', 'B', 'D', 'U', 'V'}
                      weights transformation, default is row-standardized "r".
                      Other options include
                      "B": binary,
                      "D": doubly-standardized,
                      "U": untransformed (general weights),
                      "V": variance-stabilizing.
    two_tailed      : boolean
                      If True (default), analytical p-values for Moran's I are
                      two-tailed, otherwise they are one tailed.
    permutations    : int
                      number of random permutations for calculation of pseudo
                      p_values

    Attributes
    ----------
    y            : array
                   rate variable computed from parameters e and b
                   if adjusted is True, y is standardized rates
                   otherwise, y is raw rates
    w            : W
                   original w object
    permutations : int
                   number of permutations
    I            : float
                   value of Moran's I
    EI           : float
                   expected value under normality assumption
    VI_norm      : float
                   variance of I under normality assumption
    seI_norm     : float
                   standard deviation of I under normality assumption
    z_norm       : float
                   z-value of I under normality assumption
    p_norm       : float
                   p-value of I under normality assumption
    VI_rand      : float
                   variance of I under randomization assumption
    seI_rand     : float
                   standard deviation of I under randomization assumption
    z_rand       : float
                   z-value of I under randomization assumption
    p_rand       : float
                   p-value of I under randomization assumption
    two_tailed   : boolean
                   If True, p_norm and p_rand are two-tailed p-values,
                   otherwise they are one-tailed.
    sim          : array
                   (if permutations>0)
                   vector of I values for permuted samples
    p_sim        : array
                   (if permutations>0)
                   p-value based on permutations (one-sided)
                   null: spatial randomness
                   alternative: the observed I is extreme if it is
                   either extremely greater or extremely lower than the values
                   obtained from permutaitons
    EI_sim       : float
                   (if permutations>0)
                   average value of I from permutations
    VI_sim       : float
                   (if permutations>0)
                   variance of I from permutations
    seI_sim      : float
                   (if permutations>0)
                   standard deviation of I under permutations.
    z_sim        : float
                   (if permutations>0)
                   standardized I based on permutations
    p_z_sim      : float
                   (if permutations>0)
                   p-value based on standard normal approximation from

    Examples
    --------
    >>> import pysal.lib
    >>> w = pysal.lib.io.open(pysal.lib.examples.get_path("sids2.gal")).read()
    >>> f = pysal.lib.io.open(pysal.lib.examples.get_path("sids2.dbf"))
    >>> e = np.array(f.by_col('SID79'))
    >>> b = np.array(f.by_col('BIR79'))
    >>> from pysal.explore.esda.moran import Moran_Rate
    >>> mi = Moran_Rate(e, b,  w, two_tailed=False)
    >>> "%6.4f" % mi.I
    '0.1662'
    >>> "%6.4f" % mi.p_norm
    '0.0042'
    """

[文档]    def __init__(self, e, b, w, adjusted=True, transformation="r",
                 permutations=PERMUTATIONS, two_tailed=True):
        e = np.asarray(e).flatten()
        b = np.asarray(b).flatten()
        if adjusted:
            y = assuncao_rate(e, b)
        else:
            y = e * 1.0 / b
        Moran.__init__(self, y, w, transformation=transformation,
                       permutations=permutations, two_tailed=two_tailed)

    @classmethod
    def by_col(cls, df, events, populations, w=None, inplace=False,
               pvalue='sim', outvals=None, swapname='',  **stat_kws):
        """
        Function to compute a Moran_Rate statistic on a dataframe

        Arguments
        ---------
        df          :   pandas.DataFrame
                        a pandas dataframe with a geometry column
        events      :   string or  list of strings
                        one or more names where events are stored
        populations :   string or list of strings
                        one or more names where the populations corresponding to the
                        events are stored. If one population column is provided, it is
                        used for all event columns. If more than one population column
                        is provided but there is not a population for every event
                        column, an exception will be raised.
        w           :   pysal weights object
                        a weights object aligned with the dataframe. If not provided, this
                        is searched for in the dataframe's metadata
        inplace     :   bool
                        a boolean denoting whether to operate on the dataframe inplace or to
                        return a series contaning the results of the computation. If
                        operating inplace, the derived columns will be named
                        'column_moran_rate'
        pvalue      :   string
                        a string denoting which pvalue should be returned. Refer to the
                        the Moran_Rate statistic's documentation for available p-values
        outvals     :   list of strings
                        list of arbitrary attributes to return as columns from the
                        Moran_Rate statistic
        **stat_kws  :   keyword arguments
                        options to pass to the underlying statistic. For this, see the
                        documentation for the Moran_Rate statistic.

        Returns
        --------
        If inplace, None, and operation is conducted on dataframe in memory. Otherwise,
        returns a copy of the dataframe with the relevant columns attached.

        See Also
        ---------
        Moran_Rate
        """
        if not inplace:
            new = df.copy()
            cls.by_col(new, events, populations, w=w, inplace=True,
                              pvalue=pvalue, outvals=outvals, swapname=swapname,
                              **stat_kws)
            return new
        if isinstance(events, str):
            events = [events]
        if isinstance(populations, str):
            populations = [populations]
        if len(populations) < len(events):
            populations = populations * len(events)
        if len(events) != len(populations):
            raise ValueError('There is not a one-to-one matching between events and '
                              'populations!\nEvents: {}\n\nPopulations:'
                              ' {}'.format(events, populations))
        adjusted = stat_kws.pop('adjusted', True)

        if isinstance(adjusted, bool):
            adjusted = [adjusted] * len(events)
        if swapname is '':
            swapname = cls.__name__.lower()

        rates = [assuncao_rate(df[e], df[pop]) if adj
                 else df[e].astype(float) / df[pop]
                 for e,pop,adj in zip(events, populations, adjusted)]
        names = ['-'.join((e,p)) for e,p in zip(events, populations)]
        out_df = df.copy()
        rate_df = out_df.from_items(list(zip(names, rates))) #trick to avoid importing pandas
        stat_df = _univariate_handler(rate_df, names, w=w, inplace=False,
                                      pvalue = pvalue, outvals = outvals,
                                      swapname=swapname,
                                      stat=Moran, #how would this get done w/super?
                                      **stat_kws)
        for col in stat_df.columns:
            df[col] = stat_df[col]

[文档]class Moran_Local(object):
    """Local Moran Statistics


    Parameters
    ----------
    y : array
        (n,1), attribute array
    w : W
        weight instance assumed to be aligned with y
    transformation : {'R', 'B', 'D', 'U', 'V'}
                     weights transformation,  default is row-standardized "r".
                     Other options include
                     "B": binary,
                     "D": doubly-standardized,
                     "U": untransformed (general weights),
                     "V": variance-stabilizing.
    permutations   : int
                     number of random permutations for calculation of pseudo
                     p_values
    geoda_quads    : boolean
                     (default=False)
                     If True use GeoDa scheme: HH=1, LL=2, LH=3, HL=4
                     If False use PySAL Scheme: HH=1, LH=2, LL=3, HL=4

    Attributes
    ----------

    y            : array
                   original variable
    w            : W
                   original w object
    permutations : int
                   number of random permutations for calculation of pseudo
                   p_values
    Is           : array
                   local Moran's I values
    q            : array
                   (if permutations>0)
                   values indicate quandrant location 1 HH,  2 LH,  3 LL,  4 HL
    sim          : array (permutations by n)
                   (if permutations>0)
                   I values for permuted samples
    p_sim        : array
                   (if permutations>0)
                   p-values based on permutations (one-sided)
                   null: spatial randomness
                   alternative: the observed Ii is further away or extreme
                   from the median of simulated values. It is either extremelyi
                   high or extremely low in the distribution of simulated Is.
    EI_sim       : array
                   (if permutations>0)
                   average values of local Is from permutations
    VI_sim       : array
                   (if permutations>0)
                   variance of Is from permutations
    seI_sim      : array
                   (if permutations>0)
                   standard deviations of Is under permutations.
    z_sim        : arrray
                   (if permutations>0)
                   standardized Is based on permutations
    p_z_sim      : array
                   (if permutations>0)
                   p-values based on standard normal approximation from
                   permutations (one-sided)
                   for two-sided tests, these values should be multiplied by 2

    Notes
    -----

    For technical details see :cite:`Anselin95`.


    Examples
    --------
    >>> import pysal.lib
    >>> import numpy as np
    >>> np.random.seed(10)
    >>> w = pysal.lib.io.open(pysal.lib.examples.get_path("desmith.gal")).read()
    >>> f = pysal.lib.io.open(pysal.lib.examples.get_path("desmith.txt"))
    >>> y = np.array(f.by_col['z'])
    >>> from pysal.explore.esda.moran import Moran_Local
    >>> lm = Moran_Local(y, w, transformation = "r", permutations = 99)
    >>> lm.q
    array([4, 4, 4, 2, 3, 3, 1, 4, 3, 3])
    >>> lm.p_z_sim[0]
    0.24669152541631179
    >>> lm = Moran_Local(y, w, transformation = "r", permutations = 99, \
                            geoda_quads=True)
    >>> lm.q
    array([4, 4, 4, 3, 2, 2, 1, 4, 2, 2])

    Note random components result is slightly different values across
    architectures so the results have been removed from doctests and will be
    moved into unittests that are conditional on architectures
    """
[文档]    def __init__(self, y, w, transformation="r", permutations=PERMUTATIONS,
                 geoda_quads=False):
        y = np.asarray(y).flatten()
        self.y = y
        n = len(y)
        self.n = n
        self.n_1 = n - 1
        z = y - y.mean()
        # setting for floating point noise
        orig_settings = np.seterr()
        np.seterr(all="ignore")
        sy = y.std()
        z /= sy
        np.seterr(**orig_settings)
        self.z = z
        w.transform = transformation
        self.w = w
        self.permutations = permutations
        self.den = (z * z).sum()
        self.Is = self.calc(self.w, self.z)
        self.geoda_quads = geoda_quads
        quads = [1, 2, 3, 4]
        if geoda_quads:
            quads = [1, 3, 2, 4]
        self.quads = quads
        self.__quads()
        if permutations:
            self.__crand()
            sim = np.transpose(self.rlisas)
            above = sim >= self.Is
            larger = above.sum(0)
            low_extreme = (self.permutations - larger) < larger
            larger[low_extreme] = self.permutations - larger[low_extreme]
            self.p_sim = (larger + 1.0) / (permutations + 1.0)
            self.sim = sim
            self.EI_sim = sim.mean(axis=0)
            self.seI_sim = sim.std(axis=0)
            self.VI_sim = self.seI_sim * self.seI_sim
            self.z_sim = (self.Is - self.EI_sim) / self.seI_sim
            self.p_z_sim = 1 - stats.norm.cdf(np.abs(self.z_sim))

    def calc(self, w, z):
        zl = slag(w, z)
        return self.n_1 * self.z * zl / self.den

    def __crand(self):
        """
        conditional randomization

        for observation i with ni neighbors,  the candidate set cannot include
        i (we don't want i being a neighbor of i). we have to sample without
        replacement from a set of ids that doesn't include i. numpy doesn't
        directly support sampling wo replacement and it is expensive to
        implement this. instead we omit i from the original ids,  permute the
        ids and take the first ni elements of the permuted ids as the
        neighbors to i in each randomization.

        """
        z = self.z
        lisas = np.zeros((self.n, self.permutations))
        n_1 = self.n - 1
        prange = list(range(self.permutations))
        k = self.w.max_neighbors + 1
        nn = self.n - 1
        rids = np.array([np.random.permutation(nn)[0:k] for i in prange])
        ids = np.arange(self.w.n)
        ido = self.w.id_order
        w = [self.w.weights[ido[i]] for i in ids]
        wc = [self.w.cardinalities[ido[i]] for i in ids]

        for i in range(self.w.n):
            idsi = ids[ids != i]
            np.random.shuffle(idsi)
            tmp = z[idsi[rids[:, 0:wc[i]]]]
            lisas[i] = z[i] * (w[i] * tmp).sum(1)
        self.rlisas = (n_1 / self.den) * lisas

    def __quads(self):
        zl = slag(self.w, self.z)
        zp = self.z > 0
        lp = zl > 0
        pp = zp * lp
        np = (1 - zp) * lp
        nn = (1 - zp) * (1 - lp)
        pn = zp * (1 - lp)
        self.q = self.quads[0] * pp + self.quads[1] * np + self.quads[2] * nn \
            + self.quads[3] * pn

    @property
    def _statistic(self):
        """More consistent hidden attribute to access ESDA statistics"""
        return self.Is

    @classmethod
    def by_col(cls, df, cols, w=None, inplace=False, pvalue='sim', outvals=None, **stat_kws):
        """
        Function to compute a Moran_Local statistic on a dataframe

        Arguments
        ---------
        df          :   pandas.DataFrame
                        a pandas dataframe with a geometry column
        cols        :   string or list of string
                        name or list of names of columns to use to compute the statistic
        w           :   pysal weights object
                        a weights object aligned with the dataframe. If not provided, this
                        is searched for in the dataframe's metadata
        inplace     :   bool
                        a boolean denoting whether to operate on the dataframe inplace or to
                        return a series contaning the results of the computation. If
                        operating inplace, the derived columns will be named
                        'column_moran_local'
        pvalue      :   string
                        a string denoting which pvalue should be returned. Refer to the
                        the Moran_Local statistic's documentation for available p-values
        outvals     :   list of strings
                        list of arbitrary attributes to return as columns from the
                        Moran_Local statistic
        **stat_kws  :   keyword arguments
                        options to pass to the underlying statistic. For this, see the
                        documentation for the Moran_Local statistic.

        Returns
        --------
        If inplace, None, and operation is conducted on dataframe in memory. Otherwise,
        returns a copy of the dataframe with the relevant columns attached.

        See Also
        ---------
        Moran_Local
        """
        return _univariate_handler(df, cols, w=w, inplace=inplace, pvalue=pvalue,
                                   outvals=outvals, stat=cls,
                                   swapname=cls.__name__.lower(), **stat_kws)


[文档]class Moran_Local_BV(object):
    """Bivariate Local Moran Statistics


    Parameters
    ----------
    x : array
        x-axis variable
    y : array
        (n,1), wy will be on y axis
    w : W
        weight instance assumed to be aligned with y
    transformation : {'R', 'B', 'D', 'U', 'V'}
                     weights transformation,  default is row-standardized "r".
                     Other options include
                     "B": binary,
                     "D": doubly-standardized,
                     "U": untransformed (general weights),
                     "V": variance-stabilizing.
    permutations   : int
                     number of random permutations for calculation of pseudo
                     p_values
    geoda_quads    : boolean
                     (default=False)
                     If True use GeoDa scheme: HH=1, LL=2, LH=3, HL=4
                     If False use PySAL Scheme: HH=1, LH=2, LL=3, HL=4

    Attributes
    ----------

    zx           : array
                   original x variable standardized by mean and std
    zy           : array
                   original y variable standardized by mean and std
    w            : W
                   original w object
    permutations : int
                   number of random permutations for calculation of pseudo
                   p_values
    Is           : float
                   value of Moran's I
    q            : array
                   (if permutations>0)
                   values indicate quandrant location 1 HH,  2 LH,  3 LL,  4 HL
    sim          : array
                   (if permutations>0)
                   vector of I values for permuted samples
    p_sim        : array
                   (if permutations>0)
                   p-value based on permutations (one-sided)
                   null: spatial randomness
                   alternative: the observed Ii is further away or extreme
                   from the median of simulated values. It is either extremelyi
                   high or extremely low in the distribution of simulated Is.
    EI_sim       : array
                   (if permutations>0)
                   average values of local Is from permutations
    VI_sim       : array
                   (if permutations>0)
                   variance of Is from permutations
    seI_sim      : array
                   (if permutations>0)
                   standard deviations of Is under permutations.
    z_sim        : arrray
                   (if permutations>0)
                   standardized Is based on permutations
    p_z_sim      : array
                   (if permutations>0)
                   p-values based on standard normal approximation from
                   permutations (one-sided)
                   for two-sided tests, these values should be multiplied by 2


    Examples
    --------
    >>> import pysal.lib
    >>> import numpy as np
    >>> np.random.seed(10)
    >>> w = pysal.lib.io.open(pysal.lib.examples.get_path("sids2.gal")).read()
    >>> f = pysal.lib.io.open(pysal.lib.examples.get_path("sids2.dbf"))
    >>> x = np.array(f.by_col['SIDR79'])
    >>> y = np.array(f.by_col['SIDR74'])
    >>> from pysal.explore.esda.moran import Moran_Local_BV
    >>> lm =Moran_Local_BV(x, y, w, transformation = "r", \
                               permutations = 99)
    >>> lm.q[:10]
    array([3, 4, 3, 4, 2, 1, 4, 4, 2, 4])
    >>> lm = Moran_Local_BV(x, y, w, transformation = "r", \
                               permutations = 99, geoda_quads=True)
    >>> lm.q[:10]
    array([2, 4, 2, 4, 3, 1, 4, 4, 3, 4])

    Note random components result is slightly different values across
    architectures so the results have been removed from doctests and will be
    moved into unittests that are conditional on architectures
    """
[文档]    def __init__(self, x, y, w, transformation="r", permutations=PERMUTATIONS,
                 geoda_quads=False):
        x = np.asarray(x).flatten()
        y = np.asarray(y).flatten()
        self.y = y
        self.x =x
        n = len(y)
        self.n = n
        self.n_1 = n - 1
        zx = x - x.mean()
        zy = y - y.mean()
        # setting for floating point noise
        orig_settings = np.seterr()
        np.seterr(all="ignore")
        sx = x.std()
        zx /= sx
        sy = y.std()
        zy /= sy
        np.seterr(**orig_settings)
        self.zx = zx
        self.zy = zy
        w.transform = transformation
        self.w = w
        self.permutations = permutations
        self.den = (zx * zx).sum()
        self.Is = self.calc(self.w, self.zx, self.zy)
        self.geoda_quads = geoda_quads
        quads = [1, 2, 3, 4]
        if geoda_quads:
            quads = [1, 3, 2, 4]
        self.quads = quads
        self.__quads()
        if permutations:
            self.__crand()
            sim = np.transpose(self.rlisas)
            above = sim >= self.Is
            larger = above.sum(0)
            low_extreme = (self.permutations - larger) < larger
            larger[low_extreme] = self.permutations - larger[low_extreme]
            self.p_sim = (larger + 1.0) / (permutations + 1.0)
            self.sim = sim
            self.EI_sim = sim.mean(axis=0)
            self.seI_sim = sim.std(axis=0)
            self.VI_sim = self.seI_sim * self.seI_sim
            self.z_sim = (self.Is - self.EI_sim) / self.seI_sim
            self.p_z_sim = 1 - stats.norm.cdf(np.abs(self.z_sim))

    def calc(self, w, zx, zy):
        zly = slag(w, zy)
        return self.n_1 * self.zx * zly / self.den

    def __crand(self):
        """
        conditional randomization

        for observation i with ni neighbors,  the candidate set cannot include
        i (we don't want i being a neighbor of i). we have to sample without
        replacement from a set of ids that doesn't include i. numpy doesn't
        directly support sampling wo replacement and it is expensive to
        implement this. instead we omit i from the original ids,  permute the
        ids and take the first ni elements of the permuted ids as the
        neighbors to i in each randomization.

        """
        lisas = np.zeros((self.n, self.permutations))
        n_1 = self.n - 1
        prange = list(range(self.permutations))
        k = self.w.max_neighbors + 1
        nn = self.n - 1
        rids = np.array([np.random.permutation(nn)[0:k] for i in prange])
        ids = np.arange(self.w.n)
        ido = self.w.id_order
        w = [self.w.weights[ido[i]] for i in ids]
        wc = [self.w.cardinalities[ido[i]] for i in ids]

        zx = self.zx
        zy = self.zy
        for i in range(self.w.n):
            idsi = ids[ids != i]
            np.random.shuffle(idsi)
            tmp = zy[idsi[rids[:, 0:wc[i]]]]
            lisas[i] = zx[i] * (w[i] * tmp).sum(1)
        self.rlisas = (n_1 / self.den) * lisas

    def __quads(self):
        zl = slag(self.w, self.zy)
        zp = self.zx > 0
        lp = zl > 0
        pp = zp * lp
        np = (1 - zp) * lp
        nn = (1 - zp) * (1 - lp)
        pn = zp * (1 - lp)
        self.q = self.quads[0] * pp + self.quads[1] * np + self.quads[2] * nn \
            + self.quads[3] * pn

    @property
    def _statistic(self):
        """More consistent hidden attribute to access ESDA statistics"""
        return self.Is

    @classmethod
    def by_col(cls, df, x, y=None, w=None, inplace=False, pvalue='sim', outvals=None, **stat_kws):
        """
        Function to compute a Moran_Local_BV statistic on a dataframe

        Arguments
        ---------
        df          :   pandas.DataFrame
                        a pandas dataframe with a geometry column
        X           :   list of strings
                        column name or list of column names to use as X values to compute
                        the bivariate statistic. If no Y is provided, pairwise comparisons
                        among these variates are used instead.
        Y           :   list of strings
                        column name or list of column names to use as Y values to compute
                        the bivariate statistic. if no Y is provided, pariwise comparisons
                        among the X variates are used instead.
        w           :   pysal weights object
                        a weights object aligned with the dataframe. If not provided, this
                        is searched for in the dataframe's metadata
        inplace     :   bool
                        a boolean denoting whether to operate on the dataframe inplace or to
                        return a series contaning the results of the computation. If
                        operating inplace, the derived columns will be named
                        'column_moran_local_bv'
        pvalue      :   string
                        a string denoting which pvalue should be returned. Refer to the
                        the Moran_Local_BV statistic's documentation for available p-values
        outvals     :   list of strings
                        list of arbitrary attributes to return as columns from the
                        Moran_Local_BV statistic
        **stat_kws  :   keyword arguments
                        options to pass to the underlying statistic. For this, see the
                        documentation for the Moran_Local_BV statistic.


        Returns
        --------
        If inplace, None, and operation is conducted on dataframe in memory. Otherwise,
        returns a copy of the dataframe with the relevant columns attached.

        See Also
        ---------
        Moran_Local_BV
        """
        return _bivariate_handler(df, x, y=y, w=w, inplace=inplace,
                                  pvalue = pvalue, outvals = outvals,
                                  swapname=cls.__name__.lower(), stat=cls,**stat_kws)

[文档]class Moran_Local_Rate(Moran_Local):
    """
    Adjusted Local Moran Statistics for Rate Variables [Assuncao1999]_

    Parameters
    ----------
    e : array
        (n,1), an event variable across n spatial units
    b : array
        (n,1), a population-at-risk variable across n spatial units
    w : W
        weight instance assumed to be aligned with y
    adjusted : boolean
              whether or not local Moran statistics need to be adjusted for
              rate variable
    transformation : {'R', 'B', 'D', 'U', 'V'}
                     weights transformation,  default is row-standardized "r".
                     Other options include
                     "B": binary,
                     "D": doubly-standardized,
                     "U": untransformed (general weights),
                     "V": variance-stabilizing.
    permutations   : int
                     number of random permutations for calculation of pseudo
                     p_values
    geoda_quads    : boolean
                     (default=False)
                     If True use GeoDa scheme: HH=1, LL=2, LH=3, HL=4
                     If False use PySAL Scheme: HH=1, LH=2, LL=3, HL=4
    Attributes
    ----------
    y            : array
                   rate variables computed from parameters e and b
                   if adjusted is True, y is standardized rates
                   otherwise, y is raw rates
    w            : W
                   original w object
    permutations : int
                   number of random permutations for calculation of pseudo
                   p_values
    I            : float
                   value of Moran's I
    q            : array
                   (if permutations>0)
                   values indicate quandrant location 1 HH,  2 LH,  3 LL,  4 HL
    sim          : array
                   (if permutations>0)
                   vector of I values for permuted samples
    p_sim        : array
                   (if permutations>0)
                   p-value based on permutations (one-sided)
                   null: spatial randomness
                   alternative: the observed Ii is further away or extreme
                   from the median of simulated Iis. It is either extremely
                   high or extremely low in the distribution of simulated Is
    EI_sim       : float
                   (if permutations>0)
                   average value of I from permutations
    VI_sim       : float
                   (if permutations>0)
                   variance of I from permutations
    seI_sim      : float
                   (if permutations>0)
                   standard deviation of I under permutations.
    z_sim        : float
                   (if permutations>0)
                   standardized I based on permutations
    p_z_sim      : float
                   (if permutations>0)
                   p-value based on standard normal approximation from
                   permutations (one-sided)
                   for two-sided tests, these values should be multiplied by 2

    Examples
    --------
    >>> import pysal.lib
    >>> import numpy as np
    >>> np.random.seed(10)
    >>> w = pysal.lib.io.open(pysal.lib.examples.get_path("sids2.gal")).read()
    >>> f = pysal.lib.io.open(pysal.lib.examples.get_path("sids2.dbf"))
    >>> e = np.array(f.by_col('SID79'))
    >>> b = np.array(f.by_col('BIR79'))
    >>> from pysal.explore.esda.moran import Moran_Local_Rate
    >>> lm = Moran_Local_Rate(e, b, w, transformation = "r", permutations = 99)
    >>> lm.q[:10]
    array([2, 4, 3, 1, 2, 1, 1, 4, 2, 4])
    >>> lm = Moran_Local_Rate(e, b, w,  transformation = "r",  permutations = 99,  geoda_quads=True)
    >>> lm.q[:10]
    array([3, 4, 2, 1, 3, 1, 1, 4, 3, 4])

    Note random components result is slightly different values across
    architectures so the results have been removed from doctests and will be
    moved into unittests that are conditional on architectures
    """

[文档]    def __init__(self, e, b, w, adjusted=True, transformation="r",
                 permutations=PERMUTATIONS, geoda_quads=False):
        e = np.asarray(e).flatten()
        b = np.asarray(b).flatten()
        if adjusted:
            y = assuncao_rate(e, b)
        else:
            y = e * 1.0 / b
        Moran_Local.__init__(self, y, w,
                             transformation=transformation,
                             permutations=permutations,
                             geoda_quads=geoda_quads)

    @classmethod
    def by_col(cls, df, events, populations, w=None, inplace=False,
               pvalue='sim', outvals=None, swapname='',  **stat_kws):
        """
        Function to compute a Moran_Local_Rate statistic on a dataframe

        Arguments
        ---------
        df          :   pandas.DataFrame
                        a pandas dataframe with a geometry column
        events      :   string or  list of strings
                        one or more names where events are stored
        populations :   string or list of strings
                        one or more names where the populations corresponding to the
                        events are stored. If one population column is provided, it is
                        used for all event columns. If more than one population column
                        is provided but there is not a population for every event
                        column, an exception will be raised.
        w           :   pysal weights object
                        a weights object aligned with the dataframe. If not provided, this
                        is searched for in the dataframe's metadata
        inplace     :   bool
                        a boolean denoting whether to operate on the dataframe inplace or to
                        return a series contaning the results of the computation. If
                        operating inplace, the derived columns will be named 'column_moran_local_rate'
        pvalue      :   string
                        a string denoting which pvalue should be returned. Refer to the
                        the Moran_Local_Rate statistic's documentation for available p-values
        outvals     :   list of strings
                        list of arbitrary attributes to return as columns from the
                        Moran_Local_Rate statistic
        **stat_kws  :   keyword arguments
                        options to pass to the underlying statistic. For this, see the
                        documentation for the Moran_Local_Rate statistic.

        Returns
        --------
        If inplace, None, and operation is conducted on dataframe in memory. Otherwise,
        returns a copy of the dataframe with the relevant columns attached.

        See Also
        ---------
        Moran_Local_Rate
        """
        if not inplace:
            new = df.copy()
            cls.by_col(new, events, populations, w=w, inplace=True,
                              pvalue=pvalue, outvals=outvals, swapname=swapname,
                              **stat_kws)
            return new
        if isinstance(events, str):
            events = [events]
        if isinstance(populations, str):
            populations = [populations]
        if len(populations) < len(events):
            populations = populations * len(events)
        if len(events) != len(populations):
            raise ValueError('There is not a one-to-one matching between events and '
                              'populations!\nEvents: {}\n\nPopulations:'
                              ' {}'.format(events, populations))
        adjusted = stat_kws.pop('adjusted', True)

        if isinstance(adjusted, bool):
            adjusted = [adjusted] * len(events)
        if swapname is '':
            swapname = cls.__name__.lower()

        rates = [assuncao_rate(df[e], df[pop]) if adj
                 else df[e].astype(float) / df[pop]
                 for e,pop,adj in zip(events, populations, adjusted)]
        names = ['-'.join((e,p)) for e,p in zip(events, populations)]
        out_df = df.copy()
        rate_df = out_df.from_items(list(zip(names, rates))) #trick to avoid importing pandas
        _univariate_handler(rate_df, names, w=w, inplace=True,
                                      pvalue = pvalue, outvals = outvals,
                                      swapname=swapname,
                                      stat=Moran_Local, #how would this get done w/super?
                                      **stat_kws)
        for col in rate_df.columns:
            df[col] = rate_df[col]