"""
Set-like manipulation of weights matrices.
"""
__author__ = "Sergio J. Rey <srey@asu.edu>, Charles Schmidt <schmidtc@gmail.com>, David Folch <david.folch@asu.edu>, Dani Arribas-Bel <darribas@asu.edu>"
import copy
from .weights import W, WSP
from scipy.sparse import isspmatrix_csr
from numpy import ones
__all__ = ['w_union', 'w_intersection', 'w_difference',
'w_symmetric_difference', 'w_subset', 'w_clip']
[文档]def w_union(w1, w2, silence_warnings=False):
"""
Returns a binary weights object, w, that includes all neighbor pairs that
exist in either w1 or w2.
Parameters
----------
w1 : W
object
w2 : W
object
silence_warnings : boolean
Switch to turn off (default on) print statements
for every observation with islands
Returns
-------
w : W
object
Notes
-----
ID comparisons are performed using ==, therefore the integer ID 2 is
equivalent to the float ID 2.0. Returns a matrix with all the unique IDs
from w1 and w2.
Examples
--------
Construct rook weights matrices for two regions, one is 4x4 (16 areas)
and the other is 6x4 (24 areas). A union of these two weights matrices
results in the new weights matrix matching the larger one.
>>> from pysal.lib.weights import lat2W
>>> w1 = lat2W(4,4)
>>> w2 = lat2W(6,4)
>>> import pysal.lib
>>> w = pysal.lib.weights.set_operations.w_union(w1, w2)
>>> w1[0] == w[0]
True
>>> w1.neighbors[15]
[11, 14]
>>> w2.neighbors[15]
[11, 14, 19]
>>> w.neighbors[15]
[19, 11, 14]
>>>
"""
neighbors = dict(list(w1.neighbors.items()))
for i in w2.neighbors:
if i in neighbors:
add_neigh = set(neighbors[i]).union(set(w2.neighbors[i]))
neighbors[i] = list(add_neigh)
else:
neighbors[i] = copy.copy(w2.neighbors[i])
return W(neighbors, silence_warnings=silence_warnings)
[文档]def w_intersection(w1, w2, w_shape='w1', silence_warnings=False):
"""
Returns a binary weights object, w, that includes only
those neighbor pairs that exist in both w1 and w2.
Parameters
----------
w1 : W
object
w2 : W
object
w_shape : string
Defines the shape of the returned weights matrix. 'w1' returns a
matrix with the same IDs as w1; 'all' returns a matrix with all
the unique IDs from w1 and w2; and 'min' returns a matrix with
only the IDs occurring in both w1 and w2.
silence_warnings : boolean
Switch to turn off (default on) print statements
for every observation with islands
Returns
-------
w : W
object
Notes
-----
ID comparisons are performed using ==, therefore the integer ID 2 is
equivalent to the float ID 2.0.
Examples
--------
Construct rook weights matrices for two regions, one is 4x4 (16 areas)
and the other is 6x4 (24 areas). An intersection of these two weights
matrices results in the new weights matrix matching the smaller one.
>>> from pysal.lib.weights import lat2W
>>> w1 = lat2W(4,4)
>>> w2 = lat2W(6,4)
>>> import pysal.lib
>>> w = pysal.lib.weights.set_operations.w_intersection(w1, w2)
>>> w1[0] == w[0]
True
>>> w1.neighbors[15]
[11, 14]
>>> w2.neighbors[15]
[11, 14, 19]
>>> w.neighbors[15]
[11, 14]
>>>
"""
if w_shape == 'w1':
neigh_keys = list(w1.neighbors.keys())
elif w_shape == 'all':
neigh_keys = set(w1.neighbors.keys()).union(set(w2.neighbors.keys()))
elif w_shape == 'min':
neigh_keys = set(w1.neighbors.keys(
)).intersection(set(w2.neighbors.keys()))
else:
raise Exception("invalid string passed to w_shape")
neighbors = {}
for i in neigh_keys:
if i in w1.neighbors and i in w2.neighbors:
add_neigh = set(w1.neighbors[i]).intersection(set(w2.neighbors[i]))
neighbors[i] = list(add_neigh)
else:
neighbors[i] = []
return W(neighbors, silence_warnings=silence_warnings)
[文档]def w_difference(w1, w2, w_shape='w1', constrained=True, silence_warnings=False):
"""
Returns a binary weights object, w, that includes only neighbor pairs
in w1 that are not in w2. The w_shape and constrained parameters
determine which pairs in w1 that are not in w2 are returned.
Parameters
----------
w1 : W
object
w2 : W
object
w_shape : string
Defines the shape of the returned weights matrix. 'w1' returns a
matrix with the same IDs as w1; 'all' returns a matrix with all
the unique IDs from w1 and w2; and 'min' returns a matrix with
the IDs occurring in w1 and not in w2.
constrained : boolean
If False then the full set of neighbor pairs in w1 that are
not in w2 are returned. If True then those pairs that would
not be possible if w_shape='min' are dropped. Ignored if
w_shape is set to 'min'.
silence_warnings : boolean
Switch to turn off (default on) print statements
for every observation with islands
Returns
-------
w : W
object
Notes
-----
ID comparisons are performed using ==, therefore the integer ID 2 is
equivalent to the float ID 2.0.
Examples
--------
Construct rook (w2) and queen (w1) weights matrices for two 4x4 regions
(16 areas). A queen matrix has all the joins a rook matrix does plus joins
between areas that share a corner. The new matrix formed by the difference
of rook from queen contains only join at corners (typically called a
bishop matrix). Note that the difference of queen from rook would result
in a weights matrix with no joins.
>>> from pysal.lib.weights import lat2W
>>> w1 = lat2W(4,4,rook=False)
>>> w2 = lat2W(4,4,rook=True)
>>> import pysal.lib
>>> w = pysal.lib.weights.set_operations.w_difference(w1, w2, constrained=False)
>>> w1[0] == w[0]
False
>>> w1.neighbors[15]
[10, 11, 14]
>>> w2.neighbors[15]
[11, 14]
>>> w.neighbors[15]
[10]
>>>
"""
if w_shape == 'w1':
neigh_keys = list(w1.neighbors.keys())
elif w_shape == 'all':
neigh_keys = set(w1.neighbors.keys()).union(set(w2.neighbors.keys()))
elif w_shape == 'min':
neigh_keys = set(
w1.neighbors.keys()).difference(set(w2.neighbors.keys()))
if not neigh_keys:
raise Exception("returned an empty weights matrix")
else:
raise Exception("invalid string passed to w_shape")
neighbors = {}
for i in neigh_keys:
if i in w1.neighbors:
if i in w2.neighbors:
add_neigh = set(w1.neighbors[i]
).difference(set(w2.neighbors[i]))
neighbors[i] = list(add_neigh)
else:
neighbors[i] = copy.copy(w1.neighbors[i])
else:
neighbors[i] = []
if constrained or w_shape == 'min':
constrained_keys = set(
w1.neighbors.keys()).difference(set(w2.neighbors.keys()))
island_keys = set(neighbors.keys()).difference(constrained_keys)
for i in island_keys:
neighbors[i] = []
for i in constrained_keys:
neighbors[i] = list(
set(neighbors[i]).intersection(constrained_keys))
return W(neighbors, silence_warnings=silence_warnings)
[文档]def w_symmetric_difference(w1, w2, w_shape='all', constrained=True, silence_warnings=False):
"""
Returns a binary weights object, w, that includes only neighbor pairs
that are not shared by w1 and w2. The w_shape and constrained parameters
determine which pairs that are not shared by w1 and w2 are returned.
Parameters
----------
w1 : W
object
w2 : W
object
w_shape : string
Defines the shape of the returned weights matrix. 'all' returns a
matrix with all the unique IDs from w1 and w2; and 'min' returns
a matrix with the IDs not shared by w1 and w2.
constrained : boolean
If False then the full set of neighbor pairs that are not
shared by w1 and w2 are returned. If True then those pairs
that would not be possible if w_shape='min' are dropped.
Ignored if w_shape is set to 'min'.
silence_warnings : boolean
Switch to turn off (default on) print statements
for every observation with islands
Returns
-------
w : W
object
Notes
-----
ID comparisons are performed using ==, therefore the integer ID 2 is
equivalent to the float ID 2.0.
Examples
--------
Construct queen weights matrix for a 4x4 (16 areas) region (w1) and a rook
matrix for a 6x4 (24 areas) region (w2). The symmetric difference of these
two matrices (with w_shape set to 'all' and constrained set to False)
contains the corner joins in the overlap area, all the joins in the
non-overlap area.
>>> from pysal.lib.weights import lat2W
>>> import pysal.lib
>>> w1 = lat2W(4,4,rook=False)
>>> w2 = lat2W(6,4,rook=True)
>>> w = pysal.lib.weights.set_operations.w_symmetric_difference(w1, w2, constrained=False)
>>> w1[0] == w[0]
False
>>> w1.neighbors[15]
[10, 11, 14]
>>> w2.neighbors[15]
[11, 14, 19]
>>> set(w.neighbors[15]) == set([10, 19])
True
>>>
"""
if w_shape == 'all':
neigh_keys = set(w1.neighbors.keys()).union(set(w2.neighbors.keys()))
elif w_shape == 'min':
neigh_keys = set(w1.neighbors.keys(
)).symmetric_difference(set(w2.neighbors.keys()))
else:
raise Exception("invalid string passed to w_shape")
neighbors = {}
for i in neigh_keys:
if i in w1.neighbors:
if i in w2.neighbors:
add_neigh = set(w1.neighbors[i]).symmetric_difference(
set(w2.neighbors[i]))
neighbors[i] = list(add_neigh)
else:
neighbors[i] = copy.copy(w1.neighbors[i])
elif i in w2.neighbors:
neighbors[i] = copy.copy(w2.neighbors[i])
else:
neighbors[i] = []
if constrained or w_shape == 'min':
constrained_keys = set(
w1.neighbors.keys()).difference(set(w2.neighbors.keys()))
island_keys = set(neighbors.keys()).difference(constrained_keys)
for i in island_keys:
neighbors[i] = []
for i in constrained_keys:
neighbors[i] = list(
set(neighbors[i]).intersection(constrained_keys))
return W(neighbors, silence_warnings=silence_warnings)
[文档]def w_subset(w1, ids, silence_warnings=False):
"""
Returns a binary weights object, w, that includes only those
observations in ids.
Parameters
----------
w1 : W
object
ids : list
A list containing the IDs to be include in the returned weights
object.
silence_warnings : boolean
Switch to turn off (default on) print statements
for every observation with islands
Returns
-------
w : W
object
Examples
--------
Construct a rook weights matrix for a 6x4 region (24 areas). By default
PySAL assigns integer IDs to the areas in a region. By passing in a list
of integers from 0 to 15, the first 16 areas are extracted from the
previous weights matrix, and only those joins relevant to the new region
are retained.
>>> from pysal.lib.weights import lat2W
>>> import pysal.lib
>>> w1 = lat2W(6,4)
>>> ids = range(16)
>>> w = pysal.lib.weights.set_operations.w_subset(w1, ids)
>>> w1[0] == w[0]
True
>>> w1.neighbors[15]
[11, 14, 19]
>>> w.neighbors[15]
[11, 14]
>>>
"""
neighbors = {}
ids_set = set(list(ids))
for i in ids:
if i in w1.neighbors:
neigh_add = ids_set.intersection(set(w1.neighbors[i]))
neighbors[i] = list(neigh_add)
else:
neighbors[i] = []
return W(neighbors, id_order=list(ids), silence_warnings=silence_warnings)
[文档]def w_clip(w1, w2, outSP=True, silence_warnings=False):
'''
Clip a continuous W object (w1) with a different W object (w2) so only cells where
w2 has a non-zero value remain with non-zero values in w1.
Checks on w1 and w2 are performed to make sure they conform to the
appropriate format and, if not, they are converted.
Parameters
----------
w1 : W
W, scipy.sparse.csr.csr_matrix
Potentially continuous weights matrix to be clipped. The clipped
matrix wc will have at most the same elements as w1.
w2 : W
W, scipy.sparse.csr.csr_matrix
Weights matrix to use as shell to clip w1. Automatically
converted to binary format. Only non-zero elements in w2 will be
kept non-zero in wc. NOTE: assumed to be of the same shape as w1
outSP : boolean
If True (default) return sparse version of the clipped W, if
False, return W object of the clipped matrix
silence_warnings : boolean
Switch to turn off (default on) print statements
for every observation with islands
Returns
-------
wc : W
W, scipy.sparse.csr.csr_matrix
Clipped W object (sparse if outSP=Ture). It inherits
``id_order`` from w1.
Examples
--------
>>> from pysal.lib.weights import lat2W
First create a W object from a lattice using queen contiguity and
row-standardize it (note that these weights will stay when we clip the
object, but they will not neccesarily represent a row-standardization
anymore):
>>> w1 = lat2W(3, 2, rook=False)
>>> w1.transform = 'R'
We will clip that geography assuming observations 0, 2, 3 and 4 belong to
one group and 1, 5 belong to another group and we don't want both groups
to interact with each other in our weights (i.e. w_ij = 0 if i and j in
different groups). For that, we use the following method:
>>> import pysal.lib
>>> w2 = pysal.lib.weights.util.block_weights(['r1', 'r2', 'r1', 'r1', 'r1', 'r2'])
To illustrate that w2 will only be considered as binary even when the
object passed is not, we can row-standardize it
>>> w2.transform = 'R'
The clipped object ``wc`` will contain only the spatial queen
relationships that occur within one group ('r1' or 'r2') but will have
gotten rid of those that happen across groups
>>> wcs = pysal.lib.weights.set_operations.w_clip(w1, w2, outSP=True)
This will create a sparse object (recommended when n is large).
>>> wcs.sparse.toarray()
array([[0. , 0. , 0.33333333, 0.33333333, 0. ,
0. ],
[0. , 0. , 0. , 0. , 0. ,
0. ],
[0.2 , 0. , 0. , 0.2 , 0.2 ,
0. ],
[0.2 , 0. , 0.2 , 0. , 0.2 ,
0. ],
[0. , 0. , 0.33333333, 0.33333333, 0. ,
0. ],
[0. , 0. , 0. , 0. , 0. ,
0. ]])
If we wanted an original W object, we can control that with the argument
``outSP``:
>>> wc = pysal.lib.weights.set_operations.w_clip(w1, w2, outSP=False)
WARNING: there are 2 disconnected observations
Island ids: [1, 5]
>>> wc.full()[0]
array([[0. , 0. , 0.33333333, 0.33333333, 0. ,
0. ],
[0. , 0. , 0. , 0. , 0. ,
0. ],
[0.2 , 0. , 0. , 0.2 , 0.2 ,
0. ],
[0.2 , 0. , 0.2 , 0. , 0.2 ,
0. ],
[0. , 0. , 0.33333333, 0.33333333, 0. ,
0. ],
[0. , 0. , 0. , 0. , 0. ,
0. ]])
You can check they are actually the same:
>>> wcs.sparse.toarray() == wc.full()[0]
array([[ True, True, True, True, True, True],
[ True, True, True, True, True, True],
[ True, True, True, True, True, True],
[ True, True, True, True, True, True],
[ True, True, True, True, True, True],
[ True, True, True, True, True, True]])
'''
from .util import WSP2W
if not w1.id_order:
w1.id_order = None
id_order = w1.id_order
if not isspmatrix_csr(w1):
w1 = w1.sparse
if not isspmatrix_csr(w2):
w2 = w2.sparse
w2.data = ones(w2.data.shape)
wc = w1.multiply(w2)
wc = WSP(wc, id_order=id_order)
if not outSP:
wc = WSP2W(wc, silence_warnings=silence_warnings)
return wc