Skip to content

Instantly share code, notes, and snippets.

@ljwolf
Last active August 29, 2015 14:21
Show Gist options
  • Save ljwolf/ae52c40f8a107d45370f to your computer and use it in GitHub Desktop.
Save ljwolf/ae52c40f8a107d45370f to your computer and use it in GitHub Desktop.
Contiguity from iterables
def contiguity_from_GeoInterfaces(geoints, wtype = 'rook'):
"""
Takes an iterable of objects with '__geo_interface__' or coercable to ones with '__geo_interface__'
and returns a contiguity weight. Only makes sense for pure-polygon iterables.
Works on pysal file objects (returned from pysal.open(path)), geopandas geometry columns, flat text
json fitting the geojson format, or lists of shapely shapes.
Parameters
----------
geoints : iterable of objects with '__geo_interface__'
wtype : kind of contiguity to calculate
Returns
-------
pysal W object for GeoDataframe or exception from dimensionality conflicts
Example
-------
>>> import pysal as ps #from wconstructor branch
>>> import geopandas as gpd
>>> import shapely.geometry as geom
>>> import numpy as np
>>> data = gpd.read_file(pysal.examples.get_path('NAT.shp'))
>>> NAT_w = contiguity_from_GeoInterfaces(data.geometry, wtype='ROOK')
>>> NAT_w.mean_neighbors
5.8891410048622364
>>> newgeoms = []
>>> for idx in data.index:
... p = np.random.random
... if p > .5:
... newgeoms.append(geom.Point(np.random.randint(0,10,2)))
... else:
... newgeoms.append(data.geometry[idx])
...
>>> new_W = contiguity_from_GeoDataframe(newgeoms)
NotImplementedError: Weights from objects with different dimensionality not supported
"""
WTYPE = wtype.upper()
if WTYPE not in ['QUEEN', 'ROOK']:
raise ValueError("wtype must be 'QUEEN' or 'ROOK'")
WTYPE =['QUEEN', 'ROOK'].index(WTYPE)+1
if not np.all([hasattr(feature, '__geo_interface__') for feature in geoints]):
try:
feats = parse_geojson(geoints)['features']
geoints = [ps.cg.asShape(feature['geometry']) for feature in feats]
except:
raise AttributeError("Iterable must admit a '__geo_interface__' method")
geoints = [feature.__geo_interface__ for feature in geoints]
if np.all([feature['type'] in ['Polygon', 'MultiPolygon'] for feature in geoints]):
shpdict = {idx:ps.cg.asShape(poly) for idx, poly in enumerate(geoints)}
pc = ps.cg.shapes.PolygonCollection(shpdict)
neighs = ps.weights.Contiguity.ContiguityWeightsPolygons(pc)
else:
raise NotImplementedError('Contiguity Weights from objects with different dimensionality not supported')
return ps.W(neighs.w)
#import pysal in wconstructor branch
def contiguity_from_GeoSeries(gdf, wtype = 'rook'):
"""
Takes a GeoPandas-like DataFrame or GeoSeries and returns a contiguity weight.
Only makes sense for pure-polygon dataframes.
Parameters
----------
gdf : geopandas dataframe or any pandas dataframe with shapely objects stored in a column named 'geometry'
wtype : kind of contiguity to calculate
Returns
-------
pysal W object for GeoDataframe or exception from dimensionality conflicts
Example
-------
>>> import pysal as ps #from wconstructor branch
>>> import geopandas as gpd
>>> import shapely.geometry as geom
>>> import numpy as np
>>> data = gpd.read_file(pysal.examples.get_path('NAT.shp'))
>>> NAT_w = contiguity_from_GeoDataframe(data, wtype='ROOK')
>>> NAT_w.mean_neighbors
5.8891410048622364
>>> newgeoms = []
>>> for idx in data.index:
... p = np.random.random
... if p > .5:
... newgeoms.append(geom.Point(np.random.randint(0,10,2)))
... else:
... newgeoms.append(data.geometry[idx])
...
>>> data['geometry'] = gpd.GeoSeries(newgeoms)
>>> new_W = contiguity_from_GeoDataframe(data)
NotImplementedError: Weights from objects with different dimensionality not supported
"""
WTYPE = wtype.upper()
if WTYPE not in ['QUEEN', 'ROOK']:
print "wtype must be 'QUEEN' or 'ROOK'"
WTYPE =["QUEEN", 'ROOK'].index(WTYPE)+1
types = set(feature.type for feature in gdf.geometry)
if types == set(['Polygon', 'MultiPolygon']):
shpdict = {idx:ps.cg.asShape(poly) for idx, poly in zip(gdf.index, gdf.geometry)}
pc = ps.cg.shapes.PolygonCollection(shpdict)
neighs = ps.weights.Contiguity.ContiguityWeightsPolygons(pc, wtype=WTYPE)
else:
raise NotImplementedError('Weights from objects with different dimensionality not supported')
return ps.W(neighs.w)
{
"cells": [
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import json\n",
"import pysal as ps\n",
"import urllib\n",
"import numpy as np\n",
"from urlparse import urlparse\n",
"\n",
"#test cases for geointerfaces\n",
"import geopandas as gpd\n",
"import shapely.geometry as geom\n",
"import geojson as gj"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Just extending on the prior use of `contiguity_from_geojson`, I thought it'd be cool to extend it to arbitrary interfaces that admit geointerfaces. This includes:\n",
"\n",
"1. `pysal` file objects\n",
"2. `geopandas` dataframes/geoseries\n",
"3. json (local or remote, using `parse_geojson`)\n",
"4. iterables of `shapely` objects\n",
"\n",
"\n",
"This needs the `wconstructor` branch."
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"def parse_geojson(resource):\n",
" gj = urlparse(resource)\n",
" if not gj[1]:\n",
" # either a serialized object or a local file\n",
" try:\n",
" return json.loads(gj[2])\n",
" except:\n",
" with open(gj[2]) as f:\n",
" return json.load(f) \n",
" else:\n",
" # remote url\n",
" fs = urllib.urlopen(resource)\n",
" return json.load(fs)\n",
" return gj"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"def contiguity_from_GeoInterfaces(geoints, wtype = 'rook'):\n",
" WTYPE = wtype.upper()\n",
" if WTYPE not in ['QUEEN', 'ROOK']:\n",
" raise ValueError(\"wtype must be 'QUEEN' or 'ROOK'\")\n",
" WTYPE =['QUEEN', 'ROOK'].index(WTYPE)+1\n",
" if not np.all([hasattr(feature, '__geo_interface__') for feature in geoints]):\n",
" try:\n",
" feats = parse_geojson(geoints)['features']\n",
" geoints = [ps.cg.asShape(feature['geometry']) for feature in feats]\n",
" except:\n",
" raise AttributeError(\"Iterable must admit a '__geo_interface__' method\")\n",
" geoints = [feature.__geo_interface__ for feature in geoints]\n",
" if np.all([feature['type'] in ['Polygon', 'MultiPolygon'] for feature in geoints]):\n",
" shpdict = {idx:ps.cg.asShape(poly) for idx, poly in enumerate(geoints)}\n",
" pc = ps.cg.shapes.PolygonCollection(shpdict)\n",
" neighs = ps.weights.Contiguity.ContiguityWeightsPolygons(pc)\n",
" else:\n",
" raise NotImplementedError('Contiguity Weights from objects with different dimensionality not supported')\n",
" return ps.W(neighs.w)"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"path = ps.examples.get_path('columbus.shp')"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"setting bounding box for FeatureCollection\n"
]
}
],
"source": [
"A = contiguity_from_GeoInterfaces(ps.open(path))"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"setting bounding box for FeatureCollection\n"
]
}
],
"source": [
"B = contiguity_from_GeoInterfaces(gpd.read_file(path).geometry)"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"setting bounding box for FeatureCollection\n"
]
}
],
"source": [
"C = contiguity_from_GeoInterfaces('http://toae.org/pub/columbus.json')"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"setting bounding box for FeatureCollection\n"
]
}
],
"source": [
"D = contiguity_from_GeoInterfaces([geom.shape(poly) for poly in ps.open(path)])"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"A.neighbors == B.neighbors == C.neighbors == D.neighbors"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"I dunno how much more arbitrary you can get. If something implements the geointerface or comes close to being able to be coerced into it, we can construct spatial weights for it with serge's new `wconstructor`. "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Playing around with a pysal-geopandas API"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def lag_col(df, colname):\n",
" return gpd.GeoSeries(ps.lag_spatial(df.W, df[colname]))"
]
},
{
"cell_type": "code",
"execution_count": 60,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"data = gpd.read_file(ps.examples.get_path('columbus.shp'))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now that this is more arbitrary, it might make sense for `GeoPandas` dataframes to have some `W` attribute or method to construct `W` matrices. Here, I'll just assign."
]
},
{
"cell_type": "code",
"execution_count": 61,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"setting bounding box for FeatureCollection\n"
]
}
],
"source": [
"data.W = contiguity_from_GeoInterfaces(data.geometry, wtype='QUEEN')"
]
},
{
"cell_type": "code",
"execution_count": 62,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"Index([u'AREA', u'COLUMBUS_', u'COLUMBUS_I', u'CP', u'CRIME', u'DISCBD', u'EW', u'HOVAL', u'INC', u'NEIG', u'NEIGNO', u'NSA', u'NSB', u'OPEN', u'PERIMETER', u'PLUMB', u'POLYID', u'THOUS', u'X', u'Y', u'geometry'], dtype='object')"
]
},
"execution_count": 62,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.columns"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"In general, it might be a good idea to have a `.lag_column` method. toying with that here."
]
},
{
"cell_type": "code",
"execution_count": 63,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"data['HOVAL_lag'] = lag_col(data, 'HOVAL')"
]
},
{
"cell_type": "code",
"execution_count": 64,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"data.lag_col = lambda x: lag_col(data, x)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now to test how to use pysal on geopandas objects. "
]
},
{
"cell_type": "code",
"execution_count": 143,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"Mo = ps.Moran(data['HOVAL'].values, data.W)"
]
},
{
"cell_type": "code",
"execution_count": 66,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"0.18009311431727287"
]
},
"execution_count": 66,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Mo.I"
]
},
{
"cell_type": "code",
"execution_count": 67,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"d = data['HOVAL']"
]
},
{
"cell_type": "code",
"execution_count": 68,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"Mo2 = ps.Moran(np.array(d), data.W)"
]
},
{
"cell_type": "code",
"execution_count": 69,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 69,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Mo.I == Mo2.I"
]
},
{
"cell_type": "code",
"execution_count": 70,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 70,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Mo.p_norm == Mo2.p_norm"
]
},
{
"cell_type": "code",
"execution_count": 71,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"16367.794631703126"
]
},
"execution_count": 71,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Mo.z2ss"
]
},
{
"cell_type": "code",
"execution_count": 72,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"16367.794631703126"
]
},
"execution_count": 72,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Mo2.z2ss"
]
},
{
"cell_type": "code",
"execution_count": 87,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"BV = ps.Moran_BV(data['HOVAL'].values, data['CRIME'], data.W)"
]
},
{
"cell_type": "code",
"execution_count": 86,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"LMo = ps.Moran_Local(data['HOVAL'].values, data.W)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"So, all moran calculations can be done simply by using the `.values` casting."
]
},
{
"cell_type": "code",
"execution_count": 91,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"Index([u'AREA', u'COLUMBUS_', u'COLUMBUS_I', u'CP', u'CRIME', u'DISCBD', u'EW', u'HOVAL', u'INC', u'NEIG', u'NEIGNO', u'NSA', u'NSB', u'OPEN', u'PERIMETER', u'PLUMB', u'POLYID', u'THOUS', u'X', u'Y', u'geometry', u'HOVAL_lag'], dtype='object')"
]
},
"execution_count": 91,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.columns"
]
},
{
"cell_type": "code",
"execution_count": 132,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"reg = ps.spreg.OLS(data['HOVAL'].values.reshape(49,1), \n",
" data[['INC', 'CRIME']].values, \n",
" data.W, spat_diag=True, moran=True,\n",
" name_y = 'HOVAL', name_x = ['INC', 'CRIME'])"
]
},
{
"cell_type": "code",
"execution_count": 133,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"REGRESSION\n",
"----------\n",
"SUMMARY OF OUTPUT: ORDINARY LEAST SQUARES\n",
"-----------------------------------------\n",
"Data set : unknown\n",
"Weights matrix : unknown\n",
"Dependent Variable : HOVAL Number of Observations: 49\n",
"Mean dependent var : 38.4362 Number of Variables : 3\n",
"S.D. dependent var : 18.4661 Degrees of Freedom : 46\n",
"R-squared : 0.3495\n",
"Adjusted R-squared : 0.3212\n",
"Sum squared residual: 10647.015 F-statistic : 12.3582\n",
"Sigma-square : 231.457 Prob(F-statistic) : 5.064e-05\n",
"S.E. of regression : 15.214 Log likelihood : -201.368\n",
"Sigma-square ML : 217.286 Akaike info criterion : 408.735\n",
"S.E of regression ML: 14.7406 Schwarz criterion : 414.411\n",
"\n",
"------------------------------------------------------------------------------------\n",
" Variable Coefficient Std.Error t-Statistic Probability\n",
"------------------------------------------------------------------------------------\n",
" CONSTANT 46.4281827 13.1917570 3.5194844 0.0009867\n",
" CRIME -0.4848885 0.1826729 -2.6544086 0.0108745\n",
" INC 0.6289840 0.5359104 1.1736736 0.2465669\n",
"------------------------------------------------------------------------------------\n",
"\n",
"REGRESSION DIAGNOSTICS\n",
"MULTICOLLINEARITY CONDITION NUMBER 12.538\n",
"\n",
"TEST ON NORMALITY OF ERRORS\n",
"TEST DF VALUE PROB\n",
"Jarque-Bera 2 39.706 0.0000\n",
"\n",
"DIAGNOSTICS FOR HETEROSKEDASTICITY\n",
"RANDOM COEFFICIENTS\n",
"TEST DF VALUE PROB\n",
"Breusch-Pagan test 2 5.767 0.0559\n",
"Koenker-Bassett test 2 2.270 0.3214\n",
"\n",
"DIAGNOSTICS FOR SPATIAL DEPENDENCE\n",
"TEST MI/DF VALUE PROB\n",
"Moran's I (error) 0.1713 2.283 0.0224\n",
"Lagrange Multiplier (lag) 1 0.982 0.3218\n",
"Robust LM (lag) 1 1.094 0.2957\n",
"Lagrange Multiplier (error) 1 3.097 0.0784\n",
"Robust LM (error) 1 3.209 0.0732\n",
"Lagrange Multiplier (SARMA) 2 4.191 0.1230\n",
"\n",
"================================ END OF REPORT =====================================\n"
]
}
],
"source": [
"print reg.summary"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": true
},
"source": [
"The `spreg` module also can be used, but we run into [the shape issue](https://github.com/pysal/pysal/issues/461) again."
]
},
{
"cell_type": "code",
"execution_count": 137,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"pci = gpd.pd.read_csv(ps.examples.get_path('usjoin.csv'))\n",
"states = gpd.read_file(ps.examples.get_path('us48.shp'))"
]
},
{
"cell_type": "code",
"execution_count": 138,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"Index([u'Name', u'STATE_FIPS', u'1929', u'1930', u'1931', u'1932', u'1933', u'1934', u'1935', u'1936', u'1937', u'1938', u'1939', u'1940', u'1941', u'1942', u'1943', u'1944', u'1945', u'1946', u'1947', u'1948', u'1949', u'1950', u'1951', u'1952', u'1953', u'1954', u'1955', u'1956', u'1957', u'1958', u'1959', u'1960', u'1961', u'1962', u'1963', u'1964', u'1965', u'1966', u'1967', u'1968', u'1969', u'1970', u'1971', u'1972', u'1973', u'1974', u'1975', u'1976', u'1977', u'1978', u'1979', u'1980', u'1981', u'1982', u'1983', u'1984', u'1985', u'1986', u'1987', u'1988', u'1989', u'1990', u'1991', u'1992', u'1993', u'1994', u'1995', u'1996', u'1997', u'1998', u'1999', u'2000', u'2001', u'2002', u'2003', u'2004', u'2005', u'2006', u'2007', u'2008', u'2009'], dtype='object')"
]
},
"execution_count": 138,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pci.columns"
]
},
{
"cell_type": "code",
"execution_count": 139,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"Index([u'AREA', u'PERIMETER', u'STATE_', u'STATE_ABBR', u'STATE_FIPS', u'STATE_ID', u'STATE_NAME', u'SUB_REGION', u'geometry'], dtype='object')"
]
},
"execution_count": 139,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"states.columns"
]
},
{
"cell_type": "code",
"execution_count": 140,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"new = gpd.pd.merge(states,pci, left_on='STATE_NAME', right_on='Name')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"And, from here, you could just do whatever you need to do using `.values` and preexisting code to do any kind of markov model in `spatial_dynamics`, which means that a large fraction of use cases of the library are covered. "
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.10rc1"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment