ljwolf/contiguity_from_GeoInterfaces Secret

## contiguity_from_GeoInterfaces
def contiguity_from_GeoInterfaces(geoints, wtype = 'rook'):
    """
    Takes an iterable of objects with '__geo_interface__' or coercable to ones with '__geo_interface__'
    and returns a contiguity weight. Only makes sense for pure-polygon iterables.

    Works on pysal file objects (returned from pysal.open(path)), geopandas geometry columns, flat text
    json fitting the geojson format, or lists of shapely shapes.

     Parameters
    ----------
    geoints : iterable of objects with '__geo_interface__'
    wtype : kind of contiguity to calculate

     Returns
    -------
    pysal W object for GeoDataframe or exception from dimensionality conflicts

    Example
    -------
    >>> import pysal as ps #from wconstructor branch
    >>> import geopandas as gpd
    >>> import shapely.geometry as geom
    >>> import numpy as np

    >>> data = gpd.read_file(pysal.examples.get_path('NAT.shp'))
    >>> NAT_w = contiguity_from_GeoInterfaces(data.geometry, wtype='ROOK')
    >>> NAT_w.mean_neighbors
    5.8891410048622364

    >>> newgeoms = []
    >>> for idx in data.index:
    ...     p = np.random.random
    ...     if p > .5:
    ...         newgeoms.append(geom.Point(np.random.randint(0,10,2)))
    ...     else:
    ...         newgeoms.append(data.geometry[idx])
    ...
    >>> new_W = contiguity_from_GeoDataframe(newgeoms)
    NotImplementedError: Weights from objects with different dimensionality not supported

    """
    WTYPE = wtype.upper()
    if WTYPE not in ['QUEEN', 'ROOK']:
        raise ValueError("wtype must be 'QUEEN' or 'ROOK'")
    WTYPE =['QUEEN', 'ROOK'].index(WTYPE)+1

    if not np.all([hasattr(feature, '__geo_interface__') for feature in geoints]):
        try:
            feats = parse_geojson(geoints)['features']
            geoints = [ps.cg.asShape(feature['geometry']) for feature in feats]
        except:
            raise AttributeError("Iterable must admit a '__geo_interface__' method")

    geoints = [feature.__geo_interface__ for feature in geoints]

    if np.all([feature['type'] in ['Polygon', 'MultiPolygon'] for feature in geoints]):
        shpdict = {idx:ps.cg.asShape(poly) for idx, poly in enumerate(geoints)}
        pc = ps.cg.shapes.PolygonCollection(shpdict)
        neighs = ps.weights.Contiguity.ContiguityWeightsPolygons(pc)
    else:
        raise NotImplementedError('Contiguity Weights from objects with different dimensionality not supported')

    return ps.W(neighs.w)

## contiguity_from_GeoSeries (new)
#import pysal in wconstructor branch

def contiguity_from_GeoSeries(gdf, wtype = 'rook'):
    """
    Takes a GeoPandas-like DataFrame or GeoSeries and returns a contiguity weight.
    Only makes sense for pure-polygon dataframes.

    Parameters
    ----------
    gdf : geopandas dataframe or any pandas dataframe with shapely objects stored in a column named 'geometry'
    wtype : kind of contiguity to calculate

    Returns
    -------
    pysal W object for GeoDataframe or exception from dimensionality conflicts

    Example
    -------
    >>> import pysal as ps #from wconstructor branch
    >>> import geopandas as gpd
    >>> import shapely.geometry as geom
    >>> import numpy as np

    >>> data = gpd.read_file(pysal.examples.get_path('NAT.shp'))
    >>> NAT_w = contiguity_from_GeoDataframe(data, wtype='ROOK')
    >>> NAT_w.mean_neighbors
    5.8891410048622364

    >>> newgeoms = []
    >>> for idx in data.index:
    ...     p = np.random.random
    ...     if p > .5:
    ...         newgeoms.append(geom.Point(np.random.randint(0,10,2)))
    ...     else:
    ...         newgeoms.append(data.geometry[idx])
    ...
    >>> data['geometry'] = gpd.GeoSeries(newgeoms)
    >>> new_W = contiguity_from_GeoDataframe(data)
    NotImplementedError: Weights from objects with different dimensionality not supported

    """
    WTYPE = wtype.upper()
    if WTYPE not in ['QUEEN', 'ROOK']:
        print "wtype must be 'QUEEN' or 'ROOK'"
    WTYPE =["QUEEN", 'ROOK'].index(WTYPE)+1
    types = set(feature.type for feature in gdf.geometry)
    if types == set(['Polygon', 'MultiPolygon']):
        shpdict = {idx:ps.cg.asShape(poly) for idx, poly in zip(gdf.index, gdf.geometry)}
        pc = ps.cg.shapes.PolygonCollection(shpdict)
        neighs = ps.weights.Contiguity.ContiguityWeightsPolygons(pc, wtype=WTYPE)
    else:
        raise NotImplementedError('Weights from objects with different dimensionality not supported')
    return ps.W(neighs.w)

## example notebook
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import json\n",
    "import pysal as ps\n",
    "import urllib\n",
    "import numpy as np\n",
    "from urlparse import urlparse\n",
    "\n",
    "#test cases for geointerfaces\n",
    "import geopandas as gpd\n",
    "import shapely.geometry as geom\n",
    "import geojson as gj"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Just extending on the prior use of `contiguity_from_geojson`, I thought it'd be cool to extend it to arbitrary interfaces that admit geointerfaces. This includes:\n",
    "\n",
    "1. `pysal` file objects\n",
    "2. `geopandas` dataframes/geoseries\n",
    "3. json (local or remote, using `parse_geojson`)\n",
    "4. iterables of `shapely` objects\n",
    "\n",
    "\n",
    "This needs the `wconstructor` branch."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "def parse_geojson(resource):\n",
    "    gj = urlparse(resource)\n",
    "    if not gj[1]:\n",
    "        # either a serialized object or a local file\n",
    "        try:\n",
    "            return json.loads(gj[2])\n",
    "        except:\n",
    "            with open(gj[2]) as f:\n",
    "                return json.load(f)    \n",
    "    else:\n",
    "        # remote url\n",
    "        fs = urllib.urlopen(resource)\n",
    "        return json.load(fs)\n",
    "    return gj"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "def contiguity_from_GeoInterfaces(geoints, wtype = 'rook'):\n",
    "    WTYPE = wtype.upper()\n",
    "    if WTYPE not in ['QUEEN', 'ROOK']:\n",
    "        raise ValueError(\"wtype must be 'QUEEN' or 'ROOK'\")\n",
    "    WTYPE =['QUEEN', 'ROOK'].index(WTYPE)+1\n",
    "    if not np.all([hasattr(feature, '__geo_interface__') for feature in geoints]):\n",
    "        try:\n",
    "            feats = parse_geojson(geoints)['features']\n",
    "            geoints = [ps.cg.asShape(feature['geometry']) for feature in feats]\n",
    "        except:\n",
    "            raise AttributeError(\"Iterable must admit a '__geo_interface__' method\")\n",
    "    geoints = [feature.__geo_interface__ for feature in geoints]\n",
    "    if np.all([feature['type'] in ['Polygon', 'MultiPolygon'] for feature in geoints]):\n",
    "        shpdict = {idx:ps.cg.asShape(poly) for idx, poly in enumerate(geoints)}\n",
    "        pc = ps.cg.shapes.PolygonCollection(shpdict)\n",
    "        neighs = ps.weights.Contiguity.ContiguityWeightsPolygons(pc)\n",
    "    else:\n",
    "        raise NotImplementedError('Contiguity Weights from objects with different dimensionality not supported')\n",
    "    return ps.W(neighs.w)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "path = ps.examples.get_path('columbus.shp')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "setting bounding box for FeatureCollection\n"
     ]
    }
   ],
   "source": [
    "A = contiguity_from_GeoInterfaces(ps.open(path))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "setting bounding box for FeatureCollection\n"
     ]
    }
   ],
   "source": [
    "B = contiguity_from_GeoInterfaces(gpd.read_file(path).geometry)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "setting bounding box for FeatureCollection\n"
     ]
    }
   ],
   "source": [
    "C = contiguity_from_GeoInterfaces('http://toae.org/pub/columbus.json')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "setting bounding box for FeatureCollection\n"
     ]
    }
   ],
   "source": [
    "D = contiguity_from_GeoInterfaces([geom.shape(poly) for poly in ps.open(path)])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "A.neighbors == B.neighbors == C.neighbors == D.neighbors"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "I dunno how much more arbitrary you can get. If something implements the geointerface or comes close to being able to be coerced into it, we can construct spatial weights for it with serge's new `wconstructor`. "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Playing around with a pysal-geopandas API"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def lag_col(df, colname):\n",
    "    return gpd.GeoSeries(ps.lag_spatial(df.W, df[colname]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "data = gpd.read_file(ps.examples.get_path('columbus.shp'))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Now that this is more arbitrary, it might make sense for `GeoPandas` dataframes to have some `W` attribute or method to construct `W` matrices. Here, I'll just assign."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "setting bounding box for FeatureCollection\n"
     ]
    }
   ],
   "source": [
    "data.W = contiguity_from_GeoInterfaces(data.geometry, wtype='QUEEN')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index([u'AREA', u'COLUMBUS_', u'COLUMBUS_I', u'CP', u'CRIME', u'DISCBD', u'EW', u'HOVAL', u'INC', u'NEIG', u'NEIGNO', u'NSA', u'NSB', u'OPEN', u'PERIMETER', u'PLUMB', u'POLYID', u'THOUS', u'X', u'Y', u'geometry'], dtype='object')"
      ]
     },
     "execution_count": 62,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.columns"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "In general, it might be a good idea to have a `.lag_column` method. toying with that here."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "data['HOVAL_lag'] = lag_col(data, 'HOVAL')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "data.lag_col = lambda x: lag_col(data, x)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Now to test how to use pysal on geopandas objects. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 143,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "Mo = ps.Moran(data['HOVAL'].values, data.W)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.18009311431727287"
      ]
     },
     "execution_count": 66,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Mo.I"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "d = data['HOVAL']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "Mo2 = ps.Moran(np.array(d), data.W)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 69,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Mo.I == Mo2.I"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 70,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Mo.p_norm == Mo2.p_norm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "16367.794631703126"
      ]
     },
     "execution_count": 71,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Mo.z2ss"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "16367.794631703126"
      ]
     },
     "execution_count": 72,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Mo2.z2ss"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 87,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "BV = ps.Moran_BV(data['HOVAL'].values, data['CRIME'], data.W)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "LMo = ps.Moran_Local(data['HOVAL'].values, data.W)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "So, all moran calculations can be done simply by using the `.values` casting."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 91,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index([u'AREA', u'COLUMBUS_', u'COLUMBUS_I', u'CP', u'CRIME', u'DISCBD', u'EW', u'HOVAL', u'INC', u'NEIG', u'NEIGNO', u'NSA', u'NSB', u'OPEN', u'PERIMETER', u'PLUMB', u'POLYID', u'THOUS', u'X', u'Y', u'geometry', u'HOVAL_lag'], dtype='object')"
      ]
     },
     "execution_count": 91,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 132,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "reg = ps.spreg.OLS(data['HOVAL'].values.reshape(49,1), \n",
    "                   data[['INC', 'CRIME']].values, \n",
    "                   data.W, spat_diag=True, moran=True,\n",
    "                   name_y = 'HOVAL', name_x = ['INC', 'CRIME'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 133,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "REGRESSION\n",
      "----------\n",
      "SUMMARY OF OUTPUT: ORDINARY LEAST SQUARES\n",
      "-----------------------------------------\n",
      "Data set            :     unknown\n",
      "Weights matrix      :     unknown\n",
      "Dependent Variable  :       HOVAL                Number of Observations:          49\n",
      "Mean dependent var  :     38.4362                Number of Variables   :           3\n",
      "S.D. dependent var  :     18.4661                Degrees of Freedom    :          46\n",
      "R-squared           :      0.3495\n",
      "Adjusted R-squared  :      0.3212\n",
      "Sum squared residual:   10647.015                F-statistic           :     12.3582\n",
      "Sigma-square        :     231.457                Prob(F-statistic)     :   5.064e-05\n",
      "S.E. of regression  :      15.214                Log likelihood        :    -201.368\n",
      "Sigma-square ML     :     217.286                Akaike info criterion :     408.735\n",
      "S.E of regression ML:     14.7406                Schwarz criterion     :     414.411\n",
      "\n",
      "------------------------------------------------------------------------------------\n",
      "            Variable     Coefficient       Std.Error     t-Statistic     Probability\n",
      "------------------------------------------------------------------------------------\n",
      "            CONSTANT      46.4281827      13.1917570       3.5194844       0.0009867\n",
      "               CRIME      -0.4848885       0.1826729      -2.6544086       0.0108745\n",
      "                 INC       0.6289840       0.5359104       1.1736736       0.2465669\n",
      "------------------------------------------------------------------------------------\n",
      "\n",
      "REGRESSION DIAGNOSTICS\n",
      "MULTICOLLINEARITY CONDITION NUMBER           12.538\n",
      "\n",
      "TEST ON NORMALITY OF ERRORS\n",
      "TEST                             DF        VALUE           PROB\n",
      "Jarque-Bera                       2          39.706           0.0000\n",
      "\n",
      "DIAGNOSTICS FOR HETEROSKEDASTICITY\n",
      "RANDOM COEFFICIENTS\n",
      "TEST                             DF        VALUE           PROB\n",
      "Breusch-Pagan test                2           5.767           0.0559\n",
      "Koenker-Bassett test              2           2.270           0.3214\n",
      "\n",
      "DIAGNOSTICS FOR SPATIAL DEPENDENCE\n",
      "TEST                           MI/DF       VALUE           PROB\n",
      "Moran's I (error)              0.1713         2.283           0.0224\n",
      "Lagrange Multiplier (lag)         1           0.982           0.3218\n",
      "Robust LM (lag)                   1           1.094           0.2957\n",
      "Lagrange Multiplier (error)       1           3.097           0.0784\n",
      "Robust LM (error)                 1           3.209           0.0732\n",
      "Lagrange Multiplier (SARMA)       2           4.191           0.1230\n",
      "\n",
      "================================ END OF REPORT =====================================\n"
     ]
    }
   ],
   "source": [
    "print reg.summary"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "The `spreg` module also can be used, but we run into [the shape issue](https://github.com/pysal/pysal/issues/461) again."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 137,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "pci = gpd.pd.read_csv(ps.examples.get_path('usjoin.csv'))\n",
    "states = gpd.read_file(ps.examples.get_path('us48.shp'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 138,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index([u'Name', u'STATE_FIPS', u'1929', u'1930', u'1931', u'1932', u'1933', u'1934', u'1935', u'1936', u'1937', u'1938', u'1939', u'1940', u'1941', u'1942', u'1943', u'1944', u'1945', u'1946', u'1947', u'1948', u'1949', u'1950', u'1951', u'1952', u'1953', u'1954', u'1955', u'1956', u'1957', u'1958', u'1959', u'1960', u'1961', u'1962', u'1963', u'1964', u'1965', u'1966', u'1967', u'1968', u'1969', u'1970', u'1971', u'1972', u'1973', u'1974', u'1975', u'1976', u'1977', u'1978', u'1979', u'1980', u'1981', u'1982', u'1983', u'1984', u'1985', u'1986', u'1987', u'1988', u'1989', u'1990', u'1991', u'1992', u'1993', u'1994', u'1995', u'1996', u'1997', u'1998', u'1999', u'2000', u'2001', u'2002', u'2003', u'2004', u'2005', u'2006', u'2007', u'2008', u'2009'], dtype='object')"
      ]
     },
     "execution_count": 138,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pci.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 139,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index([u'AREA', u'PERIMETER', u'STATE_', u'STATE_ABBR', u'STATE_FIPS', u'STATE_ID', u'STATE_NAME', u'SUB_REGION', u'geometry'], dtype='object')"
      ]
     },
     "execution_count": 139,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "states.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 140,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "new = gpd.pd.merge(states,pci, left_on='STATE_NAME', right_on='Name')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "And, from here, you could just do whatever you need to do using `.values` and preexisting code to do any kind of markov model in `spatial_dynamics`, which means that a large fraction of use cases of the library are covered. "
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.10rc1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
	def contiguity_from_GeoInterfaces(geoints, wtype = 'rook'):
	"""
	Takes an iterable of objects with '__geo_interface__' or coercable to ones with '__geo_interface__'
	and returns a contiguity weight. Only makes sense for pure-polygon iterables.

	Works on pysal file objects (returned from pysal.open(path)), geopandas geometry columns, flat text
	json fitting the geojson format, or lists of shapely shapes.

	Parameters
	----------
	geoints : iterable of objects with '__geo_interface__'
	wtype : kind of contiguity to calculate

	Returns
	-------
	pysal W object for GeoDataframe or exception from dimensionality conflicts

	Example
	-------
	>>> import pysal as ps #from wconstructor branch
	>>> import geopandas as gpd
	>>> import shapely.geometry as geom
	>>> import numpy as np

	>>> data = gpd.read_file(pysal.examples.get_path('NAT.shp'))
	>>> NAT_w = contiguity_from_GeoInterfaces(data.geometry, wtype='ROOK')
	>>> NAT_w.mean_neighbors
	5.8891410048622364

	>>> newgeoms = []
	>>> for idx in data.index:
	... p = np.random.random
	... if p > .5:
	... newgeoms.append(geom.Point(np.random.randint(0,10,2)))
	... else:
	... newgeoms.append(data.geometry[idx])
	...
	>>> new_W = contiguity_from_GeoDataframe(newgeoms)
	NotImplementedError: Weights from objects with different dimensionality not supported

	"""
	WTYPE = wtype.upper()
	if WTYPE not in ['QUEEN', 'ROOK']:
	raise ValueError("wtype must be 'QUEEN' or 'ROOK'")
	WTYPE =['QUEEN', 'ROOK'].index(WTYPE)+1

	if not np.all([hasattr(feature, '__geo_interface__') for feature in geoints]):
	try:
	feats = parse_geojson(geoints)['features']
	geoints = [ps.cg.asShape(feature['geometry']) for feature in feats]
	except:
	raise AttributeError("Iterable must admit a '__geo_interface__' method")

	geoints = [feature.__geo_interface__ for feature in geoints]

	if np.all([feature['type'] in ['Polygon', 'MultiPolygon'] for feature in geoints]):
	shpdict = {idx:ps.cg.asShape(poly) for idx, poly in enumerate(geoints)}
	pc = ps.cg.shapes.PolygonCollection(shpdict)
	neighs = ps.weights.Contiguity.ContiguityWeightsPolygons(pc)
	else:
	raise NotImplementedError('Contiguity Weights from objects with different dimensionality not supported')

	return ps.W(neighs.w)
	#import pysal in wconstructor branch

	def contiguity_from_GeoSeries(gdf, wtype = 'rook'):
	"""
	Takes a GeoPandas-like DataFrame or GeoSeries and returns a contiguity weight.
	Only makes sense for pure-polygon dataframes.

	Parameters
	----------
	gdf : geopandas dataframe or any pandas dataframe with shapely objects stored in a column named 'geometry'
	wtype : kind of contiguity to calculate

	Returns
	-------
	pysal W object for GeoDataframe or exception from dimensionality conflicts

	Example
	-------
	>>> import pysal as ps #from wconstructor branch
	>>> import geopandas as gpd
	>>> import shapely.geometry as geom
	>>> import numpy as np

	>>> data = gpd.read_file(pysal.examples.get_path('NAT.shp'))
	>>> NAT_w = contiguity_from_GeoDataframe(data, wtype='ROOK')
	>>> NAT_w.mean_neighbors
	5.8891410048622364

	>>> newgeoms = []
	>>> for idx in data.index:
	... p = np.random.random
	... if p > .5:
	... newgeoms.append(geom.Point(np.random.randint(0,10,2)))
	... else:
	... newgeoms.append(data.geometry[idx])
	...
	>>> data['geometry'] = gpd.GeoSeries(newgeoms)
	>>> new_W = contiguity_from_GeoDataframe(data)
	NotImplementedError: Weights from objects with different dimensionality not supported

	"""
	WTYPE = wtype.upper()
	if WTYPE not in ['QUEEN', 'ROOK']:
	print "wtype must be 'QUEEN' or 'ROOK'"
	WTYPE =["QUEEN", 'ROOK'].index(WTYPE)+1
	types = set(feature.type for feature in gdf.geometry)
	if types == set(['Polygon', 'MultiPolygon']):
	shpdict = {idx:ps.cg.asShape(poly) for idx, poly in zip(gdf.index, gdf.geometry)}
	pc = ps.cg.shapes.PolygonCollection(shpdict)
	neighs = ps.weights.Contiguity.ContiguityWeightsPolygons(pc, wtype=WTYPE)
	else:
	raise NotImplementedError('Weights from objects with different dimensionality not supported')
	return ps.W(neighs.w)
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 21,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"import json\n",
	"import pysal as ps\n",
	"import urllib\n",
	"import numpy as np\n",
	"from urlparse import urlparse\n",
	"\n",
	"#test cases for geointerfaces\n",
	"import geopandas as gpd\n",
	"import shapely.geometry as geom\n",
	"import geojson as gj"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Just extending on the prior use of `contiguity_from_geojson`, I thought it'd be cool to extend it to arbitrary interfaces that admit geointerfaces. This includes:\n",
	"\n",
	"1. `pysal` file objects\n",
	"2. `geopandas` dataframes/geoseries\n",
	"3. json (local or remote, using `parse_geojson`)\n",
	"4. iterables of `shapely` objects\n",
	"\n",
	"\n",
	"This needs the `wconstructor` branch."
	]
	},
	{
	"cell_type": "code",
	"execution_count": 22,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"def parse_geojson(resource):\n",
	" gj = urlparse(resource)\n",
	" if not gj[1]:\n",
	" # either a serialized object or a local file\n",
	" try:\n",
	" return json.loads(gj[2])\n",
	" except:\n",
	" with open(gj[2]) as f:\n",
	" return json.load(f) \n",
	" else:\n",
	" # remote url\n",
	" fs = urllib.urlopen(resource)\n",
	" return json.load(fs)\n",
	" return gj"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 23,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"def contiguity_from_GeoInterfaces(geoints, wtype = 'rook'):\n",
	" WTYPE = wtype.upper()\n",
	" if WTYPE not in ['QUEEN', 'ROOK']:\n",
	" raise ValueError(\"wtype must be 'QUEEN' or 'ROOK'\")\n",
	" WTYPE =['QUEEN', 'ROOK'].index(WTYPE)+1\n",
	" if not np.all([hasattr(feature, '__geo_interface__') for feature in geoints]):\n",
	" try:\n",
	" feats = parse_geojson(geoints)['features']\n",
	" geoints = [ps.cg.asShape(feature['geometry']) for feature in feats]\n",
	" except:\n",
	" raise AttributeError(\"Iterable must admit a '__geo_interface__' method\")\n",
	" geoints = [feature.__geo_interface__ for feature in geoints]\n",
	" if np.all([feature['type'] in ['Polygon', 'MultiPolygon'] for feature in geoints]):\n",
	" shpdict = {idx:ps.cg.asShape(poly) for idx, poly in enumerate(geoints)}\n",
	" pc = ps.cg.shapes.PolygonCollection(shpdict)\n",
	" neighs = ps.weights.Contiguity.ContiguityWeightsPolygons(pc)\n",
	" else:\n",
	" raise NotImplementedError('Contiguity Weights from objects with different dimensionality not supported')\n",
	" return ps.W(neighs.w)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 24,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"path = ps.examples.get_path('columbus.shp')"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 25,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"setting bounding box for FeatureCollection\n"
	]
	}
	],
	"source": [
	"A = contiguity_from_GeoInterfaces(ps.open(path))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 26,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"setting bounding box for FeatureCollection\n"
	]
	}
	],
	"source": [
	"B = contiguity_from_GeoInterfaces(gpd.read_file(path).geometry)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 27,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"setting bounding box for FeatureCollection\n"
	]
	}
	],
	"source": [
	"C = contiguity_from_GeoInterfaces('http://toae.org/pub/columbus.json')"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 28,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"setting bounding box for FeatureCollection\n"
	]
	}
	],
	"source": [
	"D = contiguity_from_GeoInterfaces([geom.shape(poly) for poly in ps.open(path)])"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 29,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"True"
	]
	},
	"execution_count": 29,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"A.neighbors == B.neighbors == C.neighbors == D.neighbors"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"I dunno how much more arbitrary you can get. If something implements the geointerface or comes close to being able to be coerced into it, we can construct spatial weights for it with serge's new `wconstructor`. "
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### Playing around with a pysal-geopandas API"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 30,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"def lag_col(df, colname):\n",
	" return gpd.GeoSeries(ps.lag_spatial(df.W, df[colname]))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 60,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"data = gpd.read_file(ps.examples.get_path('columbus.shp'))"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Now that this is more arbitrary, it might make sense for `GeoPandas` dataframes to have some `W` attribute or method to construct `W` matrices. Here, I'll just assign."
	]
	},
	{
	"cell_type": "code",
	"execution_count": 61,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"setting bounding box for FeatureCollection\n"
	]
	}
	],
	"source": [
	"data.W = contiguity_from_GeoInterfaces(data.geometry, wtype='QUEEN')"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 62,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"Index([u'AREA', u'COLUMBUS_', u'COLUMBUS_I', u'CP', u'CRIME', u'DISCBD', u'EW', u'HOVAL', u'INC', u'NEIG', u'NEIGNO', u'NSA', u'NSB', u'OPEN', u'PERIMETER', u'PLUMB', u'POLYID', u'THOUS', u'X', u'Y', u'geometry'], dtype='object')"
	]
	},
	"execution_count": 62,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"data.columns"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"In general, it might be a good idea to have a `.lag_column` method. toying with that here."
	]
	},
	{
	"cell_type": "code",
	"execution_count": 63,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"data['HOVAL_lag'] = lag_col(data, 'HOVAL')"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 64,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"data.lag_col = lambda x: lag_col(data, x)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Now to test how to use pysal on geopandas objects. "
	]
	},
	{
	"cell_type": "code",
	"execution_count": 143,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"Mo = ps.Moran(data['HOVAL'].values, data.W)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 66,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"0.18009311431727287"
	]
	},
	"execution_count": 66,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"Mo.I"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 67,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"d = data['HOVAL']"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 68,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"Mo2 = ps.Moran(np.array(d), data.W)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 69,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"True"
	]
	},
	"execution_count": 69,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"Mo.I == Mo2.I"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 70,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"True"
	]
	},
	"execution_count": 70,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"Mo.p_norm == Mo2.p_norm"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 71,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"16367.794631703126"
	]
	},
	"execution_count": 71,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"Mo.z2ss"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 72,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"16367.794631703126"
	]
	},
	"execution_count": 72,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"Mo2.z2ss"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 87,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"BV = ps.Moran_BV(data['HOVAL'].values, data['CRIME'], data.W)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 86,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"LMo = ps.Moran_Local(data['HOVAL'].values, data.W)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"So, all moran calculations can be done simply by using the `.values` casting."
	]
	},
	{
	"cell_type": "code",
	"execution_count": 91,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"Index([u'AREA', u'COLUMBUS_', u'COLUMBUS_I', u'CP', u'CRIME', u'DISCBD', u'EW', u'HOVAL', u'INC', u'NEIG', u'NEIGNO', u'NSA', u'NSB', u'OPEN', u'PERIMETER', u'PLUMB', u'POLYID', u'THOUS', u'X', u'Y', u'geometry', u'HOVAL_lag'], dtype='object')"
	]
	},
	"execution_count": 91,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"data.columns"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 132,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"reg = ps.spreg.OLS(data['HOVAL'].values.reshape(49,1), \n",
	" data[['INC', 'CRIME']].values, \n",
	" data.W, spat_diag=True, moran=True,\n",
	" name_y = 'HOVAL', name_x = ['INC', 'CRIME'])"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 133,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"REGRESSION\n",
	"----------\n",
	"SUMMARY OF OUTPUT: ORDINARY LEAST SQUARES\n",
	"-----------------------------------------\n",
	"Data set : unknown\n",
	"Weights matrix : unknown\n",
	"Dependent Variable : HOVAL Number of Observations: 49\n",
	"Mean dependent var : 38.4362 Number of Variables : 3\n",
	"S.D. dependent var : 18.4661 Degrees of Freedom : 46\n",
	"R-squared : 0.3495\n",
	"Adjusted R-squared : 0.3212\n",
	"Sum squared residual: 10647.015 F-statistic : 12.3582\n",
	"Sigma-square : 231.457 Prob(F-statistic) : 5.064e-05\n",
	"S.E. of regression : 15.214 Log likelihood : -201.368\n",
	"Sigma-square ML : 217.286 Akaike info criterion : 408.735\n",
	"S.E of regression ML: 14.7406 Schwarz criterion : 414.411\n",
	"\n",
	"------------------------------------------------------------------------------------\n",
	" Variable Coefficient Std.Error t-Statistic Probability\n",
	"------------------------------------------------------------------------------------\n",
	" CONSTANT 46.4281827 13.1917570 3.5194844 0.0009867\n",
	" CRIME -0.4848885 0.1826729 -2.6544086 0.0108745\n",
	" INC 0.6289840 0.5359104 1.1736736 0.2465669\n",
	"------------------------------------------------------------------------------------\n",
	"\n",
	"REGRESSION DIAGNOSTICS\n",
	"MULTICOLLINEARITY CONDITION NUMBER 12.538\n",
	"\n",
	"TEST ON NORMALITY OF ERRORS\n",
	"TEST DF VALUE PROB\n",
	"Jarque-Bera 2 39.706 0.0000\n",
	"\n",
	"DIAGNOSTICS FOR HETEROSKEDASTICITY\n",
	"RANDOM COEFFICIENTS\n",
	"TEST DF VALUE PROB\n",
	"Breusch-Pagan test 2 5.767 0.0559\n",
	"Koenker-Bassett test 2 2.270 0.3214\n",
	"\n",
	"DIAGNOSTICS FOR SPATIAL DEPENDENCE\n",
	"TEST MI/DF VALUE PROB\n",
	"Moran's I (error) 0.1713 2.283 0.0224\n",
	"Lagrange Multiplier (lag) 1 0.982 0.3218\n",
	"Robust LM (lag) 1 1.094 0.2957\n",
	"Lagrange Multiplier (error) 1 3.097 0.0784\n",
	"Robust LM (error) 1 3.209 0.0732\n",
	"Lagrange Multiplier (SARMA) 2 4.191 0.1230\n",
	"\n",
	"================================ END OF REPORT =====================================\n"
	]
	}
	],
	"source": [
	"print reg.summary"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {
	"collapsed": true
	},
	"source": [
	"The `spreg` module also can be used, but we run into [the shape issue](https://github.com/pysal/pysal/issues/461) again."
	]
	},
	{
	"cell_type": "code",
	"execution_count": 137,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"pci = gpd.pd.read_csv(ps.examples.get_path('usjoin.csv'))\n",
	"states = gpd.read_file(ps.examples.get_path('us48.shp'))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 138,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"Index([u'Name', u'STATE_FIPS', u'1929', u'1930', u'1931', u'1932', u'1933', u'1934', u'1935', u'1936', u'1937', u'1938', u'1939', u'1940', u'1941', u'1942', u'1943', u'1944', u'1945', u'1946', u'1947', u'1948', u'1949', u'1950', u'1951', u'1952', u'1953', u'1954', u'1955', u'1956', u'1957', u'1958', u'1959', u'1960', u'1961', u'1962', u'1963', u'1964', u'1965', u'1966', u'1967', u'1968', u'1969', u'1970', u'1971', u'1972', u'1973', u'1974', u'1975', u'1976', u'1977', u'1978', u'1979', u'1980', u'1981', u'1982', u'1983', u'1984', u'1985', u'1986', u'1987', u'1988', u'1989', u'1990', u'1991', u'1992', u'1993', u'1994', u'1995', u'1996', u'1997', u'1998', u'1999', u'2000', u'2001', u'2002', u'2003', u'2004', u'2005', u'2006', u'2007', u'2008', u'2009'], dtype='object')"
	]
	},
	"execution_count": 138,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"pci.columns"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 139,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"Index([u'AREA', u'PERIMETER', u'STATE_', u'STATE_ABBR', u'STATE_FIPS', u'STATE_ID', u'STATE_NAME', u'SUB_REGION', u'geometry'], dtype='object')"
	]
	},
	"execution_count": 139,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"states.columns"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 140,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"new = gpd.pd.merge(states,pci, left_on='STATE_NAME', right_on='Name')"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"And, from here, you could just do whatever you need to do using `.values` and preexisting code to do any kind of markov model in `spatial_dynamics`, which means that a large fraction of use cases of the library are covered. "
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 2",
	"language": "python",
	"name": "python2"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 2
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython2",
	"version": "2.7.10rc1"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 0
	}