Skip to content

Instantly share code, notes, and snippets.

@urschrei
Last active April 19, 2016 15:54
Show Gist options
  • Save urschrei/f76c6409e9b48e2d17402afaf64a313c to your computer and use it in GitHub Desktop.
Save urschrei/f76c6409e9b48e2d17402afaf64a313c to your computer and use it in GitHub Desktop.
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"from geopy.distance import great_circle\n",
"from geopy.distance import vincenty"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>lat</th>\n",
" <th>lon</th>\n",
" <th>coords</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>52.239760</td>\n",
" <td>1.609915</td>\n",
" <td>(52.2397597274, 1.609915459)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>53.657153</td>\n",
" <td>-3.949987</td>\n",
" <td>(53.6571534399, -3.94998656551)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>54.724409</td>\n",
" <td>-1.078950</td>\n",
" <td>(54.7244094226, -1.0789501053)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>51.183825</td>\n",
" <td>0.963573</td>\n",
" <td>(51.1838249282, 0.963573094895)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>54.811744</td>\n",
" <td>-1.010206</td>\n",
" <td>(54.8117436496, -1.01020628852)</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" lat lon coords\n",
"0 52.239760 1.609915 (52.2397597274, 1.609915459)\n",
"1 53.657153 -3.949987 (53.6571534399, -3.94998656551)\n",
"2 54.724409 -1.078950 (54.7244094226, -1.0789501053)\n",
"3 51.183825 0.963573 (51.1838249282, 0.963573094895)\n",
"4 54.811744 -1.010206 (54.8117436496, -1.01020628852)"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# UK bounding box\n",
"N = 55.811741\n",
"E = 1.768960\n",
"S = 49.871159\n",
"W = -6.379880\n",
"\n",
"# this will give us a bit more than 400k pairs\n",
"num_coords = 633\n",
"\n",
"df = pd.DataFrame({\n",
" 'lon': list(np.random.uniform(W, E, [num_coords])),\n",
" 'lat': list(np.random.uniform(S, N, [num_coords]))\n",
" })\n",
"\n",
"df['coords'] = zip(df.lat, df.lon)\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"square = pd.DataFrame(\n",
" np.zeros(len(df) ** 2).reshape(len(df), len(df)),\n",
" index=df.index, columns=df.index)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"def get_distance(col):\n",
" \"\"\"\n",
" This function looks up our 'end' coordinates from the df\n",
" DataFrame using the input column name, then applies the\n",
" geopy vincenty() function to each row in the input column,\n",
" using the square.coords column as the first argument.\n",
" This works because the function is applied column-wise from right to left.\n",
" \"\"\"\n",
" end = df.ix[col.name]['coords']\n",
" return df['coords'].apply(vincenty, args=(end,), ellipsoid='WGS-84')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"This is not efficient – it runs in around 15 wall-clock seconds on my 3.4GHz iMac \n",
"There's probably a Scipy pairwise distance function that does a much better job"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" <th>1</th>\n",
" <th>2</th>\n",
" <th>3</th>\n",
" <th>4</th>\n",
" <th>5</th>\n",
" <th>6</th>\n",
" <th>7</th>\n",
" <th>8</th>\n",
" <th>9</th>\n",
" <th>...</th>\n",
" <th>623</th>\n",
" <th>624</th>\n",
" <th>625</th>\n",
" <th>626</th>\n",
" <th>627</th>\n",
" <th>628</th>\n",
" <th>629</th>\n",
" <th>630</th>\n",
" <th>631</th>\n",
" <th>632</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.0 km</td>\n",
" <td>405.50348899 km</td>\n",
" <td>329.084912497 km</td>\n",
" <td>125.690948608 km</td>\n",
" <td>334.809546214 km</td>\n",
" <td>363.977284175 km</td>\n",
" <td>382.757346144 km</td>\n",
" <td>535.936672603 km</td>\n",
" <td>453.455691916 km</td>\n",
" <td>308.493357486 km</td>\n",
" <td>...</td>\n",
" <td>400.087508977 km</td>\n",
" <td>111.975894721 km</td>\n",
" <td>439.43400121 km</td>\n",
" <td>180.24140288 km</td>\n",
" <td>411.642775795 km</td>\n",
" <td>407.386946563 km</td>\n",
" <td>483.234335798 km</td>\n",
" <td>375.739768332 km</td>\n",
" <td>194.674392977 km</td>\n",
" <td>301.45740391 km</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>405.50348899 km</td>\n",
" <td>0.0 km</td>\n",
" <td>221.861288259 km</td>\n",
" <td>432.830979123 km</td>\n",
" <td>230.757869772 km</td>\n",
" <td>158.095284662 km</td>\n",
" <td>110.043557538 km</td>\n",
" <td>147.351736592 km</td>\n",
" <td>148.457521906 km</td>\n",
" <td>267.013330437 km</td>\n",
" <td>...</td>\n",
" <td>144.457624755 km</td>\n",
" <td>454.323538103 km</td>\n",
" <td>312.934572811 km</td>\n",
" <td>277.655527769 km</td>\n",
" <td>82.5152289072 km</td>\n",
" <td>99.3284506038 km</td>\n",
" <td>109.490313755 km</td>\n",
" <td>230.269722838 km</td>\n",
" <td>286.774422855 km</td>\n",
" <td>194.711412469 km</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>329.084912497 km</td>\n",
" <td>221.861288259 km</td>\n",
" <td>0.0 km</td>\n",
" <td>417.196927799 km</td>\n",
" <td>10.6814527 km</td>\n",
" <td>70.863645424 km</td>\n",
" <td>298.323147682 km</td>\n",
" <td>273.910722975 km</td>\n",
" <td>157.066357114 km</td>\n",
" <td>49.2445247693 km</td>\n",
" <td>...</td>\n",
" <td>335.812017502 km</td>\n",
" <td>423.039491047 km</td>\n",
" <td>483.025940498 km</td>\n",
" <td>149.04116848 km</td>\n",
" <td>291.780083538 km</td>\n",
" <td>144.491807851 km</td>\n",
" <td>222.095480532 km</td>\n",
" <td>391.732838315 km</td>\n",
" <td>138.26555647 km</td>\n",
" <td>317.903302763 km</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>125.690948608 km</td>\n",
" <td>432.830979123 km</td>\n",
" <td>417.196927799 km</td>\n",
" <td>0.0 km</td>\n",
" <td>424.885071457 km</td>\n",
" <td>435.546234701 km</td>\n",
" <td>377.839985968 km</td>\n",
" <td>576.231623356 km</td>\n",
" <td>515.485469459 km</td>\n",
" <td>407.42856463 km</td>\n",
" <td>...</td>\n",
" <td>383.454043729 km</td>\n",
" <td>33.7054490333 km</td>\n",
" <td>372.798143239 km</td>\n",
" <td>277.081424576 km</td>\n",
" <td>414.272250674 km</td>\n",
" <td>462.044510708 km</td>\n",
" <td>527.921396963 km</td>\n",
" <td>331.220487884 km</td>\n",
" <td>296.642676261 km</td>\n",
" <td>277.089868144 km</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>334.809546214 km</td>\n",
" <td>230.757869772 km</td>\n",
" <td>10.6814527 km</td>\n",
" <td>424.885071457 km</td>\n",
" <td>0.0 km</td>\n",
" <td>77.361013525 km</td>\n",
" <td>308.571537322 km</td>\n",
" <td>278.28526467 km</td>\n",
" <td>159.934984822 km</td>\n",
" <td>44.921193572 km</td>\n",
" <td>...</td>\n",
" <td>346.110366859 km</td>\n",
" <td>430.122218324 km</td>\n",
" <td>493.684280642 km</td>\n",
" <td>154.571809629 km</td>\n",
" <td>301.534166914 km</td>\n",
" <td>150.960361859 km</td>\n",
" <td>227.157332278 km</td>\n",
" <td>402.391764394 km</td>\n",
" <td>142.486100572 km</td>\n",
" <td>328.572031833 km</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 633 columns</p>\n",
"</div>"
],
"text/plain": [
" 0 1 2 3 \\\n",
"0 0.0 km 405.50348899 km 329.084912497 km 125.690948608 km \n",
"1 405.50348899 km 0.0 km 221.861288259 km 432.830979123 km \n",
"2 329.084912497 km 221.861288259 km 0.0 km 417.196927799 km \n",
"3 125.690948608 km 432.830979123 km 417.196927799 km 0.0 km \n",
"4 334.809546214 km 230.757869772 km 10.6814527 km 424.885071457 km \n",
"\n",
" 4 5 6 7 \\\n",
"0 334.809546214 km 363.977284175 km 382.757346144 km 535.936672603 km \n",
"1 230.757869772 km 158.095284662 km 110.043557538 km 147.351736592 km \n",
"2 10.6814527 km 70.863645424 km 298.323147682 km 273.910722975 km \n",
"3 424.885071457 km 435.546234701 km 377.839985968 km 576.231623356 km \n",
"4 0.0 km 77.361013525 km 308.571537322 km 278.28526467 km \n",
"\n",
" 8 9 ... 623 \\\n",
"0 453.455691916 km 308.493357486 km ... 400.087508977 km \n",
"1 148.457521906 km 267.013330437 km ... 144.457624755 km \n",
"2 157.066357114 km 49.2445247693 km ... 335.812017502 km \n",
"3 515.485469459 km 407.42856463 km ... 383.454043729 km \n",
"4 159.934984822 km 44.921193572 km ... 346.110366859 km \n",
"\n",
" 624 625 626 627 \\\n",
"0 111.975894721 km 439.43400121 km 180.24140288 km 411.642775795 km \n",
"1 454.323538103 km 312.934572811 km 277.655527769 km 82.5152289072 km \n",
"2 423.039491047 km 483.025940498 km 149.04116848 km 291.780083538 km \n",
"3 33.7054490333 km 372.798143239 km 277.081424576 km 414.272250674 km \n",
"4 430.122218324 km 493.684280642 km 154.571809629 km 301.534166914 km \n",
"\n",
" 628 629 630 631 \\\n",
"0 407.386946563 km 483.234335798 km 375.739768332 km 194.674392977 km \n",
"1 99.3284506038 km 109.490313755 km 230.269722838 km 286.774422855 km \n",
"2 144.491807851 km 222.095480532 km 391.732838315 km 138.26555647 km \n",
"3 462.044510708 km 527.921396963 km 331.220487884 km 296.642676261 km \n",
"4 150.960361859 km 227.157332278 km 402.391764394 km 142.486100572 km \n",
"\n",
" 632 \n",
"0 301.45740391 km \n",
"1 194.711412469 km \n",
"2 317.903302763 km \n",
"3 277.089868144 km \n",
"4 328.572031833 km \n",
"\n",
"[5 rows x 633 columns]"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"distances = square.apply(get_distance, axis=1).T\n",
"distances.head()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def units(input_instance):\n",
" return input_instance.meters"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" <th>1</th>\n",
" <th>2</th>\n",
" <th>3</th>\n",
" <th>4</th>\n",
" <th>5</th>\n",
" <th>6</th>\n",
" <th>7</th>\n",
" <th>8</th>\n",
" <th>9</th>\n",
" <th>...</th>\n",
" <th>623</th>\n",
" <th>624</th>\n",
" <th>625</th>\n",
" <th>626</th>\n",
" <th>627</th>\n",
" <th>628</th>\n",
" <th>629</th>\n",
" <th>630</th>\n",
" <th>631</th>\n",
" <th>632</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.000000</td>\n",
" <td>405503.488990</td>\n",
" <td>329084.912497</td>\n",
" <td>125690.948608</td>\n",
" <td>334809.546214</td>\n",
" <td>363977.284175</td>\n",
" <td>382757.346144</td>\n",
" <td>535936.672603</td>\n",
" <td>453455.691916</td>\n",
" <td>308493.357486</td>\n",
" <td>...</td>\n",
" <td>400087.508977</td>\n",
" <td>111975.894721</td>\n",
" <td>439434.001210</td>\n",
" <td>180241.402880</td>\n",
" <td>411642.775795</td>\n",
" <td>407386.946563</td>\n",
" <td>483234.335798</td>\n",
" <td>375739.768332</td>\n",
" <td>194674.392977</td>\n",
" <td>301457.403910</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>405503.488990</td>\n",
" <td>0.000000</td>\n",
" <td>221861.288259</td>\n",
" <td>432830.979123</td>\n",
" <td>230757.869772</td>\n",
" <td>158095.284662</td>\n",
" <td>110043.557538</td>\n",
" <td>147351.736592</td>\n",
" <td>148457.521906</td>\n",
" <td>267013.330437</td>\n",
" <td>...</td>\n",
" <td>144457.624755</td>\n",
" <td>454323.538103</td>\n",
" <td>312934.572811</td>\n",
" <td>277655.527769</td>\n",
" <td>82515.228907</td>\n",
" <td>99328.450604</td>\n",
" <td>109490.313755</td>\n",
" <td>230269.722838</td>\n",
" <td>286774.422855</td>\n",
" <td>194711.412469</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>329084.912497</td>\n",
" <td>221861.288259</td>\n",
" <td>0.000000</td>\n",
" <td>417196.927799</td>\n",
" <td>10681.452700</td>\n",
" <td>70863.645424</td>\n",
" <td>298323.147682</td>\n",
" <td>273910.722975</td>\n",
" <td>157066.357114</td>\n",
" <td>49244.524769</td>\n",
" <td>...</td>\n",
" <td>335812.017502</td>\n",
" <td>423039.491047</td>\n",
" <td>483025.940498</td>\n",
" <td>149041.168480</td>\n",
" <td>291780.083538</td>\n",
" <td>144491.807851</td>\n",
" <td>222095.480532</td>\n",
" <td>391732.838315</td>\n",
" <td>138265.556470</td>\n",
" <td>317903.302763</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>125690.948608</td>\n",
" <td>432830.979123</td>\n",
" <td>417196.927799</td>\n",
" <td>0.000000</td>\n",
" <td>424885.071457</td>\n",
" <td>435546.234701</td>\n",
" <td>377839.985968</td>\n",
" <td>576231.623356</td>\n",
" <td>515485.469459</td>\n",
" <td>407428.564630</td>\n",
" <td>...</td>\n",
" <td>383454.043729</td>\n",
" <td>33705.449033</td>\n",
" <td>372798.143239</td>\n",
" <td>277081.424576</td>\n",
" <td>414272.250674</td>\n",
" <td>462044.510708</td>\n",
" <td>527921.396963</td>\n",
" <td>331220.487884</td>\n",
" <td>296642.676261</td>\n",
" <td>277089.868144</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>334809.546214</td>\n",
" <td>230757.869772</td>\n",
" <td>10681.452700</td>\n",
" <td>424885.071457</td>\n",
" <td>0.000000</td>\n",
" <td>77361.013525</td>\n",
" <td>308571.537322</td>\n",
" <td>278285.264670</td>\n",
" <td>159934.984822</td>\n",
" <td>44921.193572</td>\n",
" <td>...</td>\n",
" <td>346110.366859</td>\n",
" <td>430122.218324</td>\n",
" <td>493684.280642</td>\n",
" <td>154571.809629</td>\n",
" <td>301534.166914</td>\n",
" <td>150960.361859</td>\n",
" <td>227157.332278</td>\n",
" <td>402391.764394</td>\n",
" <td>142486.100572</td>\n",
" <td>328572.031833</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 633 columns</p>\n",
"</div>"
],
"text/plain": [
" 0 1 2 3 4 \\\n",
"0 0.000000 405503.488990 329084.912497 125690.948608 334809.546214 \n",
"1 405503.488990 0.000000 221861.288259 432830.979123 230757.869772 \n",
"2 329084.912497 221861.288259 0.000000 417196.927799 10681.452700 \n",
"3 125690.948608 432830.979123 417196.927799 0.000000 424885.071457 \n",
"4 334809.546214 230757.869772 10681.452700 424885.071457 0.000000 \n",
"\n",
" 5 6 7 8 9 \\\n",
"0 363977.284175 382757.346144 535936.672603 453455.691916 308493.357486 \n",
"1 158095.284662 110043.557538 147351.736592 148457.521906 267013.330437 \n",
"2 70863.645424 298323.147682 273910.722975 157066.357114 49244.524769 \n",
"3 435546.234701 377839.985968 576231.623356 515485.469459 407428.564630 \n",
"4 77361.013525 308571.537322 278285.264670 159934.984822 44921.193572 \n",
"\n",
" ... 623 624 625 626 \\\n",
"0 ... 400087.508977 111975.894721 439434.001210 180241.402880 \n",
"1 ... 144457.624755 454323.538103 312934.572811 277655.527769 \n",
"2 ... 335812.017502 423039.491047 483025.940498 149041.168480 \n",
"3 ... 383454.043729 33705.449033 372798.143239 277081.424576 \n",
"4 ... 346110.366859 430122.218324 493684.280642 154571.809629 \n",
"\n",
" 627 628 629 630 631 \\\n",
"0 411642.775795 407386.946563 483234.335798 375739.768332 194674.392977 \n",
"1 82515.228907 99328.450604 109490.313755 230269.722838 286774.422855 \n",
"2 291780.083538 144491.807851 222095.480532 391732.838315 138265.556470 \n",
"3 414272.250674 462044.510708 527921.396963 331220.487884 296642.676261 \n",
"4 301534.166914 150960.361859 227157.332278 402391.764394 142486.100572 \n",
"\n",
" 632 \n",
"0 301457.403910 \n",
"1 194711.412469 \n",
"2 317903.302763 \n",
"3 277089.868144 \n",
"4 328572.031833 \n",
"\n",
"[5 rows x 633 columns]"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"distances_meters = distances.applymap(units)\n",
"distances_meters.head()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"432830.97912272112"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"distances_meters.loc[1, 3]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.10"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment