Last active
February 21, 2018 18:48
-
-
Save sfsheath/e89e674dda2a065143ffc138c3774a3c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"# gist -u https://gist.github.com/e89e674dda2a065143ffc138c3774a3c" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"import gzip # can uncompress gzipped files, useful for accessing the pleiades data\n", | |
"import io # useful routines for input/output\n", | |
"import pandas as pd # for working with \"rows/columns\" oriented data\n", | |
"import urllib.request # for loading documents using http\n", | |
"\n", | |
"%matplotlib inline\n", | |
"\n", | |
"import matplotlib # plotting\n", | |
"import matplotlib.pyplot as plt\n", | |
"\n", | |
"pd.options.display.max_columns = 999" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"# load csv files\n", | |
"ramphs = pd.read_csv(\"http://sebastianheath.com/roman-amphitheaters/roman-amphitheaters.csv\")\n", | |
"chronogrps = pd.read_csv(\"http://sebastianheath.com/roman-amphitheaters/chronogrps.csv\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"# load gzipped PLEIADES. key is \"io.BytesIO(gzip.decompress(response.read())\", which \n", | |
"# delivers (so to speak) an uncompressed csv to 'pd.read_csv'.\n", | |
"response = urllib.request.urlopen(\"http://atlantides.org/downloads/pleiades/dumps/pleiades-places-latest.csv.gz\")\n", | |
"pleiades = pd.read_csv(io.BytesIO(gzip.decompress(response.read())))\n", | |
"\n", | |
"# One note: because it takes a long time to load the pleiades data,\n", | |
"# avoid running this cell again unless necessary" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"# https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.merge.html\n", | |
"combined = ramphs.merge(chronogrps) \\\n", | |
" .assign(path = ramphs.pleiades.str.replace \\\n", | |
" (\"https://pleiades.stoa.org\",\"\")) \\\n", | |
" .merge(pleiades, on = 'path', suffixes = ('','_pleiades'))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"combined[['id','startdate','minDate','title_pleiades']].head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"pamphs = pleiades[pleiades.featureTypes.str.contains(\"amphitheatre\")]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"len(pamphs)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"len(ramphs)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"ramphs['path'] = ramphs.pleiades.str.replace(\"https://pleiades.stoa.org\",\"\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"pamphs.merge(ramphs, on = 'path', how = 'left')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"# https://pandas.pydata.org/pandas-docs/stable/generated/pandas.Series.duplicated.html\n", | |
"pamphs.merge(ramphs, on = 'path', how = 'left') \\\n", | |
"[pamphs.merge(ramphs, on = 'path', how = 'left') \\\n", | |
" .path.duplicated(keep = False)]\n", | |
"\n", | |
"# [['id_y','featureTypes','minDate','maxDate','chronogroup']]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"sites = pd.DataFrame({'id':['s01','s02','s03'],\n", | |
" 'type':['settlement','cave','temple']})\n", | |
"\n", | |
"\n", | |
"periods = pd.DataFrame({'id':['early','middle','late','classic'],\n", | |
" 'startdate':[100,250,725,350],\n", | |
" 'enddate':[250,725,850,550]})\n", | |
"\n", | |
"s_to_p = pd.DataFrame({'s_id':['s01','s01','s02','s03'],\n", | |
" 'p_id':['early','middle','late','classic']})\n", | |
"\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"# let's work on these steps\n", | |
"# 1) list the site types associated with periods \n", | |
"# hint: use s_to_p as left hand df (& keep it simple for now)\n", | |
"# 2) with that dataframe join start & end dates for the periods\n", | |
"# 3) for every settlement type, find max and min start dates\n", | |
"\n", | |
"\n", | |
"#['startdate']['temple']['min']" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.5.2" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 1 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment