Skip to content

Instantly share code, notes, and snippets.

@onyxfish
Last active August 29, 2015 02:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save onyxfish/6f1c1a63a571e953b32f to your computer and use it in GitHub Desktop.
Save onyxfish/6f1c1a63a571e953b32f to your computer and use it in GitHub Desktop.
First agate ipython experiment
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import csv\n",
"\n",
"import agate\n",
"\n",
"text_type = agate.TextType()\n",
"number_type = agate.NumberType()\n",
"boolean_type = agate.BooleanType()\n",
"\n",
"columns = (\n",
" ('last_name', text_type),\n",
" ('first_name', text_type),\n",
" ('age', number_type),\n",
" ('race', text_type),\n",
" ('state', text_type),\n",
" ('tags', text_type),\n",
" ('crime', text_type),\n",
" ('sentence', text_type),\n",
" ('convicted', number_type),\n",
" ('exonerated', number_type),\n",
" ('dna', boolean_type),\n",
" ('dna_essential', text_type),\n",
" ('mistaken_witness', boolean_type),\n",
" ('false_confession', boolean_type),\n",
" ('perjury', boolean_type),\n",
" ('false_evidence', boolean_type),\n",
" ('official_misconduct', boolean_type),\n",
" ('inadequate_defense', boolean_type),\n",
")\n",
"\n",
"with open('examples/realdata/exonerations-20150828.csv') as f:\n",
" # Create a csv reader\n",
" reader = csv.reader(f)\n",
"\n",
" # Skip header\n",
" next(f)\n",
"\n",
" # Create the table\n",
" exonerations = agate.Table(reader, columns)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": false,
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"False confessions: 211\n"
]
}
],
"source": [
"num_false_confessions = exonerations.columns['false_confession'].aggregate(agate.Count(True))\n",
"\n",
"print('False confessions: %i' % num_false_confessions)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Median age at time of arrest: 26\n"
]
}
],
"source": [
"with_age = exonerations.where(lambda row: row['age'] is not None)\n",
"\n",
"median_age = with_age.columns['age'].aggregate(agate.Median())\n",
"\n",
"print('Median age at time of arrest: %i' % median_age)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"|------------+------------+-----+-----------+-------+---------+---------+------|\n",
"| last_name | first_name | age | race | state | tags | crime | ... |\n",
"|------------+------------+-----+-----------+-------+---------+---------+------|\n",
"| Murray | Lacresha | 11 | Black | TX | CV, F | Murder | ... |\n",
"| Adams | Johnathan | 12 | Caucasian | GA | CV, P | Murder | ... |\n",
"| Harris | Anthony | 12 | Black | OH | CV | Murder | ... |\n",
"| Edmonds | Tyler | 13 | Caucasian | MS | | Murder | ... |\n",
"| Handley | Zachary | 13 | Caucasian | PA | A, CV | Arson | ... |\n",
"| Jimenez | Thaddeus | 13 | Hispanic | IL | | Murder | ... |\n",
"| Pacek | Jerry | 13 | Caucasian | PA | | Murder | ... |\n",
"| Barr | Jonathan | 14 | Black | IL | CDC, CV | Murder | ... |\n",
"| Brim | Dominique | 14 | Black | MI | F | Assault | ... |\n",
"| Brown | Timothy | 14 | Black | FL | | Murder | ... |\n",
"|------------+------------+-----+-----------+-------+---------+---------+------|\n"
]
}
],
"source": [
"sorted_by_age = exonerations.order_by('age')\n",
"youngest_ten = sorted_by_age.limit(10)\n",
"\n",
"print(youngest_ten.format(max_columns=7))"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"|--------+-------+-------------------------|\n",
"| group | count | median_years_in_prison |\n",
"|--------+-------+-------------------------|\n",
"| DC | 15 | 27 |\n",
"| NE | 9 | 20 |\n",
"| ID | 2 | 19 |\n",
"| VT | 1 | 18 |\n",
"| LA | 45 | 16 |\n",
"| ... | ... | ... |\n",
"|--------+-------+-------------------------|\n"
]
}
],
"source": [
"with_years_in_prison = exonerations.compute([\n",
" ('years_in_prison', agate.Change('convicted', 'exonerated'))\n",
"])\n",
"\n",
"state_totals = with_years_in_prison.group_by('state')\n",
"\n",
"medians = state_totals.aggregate([\n",
" ('years_in_prison', agate.Median(), 'median_years_in_prison')\n",
"])\n",
"\n",
"sorted_medians = medians.order_by('median_years_in_prison', reverse=True)\n",
"\n",
"print(sorted_medians.format(max_rows=5))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment