Created
January 24, 2015 03:34
-
-
Save dangunter/85a0d7e012be828f5ec2 to your computer and use it in GitHub Desktop.
SEED mapping example
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"metadata": { | |
"name": "" | |
}, | |
"nbformat": 3, | |
"nbformat_minor": 0, | |
"worksheets": [ | |
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"import json\n", | |
"import csv" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 11 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Create the file we would use as a source" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"MAP = {\n", | |
" u'Property Id': u'pm_property_id',\n", | |
" u'Property Name': u'property_name',\n", | |
" u'Address 1': u'address_line_1',\n", | |
" u'Address 2': u'address_line_2',\n", | |
" u'City': u'city',\n", | |
" u'County': u'district',\n", | |
" u'City': u'city',\n", | |
" u'Custom Property ID 1 - ID': u'custom_id_1',\n", | |
" u'Postal Code': u'postal_code',\n", | |
" u'State/Province': u'state_province',\n", | |
" u'Property Floor Area (Buildings and Parking) (ft2)': \n", | |
" u'gross_floor_area',\n", | |
" u'Year Built': u'year_built',\n", | |
" u'Year Ending': u'year_ending',\n", | |
" u'Energy Alerts': u'energy_alerts',\n", | |
" u'ENERGY STAR Score': u'energy_score',\n", | |
" u'Site EUI (kBtu/ft2)': u'site_eui',\n", | |
" u'Source EUI (kBtu/ft2)': u'source_eui',\n", | |
" u'Weather Normalized Site EUI (kBtu/ft2)': u'site_eui_weather_normalized',\n", | |
" u'Weather Normalized Source EUI (kBtu/ft2)':\n", | |
" u'source_eui_weather_normalized',\n", | |
" u'Generation Date': u'generation_date',\n", | |
" u'Release Date': u'release_date',\n", | |
"}\n", | |
"mstr = json.dumps(MAP)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 3 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Embed version number (1.0.0) in filename" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"json.dump(MAP,open(\"pmgr_1-0-0.json\",\"w\"), indent=2)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 7 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Create the (fake) data file.\n", | |
"This is just needed for this example" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"sample_filename = \"pmgr-sample-data.csv\"\n", | |
"writer = csv.writer(open(sample_filename, \"w\"))\n", | |
"writer.writerow(('Property Id', 'Address 1', \"Extra1\", \"Extra2\"))\n", | |
"for row in range(10):\n", | |
" writer.writerow((\"mv1.{:d}\".format(row),\"mv2.{:d}\".format(row),\n", | |
" \"ev1.{:d}\".format(row), \"ev2.{:d}\".format(row)))" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 24 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Perform mapping.\n", | |
"First, load mapping file (could also be a blob in a DB, etc.)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"version = (1,0,0)\n", | |
"fname = \"pmgr_{}-{}-{}.json\".format(*version)\n", | |
"mapping = json.load(open(fname))" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 25 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Then load a data file.\n", | |
"This could be through an upload, of course, but we will use the one we just created above" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"datafile = open(sample_filename)\n", | |
"data = csv.reader(datafile)\n", | |
"columns = data.next()\n", | |
"mapped, is_extra = [], []\n", | |
"for col in columns:\n", | |
" if col in mapping:\n", | |
" mapped.append(mapping[col])\n", | |
" is_extra.append(False)\n", | |
" else:\n", | |
" mapped.append(col)\n", | |
" is_extra.append(True)\n" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 41 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Show the mapping" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"for orig, m, extra in zip(columns, mapped, is_extra):\n", | |
" print(\"{:<16} => {:<16} {:>8}\".format(orig, m, ('yes', 'no')[extra]))" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"Property Id => pm_property_id yes\n", | |
"Address 1 => address_line_1 yes\n", | |
"Extra1 => Extra1 no\n", | |
"Extra2 => Extra2 no\n" | |
] | |
} | |
], | |
"prompt_number": 42 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Use the mapping" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"maplen = len(mapped)\n", | |
"for row in data:\n", | |
" values = {mapped[i]: row[i] for i in range(maplen)}\n", | |
" print(\"Row: {}\".format(values))" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"Row: {'Extra1': 'ev1.0', 'Extra2': 'ev2.0', u'pm_property_id': 'mv1.0', u'address_line_1': 'mv2.0'}\n", | |
"Row: {'Extra1': 'ev1.1', 'Extra2': 'ev2.1', u'pm_property_id': 'mv1.1', u'address_line_1': 'mv2.1'}\n", | |
"Row: {'Extra1': 'ev1.2', 'Extra2': 'ev2.2', u'pm_property_id': 'mv1.2', u'address_line_1': 'mv2.2'}\n", | |
"Row: {'Extra1': 'ev1.3', 'Extra2': 'ev2.3', u'pm_property_id': 'mv1.3', u'address_line_1': 'mv2.3'}\n", | |
"Row: {'Extra1': 'ev1.4', 'Extra2': 'ev2.4', u'pm_property_id': 'mv1.4', u'address_line_1': 'mv2.4'}\n", | |
"Row: {'Extra1': 'ev1.5', 'Extra2': 'ev2.5', u'pm_property_id': 'mv1.5', u'address_line_1': 'mv2.5'}\n", | |
"Row: {'Extra1': 'ev1.6', 'Extra2': 'ev2.6', u'pm_property_id': 'mv1.6', u'address_line_1': 'mv2.6'}\n", | |
"Row: {'Extra1': 'ev1.7', 'Extra2': 'ev2.7', u'pm_property_id': 'mv1.7', u'address_line_1': 'mv2.7'}\n", | |
"Row: {'Extra1': 'ev1.8', 'Extra2': 'ev2.8', u'pm_property_id': 'mv1.8', u'address_line_1': 'mv2.8'}\n", | |
"Row: {'Extra1': 'ev1.9', 'Extra2': 'ev2.9', u'pm_property_id': 'mv1.9', u'address_line_1': 'mv2.9'}\n" | |
] | |
} | |
], | |
"prompt_number": 43 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [] | |
} | |
], | |
"metadata": {} | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment