Skip to content

Instantly share code, notes, and snippets.

@kikiliu
Last active August 29, 2015 13:58
Show Gist options
  • Save kikiliu/10226757 to your computer and use it in GitHub Desktop.
Save kikiliu/10226757 to your computer and use it in GitHub Desktop.
Extract_Property_Info_fromAPI
{
"metadata": {
"name": ""
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"import pandas as pd\n",
"from pandas import Series, DataFrame\n",
"\n",
"import settings\n",
"\n",
"from itertools import islice\n",
"\n",
"import requests\n",
"from lxml import etree"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"assert settings.apikey is not None\n",
"print settings.zwsid\n",
"print settings.apikey"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"hidden\n",
"hidden\n"
]
}
],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import urlparse\n",
"import urllib\n",
"from IPython.core.display import HTML"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 3
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"from urllib2 import urlopen\n",
"from urlparse import urljoin\n",
"from bs4 import BeautifulSoup"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 4
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def set_url_parameter(url, name, value):\n",
" \"\"\"Take an input URL template and key-value string, one pair each call\"\"\"\n",
"\n",
" uri = urlparse.urlparse(url)\n",
" \n",
" #convert pr.query from string to dict \n",
" query_dict = urlparse.parse_qs(uri.query)#Parse a query string given as a string argument\n",
" query_dict[name]= value\n",
"\n",
" #e.g. http://stackoverflow.com/a/10233141/7782 \n",
" #convert a mapping object or a sequence (doseq=True) of two-element tuples to a \u201cpercent-encoded\u201d string\n",
" #suitable to pass to urlopen() as the optional data argument.\n",
" query = urllib.urlencode(query_dict, doseq=True)\n",
" uri = (uri.scheme, uri.netloc, uri.path, uri.params, query, uri.fragment)#avoid magic number[4]\n",
" \n",
" #Construct a URL from a tuple as returned by urlparse()\n",
" return urlparse.urlunparse(uri)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 5
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"oak_zip = [\"94601\", \"94602\", \"94605\", \"94606\", \"94607\", \"94610\", \n",
" \"94611\", \"94612\", \"94618\", \"94619\", \"94615\", \"94621\"]"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 6
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"#http://www.zillow.com/howto/api/GetDemographics.htm\n",
"def getDemographics_xml(zipcode):\n",
" url_template_zillow = 'http://www.zillow.com/webservice/GetDemographics.htm?zws-id=&zip='\n",
" url = set_url_parameter(url_template_zillow, 'zws-id', settings.zwsid)\n",
" url = set_url_parameter(url, 'zip', zipcode)\n",
" response = requests.get(url).content \n",
"\n",
" return etree.fromstring(response)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 7
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"#http://developer.trulia.com/docs/read/TruliaStats/getZipCodeStats\n",
"def getZipCodeStats_xml(zipcode,startDate,endDate):\n",
" url_template_trulia = 'http://api.trulia.com/webservices.php?library=TruliaStats&function=getZipCodeStats&zipCode=&startDate=&endDate=&apikey='\n",
" url = set_url_parameter(url_template_trulia, 'apikey', settings.apikey)\n",
" url = set_url_parameter(url, 'zipCode', zipcode)\n",
" url = set_url_parameter(url, 'startDate', startDate)\n",
" url = set_url_parameter(url, 'endDate', endDate)\n",
" response = requests.get(url).content \n",
" \n",
" return etree.fromstring(response)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 8
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def getDeepSearchResults_xml(addr, citystatezip):\n",
" url_template_zillow = 'http://www.zillow.com/webservice/GetDeepSearchResults.htm?zws-id=&address=&citystatezip='\n",
" url = set_url_parameter(url_template_zillow, 'zws-id', settings.zwsid)\n",
" url = set_url_parameter(url, 'address', addr)\n",
" url = set_url_parameter(url, 'citystatezip', citystatezip)\n",
" response = requests.get(url).content\n",
" \n",
" return etree.fromstring(response)\n",
"# return url\n",
"\n",
"# getDeepSearchResults_xml(\"2114 Bigelow Ave\", \"Seattle, WA\")"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 9
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def parse_value(path, xml_doc):\n",
" '''Take xml doc retrieved and parse certain value according to the path'''\n",
" value_list = []\n",
" value = xml_doc.xpath(path)\n",
" if value != []:\n",
" for item in value:\n",
" value_list.append(item.text)\n",
" else:\n",
" value_list.append(0)\n",
" return value_list"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 10
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"#http://www.zillow.com/howto/api/GetDeepSearchResults.htm\n",
"def getDeepSearchResults(addr, citystatezip):\n",
" '''Take in address and city+state combination and/or ZIP code, output '''\n",
" \n",
" lastSoldDate_path = \"//response//lastSoldDate\"\n",
" lastSoldPrice_path = \"//response//lastSoldPrice\"\n",
" latitude_path = \"//response//address/latitude\"\n",
" longitude_path = \"//response//address/longitude\"\n",
" \n",
" doc = getDeepSearchResults_xml(addr, citystatezip)\n",
" lastSoldDate = parse_value(lastSoldDate_path, doc)[0]\n",
" lastSoldPrice = parse_value(lastSoldPrice_path, doc)[0]\n",
" latitude = parse_value(latitude_path, doc)[0]\n",
" longitude = parse_value(longitude_path, doc)[0]\n",
" \n",
" return Series([lastSoldDate, lastSoldPrice, latitude, longitude])\n",
"\n",
"getDeepSearchResults(\"2114 Bigelow Ave\", \"Seattle, WA\") "
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 12,
"text": [
"0 11/26/2008\n",
"1 1025000\n",
"2 47.637933\n",
"3 -122.347938\n",
"dtype: object"
]
}
],
"prompt_number": 12
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def download(url):\n",
" response = urlopen(url) \n",
" data = response.read()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 13
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"{zipcode:[1,2] for zipcode in oak_zip}"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 19,
"text": [
"{'94601': [1, 2],\n",
" '94602': [1, 2],\n",
" '94605': [1, 2],\n",
" '94606': [1, 2],\n",
" '94607': [1, 2],\n",
" '94610': [1, 2],\n",
" '94611': [1, 2],\n",
" '94612': [1, 2],\n",
" '94615': [1, 2],\n",
" '94618': [1, 2],\n",
" '94619': [1, 2],\n",
" '94621': [1, 2]}"
]
}
],
"prompt_number": 19
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def getDemographics_df(ziplist):\n",
" Index_path = \"//page[name='Affordability']//attribute[name='Zillow Home Value Index']/values/zip/value\"\n",
" SFH_path = \"//page[name='Affordability']//attribute[name='Median Single Family Home Value']/values/zip/value\"\n",
" Condo_path = \"//page[name='Affordability']//attribute[name='Median Condo Value']/values/zip/value\"\n",
" SalePrice_path = \"//page[name='Affordability']//attribute[name='Median Sale Price']/values/zip/value\"\n",
" Persqft_path = \"//page[name='Affordability']//attribute[name='Median Value Per Sq Ft']/values/zip/value\" \n",
" One_year_path = \"//page[name='Affordability']//attribute[name='1-Yr. Change']/values/zip/value\"\n",
" demographics = {zipcode:[] for zipcode in ziplist}\n",
" for zipcode in ziplist:\n",
" doc = getDemographics_xml(zipcode)\n",
" demographics[zipcode].append(parse_value(Index_path, doc))\n",
" demographics[zipcode].append(parse_value(Persqft_path, doc))\n",
" return DataFrame.from_dict(demographics, 'index')\n",
"# DataFrame([[zipcode, home_value, median_per_sqft] for zipcode, value in demographics for home_value, median_per_sqft in value])\n",
"\n",
"getDemographics_df(oak_zip)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" <th>1</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>94606</th>\n",
" <td> [366100]</td>\n",
" <td> [250]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>94607</th>\n",
" <td> [355100]</td>\n",
" <td> [355]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>94605</th>\n",
" <td> [365000]</td>\n",
" <td> [260]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>94602</th>\n",
" <td> [632000]</td>\n",
" <td> [330]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>94610</th>\n",
" <td> [801300]</td>\n",
" <td> [360]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>94601</th>\n",
" <td> [281300]</td>\n",
" <td> [235]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>94612</th>\n",
" <td> [406300]</td>\n",
" <td> [311]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>94619</th>\n",
" <td> [466700]</td>\n",
" <td> [354]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>94618</th>\n",
" <td> [936100]</td>\n",
" <td> [506]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>94621</th>\n",
" <td> [200000]</td>\n",
" <td> [180]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>94611</th>\n",
" <td> [867800]</td>\n",
" <td> [413]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>94615</th>\n",
" <td> [0]</td>\n",
" <td> [0]</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>12 rows \u00d7 2 columns</p>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 35,
"text": [
" 0 1\n",
"94606 [366100] [250]\n",
"94607 [355100] [355]\n",
"94605 [365000] [260]\n",
"94602 [632000] [330]\n",
"94610 [801300] [360]\n",
"94601 [281300] [235]\n",
"94612 [406300] [311]\n",
"94619 [466700] [354]\n",
"94618 [936100] [506]\n",
"94621 [200000] [180]\n",
"94611 [867800] [413]\n",
"94615 [0] [0]\n",
"\n",
"[12 rows x 2 columns]"
]
}
],
"prompt_number": 35
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def getZipCodeStats_df(ziplist,startDate,endDate):\n",
" \n",
" MedianListing_path = \"//listingStats//subcategory[type='All Properties']/medianListingPrice\"\n",
" WeekEndingDate_path = \"//listingStats/listingStat/weekEndingDate\"\n",
" ZipCodeStats = {zipcode:[] for zipcode in ziplist}\n",
" for zipcode in ziplist:\n",
" doc = getZipCodeStats_xml(zipcode,startDate,endDate)\n",
" ZipCodeStats[zipcode].append(parse_value(MedianListing_path, doc))\n",
" ZipCodeStats[zipcode].append(parse_value(WeekEndingDate_path, doc))\n",
" return DataFrame.from_dict(ZipCodeStats, 'index')\n",
"\n",
" \n",
"getZipCodeStats_df(oak_zip, '2014-01-01', '2014-02-01')"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" <th>1</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>94606</th>\n",
" <td> [215757, 217457, 223400]</td>\n",
" <td> [2014-02-22, 2014-03-01, 2014-03-08]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>94607</th>\n",
" <td> [0]</td>\n",
" <td> [0]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>94605</th>\n",
" <td> [140650, 139179, 138350]</td>\n",
" <td> [2014-02-22, 2014-03-01, 2014-03-08]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>94602</th>\n",
" <td> [0]</td>\n",
" <td> [0]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>94610</th>\n",
" <td> [158471, 167043, 154300]</td>\n",
" <td> [2014-02-22, 2014-03-01, 2014-03-08]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>94601</th>\n",
" <td> [567613, 547134, 535358, 538907, 531871, 50071...</td>\n",
" <td> [2014-01-11, 2014-01-18, 2014-01-25, 2014-02-0...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>94612</th>\n",
" <td> [0]</td>\n",
" <td> [0]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>94619</th>\n",
" <td> [0]</td>\n",
" <td> [0]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>94618</th>\n",
" <td> [179843, 177736, 174843, 174871, 178086, 17947...</td>\n",
" <td> [2013-12-14, 2013-12-21, 2013-12-28, 2014-01-0...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>94621</th>\n",
" <td> [0]</td>\n",
" <td> [0]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>94611</th>\n",
" <td> [377029, 382250, 398286, 395000, 391536, 38885...</td>\n",
" <td> [2013-12-14, 2013-12-21, 2013-12-28, 2014-01-0...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>94615</th>\n",
" <td> [0]</td>\n",
" <td> [0]</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>12 rows \u00d7 2 columns</p>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 34,
"text": [
" 0 \\\n",
"94606 [215757, 217457, 223400] \n",
"94607 [0] \n",
"94605 [140650, 139179, 138350] \n",
"94602 [0] \n",
"94610 [158471, 167043, 154300] \n",
"94601 [567613, 547134, 535358, 538907, 531871, 50071... \n",
"94612 [0] \n",
"94619 [0] \n",
"94618 [179843, 177736, 174843, 174871, 178086, 17947... \n",
"94621 [0] \n",
"94611 [377029, 382250, 398286, 395000, 391536, 38885... \n",
"94615 [0] \n",
"\n",
" 1 \n",
"94606 [2014-02-22, 2014-03-01, 2014-03-08] \n",
"94607 [0] \n",
"94605 [2014-02-22, 2014-03-01, 2014-03-08] \n",
"94602 [0] \n",
"94610 [2014-02-22, 2014-03-01, 2014-03-08] \n",
"94601 [2014-01-11, 2014-01-18, 2014-01-25, 2014-02-0... \n",
"94612 [0] \n",
"94619 [0] \n",
"94618 [2013-12-14, 2013-12-21, 2013-12-28, 2014-01-0... \n",
"94621 [0] \n",
"94611 [2013-12-14, 2013-12-21, 2013-12-28, 2014-01-0... \n",
"94615 [0] \n",
"\n",
"[12 rows x 2 columns]"
]
}
],
"prompt_number": 34
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment