Created
August 13, 2018 14:13
-
-
Save dvas0004/21d8c7ef7bb0651bf766b56434ad011b to your computer and use it in GitHub Desktop.
Tallinn Real Estate - Scraping data using python and visualization
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# only python 3.6 supported | |
# sudo pipenv --python=3.6 install requests requests_html numpy | |
import requests | |
import numpy as np | |
from requests_html import HTMLSession | |
class KVBuilder: | |
def __init__(self): | |
self.discovery_url='' | |
self.session = HTMLSession() | |
self.data_objects = [] | |
self.max_price=0 | |
def get_object_details(self, object_id): | |
r = self.session.get('http://kinnisvaraportaal-kv-ee.postimees.ee/?act=search.objectinfo&object_id={}'.format(object_id)) | |
absolute_size = int(r.html.find('span.sep', first=True).text.split('\xa0')[0].strip('|')) | |
absolute_price = int(''.join(r.html.find('p.object-price strong', first=True).text.split('\xa0')[0:2])) | |
relative_price = float(absolute_price)/float(absolute_size) | |
return relative_price | |
def get_area_objects(self, nelat, nelng, swlat, swlng, rooms): | |
self.discovery_url='http://kinnisvaraportaal-kv-ee.postimees.ee/?act=search.objectcoords&last_deal_type=1&company_id=&page=1&orderby=ob&page_size=10000&deal_type=1&dt_select=1&county=1&search_type=new&parish=1061&rooms_min={}&rooms_max={}&price_min=&price_max=&nr_of_people=&area_min=&area_max=&floor_min=&floor_max=&energy_certs=&keyword=&cluster=true&nelat={}&nelng={}&swlat={}&swlng={}&zoom=15'.format(rooms, rooms, nelat, nelng, swlat, swlng) | |
kv_request = requests.get(self.discovery_url) | |
kv_json_response = kv_request.json() | |
print(kv_json_response) | |
if type(kv_json_response)==dict: | |
kv_markers = kv_json_response['markers'] | |
for marker in kv_markers: | |
try: | |
lng = marker['1'] | |
lat = marker['0'] | |
if 'object_ids' in marker: | |
objects = marker['object_ids'].split('.') | |
elif 'object_id' in marker: | |
objects = marker['object_id'].split('.') | |
else: | |
continue | |
relative_prices=[] | |
for apartment in objects: | |
relative_price = self.get_object_details(apartment) | |
relative_prices.append(relative_price) | |
median_price = np.median(relative_prices) | |
if median_price>self.max_price: | |
self.max_price=median_price | |
result = { | |
'lng': lng, | |
'lat': lat, | |
'price': median_price | |
} | |
self.data_objects.append(result) | |
print(result) | |
except Exception as e: | |
print(e) | |
continue | |
else: | |
for marker in kv_json_response: | |
try: | |
lat = marker[0] | |
lng = marker[1] | |
apartment = marker[2] | |
relative_price = self.get_object_details(apartment) | |
result = { | |
'lng': lng, | |
'lat': lat, | |
'price': relative_price | |
} | |
self.data_objects.append(result) | |
print(result) | |
except Exception as e: | |
print(e) | |
continue | |
def get_html(self): | |
html = ''' | |
<html> | |
<head> | |
<title>TLN Real Estate</title> | |
<meta name="viewport" content="initial-scale=1.0"> | |
<meta charset="utf-8"> | |
<link rel="stylesheet" href="https://unpkg.com/leaflet@1.3.3/dist/leaflet.css" | |
integrity="sha512-Rksm5RenBEKSKFjgI3a41vrjkw4EVPlJ3+OiI65vTjIdo9brlAacEuKOiQ5OFh7cOI1bkDwLqdLw3Zg0cRJAAQ==" | |
crossorigin=""/> | |
<script src="https://unpkg.com/leaflet@1.3.3/dist/leaflet.js" | |
integrity="sha512-tAGcCfR4Sc5ZP5ZoVz0quoZDYX5aCtEm/eu1KhSLj2c9eFrylXZknQYmxUssFaVJKvvc0dJQixhGjG2yXWiV9Q==" | |
crossorigin=""></script> | |
<style> | |
#map { | |
height: 100%; | |
} | |
/* Optional: Makes the sample page fill the window. */ | |
html, body { | |
height: 100%; | |
margin: 0; | |
padding: 0; | |
} | |
</style> | |
<body> | |
<div id="map"></div> | |
<script> | |
function perc2color(perc) { | |
var r, g, b = 0; | |
if(perc < 50) { | |
r = 255; | |
g = Math.round(5.1 * perc); | |
} | |
else { | |
g = 255; | |
r = Math.round(510 - 5.10 * perc); | |
} | |
var h = r * 0x10000 + g * 0x100 + b * 0x1; | |
return '#' + ('000000' + h.toString(16)).slice(-6); | |
} | |
var mymap = L.map('map').setView([59.437291, 24.745194], 12); | |
L.tileLayer('https://api.tiles.mapbox.com/v4/{id}/{z}/{x}/{y}.png?access_token=pk.eyJ1IjoiZHZhczAwMDQiLCJhIjoiY2prczdrMDRmMTg4ejNxbG1ndXFqYjZ3biJ9.BFxa0UpSh3dHg2pmDZSDYA', { | |
attribution: 'Map data © <a href="https://www.openstreetmap.org/">OpenStreetMap</a> contributors, <a href="https://creativecommons.org/licenses/by-sa/2.0/">CC-BY-SA</a>, Imagery © <a href="https://www.mapbox.com/">Mapbox</a>', | |
maxZoom: 18, | |
id: 'mapbox.streets', | |
accessToken: 'pk.eyJ1IjoiZHZhczAwMDQiLCJhIjoiY2prczdrMDRmMTg4ejNxbG1ndXFqYjZ3biJ9.BFxa0UpSh3dHg2pmDZSDYA' | |
}).addTo(mymap); | |
''' | |
counter=0 | |
for data_point in self.data_objects: | |
html = html+'var circle'+str(counter)+'= L.circle(['+str(data_point['lat'])+', '+str(data_point['lng'])+'], { color: perc2color('+str((((data_point['price']/self.max_price)*100)-100)*-1)+'), fillColor: perc2color('+str((((data_point['price']/self.max_price)*100)-100)*-1)+'), fillOpacity: 0.5,radius: 10}).addTo(mymap);' | |
html = html+''' | |
circle{}.bindPopup('{}') | |
'''.format(counter,data_point['price']) | |
counter+=1 | |
html += ''' | |
</script> | |
</body> | |
</html> | |
''' | |
return html | |
kv = KVBuilder() | |
# Rocca Al Mare | |
kv.get_area_objects('59.45522849665097','24.67078002286371', '59.424680606517576', '24.566238244299257', '3') | |
# Haabersti | |
kv.get_area_objects('59.42307949118309','24.66723950696405', '59.40779447737271', '24.614968617681825', '3') | |
# Mustamae | |
kv.get_area_objects('59.410502611228274','24.703073819403016', '59.395211919711365', '24.65080293012079', '3') | |
# Kristiine | |
kv.get_area_objects('59.42755451066967','24.730067570928895', '59.412271517239816', '24.67779668164667', '3') | |
# Kassisaba | |
kv.get_area_objects('59.43475678748747','24.746488054518068', '59.42711777891242', '24.720352609876954', '3') | |
# Pengulinn | |
kv.get_area_objects('59.441852989733874','24.733390257580368', '59.43421558315144', '24.707254812939254', '3') | |
# Kalamaja | |
kv.get_area_objects('59.44942323101878','24.749011442883102', '59.441787533574114', '24.72287599824199', '3') | |
# Vanalinn | |
kv.get_area_objects('59.44256209001308','24.761220858318893', '59.43492484351895', '24.73508541367778', '3') | |
# Kesklinn | |
kv.get_area_objects('59.43536130404822','24.76802294038066', '59.42772243194037', '24.741887495739547', '3') | |
# Kadriog | |
kv.get_area_objects('59.44156934546018','24.78900854371318', '59.433931874841804', '24.762873099072067', '3') | |
# Pirita | |
kv.get_area_objects('59.45800730477103','24.839511295335114', '59.44273806263683', '24.787240406052888', '3') | |
print(kv.get_html()) | |
# TODO | |
# save html to file |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment