Skip to content

Instantly share code, notes, and snippets.

@dvas0004
Created August 13, 2018 14:13
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dvas0004/21d8c7ef7bb0651bf766b56434ad011b to your computer and use it in GitHub Desktop.
Save dvas0004/21d8c7ef7bb0651bf766b56434ad011b to your computer and use it in GitHub Desktop.
Tallinn Real Estate - Scraping data using python and visualization
# only python 3.6 supported
# sudo pipenv --python=3.6 install requests requests_html numpy
import requests
import numpy as np
from requests_html import HTMLSession
class KVBuilder:
def __init__(self):
self.discovery_url=''
self.session = HTMLSession()
self.data_objects = []
self.max_price=0
def get_object_details(self, object_id):
r = self.session.get('http://kinnisvaraportaal-kv-ee.postimees.ee/?act=search.objectinfo&object_id={}'.format(object_id))
absolute_size = int(r.html.find('span.sep', first=True).text.split('\xa0')[0].strip('|'))
absolute_price = int(''.join(r.html.find('p.object-price strong', first=True).text.split('\xa0')[0:2]))
relative_price = float(absolute_price)/float(absolute_size)
return relative_price
def get_area_objects(self, nelat, nelng, swlat, swlng, rooms):
self.discovery_url='http://kinnisvaraportaal-kv-ee.postimees.ee/?act=search.objectcoords&last_deal_type=1&company_id=&page=1&orderby=ob&page_size=10000&deal_type=1&dt_select=1&county=1&search_type=new&parish=1061&rooms_min={}&rooms_max={}&price_min=&price_max=&nr_of_people=&area_min=&area_max=&floor_min=&floor_max=&energy_certs=&keyword=&cluster=true&nelat={}&nelng={}&swlat={}&swlng={}&zoom=15'.format(rooms, rooms, nelat, nelng, swlat, swlng)
kv_request = requests.get(self.discovery_url)
kv_json_response = kv_request.json()
print(kv_json_response)
if type(kv_json_response)==dict:
kv_markers = kv_json_response['markers']
for marker in kv_markers:
try:
lng = marker['1']
lat = marker['0']
if 'object_ids' in marker:
objects = marker['object_ids'].split('.')
elif 'object_id' in marker:
objects = marker['object_id'].split('.')
else:
continue
relative_prices=[]
for apartment in objects:
relative_price = self.get_object_details(apartment)
relative_prices.append(relative_price)
median_price = np.median(relative_prices)
if median_price>self.max_price:
self.max_price=median_price
result = {
'lng': lng,
'lat': lat,
'price': median_price
}
self.data_objects.append(result)
print(result)
except Exception as e:
print(e)
continue
else:
for marker in kv_json_response:
try:
lat = marker[0]
lng = marker[1]
apartment = marker[2]
relative_price = self.get_object_details(apartment)
result = {
'lng': lng,
'lat': lat,
'price': relative_price
}
self.data_objects.append(result)
print(result)
except Exception as e:
print(e)
continue
def get_html(self):
html = '''
<html>
<head>
<title>TLN Real Estate</title>
<meta name="viewport" content="initial-scale=1.0">
<meta charset="utf-8">
<link rel="stylesheet" href="https://unpkg.com/leaflet@1.3.3/dist/leaflet.css"
integrity="sha512-Rksm5RenBEKSKFjgI3a41vrjkw4EVPlJ3+OiI65vTjIdo9brlAacEuKOiQ5OFh7cOI1bkDwLqdLw3Zg0cRJAAQ=="
crossorigin=""/>
<script src="https://unpkg.com/leaflet@1.3.3/dist/leaflet.js"
integrity="sha512-tAGcCfR4Sc5ZP5ZoVz0quoZDYX5aCtEm/eu1KhSLj2c9eFrylXZknQYmxUssFaVJKvvc0dJQixhGjG2yXWiV9Q=="
crossorigin=""></script>
<style>
#map {
height: 100%;
}
/* Optional: Makes the sample page fill the window. */
html, body {
height: 100%;
margin: 0;
padding: 0;
}
</style>
<body>
<div id="map"></div>
<script>
function perc2color(perc) {
var r, g, b = 0;
if(perc < 50) {
r = 255;
g = Math.round(5.1 * perc);
}
else {
g = 255;
r = Math.round(510 - 5.10 * perc);
}
var h = r * 0x10000 + g * 0x100 + b * 0x1;
return '#' + ('000000' + h.toString(16)).slice(-6);
}
var mymap = L.map('map').setView([59.437291, 24.745194], 12);
L.tileLayer('https://api.tiles.mapbox.com/v4/{id}/{z}/{x}/{y}.png?access_token=pk.eyJ1IjoiZHZhczAwMDQiLCJhIjoiY2prczdrMDRmMTg4ejNxbG1ndXFqYjZ3biJ9.BFxa0UpSh3dHg2pmDZSDYA', {
attribution: 'Map data &copy; <a href="https://www.openstreetmap.org/">OpenStreetMap</a> contributors, <a href="https://creativecommons.org/licenses/by-sa/2.0/">CC-BY-SA</a>, Imagery © <a href="https://www.mapbox.com/">Mapbox</a>',
maxZoom: 18,
id: 'mapbox.streets',
accessToken: 'pk.eyJ1IjoiZHZhczAwMDQiLCJhIjoiY2prczdrMDRmMTg4ejNxbG1ndXFqYjZ3biJ9.BFxa0UpSh3dHg2pmDZSDYA'
}).addTo(mymap);
'''
counter=0
for data_point in self.data_objects:
html = html+'var circle'+str(counter)+'= L.circle(['+str(data_point['lat'])+', '+str(data_point['lng'])+'], { color: perc2color('+str((((data_point['price']/self.max_price)*100)-100)*-1)+'), fillColor: perc2color('+str((((data_point['price']/self.max_price)*100)-100)*-1)+'), fillOpacity: 0.5,radius: 10}).addTo(mymap);'
html = html+'''
circle{}.bindPopup('{}')
'''.format(counter,data_point['price'])
counter+=1
html += '''
</script>
</body>
</html>
'''
return html
kv = KVBuilder()
# Rocca Al Mare
kv.get_area_objects('59.45522849665097','24.67078002286371', '59.424680606517576', '24.566238244299257', '3')
# Haabersti
kv.get_area_objects('59.42307949118309','24.66723950696405', '59.40779447737271', '24.614968617681825', '3')
# Mustamae
kv.get_area_objects('59.410502611228274','24.703073819403016', '59.395211919711365', '24.65080293012079', '3')
# Kristiine
kv.get_area_objects('59.42755451066967','24.730067570928895', '59.412271517239816', '24.67779668164667', '3')
# Kassisaba
kv.get_area_objects('59.43475678748747','24.746488054518068', '59.42711777891242', '24.720352609876954', '3')
# Pengulinn
kv.get_area_objects('59.441852989733874','24.733390257580368', '59.43421558315144', '24.707254812939254', '3')
# Kalamaja
kv.get_area_objects('59.44942323101878','24.749011442883102', '59.441787533574114', '24.72287599824199', '3')
# Vanalinn
kv.get_area_objects('59.44256209001308','24.761220858318893', '59.43492484351895', '24.73508541367778', '3')
# Kesklinn
kv.get_area_objects('59.43536130404822','24.76802294038066', '59.42772243194037', '24.741887495739547', '3')
# Kadriog
kv.get_area_objects('59.44156934546018','24.78900854371318', '59.433931874841804', '24.762873099072067', '3')
# Pirita
kv.get_area_objects('59.45800730477103','24.839511295335114', '59.44273806263683', '24.787240406052888', '3')
print(kv.get_html())
# TODO
# save html to file
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment