Created
January 14, 2018 20:54
-
-
Save Exchizz/1a0ae16d8ff503db09dd001d6fd44f0b to your computer and use it in GitHub Desktop.
Project "Please notify me when a new apartment is for sale"
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
from urllib2 import urlopen | |
from jsondiff import diff | |
from bs4 import BeautifulSoup | |
from json import load,dump | |
from os import path | |
# Specify the url | |
quote_page = 'https://www.boligdeal.dk/odense-andelsbolig/odensec' | |
# Specify temp file | |
tmp_filename = "/tmp/tmp.txt" | |
# Query the website and parse the resurlt | |
page = urlopen(quote_page) | |
soup = BeautifulSoup(page.read(),"lxml") | |
# Imaginary error handling happens here | |
# *wush* | |
# Find table containing the good stuff | |
table = soup.find('table', attrs={'class': 'table-ads'}) | |
rows = table.find_all('tr') | |
data = [] | |
for row in rows: | |
cols = row.find_all('td')[1:] | |
if cols == []: | |
continue | |
# Get link to appartment | |
link = row.find('a',attrs={'class': 'image'})['href']; | |
# Get description, headline and details | |
desc = row.find('h4').text.strip(); | |
headline = row.find('h3').find('a').string | |
details = ', '.join([elm.text.strip() for elm in cols]) | |
data.append( {'link':link, 'headline' : headline, 'desc' : desc, 'details' : details}) | |
file_exists = path.exists(tmp_filename) | |
mode = "r+" if file_exists else "w+" | |
with open(tmp_filename,mode) as json_file: | |
if file_exists: | |
prev_data = load(json_file) | |
json_file.seek(0) | |
json_diff = diff(data, prev_data) | |
if json_diff != {}: | |
print json_diff | |
else: | |
print "%s created" % tmp_filename | |
dump(data, json_file) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment