Skip to content

Instantly share code, notes, and snippets.

@Exchizz
Created January 14, 2018 20:54
Show Gist options
  • Save Exchizz/1a0ae16d8ff503db09dd001d6fd44f0b to your computer and use it in GitHub Desktop.
Save Exchizz/1a0ae16d8ff503db09dd001d6fd44f0b to your computer and use it in GitHub Desktop.
Project "Please notify me when a new apartment is for sale"
#!/usr/bin/python
from urllib2 import urlopen
from jsondiff import diff
from bs4 import BeautifulSoup
from json import load,dump
from os import path
# Specify the url
quote_page = 'https://www.boligdeal.dk/odense-andelsbolig/odensec'
# Specify temp file
tmp_filename = "/tmp/tmp.txt"
# Query the website and parse the resurlt
page = urlopen(quote_page)
soup = BeautifulSoup(page.read(),"lxml")
# Imaginary error handling happens here
# *wush*
# Find table containing the good stuff
table = soup.find('table', attrs={'class': 'table-ads'})
rows = table.find_all('tr')
data = []
for row in rows:
cols = row.find_all('td')[1:]
if cols == []:
continue
# Get link to appartment
link = row.find('a',attrs={'class': 'image'})['href'];
# Get description, headline and details
desc = row.find('h4').text.strip();
headline = row.find('h3').find('a').string
details = ', '.join([elm.text.strip() for elm in cols])
data.append( {'link':link, 'headline' : headline, 'desc' : desc, 'details' : details})
file_exists = path.exists(tmp_filename)
mode = "r+" if file_exists else "w+"
with open(tmp_filename,mode) as json_file:
if file_exists:
prev_data = load(json_file)
json_file.seek(0)
json_diff = diff(data, prev_data)
if json_diff != {}:
print json_diff
else:
print "%s created" % tmp_filename
dump(data, json_file)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment