Exchizz/BoligScraper

## BoligScraper
#!/usr/bin/python

from urllib2 import urlopen
from jsondiff import diff
from bs4 import BeautifulSoup
from json import load,dump
from os import path

# Specify the url
quote_page = 'https://www.boligdeal.dk/odense-andelsbolig/odensec'

# Specify temp file
tmp_filename = "/tmp/tmp.txt"

# Query the website and parse the resurlt
page = urlopen(quote_page)
soup = BeautifulSoup(page.read(),"lxml")

# Imaginary error handling happens here
# *wush*

# Find table containing the good stuff
table = soup.find('table', attrs={'class': 'table-ads'})
rows = table.find_all('tr')

data = []
for row in rows:
    cols = row.find_all('td')[1:]
    if cols == []:
        continue

    # Get link to appartment
    link = row.find('a',attrs={'class': 'image'})['href'];

    # Get description, headline and details
    desc = row.find('h4').text.strip();
    headline = row.find('h3').find('a').string
    details = ', '.join([elm.text.strip() for elm in cols])

    data.append( {'link':link, 'headline' : headline, 'desc' : desc, 'details' : details})


file_exists = path.exists(tmp_filename)
mode = "r+" if file_exists else "w+"
with open(tmp_filename,mode) as json_file:
    if file_exists:
        prev_data = load(json_file)
        json_file.seek(0)
        json_diff = diff(data, prev_data)
        if json_diff != {}:
            print json_diff
    else:
            print "%s created" % tmp_filename
    dump(data, json_file)
	#!/usr/bin/python

	from urllib2 import urlopen
	from jsondiff import diff
	from bs4 import BeautifulSoup
	from json import load,dump
	from os import path

	# Specify the url
	quote_page = 'https://www.boligdeal.dk/odense-andelsbolig/odensec'

	# Specify temp file
	tmp_filename = "/tmp/tmp.txt"

	# Query the website and parse the resurlt
	page = urlopen(quote_page)
	soup = BeautifulSoup(page.read(),"lxml")

	# Imaginary error handling happens here
	# wush

	# Find table containing the good stuff
	table = soup.find('table', attrs={'class': 'table-ads'})
	rows = table.find_all('tr')

	data = []
	for row in rows:
	cols = row.find_all('td')[1:]
	if cols == []:
	continue

	# Get link to appartment
	link = row.find('a',attrs={'class': 'image'})['href'];

	# Get description, headline and details
	desc = row.find('h4').text.strip();
	headline = row.find('h3').find('a').string
	details = ', '.join([elm.text.strip() for elm in cols])

	data.append( {'link':link, 'headline' : headline, 'desc' : desc, 'details' : details})


	file_exists = path.exists(tmp_filename)
	mode = "r+" if file_exists else "w+"
	with open(tmp_filename,mode) as json_file:
	if file_exists:
	prev_data = load(json_file)
	json_file.seek(0)
	json_diff = diff(data, prev_data)
	if json_diff != {}:
	print json_diff
	else:
	print "%s created" % tmp_filename
	dump(data, json_file)