Skip to content

Instantly share code, notes, and snippets.

@mikejcorey
Created April 6, 2017 17:17
Show Gist options
  • Save mikejcorey/b254091a2e64f230fb4aa9aa9daa2b11 to your computer and use it in GitHub Desktop.
Save mikejcorey/b254091a2e64f230fb4aa9aa9daa2b11 to your computer and use it in GitHub Desktop.
A non-working ejendomstorvet scrape
import time
import json
import requests
session = requests.Session()
session.headers['User-Agent'] = 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0'
page_num = 1
# Extracted from the Network tab of my Web inspector in Firefox
params = {
"SearchId": "2f7ca243-b841-49a9-b245-78f470b9ac9a",
"Lang": None,
"Controller": "Search",
"View": "Index",
"ViewType": "List",
"Categories": "None",
"ItemType": "OwnUse",
"ItemTypeFilter": "None",
"GeoType": "",
"GeoId": "",
"GeoDisplayName": "",
"GeoArea": None,
"GeoMunicipality": None,
"GeoPostal": None,
"FloorAreaFrom": "0",
"FloorAreaTo": "2000",
"RentalPriceFrom": "0",
"RentalPriceTo": "1000000",
"RentalPriceFloorAreaFrom": "0",
"RentalPriceFloorAreaTo": "2000",
"PriceFrom": "0",
"PriceTo": "100000000",
"BaseAreaFrom": "0",
"BaseAreaTo": "10000",
"ItemsPerPage": 12,
"PageNumber": page_num,
"SortOrder": "Created_desc",
"IsResetting": False,
"ReturnFrom": "-10",
"ReturnTo": "20",
"IsInitialSearch": True,
"IsEditing": False,
"FullTextSearch": None,
"MultiSearch": False,
"MapBounds": {
"SW": {
"X": 16.21582,
"Y": 57.77452
},
"NE": {
"X": 7.69043,
"Y": 54.53383
}
},
"Polygon": [],
"Rectangle": {
"NW": None,
"NE": None,
"SE": None,
"SW": None
},
"Circle": {
"Radius": 0,
"Center": None
},
"GeoList": None,
"SupplierId": None,
"SearchKeywords": [],
"Keywords": [],
"ViewRenderSearchModule": True,
"ViewRenderImageSlider": False,
"NumberOfPages": 629
}
index = session.get('https://www.ejendomstorvet.dk/ledigelokaler')
print index.cookies
session.headers['Referer'] = 'https://www.ejendomstorvet.dk/ledigelokaler'
csa_accept = session.get('https://www.ejendomstorvet.dk/utility/csaccept?create=true')
print csa_accept.cookies
for page in xrange(1, 630):
params['PageNumber'] = page
print json.dumps(params)
# First reset the search
r = session.post('https://www.ejendomstorvet.dk/search/update?itemtype=OwnUse', json=params)
print r.status
print r.json()
params = r.json()['filter']
print type(r.json())
# print json.dumps(params)
# now get the results
results = session.get('https://www.ejendomstorvet.dk/search/result')
results_json = json.loads(results.content)
for prop in results_json['PropertyResultList']:
print prop['RefUrl']
# print results_json
time.sleep(2) # Pause for 2 seconds to avoid overloading the server. Can very likely be turned down to 0.5 seconds, but good to test first.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment