Skip to content

Instantly share code, notes, and snippets.

@max-kuk
Created July 20, 2021 10:06
Show Gist options
  • Save max-kuk/8037447a425d3bb53e58a374397839d7 to your computer and use it in GitHub Desktop.
Save max-kuk/8037447a425d3bb53e58a374397839d7 to your computer and use it in GitHub Desktop.
# Copyright 2020 by Maksim Kukushkin, The Information Systems Institute, University of Leipzig.
# Weather crawler to collect weather data from Wunderground.com
# This file is part of the tools used
# for master thesis "Flight price prediction and Dynamic Pricing in European low-cost airlines"
import csv
import json
import time
from calendar import monthrange
from collections import OrderedDict
from datetime import date
from datetime import datetime, timedelta
import requests
from pytz import timezone
def utc_to_local(utc_dt, tmz):
tmz_local = timezone(tmz)
return tmz_local.localize(utc_dt)
start_date = date(2017, 10, 4) # change your dates if you wish
end_date = date(2020, 1, 1)
dates = ["2017-01-01", "2020-01-01"]
start, end = [datetime.strptime(_, "%Y-%m-%d") for _ in dates]
months = OrderedDict(((start + timedelta(_)).strftime(r"%m-%Y"), None) for _ in range((end - start).days)).keys()
print(months)
dates = []
for m in list(months):
month, year = str(m).split("-")
dates.append(
[
str(year) + "" + str(month) + "" + str("01"),
str(year) + "" + str(month) + "" + str(monthrange(int(year), int(month))[1])
]
)
# Berlin 'EDDT:9:DE' // find the location code on wunderground.com
# Barcelona 'LEBL:9:ES'
print("Number of month: {}".format(len(dates)))
with open('weather_BCN.csv', mode='w') as weather_file:
weather_writer = csv.writer(weather_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
weather_writer.writerow(
['date', 'temp'])
for d in dates:
url = "https://api.weather.com/v1/location/LEBL:9:ES/observations/historical.json?apiKey=6532d6454b8aa370768e63d6ba5a832e&units=m&startDate={}&endDate={}".format(
d[0], d[1])
print(url)
payload = {}
headers = {}
response = requests.request("GET", url, headers=headers, data=payload)
response = json.loads(response.text)
weather_data = response['observations']
for w in weather_data:
# local_date = utc_to_local(datetime.utcfromtimestamp(w['expire_time_gmt']), 'Atlantic/Azores')
weather_writer.writerow([
datetime.utcfromtimestamp(w['expire_time_gmt']).strftime("%Y/%m/%d %H:%M:%S"),
w['temp'],
])
time.sleep(10)
print("Done!")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment