Skip to content

Instantly share code, notes, and snippets.

@kisst
Created March 26, 2023 11:38
Show Gist options
  • Save kisst/7c61db996a2702a0e4e39149c7f465fe to your computer and use it in GitHub Desktop.
Save kisst/7c61db996a2702a0e4e39149c7f465fe to your computer and use it in GitHub Desktop.
timeanddata weather history scraper
#!/usr/bin/env python3
"""
Fetch weather data from timeanddate
"""
import re
import json
from datetime import date
import html
import pprint
import requests
from dateutil.rrule import rrule, DAILY
PROT = "https://"
DOMAIN = "www.timeanddate.com"
URI = "/scripts/cityajax.php"
BASE_URL = PROT + DOMAIN + URI
LOCATION = "@<insertlocationcode>"
base_parm = {"n": LOCATION, "mode": "historic", "json": 1}
date_from = date(2023, 2, 1)
date_to = date(2023, 2, 3)
print(
"year",
"month",
"day",
"hour",
"temp[°C]",
"desc",
"wind[km/h]",
"humidity[%]",
"pressure[mbar]",
"visibilty[km]",
sep=", ",
)
for dt in rrule(DAILY, dtstart=date_from, until=date_to):
parm = base_parm
parm["hd"] = dt.strftime("%Y%m%d")
parm["month"] = dt.strftime("%m")
parm["year"] = dt.strftime("%Y")
# grab the data
datapoints = requests.get(BASE_URL, params=parm, timeout=10)
# decode it as utf8
data = datapoints.content.decode("utf-8")
# process html
data = html.unescape(data)
# replace html space to normal space
data = data.replace("\xa0", " ")
# fix missing qvotes arounds "strings" to make it valid json
raw_json = re.sub(r"([hsc]):", r'"\1":', data)
try:
json_data = json.loads(raw_json)
for datapoint in json_data:
clock = datapoint["c"][0]["h"].split("<")[0]
temp = datapoint["c"][2]["h"].split(" ")[0]
text = datapoint["c"][3]["h"].split(".")[0]
windspeed = datapoint["c"][4]["h"].split(" ")[0]
humidity = datapoint["c"][6]["h"].split("%")[0]
pressure = datapoint["c"][7]["h"].split(" ")[0]
visibilty = datapoint["c"][8]["h"].split(" ")[0]
print(
dt.strftime("%Y"),
dt.strftime("%m"),
dt.strftime("%d"),
clock,
temp,
text,
windspeed,
humidity,
pressure,
visibilty,
sep=", ",
)
except Exception as exc:
print("# ERROR")
print(exc)
print(raw_json)
print("# ERROR END")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment