Skip to content

Instantly share code, notes, and snippets.

@valdergallo
Created June 1, 2022 13:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save valdergallo/ed42886d4263d87c71cea60e18fb7573 to your computer and use it in GitHub Desktop.
Save valdergallo/ed42886d4263d87c71cea60e18fb7573 to your computer and use it in GitHub Desktop.
Download Time Zone information from Wikipedia
import requests
from lxml.html import document_fromstring
from django.core.exceptions import ValidationError
from pprint import pprint
def _parse_wiki_data(content):
table = content.xpath("/html/body/div[3]/div[3]/div[5]/div[1]/table[1]").pop()
tz_name = table.xpath("//tr/td[2]/a//text()")
iana_times_utc_std = table.xpath("//tr/td[5]/a//text()")
iana_times_utc_dst = table.xpath("//tr/td[6]/a//text()")
time_zone_abbreviation = table.xpath("//tr/td[7]/a//text()")
keys_list = ["tz_name", "tz_time_utc_std", "tz_time_utc_dst", "tz_abbreviation"]
lines = list(
zip(tz_name, iana_times_utc_std, iana_times_utc_dst, time_zone_abbreviation)
)
raw_data = [dict(zip(keys_list, line)) for line in lines]
return raw_data
def _load_iana_timezone_database():
wiki_database_url = "https://en.wikipedia.org/wiki/List_of_tz_database_time_zones"
response = requests.get(wiki_database_url)
content = document_fromstring(response.content)
return _parse_wiki_data(content)
if __name__ == "__main__":
print("Download Time Zone names from Wikipedia")
print("Loading ...")
content = _load_iana_timezone_database()
iana_timezones = [i["tz_name"] for i in content]
print("-" * 80)
pprint(iana_timezones)
print("-" * 80)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment