Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save StellarStoic/3c19567b28dfb2ee1d5c748c5cfa0a1d to your computer and use it in GitHub Desktop.
Save StellarStoic/3c19567b28dfb2ee1d5c748c5cfa0a1d to your computer and use it in GitHub Desktop.
slo_control_scrape
import requests
from bs4 import BeautifulSoup
from datetime import datetime, timedelta
import pytz
import re
# the URL of the page you want to scrape
url = "https://www.sloveniacontrol.si/Strani/Summary-C.aspx"
# send a GET request to the URL
response = requests.get(url)
# get the HTML content of the page
html_content = response.text
soup = BeautifulSoup(html_content, 'html.parser')
# find all elements with the NOTAM class
notam_data_elements = soup.find_all(class_="kzps-notam-item")
# check if any NOTAM data elements were found
if not notam_data_elements:
print("No NOTAM data elements found.")
else:
# get today's date
today = datetime.now(pytz.timezone('Europe/Ljubljana'))
# initialize a dictionary to store NOTAMs for each day
notams_by_day = {today + timedelta(days=i): [] for i in range(7)}
# timezone abbreviations mapping
tz_mapping = {'EST': 'US/Eastern', 'CET': 'Europe/Paris', 'UTC': 'UTC'}
# regular expression for matching date strings
date_pattern = re.compile(r'^(PERM|\d{2}\.\d{2}\.\d{4}( \d{2}:\d{2}( EST| CET)?)?)$')
# loop through each NOTAM data element
for notam_data in notam_data_elements:
# extract the NOTAM number
notam_number = notam_data.find('h1').get_text()
# initialize start_date and end_date to None
start_date = None
end_date = None
# extract the NOTAM details
notam_details = notam_data.find_all('p')
for detail in notam_details:
if detail.find(class_='kzps-notam-item-b') is not None:
# check if the text contains a date
potential_date_str = detail.find(class_='kzps-notam-item-b').get_text().strip()
if potential_date_str.startswith('B)'): # check if the string starts with 'B) '
start_date_str = potential_date_str[3:] # remove the 'B) ' prefix
if date_pattern.match(start_date_str): # check if the date string matches the pattern
start_date_tz_str = start_date_str.split()[-1] # get the timezone abbreviation
start_date_str = ' '.join(start_date_str.split()[:-1]) # remove the timezone abbreviation from the date string
if ' ' in start_date_str: # check if time is specified
start_date = datetime.strptime(start_date_str, "%d.%m.%Y %H:%M")
else:
start_date = datetime.strptime(start_date_str, "%d.%m.%Y")
start_date = pytz.timezone(tz_mapping.get(start_date_tz_str, 'Europe/Ljubljana')).localize(start_date)
if detail.find(class_='kzps-notam-item-c') is not None:
end_date_str = detail.find(class_='kzps-notam-item-c').get_text().strip()[3:] # remove the 'C) ' prefix
if date_pattern.match(end_date_str): # check if the date string matches the pattern
if end_date_str == 'PERM':
end_date = start_date + timedelta(hours=48)
else:
end_date_tz_str = end_date_str.split()[-1] # get the timezone abbreviation
end_date_str = ' '.join(end_date_str.split()[:-1]) # remove the timezone abbreviation from the date string
if ' ' in end_date_str: # check if time is specified
end_date = datetime.strptime(end_date_str, "%d.%m.%Y %H:%M")
else:
end_date = datetime.strptime(end_date_str, "%d.%m.%Y")
end_date = pytz.timezone(tz_mapping.get(end_date_tz_str, 'Europe/Ljubljana')).localize(end_date)
# check if the NOTAM is or will be active in the next 7 days
for day, notams in notams_by_day.items():
if start_date is not None and end_date is not None and start_date <= day <= end_date:
notams.append((notam_number, notam_details))
# print NOTAMs for each day
for day, notams in notams_by_day.items():
print("\n\n" + day.strftime('%A:') + "\n")
if notams:
for notam_number, notam_details in notams:
print("----------------------------------------")
print(f"NOTAM Number: {notam_number}")
for detail in notam_details:
print(detail.get_text())
else:
print("No NOTAMs. Yay!")
print(f"\nStart date: {start_date.strftime('%d.%m.%Y %H:%M')} {start_date.tzinfo}" if start_date else "Start date: None")
print(f"End date: {end_date.strftime('%d.%m.%Y %H:%M')} {end_date.tzinfo}" if end_date else "End date: None")
print(f"Today's date: {today.strftime('%d.%m.%Y %H:%M')}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment