Skip to content

Instantly share code, notes, and snippets.

@OneGneissGuy
Last active January 4, 2018 20:51
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save OneGneissGuy/ee9e7fd5937be47a413777c22b517fe0 to your computer and use it in GitHub Desktop.
Save OneGneissGuy/ee9e7fd5937be47a413777c22b517fe0 to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
"""
Webscraping local air quality index forecast from https://sparetheair.com/
@author: saraceno
@email: jfsaraceno@gmail.com
@github: onegneissguy
code adapted from https://medium.com/python-pandemonium/6-things-to-develop-an-efficient-web-scraper-in-python-1dffa688793c
"""
def pull_aqi(tag_id="todayAQIBar"):
tags = soup.find_all(id=tag_id)
result = []
for tag in tags:
result.append(tag)
return result[0]
def process_aqi(result):
AQI_value = int(result.text.strip().split(' ')[0])
AQI_text = ' '.join(result.text.strip().split(' ')[-4:])
return AQI_value, AQI_text
from bs4 import BeautifulSoup
import requests
url = 'http://www.sparetheair.com'
headers = {
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36'
}
today_tag = "todayAQIBar"
tomorrow_tag = "tomorrowAQIBar"
try:
page = requests.get(url, headers=headers, timeout=5)
soup = BeautifulSoup(page.content, 'html.parser')
#print(soup.prettify())
today = process_aqi(pull_aqi(tag_id=today_tag))
tomorrow = process_aqi(pull_aqi(tag_id=tomorrow_tag))
if tomorrow[0] < today[0]:
print('The air quality will improve tomorrow and is forecasted to be {}'.format(today[1].lower()))
elif tomorrow[0] > today[0]:
print('The air quality will be worse tomorrow and is forecasted to be {}'.format(tomorrow[1].lower()))
else:
print('The air quality will remain the same is forecasted to be {}'.format(tomorrow[1].lower()))
except requests.ConnectionError as e:
print("OOPS!! Connection Error. Make sure you are connected to Internet. Technical Details given below.\n")
print(str(e))
except requests.Timeout as e:
print("OOPS!! Timeout Error")
print(str(e))
except requests.RequestException as e:
print("OOPS!! General Error")
print(str(e))
except KeyboardInterrupt:
print("Someone closed the program")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment