Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save audhiaprilliant/566fa40983abf8f74a51e883e337a09f to your computer and use it in GitHub Desktop.
Save audhiaprilliant/566fa40983abf8f74a51e883e337a09f to your computer and use it in GitHub Desktop.
Apache Airflow as Job Orchestration
# Function to get the daily aggregated data
def get_daily_summary(**kwargs):
soup = get_url()
date,time = get_current_date()
# Get summary
# Regular expression pattern
pattern_summary = re.compile(r'\d[^\s]+')
for job_elem in soup.find_all('div',class_='covid__box'):
# Each job_elem is a new BeautifulSoup object.
terkonfirmasi_elem = job_elem.find('div',class_='covid__box2 -cases')
dirawat_elem = job_elem.find('div',class_='covid__box2 -odp')
meninggal_elem = job_elem.find('div',class_='covid__box2 -gone')
sembuh_elem = job_elem.find('div',class_='covid__box2 -health')
# Daily update
a = pattern_summary.findall(terkonfirmasi_elem.text)[0].replace(',','')
b = pattern_summary.findall(dirawat_elem.text)[0].replace(',','')
c = pattern_summary.findall(meninggal_elem.text)[0].replace(',','')
d = pattern_summary.findall(sembuh_elem.text)[0].replace(',','')
daily_update = ','.join([date,time,a,b,c,d])
return(daily_update)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment