Skip to content

Instantly share code, notes, and snippets.

@aarong1
Created January 19, 2022 22:42
Show Gist options
  • Save aarong1/c82e574608a4946a4f1b1bc9a804f894 to your computer and use it in GitHub Desktop.
Save aarong1/c82e574608a4946a4f1b1bc9a804f894 to your computer and use it in GitHub Desktop.
python web scraper urllib.requests
# -*- coding: utf-8 -*-
"""
Created on Mon Mar 15 16:36:30 2021
@author: dsuser
"""
import os
print(os.getcwd())
os.chdir("C:/Users/Public/Desktop/TraceData/hscni-analytics-data/R/risk_assess")
print(os.getcwd())
import urllib.request
import datetime
x=datetime.date.today()
x_d=datetime.timedelta(-1)
x1=x+x_d
def formatDate(x):
print(x.day)
print(x.month)
print(x.year)
if len(str(x.day))!=2:
xd='0'+str(x.day)
xd
if len(str(x.month))!=2:
xm='0'+str(x.month)
xm
xy=str(x.year)[2:]
xy
date=str(x.day)+str(xm)+str(xy)
return date
todayFormated=formatDate(x)
print('today (ddmmyy): ',todayFormated)
yesterdayFormated=formatDate(x1)
print('yesterday: '+ yesterdayFormated)
newfileName="https://www.health-ni.gov.uk/sites/default/files/publications/health/doh-dd-"+todayFormated+".xlsx"
dls = "https://www.health-ni.gov.uk/sites/default/files/publications/health/doh-dd-"+yesterdayFormated+".xlsx"
try:
urllib.request.urlretrieve(newfileName, "DoH_latest.xlsx")
print('todays file is released and loaded from doH')
except:
urllib.request.urlretrieve(dls, "DoH_latest.xlsx")
print('todays file not available yet, defaulting to yesterdays')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment