Skip to content

Instantly share code, notes, and snippets.

@mhweber
Created January 21, 2022 15:33
Show Gist options
  • Save mhweber/9cc3e8d72d69eb35f5cc43d245637bfa to your computer and use it in GitHub Desktop.
Save mhweber/9cc3e8d72d69eb35f5cc43d245637bfa to your computer and use it in GitHub Desktop.
list and download StreamCat files
from bs4 import BeautifulSoup
import requests
url = "https://gaftp.epa.gov/epadatacommons/ORD/NHDPlusLandscapeAttributes/StreamCat/HydroRegions"
ext = 'zip'
def listFD(url, ext=''):
page = requests.get(url, verify=False).text
print(page)
soup = BeautifulSoup(page, 'html.parser')
return [url + '/' + node.get('href') for node in soup.find_all('a') if node.get('href').endswith(ext)]
for file in listFD(url, ext):
print(file)
# a particular table we get listed from above:
url='https://gaftp.epa.gov/epadatacommons/ORD/NHDPlusLandscapeAttributes/StreamCat/HydroRegions/wdrw_LD_Region18.zip'
r = requests.get(url, allow_redirects=True, verify=False)
open('E:/WorkingData/wdrw_LD_Region18.zip', 'wb').write(r.content)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment