Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
grabbing cambridge airphotos
from bs4 import BeautifulSoup
import csv
import requests
file = open("output.txt", "w")
# f = csv.writer(open("output.csv", "w"))
# f.writerow(["domain", "fulllink"])
pages = []
for i in range(1,2):
url = 'https://www.cambridgeairphotos.com/themes/earthworks/page' + str(i) + '.html'
pages.append(url)
for item in pages:
page = requests.get(item)
soup = BeautifulSoup(page.content, "html.parser")
divs = soup.find_all('div', attrs={"class": "cucapgallery naturalwidth compressed"})
for div in divs:
for link in div.find_all('a', attrs={"class": "lightbox"}):
fulllink = link.get ('href')
file.writelines(["https://www.cambridgeairphotos.com", fulllink, "\n"])
import csv
import requests
import re
img = []
with open('output.txt') as csvfile:
csvrows = csv.reader(csvfile, delimiter=',', quotechar='"')
for row in csvrows:
filename = row[0]
filename = re.sub('["https://www.cambridgeairphotos.com/data/thumbnails"]', '', filename)
url = row[0]
print(url)
result = requests.get(url, stream=True)
if result.status_code == 200:
image = result.raw.read()
open(filename + ".jpg","wb").write(image)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.