Skip to content

Instantly share code, notes, and snippets.

@risingsunomi
Created May 17, 2022 06:33
Show Gist options
  • Save risingsunomi/9e8e2563b1fae2526720d1f37a85a636 to your computer and use it in GitHub Desktop.
Save risingsunomi/9e8e2563b1fae2526720d1f37a85a636 to your computer and use it in GitHub Desktop.
Scrape offender images from FBI Wanted collection
#!/usr/bin/env python3
import re
import requests
from bs4 import BeautifulSoup
import uuid
from PIL import Image
from io import BytesIO
site = 'https://www.fbi.gov/feeds/wanted-collection'
response = requests.get(site)
soup = BeautifulSoup(response.text, 'html.parser')
a_tags = soup.find_all('a')
a_urls = [a['href'] for a in a_tags]
for url in a_urls:
wanted_link = re.search(r'https\:\/\/www.fbi.gov\/wanted\/[\w\W]+', url)
if wanted_link:
res2 = requests.get(wanted_link.group(0))
soup2 = BeautifulSoup(res2.text, 'html.parser')
img_url = wanted_link.group(0)+"/@@images/image/preview"
print("Downloading {}....".format(img_url))
with open("./data/{}.png".format(str(uuid.uuid4()).replace("-", "")), 'wb') as f:
res3 = requests.get(img_url)
f.write(res3.content)
@risingsunomi
Copy link
Author

Filename is generated with uuid

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment