Skip to content

Instantly share code, notes, and snippets.

@mkfink
Last active January 22, 2020 16:55
Show Gist options
  • Save mkfink/aac99312cca2377629d3548d96cfc469 to your computer and use it in GitHub Desktop.
Save mkfink/aac99312cca2377629d3548d96cfc469 to your computer and use it in GitHub Desktop.
scrape and generate images of equipment labels from wiki page
""" Scrape the wiki page of equipment labels to generate images of
each label at the resolution required by the label printer
"""
from selenium.webdriver.chrome.options import Options
from selenium import webdriver
from bs4 import BeautifulSoup
from time import sleep
from io import BytesIO
from PIL import Image
import requests
import string
import json
page_url = "https://www.i3detroit.org/wiki/Equipment_Labels_List"
printer_resolution = (1109, 696)
load_delay = 3 # seconds to let page load before taking screenshot
fn_valid_chars = "-_.() " + string.ascii_letters + string.digits
# chrome webdriver options
op = Options()
op.add_argument("--force-device-scale-factor=2.0") # labels on wiki are at
# 1/2 printer resolution
op.add_argument("start-maximized")
op.add_argument('--headless')
# Process the html to find the table of labels
html = requests.get(page_url).text
soup = BeautifulSoup(html, 'html.parser')
table = soup.findChildren('table')[0]
rows = table.findChildren(['th', 'tr'])
# Each row of the table is one label
for row in rows:
# Extract the label html from each row and render with selenium
content = str(row)
raw = json.dumps(content)[1:-1] # indicies strip off quotes
raw = raw
name = row.find(id='title').find('text').contents[0]
filename = ''.join([c for c in name if c in fn_valid_chars]) + '.png'
driver = webdriver.Chrome(options=op)
driver.execute_script("document.write('{}')".format(raw))
# Get the size and location of the label in the browser
element = driver.find_element_by_tag_name('svg')
location = element.location
size = element.size
sleep(load_delay) # Make sure the label fully loads. QR code can be slow
# Screenshot, crop, resize to the exact printer resolution
png = driver.get_screenshot_as_png()
driver.close()
left = location['x']*2
top = location['y']*2
right = location['x']*2 + size['width']*2
bottom = location['y']*2 + size['height']*2
im = Image.open(BytesIO(png))
im = im.crop((left, top, right, bottom))
im = im.resize(printer_resolution, Image.BICUBIC)
im.save(filename)
print("Captured image for " + name)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment