-
-
Save fabtho/13e4a2e7cfbfde671b8fa81bbe9359fb to your computer and use it in GitHub Desktop.
#!/usr/bin/python | |
from selenium import webdriver | |
from PIL import Image | |
from cStringIO import StringIO | |
verbose = 1 | |
browser = webdriver.Firefox() | |
browser.get('http://stackoverflow.com/questions/37906704/taking-a-whole-page-screenshot-with-selenium-marionette-in-python') | |
# from here http://stackoverflow.com/questions/1145850/how-to-get-height-of-entire-document-with-javascript | |
js = 'return Math.max( document.body.scrollHeight, document.body.offsetHeight, document.documentElement.clientHeight, document.documentElement.scrollHeight, document.documentElement.offsetHeight);' | |
scrollheight = browser.execute_script(js) | |
if verbose > 0: | |
print scrollheight | |
slices = [] | |
offset = 0 | |
while offset < scrollheight: | |
if verbose > 0: | |
print offset | |
browser.execute_script("window.scrollTo(0, %s);" % offset) | |
img = Image.open(StringIO(browser.get_screenshot_as_png())) | |
offset += img.size[1] | |
slices.append(img) | |
if verbose > 0: | |
browser.get_screenshot_as_file('%s/screen_%s.png' % ('/tmp', offset)) | |
print scrollheight | |
screenshot = Image.new('RGB', (slices[0].size[0], offset)) | |
offset = 0 | |
for img in slices: | |
screenshot.paste(img, (0, offset)) | |
offset += img.size[1] | |
screenshot.save('/tmp/test.png') |
ok, make sense, did change it in the code (untested)
Hello fabtho. I based on your code and made some modification on it.
def full_screenshot(driver, save_path):
# initiate value
save_path = save_path + '.png' if save_path[-4::] != '.png' else save_path
img_li = [] # to store image fragment
offset = 0 # where to start
# js to get height
height = driver.execute_script('return Math.max('
'document.documentElement.clientHeight, window.innerHeight);')
# js to get the maximum scroll height
# Ref--> https://stackoverflow.com/questions/17688595/finding-the-maximum-scroll-position-of-a-page
max_window_height = driver.execute_script('return Math.max('
'document.body.scrollHeight, '
'document.body.offsetHeight, '
'document.documentElement.clientHeight, '
'document.documentElement.scrollHeight, '
'document.documentElement.offsetHeight);')
# looping from top to bottom, append to img list
# Ref--> https://gist.github.com/fabtho/13e4a2e7cfbfde671b8fa81bbe9359fb
while offset < max_window_height:
# Scroll to height
driver.execute_script(f'window.scrollTo(0, {offset});')
img = Image.open(BytesIO((driver.get_screenshot_as_png())))
img_li.append(img)
offset += height
# Stitch image into one
# Set up the full screen frame
img_frame_height = sum([img_frag.size[1] for img_frag in img_li])
img_frame = Image.new('RGB', (img_li[0].size[0], img_frame_height))
offset = 0
for img_frag in img_li:
img_frame.paste(img_frag, (0, offset))
offset += img_frag.size[1]
img_frame.save(save_path)
👍
`def full_screenshot(driver, save_path):
# initiate value
save_path = save_path + '.png' if save_path[-4::] != '.png' else save_path
img_li = [] # to store image fragment
offset = 0 # where to start
# js to get height
height = driver.execute_script('return Math.max('
'document.documentElement.clientHeight, window.innerHeight);')
# js to get the maximum scroll height
# Ref--> https://stackoverflow.com/questions/17688595/finding-the-maximum-scroll-position-of-a-page
max_window_height = driver.execute_script('return Math.max('
'document.body.scrollHeight, '
'document.body.offsetHeight, '
'document.documentElement.clientHeight, '
'document.documentElement.scrollHeight, '
'document.documentElement.offsetHeight);')
# looping from top to bottom, append to img list
# Ref--> https://gist.github.com/fabtho/13e4a2e7cfbfde671b8fa81bbe9359fb
while offset < max_window_height:
# Scroll to height
driver.execute_script(f'window.scrollTo(0, {offset});')
img = Image.open(BytesIO((driver.get_screenshot_as_png())))
img_li.append(img)
offset += height
# Stitch image into one
# Set up the full screen frame
box = (0, height - height * (max_window_height / height - max_window_height // height), img_li[-1].size[0], img_li[-1].size[1])
img_li[-1] = img_li[-1].crop(box)
img_frame_height = sum([img_frag.size[1] for img_frag in img_li])
img_frame = Image.new('RGB', (img_li[0].size[0], img_frame_height))
offset = 0
for img_frag in img_li:
img_frame.paste(img_frag, (0, offset))
offset += img_frag.size[1]
img_frame.save(save_path)`
some changes,
small upgrade if scrolling duplicates information
Ok , i find a very good way to do it ; )
height = driver.execute_script("return document.body.scrollHeight")
driver.set_window_size(900,height+100)
driver.save_screenshot("image.png")
this work great in headless mode.
Thanks fabtho, it worked well and I have implemented through chrome driver
It still creates some duplicated content, mainly for mobile.
It still creates some duplicated content, mainly for mobile.
height = driver.execute_script("return document.body.scrollHeight")
driver.set_window_size(900,height+100)
driver.save_screenshot("image.png")
try this.
work great in headless mode.
@yassinz the code you posted does not work for mobile at all. It only captures the visible area, I tried that initially.
options = webdriver.ChromeOptions()
mobile_emulation = { "deviceName": "iPhone X" }
options.add_experimental_option("mobileEmulation", mobile_emulation)
driver = webdriver.Remote(
command_executor=constants.WEBDRIVER_HUB_URL,
desired_capabilities=DesiredCapabilities.CHROME,
options=options
)
This fixes the duplication for the last image.
@staticmethod
def full_screenshot_with_scroll(driver, save_path):
# initiate value
save_path = save_path.with_suffix("png") if not save_path.match("*.png") else save_path
img_li = [] # to store image fragment
offset = 0 # where to start
# js to get height
height = driver.execute_script("return Math.max(" "document.documentElement.clientHeight, window.innerHeight);")
# js to get the maximum scroll height
# Ref--> https://stackoverflow.com/questions/17688595/finding-the-maximum-scroll-position-of-a-page
max_window_height = driver.execute_script(
"return Math.max("
"document.body.scrollHeight, "
"document.body.offsetHeight, "
"document.documentElement.clientHeight, "
"document.documentElement.scrollHeight, "
"document.documentElement.offsetHeight);"
)
# looping from top to bottom, append to img list
# Ref--> https://gist.github.com/fabtho/13e4a2e7cfbfde671b8fa81bbe9359fb
while offset < max_window_height:
# Scroll to height
driver.execute_script(f"window.scrollTo(0, {offset});")
img = Image.open(BytesIO((driver.get_screenshot_as_png())))
img_li.append(img)
offset += height
# In case it is not a perfect fit, the last image contains extra at the top.
# Crop the screenshot at the top of last image.
extra_height = offset - max_window_height
if extra_height > 0 and len(img_li) > 1:
pixel_ratio = driver.execute_script("return window.devicePixelRatio;")
extra_height *= pixel_ratio
last_image = img_li[-1]
width, height = last_image.size
box = (0, extra_height, width, height)
img_li[-1] = last_image.crop(box)
# Stitch image into one
# Set up the full screen frame
img_frame_height = sum([img_frag.size[1] for img_frag in img_li])
img_frame = Image.new("RGB", (img_li[0].size[0], img_frame_height))
offset = 0
for img_frag in img_li:
img_frame.paste(img_frag, (0, offset))
offset += img_frag.size[1]
img_frame.save(save_path)
+1
I'm getting SystemError: tile cannot extend outside image
in specific websites. Someone know how to fix it?
fixed imports & added a line to remove persistent element such as navbar when scrolling:
#!/usr/bin/python
from selenium import webdriver
from PIL import Image
from io import BytesIO
from pathlib import Path
def full_screenshot_with_scroll(driver, save_path:Path):
# initiate value
save_path = save_path.with_suffix(".png") if not save_path.match("*.png") else save_path
img_li = [] # to store image fragment
offset = 0 # where to start
# js to get height
height = driver.execute_script("return Math.max(" "document.documentElement.clientHeight, window.innerHeight);")
# js to get the maximum scroll height
# Ref--> https://stackoverflow.com/questions/17688595/finding-the-maximum-scroll-position-of-a-page
max_window_height = driver.execute_script(
"return Math.max("
"document.body.scrollHeight, "
"document.body.offsetHeight, "
"document.documentElement.clientHeight, "
"document.documentElement.scrollHeight, "
"document.documentElement.offsetHeight);"
)
# looping from top to bottom, append to img list
# Ref--> https://gist.github.com/fabtho/13e4a2e7cfbfde671b8fa81bbe9359fb
while offset < max_window_height:
# Scroll to height
driver.execute_script(f"window.scrollTo(0, {offset});")
# === uncomment the line and edit id to hide persistent elements when scrolling ===
# driver.execute_script("document.getElementById('navbar').innerHTML = '';")
img = Image.open(BytesIO((driver.get_screenshot_as_png())))
img_li.append(img)
offset += height
# In case it is not a perfect fit, the last image contains extra at the top.
# Crop the screenshot at the top of last image.
extra_height = offset - max_window_height
if extra_height > 0 and len(img_li) > 1:
pixel_ratio = driver.execute_script("return window.devicePixelRatio;")
extra_height *= pixel_ratio
last_image = img_li[-1]
width, height = last_image.size
box = (0, extra_height, width, height)
img_li[-1] = last_image.crop(box)
# Stitch image into one
# Set up the full screen frame
img_frame_height = sum([img_frag.size[1] for img_frag in img_li])
img_frame = Image.new("RGB", (img_li[0].size[0], img_frame_height))
offset = 0
for img_frag in img_li:
img_frame.paste(img_frag, (0, offset))
offset += img_frag.size[1]
img_frame.save(save_path)
@trojblue unfortunately, the code still does not work as intended (atleast for me).
I tried to make a screenshot of this -> https://stackoverflow.com/questions/41721734/take-screenshot-of-full-page-with-selenium-python-with-chromedriver but the problem is that it cuts a portion of the end of a page.
I am currently trying to fix it, if I manage to fix it, I will provide the fix. (But it would be awesome if someone more competent tried to fix it)
P.S - if you take a close look, you can see on the image a few pixels comming from that black footer, which indicates there should be another text in between the footer and the page (footer is cropped as well)
For anyone else who ends up here: in Selenium 4+ there's a save_full_page_screenshot()
function for this that only works with Firefox. Give it a try, but don't expect miracles. In my case the page rendered strangely. 😢
line 35 should be:
screenshot = Image.new('RGB', (slices[0].size[0], offset))