Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save Robin-Lord/2ec7b716ebdfaaa9a3dfa007d080066f to your computer and use it in GitHub Desktop.
Save Robin-Lord/2ec7b716ebdfaaa9a3dfa007d080066f to your computer and use it in GitHub Desktop.
Some quick code to find and extract all of your Invision files using the "download zip" command for each project
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Import our libraries"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import selenium\n",
"from selenium import webdriver\n",
"from selenium.webdriver.common.action_chains import ActionChains\n",
"\n",
"\n",
"import time\n",
"import os\n",
"import datetime\n",
"\n",
"\n",
"import re\n",
"import copy"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Defining a couple functions"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Function to open up firefox\n",
"def setup_firefox(driver_location = None):\n",
" driver_args = {}\n",
" if driver_location != None:\n",
" driver_args[\"executable_path\"] = driver_location\n",
" \n",
" profile = webdriver.FirefoxProfile()\n",
" profile.set_preference(\"browser.helperApps.neverAsk.saveToDisk\", \"text/plain, application/octet-stream, application/binary, text/csv, application/csv, application/excel, text/comma-separated-values, text/xml, application/xml\")\n",
" profile.set_preference(\"browser.download.folderList\", 2)\n",
" profile.set_preference(\"browser.download.manager.showWhenStarting\", False)\n",
" profile.set_preference(\"browser.helperApps.alwaysAsk.force\",False)\n",
" \n",
" driver_args[\"firefox_profile\"] = profile\n",
"\n",
" driver = webdriver.Firefox(**driver_args)\n",
" \n",
" return driver"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# This is trying to minimise how often we're opening new windows\n",
"# They're such a pain in Selenium\n",
"\n",
"def make_button_safe(button):\n",
" try:\n",
" driver.execute_script(\"arguments[0].target='_self';\", button)\n",
" except:\n",
" \"is safe\"\n",
" \n",
" button.click()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Giving details of where to look and setting up Selenium\n",
"\n",
"If you have Firefox geckodriver already in your Path variable you can change just delete the driver_location argument"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Setting locations for our Selenium driver, and starting Firefox\n",
"\n",
"driver_location = r\"\"\"C:\\Users\\Robin\\Dropbox (Distilled LLC)\\Notebooks\\Selenium drivers\\geckodriver.exe\"\"\"\n",
"driver = setup_firefox(driver_location)\n",
"original_window = driver.window_handles"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Go to the Invision homepage\n",
"driver.get(\"\"\"https://projects.invisionapp.com\"\"\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# <font color='red'>Action:</font> Now manually log in - I CBA to automate that"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Extracting all the urls of your projects"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def extract_urls():\n",
" # Assuming project list is paginated\n",
" has_next = True\n",
"\n",
" # Creating a list of project urls to populate\n",
" total_proj_urls = []\n",
"\n",
" # While we have another page to look at\n",
" while has_next:\n",
"\n",
" # Find all projects\n",
" project_links = driver.find_elements_by_css_selector(\"\"\".projects__project-click-area\"\"\")\n",
"\n",
" # Extract the urls from all of the project links\n",
" proj_urls = [proj_link.get_attribute(\"href\") for proj_link in project_links]\n",
"\n",
" # Add all the found urls to our total list\n",
" total_proj_urls = total_proj_urls + proj_urls\n",
"\n",
" # Try to click the next page\n",
" try:\n",
" # Wait a little bit to be gentle with the site\n",
" time.sleep(5)\n",
" next_button = driver.find_element_by_css_selector(\"li[class=next]\")\n",
" next_button.click()\n",
" except:\n",
" # If we can't click the next button we assume there ISN'T a next button\n",
" # so we stop \n",
" has_next = False\n",
"\n",
" # Print out all the project links in case they are helpful later\n",
" print (\"Project links:\")\n",
" print (total_proj_urls)\n",
" \n",
"extract_urls()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Create some copies of our list, just in case\n",
"working_list = copy.deepcopy(total_proj_urls)\n",
"backup_list = copy.deepcopy(total_proj_urls)\n",
"\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Going through each project and downloading Zip file"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def export_projects(working_list, original_window, wait_time = 300):\n",
" \n",
" # Create a failed_list for us to populate with all the urls which didn't work\n",
" failed_list = []\n",
" \n",
" for num in range(len(working_list)):\n",
" # Go through our list of urls\n",
"\n",
" try:\n",
"\n",
" # Making a note that we haven't successfully downloaded\n",
" # this file yet\n",
" download_success = False\n",
"\n",
" # First, take the url we're going to be using out of the list\n",
" proj_url = working_list.pop(0)\n",
" \n",
"# Make sure we're in the right window\n",
" driver.switch_to.window(original_window[0])\n",
"\n",
" # Go to the project url, leave it a few seconds to load\n",
" driver.get(proj_url)\n",
" time.sleep(5)\n",
"\n",
"\n",
" # Find the menu which allows us to download a zip file\n",
" # Wait a second for it to load\n",
" db_1 = driver.find_element_by_css_selector(\"\"\"a[class='more link']\"\"\")\n",
" make_button_safe(db_1)\n",
" time.sleep(1)\n",
"\n",
"\n",
" # Find the menu item which says we want to download\n",
" # Wait a second for it to load\n",
" db_2 = driver.find_element_by_css_selector(\"\"\"a[ng-click*='Download']\"\"\")\n",
" make_button_safe(db_2)\n",
" time.sleep(1)\n",
"\n",
"\n",
" # In the new window that comes up, confirm we want to download\n",
" # the full zip file\n",
" db_3 = driver.find_element_by_css_selector(\"\"\"button[class*='download']\"\"\")\n",
" make_button_safe(db_3)\n",
"\n",
"\n",
" # Generating the zip can take a little while so first we wait\n",
" time.sleep(10)\n",
"\n",
"\n",
" # Get a list of all the currently open windows\n",
" handles_list = driver.window_handles\n",
"\n",
" # Finding the handle for the most recently opened window\n",
" most_recent_window = len(handles_list)-1\n",
"\n",
" # Switching to that window\n",
" driver.switch_to.window(handles_list[most_recent_window])\n",
"\n",
" # Allowing for if the zip file hasn't loaded properly (sometimes they take a while)\n",
" total_sleep_time = 0\n",
" sleep_addition = 10\n",
"\n",
" while download_success == False:\n",
" # If we haven't downloaded the file yet\n",
"\n",
" try:\n",
" # Look for the download link and try to click it\n",
" db_4 = driver.find_element_by_css_selector(\"\"\"a[class='button export']\"\"\")\n",
" make_button_safe(db_4)\n",
"\n",
" # When complete - make a note that we've successfully downloaded\n",
" download_success = True\n",
"\n",
" if num == 0:\n",
" # If this is the first download, give us extra time to tell Firefox\n",
" # that it is OK to download\n",
" time.sleep(30)\n",
"\n",
" except Exception as e:\n",
" # If we fail, wait a little bit, record how long we've waited\n",
" time.sleep(sleep_addition)\n",
" total_sleep_time = total_sleep_time + sleep_addition\n",
"\n",
" if total_sleep_time > 30:\n",
"# If we've been waiting for a little bit, something might have broken\n",
"# Invision-side, this is a quick check to save us some time.\n",
" html = driver.page_source\n",
" if \"prototype has missing assets\" in html:\n",
" print (\"Missing assets: {}\".format(proj_url))\n",
" raise(e)\n",
" \n",
" if total_sleep_time > wait_time:\n",
" # If it's taken more than 5 minutes - raise an exception so we don't just get stuck\n",
" raise(e)\n",
"\n",
" if len(handles_list) > 1:\n",
" # Close the newly opened window and switch to the original\n",
" driver.close()\n",
" driver.switch_to.window(original_window[0])\n",
"\n",
" except:\n",
" # If for some reason we failed, record the failing url\n",
" failed_list = failed_list + [proj_url]\n",
" print (\"Failed: {}\".format(proj_url))\n",
"\n",
" # Get the total list of open windows\n",
" handles_list = driver.window_handles\n",
"\n",
" while len(handles_list) > 1:\n",
" # We know we have multiple windows open, which we don't want\n",
" # we close them all to avoid getting confused about which window\n",
" # to focus on\n",
"\n",
" # Switch to most recently opened window and close it\n",
" most_recent_window = len(handles_list)-1 \n",
" driver.close()\n",
"\n",
" # Get new list of handles\n",
" handles_list = driver.window_handles\n",
" \n",
"# Return our list of failed urls and our working list \n",
"# in case we have to stop part-way and restart\n",
" return failed_list, working_list\n",
" \n",
"failed_list_1, working_list = export_projects(working_list = working_list, \n",
" original_window = original_window)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# If you want - print out all the times this script failed"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"failed_list_1 = copy.deepcopy(failed_list)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print (\"Failed urls\")\n",
"\n",
"for url in failed_list_1:\n",
" print (url)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# If you want - rerun the process before on the failed urls only\n",
"We'll increase the wait time just to make sure something funny didn't happen"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"failed_list_1_backup = copy.deepcopy(failed_list_1)\n",
"\n",
"failed_list_2, working_list = export_projects(working_list = failed_list_1,\n",
" original_window = original_window,\n",
" wait_time = 700)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment