Skip to content

Instantly share code, notes, and snippets.

@mwender
Created May 21, 2024 17:26
Show Gist options
  • Save mwender/4000b7412e2eb25032db9d18293e87e7 to your computer and use it in GitHub Desktop.
Save mwender/4000b7412e2eb25032db9d18293e87e7 to your computer and use it in GitHub Desktop.
[SquareSpace Image Downloader] This python script downloads all images found in a SquareSpace site XML export. #python
import requests
import shutil
import xml.etree.ElementTree as ET
from bs4 import BeautifulSoup
import logging
# Setup basic logging configuration
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
# Parse the XML and handle namespaces
xml_file = 'example.xml'
tree = ET.parse(xml_file)
root = tree.getroot()
namespaces = {'content': 'http://purl.org/rss/1.0/modules/content/'} # Define your namespaces
images = set()
for item in root.findall('.//item'):
content = item.find('content:encoded', namespaces)
if content is not None and content.text is not None: # Check that content and content.text are not None
soup = BeautifulSoup(content.text, 'html.parser')
for img_tag in soup.find_all('img'):
img_url = img_tag.get('src')
if img_url and any(ext in img_url for ext in ['.png', '.jpg', '.gif']):
images.add(img_url)
else:
logging.warning('No content found in an item or content is empty.')
if not images:
logging.warning('No images found to download.')
else:
logging.info(f'Found {len(images)} images to download.')
for img in images:
try:
logging.info(f'Downloading image from {img}')
resp = requests.get(img, stream=True)
if resp.status_code == 200:
with open(f'images/{img.split("/")[-1]}', 'wb') as local_file:
resp.raw.decode_content = True
shutil.copyfileobj(resp.raw, local_file)
logging.info(f'Successfully downloaded {img}')
else:
logging.error(f'Failed to download {img}. Status code: {resp.status_code}')
except Exception as e:
logging.error(f'Error downloading {img}. Error: {e}')
finally:
if 'resp' in locals(): # Ensure resp is defined before attempting to close it
resp.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment