Skip to content

Instantly share code, notes, and snippets.

@mayo
Last active May 29, 2021 19:02
Show Gist options
  • Star 7 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mayo/90a02222fe6a2826044572d7e8192551 to your computer and use it in GitHub Desktop.
Save mayo/90a02222fe6a2826044572d7e8192551 to your computer and use it in GitHub Desktop.
Import instagram data into a static site (Jekyll, Hugo, Hana, Metalsmith, etc)
import codecs
from datetime import datetime
import json
import os
import posixpath
import re
import shutil
import sys
# This script will import your instagram photos to your static site. It should work for most, assuming you customize the POST_TEMPLATE to match what your site uses.
# Things to note:
# * multi-photo posts will be imported as standalone photos with no caption (that's how Instagram exports them)
# * hashtags get removed from posts, unless you set REMOVE_HASH_TAGS to False.
# * multiline instagram posts are not handled gracefully if you're using the default POST_TEMPLATE (instagram image caption doesn't work with new lines)
# Configuration parameters START
# All of these paths will be strftime formatted with capture time
#BLOG_PATH = "content/blog/%Y/%m"
BLOG_PATH = "_posts"
#MEDIA_PATH = "content/media/images/photos/%Y/%m"
MEDIA_PATH = "assets/img/photos"
#MEDIA_URL = "/media/images/photos/%Y/%m"
MEDIA_URL = "/assets/img/photos"
# BLOG_POST_FILE_TEMPLATE will be formatted with parameters: capture_time, stub
BLOG_POST_FILE_TEMPLATE = "{capture_time:%Y}-{capture_time:%m}-{capture_time:%d}-{slug:s}.md"
# POST_TEMPLATE will be formatted with post_time, caption, and photo_file parameters. Photo file is full path derived from MEDIA_URL
#POST_TEMPLATE = """---
#type: photo
#imported: "instagram"
#created: !!timestamp '{post_time:%Y-%m-%d %H:%M:%S}'
#tags:
# - microblog
#---
#![{caption:s}]({photo_path:s})
#
#"""
POST_TEMPLATE = """
---
layout: post
title: "{caption:s}"
date: {post_time:%Y-%m-%d %H:%M}
imported: instagram
---
![{caption:s}]({photo_path:s})
"""
REMOVE_HASH_TAGS = True
# Configuration parameters END
if len(sys.argv) < 2:
print 'First parameter has to be path to directory with Instagram data'
exit(1)
instagram_data = sys.argv[1]
media_file = os.path.join(instagram_data, 'media.json')
data = {}
if not os.path.exists(instagram_data) or not os.path.exists(media_file):
print """Directory "{:s}" doesn't exist or is missing media.json file"""
exit(1)
with open(media_file) as fd:
data = json.load(fd)
photos = data.get('photos', [])
for photo in photos:
if REMOVE_HASH_TAGS:
caption = re.sub(r'#[^\n\s]+', '', photo['caption']).strip()
# Encode everything as UTF-8
caption = codecs.encode(caption, 'utf-8')
capture_time = datetime.strptime(photo['taken_at'], "%Y-%m-%dT%H:%M:%S")
instagram_photo_path = os.path.join(instagram_data, photo['path'])
# If the image file is not there, move on. If you don't want to import certain images, just delete them first.
if not os.path.exists(instagram_photo_path):
continue
blog_path = datetime.strftime(capture_time, BLOG_PATH)
media_path = datetime.strftime(capture_time, MEDIA_PATH)
media_url = datetime.strftime(capture_time, MEDIA_URL)
photo_file = os.path.basename(instagram_photo_path)
post = POST_TEMPLATE.format(
post_time=capture_time,
caption=caption,
photo_path=posixpath.join(media_url, photo_file)
)
post_file = None
if caption:
slug = re.sub(r"[^a-z0-9 ]", '', caption.lower()).strip()
slug = re.sub(r" (the|a|an) ", ' ', slug).strip()
slug = re.sub(r"\s+", ' ', slug).strip()
slug = "-".join(slug.split()[0:3])
post_file = BLOG_POST_FILE_TEMPLATE.format(
capture_time=capture_time,
slug=slug
)
# If the filename derived from caption exists, don't use it
if os.path.exists(os.path.join(blog_path, post_file)):
post_file = None
if not post_file:
post_file = BLOG_POST_FILE_TEMPLATE.format(
capture_time=capture_time,
slug="instagram-{:s}".format(os.path.splitext(photo_file)[0])
)
post_path = os.path.join(blog_path, post_file)
try:
os.makedirs(media_path)
os.makedirs(os.path.dirname(post_path))
except OSError as err:
# Ignore "directory exists" errors
if err.errno != 17:
raise
with open(post_path, 'w') as fd:
fd.write(post)
shutil.copy(instagram_photo_path, media_path)
@rdsaunders
Copy link

This has been really helpful. Not sure if you are still using this but wondered if you could extend it slightly.

It would be great if the location of the photo could be added to the post template as well as any of the #tags that you strip out as a tag array?

I’m completely new to python and did attempt to make some changes but couldn’t get it to work.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment