mayo/instagram_import.py

## instagram_import.py
import codecs
from datetime import datetime
import json
import os
import posixpath
import re
import shutil
import sys

# This script will import your instagram photos to your static site. It should work for most, assuming you customize the POST_TEMPLATE to match what your site uses.
# Things to note:
#   * multi-photo posts will be imported as standalone photos with no caption (that's how Instagram exports them)
#   * hashtags get removed from posts, unless you set REMOVE_HASH_TAGS to False.
#   * multiline instagram posts are not handled gracefully if you're using the default POST_TEMPLATE (instagram image caption doesn't work with new lines)

# Configuration parameters START

# All of these paths will be strftime formatted with capture time
#BLOG_PATH = "content/blog/%Y/%m"
BLOG_PATH = "_posts"
#MEDIA_PATH = "content/media/images/photos/%Y/%m"
MEDIA_PATH = "assets/img/photos"
#MEDIA_URL = "/media/images/photos/%Y/%m"
MEDIA_URL = "/assets/img/photos"

# BLOG_POST_FILE_TEMPLATE will be formatted with parameters: capture_time, stub
BLOG_POST_FILE_TEMPLATE = "{capture_time:%Y}-{capture_time:%m}-{capture_time:%d}-{slug:s}.md"

# POST_TEMPLATE will be formatted with post_time, caption, and photo_file parameters. Photo file is full path derived from MEDIA_URL
#POST_TEMPLATE = """---
#type: photo
#imported: "instagram"
#created: !!timestamp '{post_time:%Y-%m-%d %H:%M:%S}'
#tags:
#    - microblog
#---
#![{caption:s}]({photo_path:s})
#
#"""

POST_TEMPLATE = """
---
layout: post
title: "{caption:s}"
date: {post_time:%Y-%m-%d %H:%M}
imported: instagram
---

![{caption:s}]({photo_path:s})

"""

REMOVE_HASH_TAGS = True

# Configuration parameters END

if len(sys.argv) < 2:
    print 'First parameter has to be path to directory with Instagram data'
    exit(1)

instagram_data = sys.argv[1]
media_file = os.path.join(instagram_data, 'media.json')
data = {}

if not os.path.exists(instagram_data) or not os.path.exists(media_file):
    print """Directory "{:s}" doesn't exist or is missing media.json file"""
    exit(1)

with open(media_file) as fd:
    data = json.load(fd)

photos = data.get('photos', [])

for photo in photos:
    if REMOVE_HASH_TAGS:
        caption = re.sub(r'#[^\n\s]+', '', photo['caption']).strip()

    # Encode everything as UTF-8
    caption = codecs.encode(caption, 'utf-8')
    capture_time = datetime.strptime(photo['taken_at'], "%Y-%m-%dT%H:%M:%S")
    instagram_photo_path = os.path.join(instagram_data, photo['path'])

    # If the image file is not there, move on. If you don't want to import certain images, just delete them first.
    if not os.path.exists(instagram_photo_path):
        continue

    blog_path = datetime.strftime(capture_time, BLOG_PATH)
    media_path = datetime.strftime(capture_time, MEDIA_PATH)
    media_url = datetime.strftime(capture_time, MEDIA_URL)

    photo_file = os.path.basename(instagram_photo_path)

    post = POST_TEMPLATE.format(
        post_time=capture_time,
        caption=caption,
        photo_path=posixpath.join(media_url, photo_file)
    )

    post_file = None

    if caption:
        slug = re.sub(r"[^a-z0-9 ]", '', caption.lower()).strip()
        slug = re.sub(r" (the|a|an) ", ' ', slug).strip()
        slug = re.sub(r"\s+", ' ', slug).strip()
        slug = "-".join(slug.split()[0:3])

        post_file = BLOG_POST_FILE_TEMPLATE.format(
            capture_time=capture_time,
            slug=slug
        )

        # If the filename derived from caption exists, don't use it
        if os.path.exists(os.path.join(blog_path, post_file)):
            post_file = None

    if not post_file:
        post_file = BLOG_POST_FILE_TEMPLATE.format(
            capture_time=capture_time,
            slug="instagram-{:s}".format(os.path.splitext(photo_file)[0])
        )

    post_path = os.path.join(blog_path, post_file)

    try:
        os.makedirs(media_path)
        os.makedirs(os.path.dirname(post_path))
    except OSError as err:
        # Ignore "directory exists" errors
        if err.errno != 17:
            raise

    with open(post_path, 'w') as fd:
        fd.write(post)
        shutil.copy(instagram_photo_path, media_path)
	import codecs
	from datetime import datetime
	import json
	import os
	import posixpath
	import re
	import shutil
	import sys

	# This script will import your instagram photos to your static site. It should work for most, assuming you customize the POST_TEMPLATE to match what your site uses.
	# Things to note:
	# * multi-photo posts will be imported as standalone photos with no caption (that's how Instagram exports them)
	# * hashtags get removed from posts, unless you set REMOVE_HASH_TAGS to False.
	# * multiline instagram posts are not handled gracefully if you're using the default POST_TEMPLATE (instagram image caption doesn't work with new lines)

	# Configuration parameters START

	# All of these paths will be strftime formatted with capture time
	#BLOG_PATH = "content/blog/%Y/%m"
	BLOG_PATH = "_posts"
	#MEDIA_PATH = "content/media/images/photos/%Y/%m"
	MEDIA_PATH = "assets/img/photos"
	#MEDIA_URL = "/media/images/photos/%Y/%m"
	MEDIA_URL = "/assets/img/photos"

	# BLOG_POST_FILE_TEMPLATE will be formatted with parameters: capture_time, stub
	BLOG_POST_FILE_TEMPLATE = "{capture_time:%Y}-{capture_time:%m}-{capture_time:%d}-{slug:s}.md"

	# POST_TEMPLATE will be formatted with post_time, caption, and photo_file parameters. Photo file is full path derived from MEDIA_URL
	#POST_TEMPLATE = """---
	#type: photo
	#imported: "instagram"
	#created: !!timestamp '{post_time:%Y-%m-%d %H:%M:%S}'
	#tags:
	# - microblog
	#---
	#![{caption:s}]({photo_path:s})
	#
	#"""

	POST_TEMPLATE = """
	---
	layout: post
	title: "{caption:s}"
	date: {post_time:%Y-%m-%d %H:%M}
	imported: instagram
	---

	![{caption:s}]({photo_path:s})

	"""

	REMOVE_HASH_TAGS = True

	# Configuration parameters END

	if len(sys.argv) < 2:
	print 'First parameter has to be path to directory with Instagram data'
	exit(1)

	instagram_data = sys.argv[1]
	media_file = os.path.join(instagram_data, 'media.json')
	data = {}

	if not os.path.exists(instagram_data) or not os.path.exists(media_file):
	print """Directory "{:s}" doesn't exist or is missing media.json file"""
	exit(1)

	with open(media_file) as fd:
	data = json.load(fd)

	photos = data.get('photos', [])

	for photo in photos:
	if REMOVE_HASH_TAGS:
	caption = re.sub(r'#[^\n\s]+', '', photo['caption']).strip()

	# Encode everything as UTF-8
	caption = codecs.encode(caption, 'utf-8')
	capture_time = datetime.strptime(photo['taken_at'], "%Y-%m-%dT%H:%M:%S")
	instagram_photo_path = os.path.join(instagram_data, photo['path'])

	# If the image file is not there, move on. If you don't want to import certain images, just delete them first.
	if not os.path.exists(instagram_photo_path):
	continue

	blog_path = datetime.strftime(capture_time, BLOG_PATH)
	media_path = datetime.strftime(capture_time, MEDIA_PATH)
	media_url = datetime.strftime(capture_time, MEDIA_URL)

	photo_file = os.path.basename(instagram_photo_path)

	post = POST_TEMPLATE.format(
	post_time=capture_time,
	caption=caption,
	photo_path=posixpath.join(media_url, photo_file)
	)

	post_file = None

	if caption:
	slug = re.sub(r"[^a-z0-9 ]", '', caption.lower()).strip()
	slug = re.sub(r" (the\|a\|an) ", ' ', slug).strip()
	slug = re.sub(r"\s+", ' ', slug).strip()
	slug = "-".join(slug.split()[0:3])

	post_file = BLOG_POST_FILE_TEMPLATE.format(
	capture_time=capture_time,
	slug=slug
	)

	# If the filename derived from caption exists, don't use it
	if os.path.exists(os.path.join(blog_path, post_file)):
	post_file = None

	if not post_file:
	post_file = BLOG_POST_FILE_TEMPLATE.format(
	capture_time=capture_time,
	slug="instagram-{:s}".format(os.path.splitext(photo_file)[0])
	)

	post_path = os.path.join(blog_path, post_file)

	try:
	os.makedirs(media_path)
	os.makedirs(os.path.dirname(post_path))
	except OSError as err:
	# Ignore "directory exists" errors
	if err.errno != 17:
	raise

	with open(post_path, 'w') as fd:
	fd.write(post)
	shutil.copy(instagram_photo_path, media_path)