Skip to content

Instantly share code, notes, and snippets.

@kickingvegas
Created February 22, 2013 20:30
Show Gist options
  • Save kickingvegas/5016334 to your computer and use it in GitHub Desktop.
Save kickingvegas/5016334 to your computer and use it in GitHub Desktop.
Python script to parse Posterous API JSON for user posts which generates a Markdown entry with Pelican metadata tags and downloads all media image assets associated with the post. This is functional yet nowhere near production/release level code.
#!/usr/bin/env python
# Copyright 2012 Yummy Melon Software LLC
# Author: Charles Y. Choi
#
import os
import sys
import getopt
import subprocess
import shutil
import json
from datetime import datetime
import urlparse
usageString = '%s ...' % os.path.basename(sys.argv[0])
helpString = """
-h, --help help
-v, --version version
"""
class Application:
def __init__(self):
self.version = 1.0
self.options = {}
self.options['prefix'] = 'nfdn'
self.options['fetchimages'] = False
def run(self, optlist, args):
sys.stdout.write('hello, world...\n')
for o, i in optlist:
if o in ('-h', '--help'):
sys.stderr.write(usageString)
sys.stderr.write(helpString)
sys.exit(1)
elif o in ('-v', '--version'):
sys.stdout.write('%s\n' % str(self.version))
sys.exit(0)
elif o in ('-f', '--fetchimages'):
self.options['fetchimages'] = True
elif o in ('-p', '--prefix'):
self.options['prefix'] = i
if len(args) < 1:
sys.exit(1)
with open(args[0], 'r') as infile:
posterousJson = json.load(infile)
for jsonObj in posterousJson:
self.processJsonObj(jsonObj)
def pullMedia(self, jsonObj):
mediaObj = jsonObj['media']
images = mediaObj['images']
videos = mediaObj['videos']
audioFiles = mediaObj['audio_files']
result = []
for imageObj in images:
for key in ('full',):
if imageObj.has_key(key):
result.append(imageObj[key]['url'])
return result
def processJsonObj(self, jsonObj):
# print jsonObj['title']
dList = jsonObj['display_date'].split()
d = datetime.strptime(' '.join(dList[:-1]), '%Y/%m/%d %H:%M:%S')
# print jsonObj['display_date'], d
# print jsonObj['slug']
# print jsonObj['body_html']
# if len(jsonObj['media']['images']) > 0:
# print jsonObj['media']['images']
fileName = '{0}_{1}.md'.format(self.options['prefix'], d.strftime('%Y_%m_%d_%H%M%S'))
sys.stdout.write(fileName)
sys.stdout.write('\n')
bufList = []
bufList.append('Title: {0}'.format(jsonObj['title'].encode('utf-8')))
bufList.append('Date: {0}'.format(d.strftime('%Y-%m-%d %H:%M')))
bufList.append('Slug: {0}'.format(jsonObj['slug'].encode('utf-8')))
bufList.append('Author: {0}'.format('Charles Choi'))
if len(jsonObj['tags']) > 0:
tagList = []
for tagObj in jsonObj['tags']:
tagList.append(tagObj['name'].encode('utf-8'))
bufList.append('Tags: {0}'.format(', '.join(tagList)))
bufList.append('\n')
imageUrls = self.pullMedia(jsonObj)
for url in imageUrls:
o = urlparse.urlparse(url)
pathComponents = o.path.split('/')
print o.path, pathComponents[-2], pathComponents[-1]
imageDir = pathComponents[-2].encode('utf-8').strip()
imageName = pathComponents[-1].encode('utf-8').strip()
imagePath = os.path.join('images', imageDir)
if self.options['fetchimages']:
if not os.path.exists(imagePath):
os.makedirs(imagePath)
cmdList = ['curl']
cmdList.append('-o')
cmdList.append('images/{0}/{1}'.format(imageDir, imageName))
cmdList.append('-G')
cmdList.append('-L')
cmdList.append(url.encode('utf-8'))
#print cmdList
subprocess.call(cmdList)
bufList.append('<p><img src="static/images/{0}/{1}"/></p>'.format(imageDir, imageName))
bufList.append('{0}'.format(jsonObj['body_html'].encode('utf-8')))
#sys.stdout.write('\n'.join(bufList))
#sys.stdout.write('\n---\n')
with open(fileName, 'w') as outfile:
outfile.write('\n'.join(bufList))
outfile.write('\n')
if __name__ == '__main__':
try:
optlist, args = getopt.getopt(sys.argv[1:], 'hvp:f',
('help',
'version',
'prefix=',
'fetchimages'))
except getopt.error, msg:
sys.stderr.write(msg[0] + '\n')
sys.stderr.write(usageString + '\n')
sys.exit(1)
app = Application()
app.run(optlist, args)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment