Skip to content

Instantly share code, notes, and snippets.

@ydnar
Created July 20, 2009 04:42
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ydnar/150161 to your computer and use it in GitHub Desktop.
Save ydnar/150161 to your computer and use it in GitHub Desktop.
Export Vox blogs to Movable Type Import Format (MTIF)
/*
vox2mtif
Copyright 2009 Randy Reddig - http://ydnar.com
CSS suitable for basic styling of Vox enclosures in a TypePad or Movable Type blog.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
'Software'), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
/* enclosures */
.enclosure {
margin: 0 0 18px 0;
text-align: center;
font-size: 12px;
}
/* set enclosure-inner defaults */
.enclosure-inner {
text-align: center;
overflow: hidden;
}
.embed-enclosure .enclosure-inner {
border: 0;
}
/* enclosure formats */
.enclosure-extra-large .enclosure-inner { width: 500px; }
.enclosure-large .enclosure-inner { width: 320px; }
.enclosure-medium .enclosure-inner { width: 200px; }
.enclosure-small .enclosure-inner { width: 120px; }
.enclosure-strip-vertical .enclosure-inner { width: 130px; }
.enclosure-strip .enclosure-inner,
.enclosure-strip .enclosure-inner a img {
margin: 0 5px 18px 5px;
}
/* enclosure alignment */
.enclosure-left { float: left; margin-right: 20px; }
.enclosure-right { float: right; margin-left: 20px; }
.enclosure-center { clear: both; }
.enclosure-center .enclosure-inner {
margin-left: auto;
margin-right: auto;
}
/* enclosure list */
.enclosure-list .enclosure-item {
margin-bottom: 18px;
}
.enclosure-list .last { margin-bottom: 0; }
.enclosure-asset-name {
margin-bottom: 0;
padding-bottom: 0;
overflow: hidden;
font-size: 12px;
}
.enclosure-comments a {
display: -moz-inline-box;
display: inline-block; /* win-ie - displays correctly */
padding-left: 10px;
}
.enclosure-embed-source {
overflow: hidden;
padding: 10px;
background: #eee;
font: normal 10px courier new, courier, monaco, fixed-width;
text-align: left;
}
.enclosure-embed {
overflow: hidden;
}
/* link assets */
.link-enclosure .enclosure-inner {
text-align: left;
}
.link-enclosure .enclosure-image {
float: left;
margin: 0 5px 0 0;
}
/* override custom bullets in enclosures */
.asset-body .enclosure ul li { list-style-image: none; }
#!/usr/bin/python
# vox2mtif
# Copyright 2009 Randy Reddig - http://ydnar.com
# Converts a Vox blog's public posts into a format suitable for importing into TypePad or Movable Type.
# http://www.movabletype.org/documentation/appendices/import-export-format.html
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# 'Software'), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
import re
import sys
import time
import codecs
import locale
import getopt
# http://code.google.com/p/httplib2
import httplib2
# http://www.feedparser.org/
import feedparser
# http://www.crummy.com/software/BeautifulSoup/
from BeautifulSoup import BeautifulSoup
class VoxPop:
DEFAULT_CATEGORIES = (
'Archive',
'Vox',
'Imported',
)
TAGS_TO_CATEGORIES = {
'apple': 'Apple',
'art': 'Art',
'awesome': 'Awesome',
'bicycle': 'Cycling',
'california': 'California',
'climbing': 'Climbing',
'code': 'Code',
'cycling': 'Cycling',
'dance': 'Music',
'design': 'Design',
'electro': 'Music',
'film': 'Film',
'friends': 'Friends',
'helvetica': 'Typography',
'hawaii': 'Hawaii',
'javascript': 'JavaScript',
'new york': 'New York',
'politics': 'Politics',
'python': 'Python',
'ruby': 'Ruby',
'seattle': 'Seattle',
'six apart': 'Six Apart',
'san francisco': 'San Francisco',
'travel': 'Travel',
'typography': 'Typography',
'vacation': 'Travel',
'vox': 'Six Apart',
'ydnar': 'ydnar',
}
def __init__(self, username):
self.username = username
self.entries = []
self.h = httplib2.Http(".cache")
def __url(self):
return 'http://%s.vox.com/library/posts/atom.xml' % self.username
url = property(__url)
def fetch(self, url):
response, content = self.h.request(url)
return content
# Fetch all entries from a Vox blog.
def parse_feeds(self):
self.entries = []
next_url = self.parse_feed(self.url)
while next_url:
next_url = self.parse_feed(next_url)
# Parse a single Atom feed, add its entries, and return the next URL, if any.
def parse_feed(self, url):
print u'Parsing feed: %s' % url
data = feedparser.parse(self.fetch(url))
for entry in data.entries:
self.entries.append(entry)
for link in data.feed.links:
if link.rel == 'next':
return link.href
# Generate MT categries for each Vox tag. YMMV.
def map_entry_categories(self, entry):
print u'Mapping categories for: %s' % entry.title
categories = list(self.DEFAULT_CATEGORIES)
if entry.get('tags'):
for tag in entry.tags:
if tag.term in self.TAGS_TO_CATEGORIES and not tag.term in categories:
categories.append(self.TAGS_TO_CATEGORIES[tag.term])
entry['mapped_categories'] = categories
# Vox feed content is a subset of the actual full post content, so fetch the original.
def fetch_entry_content(self, entry):
print u'Fetching content for: %s' % entry.title
soup = BeautifulSoup(self.fetch(entry.link))
div = soup.find('div', {'class': 'asset-body'}) or soup.find('div', {'class': 'asset-body preview-links'})
entry['full_content'] = div.decodeContents()
def serialize_entries(self):
mtif = []
for entry in self.entries:
mtif.append(self.serialize_entry(entry))
return u''.join(mtif)
def serialize_entry(self, entry):
original_url = entry.link.replace('?_c=feed-atom', '')
mtif = []
mtif.append(u'TITLE: %s' % entry.title)
mtif.append(u'AUTHOR: %s' % entry.author)
mtif.append(u'DATE: %s' % time.strftime('%m/%d/%Y %H:%M:%S', entry.published_parsed))
mtif.append(u'CONVERT BREAKS: 1') # Vox content is always HTML. TypePad can mangle with its RTE unless this is set.
mtif.append(u'STATUS: publish') # Hard-coded to publish.
if entry.get('mapped_categories'):
for category in entry['mapped_categories']:
mtif.append(u'CATEGORY: %s' % category)
mtif.append(u'-----')
mtif.append(u'BODY:')
mtif.append(u'<p class="import">Originally posted to <a href="%s" rel="alternate nofollow">%s.vox.com</a> on %s.</p>' % (
original_url, self.username, time.strftime('%B %d, %Y', entry.published_parsed)))
mtif.append(entry['full_content'])
mtif.append(u'-----')
mtif.append(u'--------\n')
return u'\n'.join(mtif)
class Usage(Exception):
def __init__(self, msg):
self.msg = msg
def main(argv=None):
if argv is None:
argv = sys.argv
sys.stdout = codecs.getwriter('utf-8')(sys.stdout);
try:
try:
opts, args = getopt.getopt(argv[1:], 'h', ['help'])
if len(args) < 1:
raise Usage('No URL specified.')
except getopt.error, msg:
raise Usage(msg)
except Usage, err:
print >>sys.stderr, 'Usage: %s <username>' % argv[0]
print >>sys.stderr, err.msg
print >>sys.stderr, 'for help use --help'
return 2
vp = VoxPop(args[0])
f = codecs.getwriter('utf-8')(open('%s.mtif' % vp.username, 'wb'))
print u'\nFetching Atom feeds for %s...' % vp.username
vp.parse_feeds()
print u'Found %d posts.' % len(vp.entries)
print u'\nMapping categories...'
for entry in vp.entries:
vp.map_entry_categories(entry)
print u'\nFetching post content (this could take a while)...'
for entry in vp.entries:
vp.fetch_entry_content(entry)
f.write(vp.serialize_entry(entry))
f.flush()
f.close()
if __name__ == '__main__':
sys.exit(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment