Skip to content

Instantly share code, notes, and snippets.

@jdriscoll
Created December 6, 2009 13:24
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jdriscoll/250223 to your computer and use it in GitHub Desktop.
Save jdriscoll/250223 to your computer and use it in GitHub Desktop.
import sys
import time
import urllib
import urllib2
from django.core.management import setup_environ
# Make we're actually importing and activating the correct settings file here
import settings
setup_environ(settings)
from meowr.models import Article
from tagging.models import Tag
TUMBLR_POST_URL = 'http://www.tumblr.com/api/write'
# set this to your blogs subdomain if importing into something other than your
# default blog:
# BLOG_URL = 'myblog.tumblr.com'
BLOG_URL = None
# Your current account email and password
USER_EMAIL = ''
USER_PASSWORD = ''
STATIC_URL = 'http://static.latherrinserepeat.org/images/'
DELAY = 10
def main():
count = 0
timeout_seconds = 1
for a in Article.live.order_by('pub_date'):
tag_set = Tag.objects.get_for_object(a)
if tag_set.count() == 0:
tags = 'cinema'
else:
tags = ','.join([t.name for t in tag_set])
for r in a.rating.all():
tags += ',%s' % r.description
body = a.body.encode('utf-8')
body = body.replace('http://latherrinserepeat.org/static/images/', STATIC_URL)
body = body.replace('/static/images/', STATIC_URL)
data = {}
data['email'] = USER_EMAIL
data['password'] = USER_PASSWORD
data['type'] = 'regular'
data['date'] = a.pub_date.strftime('%Y-%m-%d %H:%M:%S')
data['tags'] = tags.encode('utf-8')
data['format'] = 'markdown'
data['title'] = a.title.encode('utf-8')
data['body'] = body
if BLOG_URL is not None:
data['group'] = BLOG_URL
encoded_data = urllib.urlencode(data)
req = urllib2.Request(TUMBLR_POST_URL, encoded_data)
while 1:
try:
res = urllib2.urlopen(req)
except urllib2.URLError, e:
if e.code != 201:
print 'There was an error (code %s): %s' % (e.code, e.read())
timeout_seconds = timeout_seconds*2
print 'Retrying in %s seconds' % timeout_seconds
time.sleep(timeout_seconds)
continue
timeout_seconds = 1
count += 1
print 'Successfully migrated article "%s". %s completed...' % (a.title, count)
time.sleep(DELAY)
break
print "Successfully migrated %s articles." % count
if __name__ == '__main__':
main()
import pickle
import sys
import time
import urllib
import urllib2
from os import path
from django.core.management import setup_environ
# Make we're actually importing and activating the correct settings file here
import settings
setup_environ(settings)
from meowr.models import Article
from tagging.models import Tag
FILENAME = 'progress.pickle'
TUMBLR_POST_URL = 'http://www.tumblr.com/api/write'
# set this to your blogs subdomain if importing into something other than your
# default blog:
# BLOG_URL = 'myblog.tumblr.com'
BLOG_URL = None
# Your current account email and password
USER_EMAIL = ''
USER_PASSWORD = ''
STATIC_URL = 'http://static.latherrinserepeat.org/images/'
DELAY = 10
IMPORT_AS_PRIVATE = 1 # 0 for false
def main():
count = 0
timeout_seconds = 1
migrate_all = False
# look for history file
if path.exists(FILENAME):
progress = pickle.load(open(FILENAME, 'r'))
else:
progress = []
#print "DEBUG: %s" % progress
print "Current progress:\n%s" % '\n'.join([str(n) for n in progress])
try:
for a in Article.live.order_by('pub_date'):
# Check to see if we've already migrated this post
pk = a._get_pk_val()
if pk in progress:
print "Skipping article: %s" % a.title
continue
print "DEBUG (PK): %s" % pk
if not migrate_all:
print 'Preparing to migrating the article: "%s".' % a.title
resp = raw_input("Continue? (Yes, No, All): ")
if resp.lower() == 'all':
migrate_all = True
elif resp.lower() != 'yes':
sys.exit('Exiting migration script...')
tag_set = Tag.objects.get_for_object(a)
if tag_set.count() == 0:
tags = 'cinema'
else:
tags = ','.join([t.name for t in tag_set])
for r in a.rating.all():
tags += ',%s' % r.description
body = a.body.encode('utf-8')
body = body.replace('http://latherrinserepeat.org/static/images/', STATIC_URL)
body = body.replace('/static/images/', STATIC_URL)
data = {}
data['email'] = USER_EMAIL
data['password'] = USER_PASSWORD
data['type'] = 'regular'
data['date'] = a.pub_date.strftime('%Y-%m-%d %H:%M:%S')
data['tags'] = tags.encode('utf-8')
data['format'] = 'markdown'
data['title'] = a.title.encode('utf-8')
data['body'] = body
data['private'] = IMPORT_AS_PRIVATE
if BLOG_URL is not None:
data['group'] = BLOG_URL
encoded_data = urllib.urlencode(data)
req = urllib2.Request(TUMBLR_POST_URL, encoded_data)
while 1:
try:
res = urllib2.urlopen(req)
except urllib2.URLError, e:
if e.code != 201:
print 'There was an error (code %s): %s' % (e.code, e.read())
timeout_seconds = timeout_seconds*2
print 'Retrying in %s seconds' % timeout_seconds
time.sleep(timeout_seconds)
continue
timeout_seconds = 1
count += 1
# Add article primary key to progress list
progress.append(pk)
print 'Successfully migrated article "%s". %s completed...' % (a.title, count)
time.sleep(DELAY)
break
finally:
print "Saving progress file..."
pickle.dump(progress, open(FILENAME, 'w'))
print "Successfully migrated %s articles." % count
if __name__ == '__main__':
main()
import sys
import time
import urllib
import urllib2
from xml.etree import ElementTree as ET
RSS_URL = 'http://myfeed.com/rss'
TUMBLR_POST_URL = 'http://www.tumblr.com/api/write'
# set this to your blogs subdomain if importing into something other than your
# default blog:
# BLOG_URL = 'myblog.tumblr.com'
BLOG_URL = None
# Your current account email and password
USER_EMAIL = 'email@example.com'
USER_PASSWORD = 'secret'
STATIC_URL = 'http://static.myserver.com/images/' # replace this
DELAY = 10
def main():
count = 0
timeout_seconds = 1
req = urllib2.Request(RSS_URL)
res = urllib2.urlopen(req)
xml = ET.parse(res)
for item in xml.findall('channel/item'):
tags = []
for c in item.findall('category'):
tags.append(c.text)
title = item.find('title').text
body = item.find('{http://purl.org/rss/1.0/modules/content/}encoded').text
#body = body.replace('http://oldserver.com/static/images/', STATIC_URL)
#body = body.replace('/static/images/', STATIC_URL)
data = {}
data['email'] = USER_EMAIL
data['password'] = USER_PASSWORD
data['type'] = 'regular'
data['date'] = item.find('pubDate').text
data['format'] = 'html'
data['title'] = title.encode('utf-8')
data['body'] = body.encode('utf-8')
data['tags'] = ','.join(tags)
if BLOG_URL is not None:
data['group'] = BLOG_URL
encoded_data = urllib.urlencode(data)
req = urllib2.Request(TUMBLR_POST_URL, encoded_data)
while 1:
try:
res = urllib2.urlopen(req)
except urllib2.URLError, e:
if e.code != 201:
print 'There was an error (code %s): %s' % (e.code, e.read())
timeout_seconds = timeout_seconds*2
print 'Retrying in %s seconds' % timeout_seconds
time.sleep(timeout_seconds)
continue
timeout_seconds = 1
count += 1
print 'Successfully migrated article "%s". %s completed...' % (title, count)
time.sleep(DELAY)
break
print "Successfully migrated %s articles." % count
if __name__ == '__main__':
main()
import getpass
import os
import pickle
import re
import sys
import time
import urllib
import urllib2
from xml.etree import ElementTree as ET
PROGRESS_FILENAME = 'progress.pickle'
TUMBLR_POST_URL = 'http://www.tumblr.com/api/write'
DELAY = 10
RE_TUMBLR_XML = re.compile(r'<!-- BEGIN TUMBLR XML\s+(.*)\s+END TUMBLR XML -->', re.MULTILINE|re.DOTALL)
TYPE_MAP = {
'regular': {'title': 'regular-title', 'body': 'regular-body'},
'photo': {'source': 'photo-url', 'caption': 'photo-caption', 'click-through-url': 'photo-link-url'},
'quote': {'quote': 'quote-text', 'source': 'quote-source'},
'link': {'name': 'link-text', 'url': 'link-url', 'description': 'link-description'},
'conversation': {'title': 'conversation-title', 'conversation': 'conversation-text'},
'video': {'caption': 'video-caption', 'embed': 'video-player'},
'audio': {},
}
def main():
USER_EMAIL = raw_input('Enter the email address associated with your Tumblr account: ')
USER_PASSWORD = getpass.getpass()
BLOG_URL = raw_input('Enter a Tumblr subdomain (Ex. mysite.tumblr.com) or hit return to skip: ')
if BLOG_URL == '':
BLOG_URL = None
if BLOG_URL is None:
msg = 'Uploading data to your default blog. Continue? '
else:
msg = 'Uploading data to %s. Continue? ' % BLOG_URL
if raw_input(msg).lower() not in ['y', 'yes']:
import sys
sys.exit()
count = 0
timeout_seconds = DELAY
dir_path = os.path.dirname(os.path.abspath(__file__))
# look for history file
if os.path.exists(PROGRESS_FILENAME):
progress = pickle.load(open(PROGRESS_FILENAME, 'r'))
count = len(progress)
else:
progress = []
try:
for filename in os.listdir(dir_path):
# Process only html files
if filename[-4:] != 'html':
continue
pth = os.path.join(dir_path, filename)
xml = ET.fromstring(RE_TUMBLR_XML.search(open(pth).read()).group(1))
# Check to see if we've already migrated this post
if xml.get('id') in progress:
print "Skipping post: %s" % xml.get('id')
continue
else:
print "Uploading post: %s" % xml.get('id')
post_type = xml.get('type')
params = TYPE_MAP[post_type]
data = {}
data['email'] = USER_EMAIL
data['password'] = USER_PASSWORD
data['type'] = post_type
data['date'] = xml.get('date-gmt')
data['format'] = xml.get('format')
for key, val in params.items():
if val == 'photo-url':
elements = xml.findall(val)
for photo_url in elements:
if 'media.tumblr.com' not in photo_url.text:
continue
else:
data[key] = photo_url.text
break
else:
el = xml.find(val)
if el is not None:
data[key] = el.text.encode('utf-8')
if BLOG_URL is not None:
data['group'] = BLOG_URL
encoded_data = urllib.urlencode(data)
req = urllib2.Request(TUMBLR_POST_URL, encoded_data)
while 1:
try:
res = urllib2.urlopen(req)
except urllib2.URLError, e:
if e.code != 201:
print 'There was an error (code %s): %s' % (e.code, e.read())
timeout_seconds = timeout_seconds*2
print 'Retrying in %s seconds' % timeout_seconds
time.sleep(timeout_seconds)
continue
timeout_seconds = DELAY
count += 1
# Add article primary key to progress list
progress.append(xml.get('id'))
print 'Successfully migrated post "%s". %s completed, pausing for %s seconds...' % (xml.get('id'), count, DELAY)
time.sleep(DELAY)
break
finally:
print "Saving progress file..."
pickle.dump(progress, open(PROGRESS_FILENAME, 'w'))
print "Successfully migrated %s articles." % count
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment