jdriscoll/migrate.py

## migrate.py
import sys
import time
import urllib
import urllib2

from django.core.management import setup_environ

# Make we're actually importing and activating the correct settings file here
import settings
setup_environ(settings)

from meowr.models import Article
from tagging.models import Tag


TUMBLR_POST_URL = 'http://www.tumblr.com/api/write'

# set this to your blogs subdomain if importing into something other than your
# default blog:
# BLOG_URL = 'myblog.tumblr.com'
BLOG_URL = None

# Your current account email and password
USER_EMAIL = ''
USER_PASSWORD = ''

STATIC_URL = 'http://static.latherrinserepeat.org/images/'

DELAY = 10

def main():

	count = 0
	timeout_seconds = 1

	for a in Article.live.order_by('pub_date'):

		tag_set = Tag.objects.get_for_object(a)

		if tag_set.count() == 0:
			tags = 'cinema'
		else:
			tags = ','.join([t.name for t in tag_set])

		for r in a.rating.all():
			tags += ',%s' % r.description


		body = a.body.encode('utf-8')
		body = body.replace('http://latherrinserepeat.org/static/images/', STATIC_URL)
		body = body.replace('/static/images/', STATIC_URL)

		data = {}
		data['email'] = USER_EMAIL
		data['password'] = USER_PASSWORD
		data['type'] = 'regular'
		data['date'] = a.pub_date.strftime('%Y-%m-%d %H:%M:%S')
		data['tags'] = tags.encode('utf-8')
		data['format'] = 'markdown'
		data['title'] = a.title.encode('utf-8')
		data['body'] = body


		if BLOG_URL is not None:
			data['group'] = BLOG_URL

		encoded_data = urllib.urlencode(data)
		req = urllib2.Request(TUMBLR_POST_URL, encoded_data)

		while 1:
			try:
				res = urllib2.urlopen(req)
			except urllib2.URLError, e:
				if e.code != 201:
					print 'There was an error (code %s): %s' % (e.code, e.read())
					timeout_seconds = timeout_seconds*2
					print 'Retrying in %s seconds' % timeout_seconds
					time.sleep(timeout_seconds)
					continue
			timeout_seconds = 1
			count += 1
			print 'Successfully migrated article "%s". %s completed...' % (a.title, count)
			time.sleep(DELAY)
			break

	print "Successfully migrated %s articles." % count

if __name__ == '__main__':
	main()

## migrate2.py
import pickle
import sys
import time
import urllib
import urllib2

from os import path
from django.core.management import setup_environ

# Make we're actually importing and activating the correct settings file here
import settings
setup_environ(settings)

from meowr.models import Article
from tagging.models import Tag

FILENAME = 'progress.pickle'

TUMBLR_POST_URL = 'http://www.tumblr.com/api/write'

# set this to your blogs subdomain if importing into something other than your
# default blog:
# BLOG_URL = 'myblog.tumblr.com'
BLOG_URL = None

# Your current account email and password
USER_EMAIL = ''
USER_PASSWORD = ''

STATIC_URL = 'http://static.latherrinserepeat.org/images/'

DELAY = 10

IMPORT_AS_PRIVATE = 1 # 0 for false

def main():

    count = 0
    timeout_seconds = 1
    migrate_all = False

    # look for history file
    if path.exists(FILENAME):
        progress = pickle.load(open(FILENAME, 'r'))
    else:
        progress = []

    #print "DEBUG: %s" % progress
    print "Current progress:\n%s" % '\n'.join([str(n) for n in progress])

    try:

        for a in Article.live.order_by('pub_date'):

            # Check to see if we've already migrated this post
            pk = a._get_pk_val()
            if pk in progress:
                print "Skipping article: %s" % a.title
                continue

            print "DEBUG (PK): %s" % pk

            if not migrate_all:
                print 'Preparing to migrating the article: "%s".' % a.title
                resp = raw_input("Continue? (Yes, No, All): ")
                if resp.lower() == 'all':
                    migrate_all = True
                elif resp.lower() != 'yes':
                    sys.exit('Exiting migration script...')

            tag_set = Tag.objects.get_for_object(a)

            if tag_set.count() == 0:
                tags = 'cinema'
            else:
                tags = ','.join([t.name for t in tag_set])

            for r in a.rating.all():
                tags += ',%s' % r.description


            body = a.body.encode('utf-8')
            body = body.replace('http://latherrinserepeat.org/static/images/', STATIC_URL)
            body = body.replace('/static/images/', STATIC_URL)

            data = {}
            data['email'] = USER_EMAIL
            data['password'] = USER_PASSWORD
            data['type'] = 'regular'
            data['date'] = a.pub_date.strftime('%Y-%m-%d %H:%M:%S')
            data['tags'] = tags.encode('utf-8')
            data['format'] = 'markdown'
            data['title'] = a.title.encode('utf-8')
            data['body'] = body
            data['private'] = IMPORT_AS_PRIVATE

            if BLOG_URL is not None:
                data['group'] = BLOG_URL

            encoded_data = urllib.urlencode(data)
            req = urllib2.Request(TUMBLR_POST_URL, encoded_data)

            while 1:
                try:
                    res = urllib2.urlopen(req)
                except urllib2.URLError, e:
                    if e.code != 201:
                        print 'There was an error (code %s): %s' % (e.code, e.read())
                        timeout_seconds = timeout_seconds*2
                        print 'Retrying in %s seconds' % timeout_seconds
                        time.sleep(timeout_seconds)
                        continue
                timeout_seconds = 1
                count += 1

                # Add article primary key to progress list
                progress.append(pk)

                print 'Successfully migrated article "%s". %s completed...' % (a.title, count)
                time.sleep(DELAY)
                break

    finally:
        print "Saving progress file..."
        pickle.dump(progress, open(FILENAME, 'w'))

    print "Successfully migrated %s articles." % count

if __name__ == '__main__':
    main()

## migrate_rss.py
import sys
import time
import urllib
import urllib2

from xml.etree import ElementTree as ET

RSS_URL = 'http://myfeed.com/rss'

TUMBLR_POST_URL = 'http://www.tumblr.com/api/write'

# set this to your blogs subdomain if importing into something other than your
# default blog:
# BLOG_URL = 'myblog.tumblr.com'
BLOG_URL = None

# Your current account email and password
USER_EMAIL = 'email@example.com'
USER_PASSWORD = 'secret'

STATIC_URL = 'http://static.myserver.com/images/' # replace this

DELAY = 10


def main():

	count = 0
	timeout_seconds = 1

	req = urllib2.Request(RSS_URL)
	res = urllib2.urlopen(req)

	xml = ET.parse(res)

	for item in xml.findall('channel/item'):

		tags = []
		for c in item.findall('category'):
			tags.append(c.text)

		title = item.find('title').text

		body = item.find('{http://purl.org/rss/1.0/modules/content/}encoded').text
		#body = body.replace('http://oldserver.com/static/images/', STATIC_URL)
		#body = body.replace('/static/images/', STATIC_URL)

		data = {}
		data['email'] = USER_EMAIL
		data['password'] = USER_PASSWORD
		data['type'] = 'regular'
		data['date'] = item.find('pubDate').text
		data['format'] = 'html'
		data['title'] = title.encode('utf-8')
		data['body'] = body.encode('utf-8')
		data['tags'] = ','.join(tags)

		if BLOG_URL is not None:
			data['group'] = BLOG_URL

		encoded_data = urllib.urlencode(data)
		req = urllib2.Request(TUMBLR_POST_URL, encoded_data)

		while 1:
			try:
				res = urllib2.urlopen(req)
			except urllib2.URLError, e:
				if e.code != 201:
					print 'There was an error (code %s): %s' % (e.code, e.read())
					timeout_seconds = timeout_seconds*2
					print 'Retrying in %s seconds' % timeout_seconds
					time.sleep(timeout_seconds)
					continue
			timeout_seconds = 1
			count += 1
			print 'Successfully migrated article "%s". %s completed...' % (title, count)
			time.sleep(DELAY)
			break

	print "Successfully migrated %s articles." % count

if __name__ == '__main__':
	main()

## migrate_tumblr.py
import getpass
import os
import pickle
import re
import sys
import time
import urllib
import urllib2


from xml.etree import ElementTree as ET

PROGRESS_FILENAME = 'progress.pickle'

TUMBLR_POST_URL = 'http://www.tumblr.com/api/write'
DELAY = 10

RE_TUMBLR_XML = re.compile(r'<!-- BEGIN TUMBLR XML\s+(.*)\s+END TUMBLR XML -->', re.MULTILINE|re.DOTALL)

TYPE_MAP = {
    'regular': {'title': 'regular-title', 'body': 'regular-body'},
    'photo': {'source': 'photo-url', 'caption': 'photo-caption', 'click-through-url': 'photo-link-url'},
    'quote': {'quote': 'quote-text', 'source': 'quote-source'},
    'link': {'name': 'link-text', 'url': 'link-url', 'description': 'link-description'},
    'conversation': {'title': 'conversation-title', 'conversation': 'conversation-text'},
    'video': {'caption': 'video-caption', 'embed': 'video-player'},
    'audio': {},
}

def main():

    USER_EMAIL = raw_input('Enter the email address associated with your Tumblr account: ')
    USER_PASSWORD = getpass.getpass()
    BLOG_URL = raw_input('Enter a Tumblr subdomain (Ex. mysite.tumblr.com) or hit return to skip: ')

    if BLOG_URL == '':
        BLOG_URL = None

    if BLOG_URL is None:
        msg = 'Uploading data to your default blog. Continue? '
    else:
        msg = 'Uploading data to %s. Continue? ' % BLOG_URL
    if raw_input(msg).lower() not in ['y', 'yes']:
        import sys
        sys.exit()

    count = 0
    timeout_seconds = DELAY

    dir_path = os.path.dirname(os.path.abspath(__file__))

    # look for history file
    if os.path.exists(PROGRESS_FILENAME):
        progress = pickle.load(open(PROGRESS_FILENAME, 'r'))
        count = len(progress)
    else:
        progress = []

    try:
        for filename in os.listdir(dir_path):

            # Process only html files
            if filename[-4:] != 'html':
                continue

            pth = os.path.join(dir_path, filename)
            xml = ET.fromstring(RE_TUMBLR_XML.search(open(pth).read()).group(1))

            # Check to see if we've already migrated this post
            if xml.get('id') in progress:
                print "Skipping post: %s" % xml.get('id')
                continue
            else:
                print "Uploading post: %s" % xml.get('id')

            post_type = xml.get('type')
            params = TYPE_MAP[post_type]

            data = {}
            data['email'] = USER_EMAIL
            data['password'] = USER_PASSWORD
            data['type'] = post_type
            data['date'] = xml.get('date-gmt')
            data['format'] = xml.get('format')

            for key, val in params.items():
                if val == 'photo-url':
                    elements = xml.findall(val)
                    for photo_url in elements:
                        if 'media.tumblr.com' not in photo_url.text:
                            continue
                        else:
                            data[key] = photo_url.text
                            break
                else:
                    el = xml.find(val)
                    if el is not None:
                        data[key] = el.text.encode('utf-8')

            if BLOG_URL is not None:
                data['group'] = BLOG_URL

            encoded_data = urllib.urlencode(data)
            req = urllib2.Request(TUMBLR_POST_URL, encoded_data)

            while 1:
                try:
                    res = urllib2.urlopen(req)
                except urllib2.URLError, e:
                    if e.code != 201:
                        print 'There was an error (code %s): %s' % (e.code, e.read())
                        timeout_seconds = timeout_seconds*2
                        print 'Retrying in %s seconds' % timeout_seconds
                        time.sleep(timeout_seconds)
                        continue
                timeout_seconds = DELAY
                count += 1

                # Add article primary key to progress list
                progress.append(xml.get('id'))

                print 'Successfully migrated post "%s". %s completed, pausing for %s seconds...' % (xml.get('id'), count, DELAY)
                time.sleep(DELAY)
                break

    finally:
        print "Saving progress file..."
        pickle.dump(progress, open(PROGRESS_FILENAME, 'w'))

    print "Successfully migrated %s articles." % count


if __name__ == '__main__':
    main()
	import sys
	import time
	import urllib
	import urllib2

	from django.core.management import setup_environ

	# Make we're actually importing and activating the correct settings file here
	import settings
	setup_environ(settings)

	from meowr.models import Article
	from tagging.models import Tag


	TUMBLR_POST_URL = 'http://www.tumblr.com/api/write'

	# set this to your blogs subdomain if importing into something other than your
	# default blog:
	# BLOG_URL = 'myblog.tumblr.com'
	BLOG_URL = None

	# Your current account email and password
	USER_EMAIL = ''
	USER_PASSWORD = ''

	STATIC_URL = 'http://static.latherrinserepeat.org/images/'

	DELAY = 10

	def main():

	count = 0
	timeout_seconds = 1

	for a in Article.live.order_by('pub_date'):

	tag_set = Tag.objects.get_for_object(a)

	if tag_set.count() == 0:
	tags = 'cinema'
	else:
	tags = ','.join([t.name for t in tag_set])

	for r in a.rating.all():
	tags += ',%s' % r.description


	body = a.body.encode('utf-8')
	body = body.replace('http://latherrinserepeat.org/static/images/', STATIC_URL)
	body = body.replace('/static/images/', STATIC_URL)

	data = {}
	data['email'] = USER_EMAIL
	data['password'] = USER_PASSWORD
	data['type'] = 'regular'
	data['date'] = a.pub_date.strftime('%Y-%m-%d %H:%M:%S')
	data['tags'] = tags.encode('utf-8')
	data['format'] = 'markdown'
	data['title'] = a.title.encode('utf-8')
	data['body'] = body



	if BLOG_URL is not None:
	data['group'] = BLOG_URL

	encoded_data = urllib.urlencode(data)
	req = urllib2.Request(TUMBLR_POST_URL, encoded_data)

	while 1:
	try:
	res = urllib2.urlopen(req)
	except urllib2.URLError, e:
	if e.code != 201:
	print 'There was an error (code %s): %s' % (e.code, e.read())
	timeout_seconds = timeout_seconds*2
	print 'Retrying in %s seconds' % timeout_seconds
	time.sleep(timeout_seconds)
	continue
	timeout_seconds = 1
	count += 1
	print 'Successfully migrated article "%s". %s completed...' % (a.title, count)
	time.sleep(DELAY)
	break

	print "Successfully migrated %s articles." % count

	if __name__ == '__main__':
	main()
	import pickle
	import sys
	import time
	import urllib
	import urllib2

	from os import path
	from django.core.management import setup_environ

	# Make we're actually importing and activating the correct settings file here
	import settings
	setup_environ(settings)

	from meowr.models import Article
	from tagging.models import Tag

	FILENAME = 'progress.pickle'

	TUMBLR_POST_URL = 'http://www.tumblr.com/api/write'

	# set this to your blogs subdomain if importing into something other than your
	# default blog:
	# BLOG_URL = 'myblog.tumblr.com'
	BLOG_URL = None

	# Your current account email and password
	USER_EMAIL = ''
	USER_PASSWORD = ''

	STATIC_URL = 'http://static.latherrinserepeat.org/images/'

	DELAY = 10

	IMPORT_AS_PRIVATE = 1 # 0 for false

	def main():

	count = 0
	timeout_seconds = 1
	migrate_all = False

	# look for history file
	if path.exists(FILENAME):
	progress = pickle.load(open(FILENAME, 'r'))
	else:
	progress = []

	#print "DEBUG: %s" % progress
	print "Current progress:\n%s" % '\n'.join([str(n) for n in progress])

	try:

	for a in Article.live.order_by('pub_date'):

	# Check to see if we've already migrated this post
	pk = a._get_pk_val()
	if pk in progress:
	print "Skipping article: %s" % a.title
	continue

	print "DEBUG (PK): %s" % pk

	if not migrate_all:
	print 'Preparing to migrating the article: "%s".' % a.title
	resp = raw_input("Continue? (Yes, No, All): ")
	if resp.lower() == 'all':
	migrate_all = True
	elif resp.lower() != 'yes':
	sys.exit('Exiting migration script...')

	tag_set = Tag.objects.get_for_object(a)

	if tag_set.count() == 0:
	tags = 'cinema'
	else:
	tags = ','.join([t.name for t in tag_set])

	for r in a.rating.all():
	tags += ',%s' % r.description


	body = a.body.encode('utf-8')
	body = body.replace('http://latherrinserepeat.org/static/images/', STATIC_URL)
	body = body.replace('/static/images/', STATIC_URL)

	data = {}
	data['email'] = USER_EMAIL
	data['password'] = USER_PASSWORD
	data['type'] = 'regular'
	data['date'] = a.pub_date.strftime('%Y-%m-%d %H:%M:%S')
	data['tags'] = tags.encode('utf-8')
	data['format'] = 'markdown'
	data['title'] = a.title.encode('utf-8')
	data['body'] = body
	data['private'] = IMPORT_AS_PRIVATE

	if BLOG_URL is not None:
	data['group'] = BLOG_URL

	encoded_data = urllib.urlencode(data)
	req = urllib2.Request(TUMBLR_POST_URL, encoded_data)

	while 1:
	try:
	res = urllib2.urlopen(req)
	except urllib2.URLError, e:
	if e.code != 201:
	print 'There was an error (code %s): %s' % (e.code, e.read())
	timeout_seconds = timeout_seconds*2
	print 'Retrying in %s seconds' % timeout_seconds
	time.sleep(timeout_seconds)
	continue
	timeout_seconds = 1
	count += 1

	# Add article primary key to progress list
	progress.append(pk)

	print 'Successfully migrated article "%s". %s completed...' % (a.title, count)
	time.sleep(DELAY)
	break

	finally:
	print "Saving progress file..."
	pickle.dump(progress, open(FILENAME, 'w'))

	print "Successfully migrated %s articles." % count

	if __name__ == '__main__':
	main()
	import getpass
	import os
	import pickle
	import re
	import sys
	import time
	import urllib
	import urllib2


	from xml.etree import ElementTree as ET

	PROGRESS_FILENAME = 'progress.pickle'

	TUMBLR_POST_URL = 'http://www.tumblr.com/api/write'
	DELAY = 10

	RE_TUMBLR_XML = re.compile(r'<!-- BEGIN TUMBLR XML\s+(.*)\s+END TUMBLR XML -->', re.MULTILINE\|re.DOTALL)

	TYPE_MAP = {
	'regular': {'title': 'regular-title', 'body': 'regular-body'},
	'photo': {'source': 'photo-url', 'caption': 'photo-caption', 'click-through-url': 'photo-link-url'},
	'quote': {'quote': 'quote-text', 'source': 'quote-source'},
	'link': {'name': 'link-text', 'url': 'link-url', 'description': 'link-description'},
	'conversation': {'title': 'conversation-title', 'conversation': 'conversation-text'},
	'video': {'caption': 'video-caption', 'embed': 'video-player'},
	'audio': {},
	}

	def main():

	USER_EMAIL = raw_input('Enter the email address associated with your Tumblr account: ')
	USER_PASSWORD = getpass.getpass()
	BLOG_URL = raw_input('Enter a Tumblr subdomain (Ex. mysite.tumblr.com) or hit return to skip: ')

	if BLOG_URL == '':
	BLOG_URL = None

	if BLOG_URL is None:
	msg = 'Uploading data to your default blog. Continue? '
	else:
	msg = 'Uploading data to %s. Continue? ' % BLOG_URL
	if raw_input(msg).lower() not in ['y', 'yes']:
	import sys
	sys.exit()

	count = 0
	timeout_seconds = DELAY

	dir_path = os.path.dirname(os.path.abspath(__file__))

	# look for history file
	if os.path.exists(PROGRESS_FILENAME):
	progress = pickle.load(open(PROGRESS_FILENAME, 'r'))
	count = len(progress)
	else:
	progress = []

	try:
	for filename in os.listdir(dir_path):

	# Process only html files
	if filename[-4:] != 'html':
	continue

	pth = os.path.join(dir_path, filename)
	xml = ET.fromstring(RE_TUMBLR_XML.search(open(pth).read()).group(1))

	# Check to see if we've already migrated this post
	if xml.get('id') in progress:
	print "Skipping post: %s" % xml.get('id')
	continue
	else:
	print "Uploading post: %s" % xml.get('id')

	post_type = xml.get('type')
	params = TYPE_MAP[post_type]

	data = {}
	data['email'] = USER_EMAIL
	data['password'] = USER_PASSWORD
	data['type'] = post_type
	data['date'] = xml.get('date-gmt')
	data['format'] = xml.get('format')

	for key, val in params.items():
	if val == 'photo-url':
	elements = xml.findall(val)
	for photo_url in elements:
	if 'media.tumblr.com' not in photo_url.text:
	continue
	else:
	data[key] = photo_url.text
	break
	else:
	el = xml.find(val)
	if el is not None:
	data[key] = el.text.encode('utf-8')

	if BLOG_URL is not None:
	data['group'] = BLOG_URL

	encoded_data = urllib.urlencode(data)
	req = urllib2.Request(TUMBLR_POST_URL, encoded_data)

	while 1:
	try:
	res = urllib2.urlopen(req)
	except urllib2.URLError, e:
	if e.code != 201:
	print 'There was an error (code %s): %s' % (e.code, e.read())
	timeout_seconds = timeout_seconds*2
	print 'Retrying in %s seconds' % timeout_seconds
	time.sleep(timeout_seconds)
	continue
	timeout_seconds = DELAY
	count += 1

	# Add article primary key to progress list
	progress.append(xml.get('id'))

	print 'Successfully migrated post "%s". %s completed, pausing for %s seconds...' % (xml.get('id'), count, DELAY)
	time.sleep(DELAY)
	break

	finally:
	print "Saving progress file..."
	pickle.dump(progress, open(PROGRESS_FILENAME, 'w'))

	print "Successfully migrated %s articles." % count


	if __name__ == '__main__':
	main()