Skip to content

Instantly share code, notes, and snippets.

@vpetersson
Created July 24, 2014 20:24
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save vpetersson/83da37e80702078e7775 to your computer and use it in GitHub Desktop.
Save vpetersson/83da37e80702078e7775 to your computer and use it in GitHub Desktop.
import pytumblr
import requests
from pprint import pprint
from re import findall
from urlparse import urlparse
tumblr_blog = 'blog.viktorpetersson.com'
wordpress_site = 'http://viktorpetersson.com/'
source_file = '/Users/mvip/Desktop/pages.txt'
client = pytumblr.TumblrRestClient(
'<consumer_key>',
'<consumer_secret>',
'<oauth_token>',
'<oauth_secret>',
)
def get_tumblr_posts():
blog_posts = []
chunk_size = 20
current_fetch = 0
post_count = client.posts(tumblr_blog)['total_posts']
while current_fetch < post_count:
posts = client.posts(tumblr_blog, offset=current_fetch, limit=chunk_size)
for p in posts['posts']:
blog_posts.append(p['post_url'])
current_fetch += chunk_size
return blog_posts
def parse_pages():
pages = []
f = open(source_file, 'r')
for line in f:
url = line.split()[0]
pages.append(url)
return pages
def match_src_and_dst(src, dst):
pairs = []
for s in src:
found_match = False
for d in dst:
page_url = d.split('/')[-1]
if page_url in s:
pairs.append({'src': s, 'dst': d})
found_match = True
break
if not found_match:
print "Failed to find match for %s" % s
return pairs
def generate_nginx_redirects(matched):
for p in matched:
src_path = urlparse(p['src'])
print "rewrite ^%s$ %s permanent;" % (src_path.path, p['dst'])
source_pages = parse_pages()
destination_pages = get_tumblr_posts()
matched = match_src_and_dst(source_pages, destination_pages)
print "Src: " + str(len(source_pages))
print "Dst: " + str(len(destination_pages))
print "Matched: " + str(len(matched))
print generate_nginx_redirects(matched)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment