Created
April 5, 2013 21:47
-
-
Save brettkelly/5322909 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# Copyright 2013 Evernote Corporation | |
# Move gists to evernotegists | |
import base64 | |
import json | |
import os.path | |
import re | |
import sys | |
import urllib | |
import urllib2 | |
## Normally we'd prompt the user for this info | |
github_user = 'evernotegists' | |
github_pass = '[REDACTED]' | |
GHBASE = 'https://api.gitub.com/' | |
AUTH_URL = GHBASE + 'authorizations' | |
GISTS_URL = GHBASE + 'gists' | |
class GistInfo(object): | |
""" | |
Data retrieved from gist embed url | |
This will include an id and optionally a username and filename | |
""" | |
def __init__(self,**kwargs): | |
for k, v in kwargs.iteritems(): | |
setattr(self, k, v) | |
def __repr__(self): | |
return str(dir(self)) # for debugging | |
class Gist(object): | |
"Represents a single gist" | |
def __init__(self, description, files, public): | |
self.description = description | |
if type(files) != list: | |
files = [files] | |
self.files = files | |
self.public = public | |
@property | |
def asJSON(self): | |
"Return Gist as a JSON block" | |
data = { | |
'description': self.description, | |
'public': self.public, | |
'files': {} | |
} | |
for f in self.files: | |
data['files'][f.name] = {'content': f.content} | |
return json.dumps(data) | |
class GistFile(object): | |
"A file attached to a gist" | |
def __init__(self, name=None, content=None): | |
self.name = name | |
self.content = content | |
def __repr__(self): | |
return "%s: %d bytes" % (self.name, len(self.content)) | |
def getGithubAuthToken(): | |
"Get a Github auth token and return it" | |
gist_data = json.dumps({"scopes":["gist"], "note":"Accessing gists"}) | |
req = urllib2.Request(AUTH_URL) | |
base64str = base64.encodestring("%s:%s" % \ | |
(github_user, github_pass)).replace('\n','') | |
req.add_header("Authorization", "Basic %s" % base64str); | |
req.add_data(gist_data) | |
try: | |
response = urllib2.urlopen(req) | |
except urllib2.URLError, e: | |
print "Something broke while getting Github auth token: %s" % e | |
raise e | |
if response.getcode() == 201: | |
jresp = json.loads('\n'.join(response.readlines())) | |
return jresp['token'] | |
return None | |
def getIdFromSingleGistEmbedURL(url): | |
"Get and return gist id (either numeric or alphanumeric)" | |
idreg = re.compile("([a-zA-Z0-9]+)\.js") | |
grps = idreg.search(url).groups() | |
if grps: | |
return grps[0] | |
return None | |
def getGistInfoFromGithub(token, gist_id): | |
"Pull down the gist itself as it appears on Github, get the data we need" | |
u = GISTS_URL + "/" + str(gist_id) | |
try: | |
req = urllib2.Request(u) | |
req.add_header("Authorization", "token %s" % token) | |
resp = urllib2.urlopen(req) | |
gdata = resp.read() | |
except Exception, e: | |
print "Exception getting gist info from github" | |
print type(e) | |
print e | |
return None | |
gobj = json.loads(gdata) | |
try: | |
user = gobj['user']['login'] | |
for f,v in gobj['files'].iteritems(): | |
gfcontent = v['content'] | |
gfile = GistFile(f, gfcontent) | |
break # none of these have more than one file and this is just easier. | |
return GistInfo(user=user,file=gfile, id=gist_id) | |
except Exception, e: | |
print "Problem parsing gist info from github" | |
print type(e) | |
print e | |
return None | |
def createGist(token, gist): | |
"Create a github gist and return the URL" | |
req = urllib2.Request(GISTS_URL, gist.asJSON) | |
req.add_header("Authorization", "token %s" % token) | |
try: | |
response = urllib2.urlopen(req) | |
except urllib2.URLError, e: | |
print e | |
print type(e) | |
return None | |
if response.getcode() == 201: | |
jresp = json.loads('\n'.join(response.readlines())) | |
return jresp['html_url'] | |
else: | |
print "Weird response code from github: %d" % response.getcode() | |
return None | |
### yay main event | |
oldurls = open("gisturls.txt").readlines() | |
# trim any whitespace from urls | |
oldurls = map(str.strip, oldurls) | |
newgistmap = {} | |
broken = [] | |
created = [] | |
try: | |
gauth = getGithubAuthToken() | |
except Exception, e: | |
raise SystemExit | |
print "Got github auth token: %s" % gauth | |
try: | |
for url in oldurls: | |
print "Current URL: %s" % url | |
gid = getIdFromSingleGistEmbedURL(url) | |
if not gid: | |
broken.append(url) | |
print "Could not extract ID from gist URL: %s" % url | |
continue | |
ginfo = getGistInfoFromGithub(gauth, gid) | |
if not ginfo: | |
print "Something broke getting the gist info from github" | |
broken.append(url) | |
continue | |
if ginfo.user == "evernotegists": | |
print "this gist already belongs to Evernote, skipping" | |
continue | |
gistObj = Gist('', ginfo.file, True) | |
weburl = createGist(gauth, gistObj) | |
if weburl: | |
created.append(weburl) | |
jsurl = weburl + ".js" | |
else: | |
print "Error creating gist" | |
broken.append(url) | |
continue | |
print "Old URL: %s" % url | |
print "New URL: %s" % jsurl | |
print "###########" | |
newgistmap[url] = jsurl | |
## This next chunk is only here to: | |
## 1. make sure the finally clause runs. | |
## 2. alert us as to any uncaught exceptions. | |
## Mostly #1 though. | |
except Exception, e: | |
print "Something REALLY broke here" | |
print type(e) | |
print e | |
## Textbook paranoia follows. | |
finally: | |
## Save the raw python dict (as JSON) so we can reload it later if needed. | |
with open('rawmap.txt', 'w') as f: | |
f.write(json.dumps(newgistmap)) | |
## write each url pair on it's own line so we can Perl it easily. | |
with open("gistmap.txt", 'w') as f: | |
for k,v in newgistmap.iteritems(): | |
f.write("%s %s\n" % (k, v)) | |
## list any URLs that blew up so we can fix and reprocess them. | |
with open("broken.txt", 'w') as f: | |
for u in broken: | |
f.write("%s\n" % u) | |
## A separate list of all created Gists. Probably don't need this. | |
with open("created.txt", 'w') as f: | |
for u in created: | |
f.write("%s\n" % u) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment