Skip to content

Instantly share code, notes, and snippets.

@atifaziz
Created August 27, 2008 17:45
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save atifaziz/7541 to your computer and use it in GitHub Desktop.
Save atifaziz/7541 to your computer and use it in GitHub Desktop.
import sys, urllib2, httplib, os
# Adapted from "Archiving Twitter data with Python" at:
# http://morethanseven.net/posts/archiving-twitter-data-with-python/
#
# See also "Download entire twitter archive of a user" at:
# http://groups.google.com/group/twitter-development-talk/t/91f8ac9437f1ba72
class SmartRedirectHandler(urllib2.HTTPRedirectHandler):
"""
The object you get back from the opener contains the new permanent
address and all the headers returned from the second request
(retrieved from the new permanent address). But the status code is
missing, so you have no way of knowing programmatically whether
this redirect was temporary or permanent.
You can now build a URL opener with this custom redirect handler,
and it will still automatically follow redirects, but now it will
also expose the redirect status code.
For more information, see:
http://diveintopython.org/http_web_services/redirects.html#d0e28866
Credit: Mark Pilgrim, http://diveintopython.org
"""
def http_error_301(self, req, fp, code, msg, headers):
result = urllib2.HTTPRedirectHandler.http_error_301(
self, req, fp, code, msg, headers)
result.status = code
return result
def http_error_302(self, req, fp, code, msg, headers):
result = urllib2.HTTPRedirectHandler.http_error_302(
self, req, fp, code, msg, headers)
result.status = code
return result
def usage():
print """Usage:
%s USERNAME PASSWORD ( PAGE ( FORMAT ) )
where:
PAGE is 1 when omitted
FORMAT is xml (default) or json
""" % os.path.basename(sys.argv[0]),
def main(args):
if not args:
usage()
return
username, password, page, format = (
args.pop(0),
args.pop(0),
int(args and args.pop(0) or 1),
args and args.pop(0) or 'xml')
httplib.HTTPConnection.debuglevel = 1
auth = urllib2.HTTPPasswordMgrWithDefaultRealm()
auth.add_password(None, 'http://twitter.com/account/', username, password)
authHandler = urllib2.HTTPBasicAuthHandler(auth)
opener = urllib2.build_opener(authHandler, SmartRedirectHandler())
urllib2.install_opener(opener)
request = urllib2.Request('http://twitter.com/account/archive.%s?page=%d' % (format, page))
response = urllib2.urlopen(request)
if getattr(response, 'status', response.code) == 302:
print >> sys.stderr, 'There is no data for page %d.' % page
else:
print response.read()
if __name__ == '__main__':
main(sys.argv[1:])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment