Created
August 27, 2008 17:45
-
-
Save atifaziz/7541 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys, urllib2, httplib, os | |
# Adapted from "Archiving Twitter data with Python" at: | |
# http://morethanseven.net/posts/archiving-twitter-data-with-python/ | |
# | |
# See also "Download entire twitter archive of a user" at: | |
# http://groups.google.com/group/twitter-development-talk/t/91f8ac9437f1ba72 | |
class SmartRedirectHandler(urllib2.HTTPRedirectHandler): | |
""" | |
The object you get back from the opener contains the new permanent | |
address and all the headers returned from the second request | |
(retrieved from the new permanent address). But the status code is | |
missing, so you have no way of knowing programmatically whether | |
this redirect was temporary or permanent. | |
You can now build a URL opener with this custom redirect handler, | |
and it will still automatically follow redirects, but now it will | |
also expose the redirect status code. | |
For more information, see: | |
http://diveintopython.org/http_web_services/redirects.html#d0e28866 | |
Credit: Mark Pilgrim, http://diveintopython.org | |
""" | |
def http_error_301(self, req, fp, code, msg, headers): | |
result = urllib2.HTTPRedirectHandler.http_error_301( | |
self, req, fp, code, msg, headers) | |
result.status = code | |
return result | |
def http_error_302(self, req, fp, code, msg, headers): | |
result = urllib2.HTTPRedirectHandler.http_error_302( | |
self, req, fp, code, msg, headers) | |
result.status = code | |
return result | |
def usage(): | |
print """Usage: | |
%s USERNAME PASSWORD ( PAGE ( FORMAT ) ) | |
where: | |
PAGE is 1 when omitted | |
FORMAT is xml (default) or json | |
""" % os.path.basename(sys.argv[0]), | |
def main(args): | |
if not args: | |
usage() | |
return | |
username, password, page, format = ( | |
args.pop(0), | |
args.pop(0), | |
int(args and args.pop(0) or 1), | |
args and args.pop(0) or 'xml') | |
httplib.HTTPConnection.debuglevel = 1 | |
auth = urllib2.HTTPPasswordMgrWithDefaultRealm() | |
auth.add_password(None, 'http://twitter.com/account/', username, password) | |
authHandler = urllib2.HTTPBasicAuthHandler(auth) | |
opener = urllib2.build_opener(authHandler, SmartRedirectHandler()) | |
urllib2.install_opener(opener) | |
request = urllib2.Request('http://twitter.com/account/archive.%s?page=%d' % (format, page)) | |
response = urllib2.urlopen(request) | |
if getattr(response, 'status', response.code) == 302: | |
print >> sys.stderr, 'There is no data for page %d.' % page | |
else: | |
print response.read() | |
if __name__ == '__main__': | |
main(sys.argv[1:]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment