-
-
Save paranlee/16adb40b5269c218d4e0d70550d1e6e3 to your computer and use it in GitHub Desktop.
download source code which powered by Trac's Browse source
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/env python | |
import urllib2 | |
from HTMLParser import HTMLParser | |
import os | |
def downloadFile(url, filename): | |
u = urllib2.urlopen(url+'?format=txt') | |
localFile = open(filename, 'w') | |
localFile.write(u.read()) | |
localFile.close() | |
def createDirectory(dirname): | |
if not os.path.exists(dirname): | |
os.makedirs(dirname) | |
class Node: | |
def __init__(self): | |
self.isFile=0 | |
self.url='' | |
self.name='' | |
class MyHTMLParser(HTMLParser): | |
def __init__(self): | |
HTMLParser.__init__(self) | |
self.recording = 0 | |
self.data = [] | |
self.node=Node() | |
def handle_starttag(self, tag, attrs): | |
if tag == 'a': | |
for name, value in attrs: | |
if name == 'title': | |
if value == 'View Directory': | |
print name, value | |
print "Encountered the beginning of a %s tag" % tag | |
self.recording = 1 | |
self.node.isFile=0 | |
elif value == 'View File': | |
print name, value | |
print "Encountered the beginning of a %s tag" % tag | |
self.recording = 1 | |
self.node.isFile=1 | |
if name == 'href': | |
print "Encounter the href attr" | |
print name,value | |
self.node.url=value | |
def handle_endtag(self, tag): | |
if tag == 'a': | |
self.recording = 0 | |
self.node=Node() | |
print "Encountered the end of a %s tag" % tag | |
def handle_data(self, data): | |
if self.recording: | |
print 'title directory data: '+data | |
self.node.name=data | |
self.data.append(self.node) | |
def download(host,url,currDir): | |
p= MyHTMLParser() | |
f=urllib2.urlopen(url) | |
html=f.read() | |
p.feed(html) | |
nodes = p.data | |
p.close() | |
for node in nodes: | |
if node.isFile: | |
print " download file " , node.name, " url:" ,node.url | |
downloadFile(host+node.url, currDir+"/"+node.name) | |
else: | |
print " download directory " ,node.name ," url:",node.url | |
directory=currDir+"/"+node.name | |
createDirectory(directory) | |
download(host,host+node.url,directory) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
python2