Created
September 11, 2012 16:34
-
-
Save alexliang1975/3699714 to your computer and use it in GitHub Desktop.
download source code which powered by Trac's Browse source
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/env python | |
import urllib2 | |
from HTMLParser import HTMLParser | |
import os | |
def downloadFile(url, filename): | |
u = urllib2.urlopen(url+'?format=txt') | |
localFile = open(filename, 'w') | |
localFile.write(u.read()) | |
localFile.close() | |
def createDirectory(dirname): | |
if not os.path.exists(dirname): | |
os.makedirs(dirname) | |
class Node: | |
def __init__(self): | |
self.isFile=0 | |
self.url='' | |
self.name='' | |
class MyHTMLParser(HTMLParser): | |
def __init__(self): | |
HTMLParser.__init__(self) | |
self.recording = 0 | |
self.data = [] | |
self.node=Node() | |
def handle_starttag(self, tag, attrs): | |
if tag == 'a': | |
for name, value in attrs: | |
if name == 'title': | |
if value == 'View Directory': | |
print name, value | |
print "Encountered the beginning of a %s tag" % tag | |
self.recording = 1 | |
self.node.isFile=0 | |
elif value == 'View File': | |
print name, value | |
print "Encountered the beginning of a %s tag" % tag | |
self.recording = 1 | |
self.node.isFile=1 | |
if name == 'href': | |
print "Encounter the href attr" | |
print name,value | |
self.node.url=value | |
def handle_endtag(self, tag): | |
if tag == 'a': | |
self.recording = 0 | |
self.node=Node() | |
print "Encountered the end of a %s tag" % tag | |
def handle_data(self, data): | |
if self.recording: | |
print 'title directory data: '+data | |
self.node.name=data | |
self.data.append(self.node) | |
def download(host,url,currDir): | |
p= MyHTMLParser() | |
f=urllib2.urlopen(url) | |
html=f.read() | |
p.feed(html) | |
nodes = p.data | |
p.close() | |
for node in nodes: | |
if node.isFile: | |
print " download file " , node.name, " url:" ,node.url | |
downloadFile(host+node.url, currDir+"/"+node.name) | |
else: | |
print " download directory " ,node.name ," url:",node.url | |
directory=currDir+"/"+node.name | |
createDirectory(directory) | |
download(host,host+node.url,directory) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
the script can download all files and directory which is powered by Trac browse source page.
Following is the sample:
python
import download
host='http://http://genshi.edgewall.org'
url='http://genshi.edgewall.org/browser'
curDir='./genshi'
download.download(host,url,curDir)