Skip to content

Instantly share code, notes, and snippets.

@paranlee
Forked from alexliang1975/download.py
Created May 19, 2022 14:46
Show Gist options
  • Save paranlee/16adb40b5269c218d4e0d70550d1e6e3 to your computer and use it in GitHub Desktop.
Save paranlee/16adb40b5269c218d4e0d70550d1e6e3 to your computer and use it in GitHub Desktop.
download source code which powered by Trac's Browse source
#!/usr/env python
import urllib2
from HTMLParser import HTMLParser
import os
def downloadFile(url, filename):
u = urllib2.urlopen(url+'?format=txt')
localFile = open(filename, 'w')
localFile.write(u.read())
localFile.close()
def createDirectory(dirname):
if not os.path.exists(dirname):
os.makedirs(dirname)
class Node:
def __init__(self):
self.isFile=0
self.url=''
self.name=''
class MyHTMLParser(HTMLParser):
def __init__(self):
HTMLParser.__init__(self)
self.recording = 0
self.data = []
self.node=Node()
def handle_starttag(self, tag, attrs):
if tag == 'a':
for name, value in attrs:
if name == 'title':
if value == 'View Directory':
print name, value
print "Encountered the beginning of a %s tag" % tag
self.recording = 1
self.node.isFile=0
elif value == 'View File':
print name, value
print "Encountered the beginning of a %s tag" % tag
self.recording = 1
self.node.isFile=1
if name == 'href':
print "Encounter the href attr"
print name,value
self.node.url=value
def handle_endtag(self, tag):
if tag == 'a':
self.recording = 0
self.node=Node()
print "Encountered the end of a %s tag" % tag
def handle_data(self, data):
if self.recording:
print 'title directory data: '+data
self.node.name=data
self.data.append(self.node)
def download(host,url,currDir):
p= MyHTMLParser()
f=urllib2.urlopen(url)
html=f.read()
p.feed(html)
nodes = p.data
p.close()
for node in nodes:
if node.isFile:
print " download file " , node.name, " url:" ,node.url
downloadFile(host+node.url, currDir+"/"+node.name)
else:
print " download directory " ,node.name ," url:",node.url
directory=currDir+"/"+node.name
createDirectory(directory)
download(host,host+node.url,directory)
@paranlee
Copy link
Author

python2

import download
host='https://cforall.uwaterloo.ca'
url='https://cforall.uwaterloo.ca/trac/browser'
curDir='./genshi'
download.download(host,url,curDir)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment