alexliang1975/download.py

## download.py
#!/usr/env python

import urllib2
from HTMLParser import HTMLParser
import os

def downloadFile(url, filename):

	u = urllib2.urlopen(url+'?format=txt')
	localFile = open(filename, 'w')
	localFile.write(u.read())
	localFile.close()

def createDirectory(dirname):
	if not os.path.exists(dirname):
		os.makedirs(dirname)


class Node:
	def __init__(self):
		self.isFile=0
		self.url=''
		self.name=''

class MyHTMLParser(HTMLParser):

  def __init__(self):
    HTMLParser.__init__(self)
    self.recording = 0
    self.data = []
    self.node=Node()

  def handle_starttag(self, tag, attrs):
    if tag == 'a':
      for name, value in attrs:
        if name == 'title':
		if value == 'View Directory':
          		print name, value
          		print "Encountered the beginning of a %s tag" % tag
          		self.recording = 1
	  		self.node.isFile=0
        	elif value == 'View File':
          		print name, value
			print "Encountered the beginning of a %s tag" % tag
			self.recording = 1
			self.node.isFile=1
	if name == 'href':
		print "Encounter the href attr"
		print name,value
		self.node.url=value


  def handle_endtag(self, tag):
    if tag == 'a':
      self.recording = 0
      self.node=Node()
      print "Encountered the end of a %s tag" % tag

  def handle_data(self, data):
    if self.recording:
      print 'title directory data: '+data
      self.node.name=data
      self.data.append(self.node)

def download(host,url,currDir):
	p= MyHTMLParser()
	f=urllib2.urlopen(url)
	html=f.read()
	p.feed(html)
	nodes = p.data
	p.close()
	for node in nodes:
		if node.isFile:
			print " download file " , node.name, " url:" ,node.url
			downloadFile(host+node.url, currDir+"/"+node.name)
		else:
			print " download directory " ,node.name ," url:",node.url
			directory=currDir+"/"+node.name
			createDirectory(directory)
			download(host,host+node.url,directory)
	#!/usr/env python

	import urllib2
	from HTMLParser import HTMLParser
	import os

	def downloadFile(url, filename):

	u = urllib2.urlopen(url+'?format=txt')
	localFile = open(filename, 'w')
	localFile.write(u.read())
	localFile.close()

	def createDirectory(dirname):
	if not os.path.exists(dirname):
	os.makedirs(dirname)


	class Node:
	def __init__(self):
	self.isFile=0
	self.url=''
	self.name=''

	class MyHTMLParser(HTMLParser):

	def __init__(self):
	HTMLParser.__init__(self)
	self.recording = 0
	self.data = []
	self.node=Node()

	def handle_starttag(self, tag, attrs):
	if tag == 'a':
	for name, value in attrs:
	if name == 'title':
	if value == 'View Directory':
	print name, value
	print "Encountered the beginning of a %s tag" % tag
	self.recording = 1
	self.node.isFile=0
	elif value == 'View File':
	print name, value
	print "Encountered the beginning of a %s tag" % tag
	self.recording = 1
	self.node.isFile=1
	if name == 'href':
	print "Encounter the href attr"
	print name,value
	self.node.url=value


	def handle_endtag(self, tag):
	if tag == 'a':
	self.recording = 0
	self.node=Node()
	print "Encountered the end of a %s tag" % tag

	def handle_data(self, data):
	if self.recording:
	print 'title directory data: '+data
	self.node.name=data
	self.data.append(self.node)

	def download(host,url,currDir):
	p= MyHTMLParser()
	f=urllib2.urlopen(url)
	html=f.read()
	p.feed(html)
	nodes = p.data
	p.close()
	for node in nodes:
	if node.isFile:
	print " download file " , node.name, " url:" ,node.url
	downloadFile(host+node.url, currDir+"/"+node.name)
	else:
	print " download directory " ,node.name ," url:",node.url
	directory=currDir+"/"+node.name
	createDirectory(directory)
	download(host,host+node.url,directory)