hydrogen18/dl30c3.py

## dl30c3.py
from HTMLParser import HTMLParser

import urllib2

import subprocess

import re
import os

SAVE_DIR = os.path.expanduser("~/30c3")
FILE_RE = re.compile("30c3-\d{4}-.*")

class DirListingParser(HTMLParser):
	def init(self):
		self.anchorIsDir = False
		self.anchorIsFile = False
		self.dirs = []
		self.files = []

	def handle_starttag(self,tag,attrs):
		attrs = dict((k,v) for k,v in attrs)
		if tag == "img":
			altText = attrs.get('alt')
			if altText == "[DIR]":
				self.anchorIsDir = True
		elif tag == "a":
			if self.anchorIsDir:
				self.anchorIsDir = False
				href = attrs.get('href')
				print href
				self.dirs.append(href)
			else:
				href = attrs.get('href')
				if None != FILE_RE.match(href):
					print file
					self.files.append(href)

	#def handle_endtag(self,tag):

ROOT_URL = "http://ftp.ccc.de/congress/2013/"

def download_one(filename,fileurl):
	print fileurl
	cmd = ['wget','-v','-t','3','-c','-O',os.path.join(SAVE_DIR,filename),fileurl]
	print ' '.join(cmd)

	retcode = subprocess.check_call(cmd)

def download_all(dirurl):
	print dirurl
	response = urllib2.urlopen(dirurl)
	body = response.read()

	parser = DirListingParser()
	parser.init()
	parser.feed(body)

	for dirname in parser.dirs:
		download_all(dirurl + dirname)

	for filename in parser.files:
		fileurl = dirurl + filename
		download_one(filename,fileurl)

os.makedirs(SAVE_DIR)
download_all(ROOT_URL)
	from HTMLParser import HTMLParser

	import urllib2

	import subprocess

	import re
	import os

	SAVE_DIR = os.path.expanduser("~/30c3")
	FILE_RE = re.compile("30c3-\d{4}-.*")

	class DirListingParser(HTMLParser):
	def init(self):
	self.anchorIsDir = False
	self.anchorIsFile = False
	self.dirs = []
	self.files = []

	def handle_starttag(self,tag,attrs):
	attrs = dict((k,v) for k,v in attrs)
	if tag == "img":
	altText = attrs.get('alt')
	if altText == "[DIR]":
	self.anchorIsDir = True
	elif tag == "a":
	if self.anchorIsDir:
	self.anchorIsDir = False
	href = attrs.get('href')
	print href
	self.dirs.append(href)
	else:
	href = attrs.get('href')
	if None != FILE_RE.match(href):
	print file
	self.files.append(href)

	#def handle_endtag(self,tag):

	ROOT_URL = "http://ftp.ccc.de/congress/2013/"

	def download_one(filename,fileurl):
	print fileurl
	cmd = ['wget','-v','-t','3','-c','-O',os.path.join(SAVE_DIR,filename),fileurl]
	print ' '.join(cmd)

	retcode = subprocess.check_call(cmd)

	def download_all(dirurl):
	print dirurl
	response = urllib2.urlopen(dirurl)
	body = response.read()

	parser = DirListingParser()
	parser.init()
	parser.feed(body)

	for dirname in parser.dirs:
	download_all(dirurl + dirname)

	for filename in parser.files:
	fileurl = dirurl + filename
	download_one(filename,fileurl)

	os.makedirs(SAVE_DIR)
	download_all(ROOT_URL)