Skip to content

Instantly share code, notes, and snippets.

@paulwinex
Created May 9, 2014 15:34
Show Gist options
  • Save paulwinex/0c8328353db9a2510314 to your computer and use it in GitHub Desktop.
Save paulwinex/0c8328353db9a2510314 to your computer and use it in GitHub Desktop.
Download folder structure from http
import urllib, urllib2
import re
import os
import shutil
import time
class downloaderClass(object):
def __init__(self):
self.v = False
def verbos(self, i):
self.v = i
def message(self, msg):
print '>>>', msg
def downloadFile(self, url, local):
file_size_dl = 0
block_sz = 8192
u = urllib2.urlopen(url)
f = open(local, 'wb')
if self.v:
meta = u.info()
file_size = int(meta.getheaders("Content-Length")[0])
self.message("Downloading: %s Bytes: %s" % (local, file_size))
while True:
buffer = u.read(block_sz)
if not buffer:
break
file_size_dl += len(buffer)
f.write(buffer)
if self.v and file_size > block_sz:
prc = file_size_dl * 100. / file_size
status = '%2f%%' % prc
status = status + chr(8)*(len(status)+1)
self.message(status)
f.close()
return True
def downloadFolder(self, url, target, ind=''):
data = urllib.urlopen(url).read()
string = data.decode('utf-8')
# find files
pat1 = re.compile('> (.+\.[a-zA-Z0-9]+)<')
filelist = pat1.findall(string)
# find folders
pat2 = re.compile('> (.+)/<')
folderlist = pat2.findall(string)
# download files
for f in filelist:
remote = '/'.join([url, f])
local = os.path.normpath(os.path.join(target, f))
if os.path.exists(local):
os.remove(local)
print ind, f
self.downloadFile(remote, local)
# download folders
for f in [x.strip() for x in folderlist]:
print ind, f
local = os.path.normpath(os.path.join(target, f))
if not os.path.exists(local):
os.makedirs(local)
else:
shutil.rmtree(local)
time.sleep(0.3) #for fix WindowsError 5
os.makedirs(local)
remote = '/'.join([url, f])
self.downloadFolder(remote, local, ind+' ')
#example
#url = 'http//site/folder/subfolder'
#targetDir = 'c:/tergetFolder'
#d = downloaderClass()
#d.downloadFolder(url, targetDir)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment