Skip to content

Instantly share code, notes, and snippets.

Last active December 24, 2015 20:29
Show Gist options
  • Save cclauss/6858439 to your computer and use it in GitHub Desktop.
Save cclauss/6858439 to your computer and use it in GitHub Desktop.
David Beazley is one of the best trainers on Python topics. This script downloads several code bases from David's website ( to a local directory.
# copy source code from
# into local directory David_Beazley
import bs4, os, requests
codeBases = ('coroutines', 'generators', 'pydata',
'python3io', 'usenix2009/concurrent')
baseURLFmt = '{}/'
destDirFmt = 'David_Beazley/{}/'
aboutFileText = """#!/usr/bin/env python
theURL = '{}'
import webbrowser;"""
def getEmbeddedPyURLs(inSoup):
for theURL in inSoup.find_all('a'):
if theURL['href'].endswith('.py'):
yield baseURL + theURL['href']
def copyWebPageToFile(inSrceURL, inDestFileName):
with open(inDestFileName, 'w') as destFile:
def writeAboutFile(inSrceURL, inDestFileName):
with open(inDestFileName, 'w') as destFile:
for codeBase in codeBases:
baseURL = baseURLFmt.format(codeBase)
codeBase = codeBase.lstrip('usenix2009/')
destDir = destDirFmt.format(codeBase)
try: os.makedirs(destDir)
except OSError: pass
theSoup = bs4.BeautifulSoup(requests.get(baseURL).text)
fileCount = 0
fmt = 'Copying {}\n to {}...'
print(fmt.format(baseURL, destDir))
for embededURL in getEmbeddedPyURLs(theSoup):
fileName = destDir + embededURL.rpartition('/')[2]
#print(fmt.format(embededURL, fileName))
copyWebPageToFile(embededURL, fileName)
fileCount += 1
fmt = '{} files copied into {}\n'
print(fmt.format(fileCount, destDir))
aboutFileName = 'about{}.py'.format(codeBase.title())
writeAboutFile(baseURL.rstrip('/'), destDir+aboutFileName)
print('Done. ' + '=' * 25)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment