Created
October 10, 2016 23:54
-
-
Save hansdg1/ca228878332ffc3a71c35b51ea8b2af7 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# Hans Guthrie | |
# 10/10/2016 | |
# | |
# Purpose: | |
# Used to download free ebooks from oreilly.com | |
# http://www.oreilly.com/programming/free/ | |
# Takes an input file of URLs (urls.txt) with one URL per line. | |
# Does some tweaking of the URL, and adds extensions for the file types Oreiley offers | |
def fixURL( url ): | |
"Takes a URL and adds '/files/' in the URL path." | |
#Split on the word free | |
out = url.split('free') | |
#Build the output string from the parts | |
return out[0] + 'free/files' + out[1]; | |
def urlExts(url, exts = ['epub', 'mobi', 'pdf'] ): | |
"Returns a list of URLs that have the given extentions added to them" | |
urllist = [] | |
for theexts in exts: | |
urllist.append(str(url) + '.' + theexts) | |
return urllist | |
#Read in all URLs from the file | |
with open('urls.txt', 'r') as f: | |
read_data = f.read().splitlines() | |
masterlist = [] | |
f = open('URL_output.txt','w') | |
#Call the functions to build the masterlist of URLs to download | |
for line in read_data: | |
cur_line = fixURL( line ) | |
for line in urlExts( cur_line ): | |
masterlist.append(line) | |
f.write(line + '\n') | |
print masterlist |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
my multi-thread edit: https://gist.github.com/Kovrinic/30bc4eca70ea8f062225d8521bc1722f (best if used with Python 3.x)
original: https://www.reddit.com/r/Python/comments/56syaa/7_oreilly_python_books_for_free/d8n6597