-
-
Save tuxskar/8fa64300ee02693b05f295337fff607c to your computer and use it in GitHub Desktop.
Create wget commands for all claimed books on packtpub
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
== Adapted from the code over at https://gist.github.com/graymouser/a33fbb75f94f08af7e36 == | |
Log into your account at packtpub.com and save the cookies with the "cookies.txt" Chrome extension or the "Export cookies" Firefox extension into the file cookies.txt. | |
Then open the console in your browsers dev tools and paste the following code. | |
You will get a list of wget commands that you can copy and paste as a whole into a terminal to download all books. | |
Example: wget --load-cookies=cookies.txt --content-disposition "https://packtpub.com//ebook_download/20217/mobi" -O "R Data Visualization Cookbook.mobi" | |
If you only want some filetypes, edit the "pattern" vaiable accordingly. | |
wget for Windows: https://eternallybored.org/misc/wget/ | |
wget for OS-X: http://osxdaily.com/2012/05/22/install-wget-mac-os-x/ | |
*/ | |
var pattern = /(MOBI|EPUB|PDF|)$/i; | |
var downloadCmd = ''; | |
var nodes = document.getElementsByTagName('a'); | |
var titleNodes = document.getElementsByClassName('product-line'); | |
var name, rawName, id, titleById = {}, | |
ebookPattern = /ebook_download\/(\d+)\/(.*)/i, | |
codePattern = /code_download\/(\d+)$/i; | |
var downloadImgs = true, downloadZip = true, downloadEbooks = true, createDir = true; | |
for (i = 0; i < titleNodes.length; i++) { | |
rawName = titleNodes[i].getAttribute('title'); | |
if (rawName) { | |
name = rawName.trim().replace(' [eBook]', ''); | |
id = titleNodes[i].getAttribute('nid'); | |
titleById[id] = name; | |
} | |
} | |
function getDirectoryName(bookName){ | |
return bookName.replace(/ /g, '_').replace(/\'/g,'') | |
} | |
function createDirectories(bookName){ | |
return 'mkdir ' + getDirectoryName(bookName); | |
} | |
if (!downloadImgs && !downloadZip && !downloadEbooks) | |
throw new Error("Nothing to do here") | |
for (id in titleById){ | |
if (createDir) | |
downloadCmd += createDirectories(titleById[id]) + "\n" | |
if (downloadImgs){ | |
var imageUrl = $('div[nid="' + id + '"]').find('.imagecache').prop('src'), | |
ext = imageUrl.slice(imageUrl.length - 3); | |
downloadCmd += download(imageUrl, id, 'thumbview.' + ext); | |
imageUrl = imageUrl.replace('thumbview', 'ppv4_main_book_cover'); | |
ext = imageUrl.slice(imageUrl.length - 3); | |
downloadCmd += download(imageUrl, id, 'main.' + ext); | |
} | |
} | |
function download(url, id, ext) { | |
return 'wget --load-cookies=cookies.txt --content-disposition "' + url + "\"" + " -O \"" + getDirectoryName(titleById[id]) + '/' + titleById[id] + "." + ext + "\"\n"; | |
} | |
for (i in nodes) { | |
var a = nodes[i]; | |
if (a && a.getAttribute && a.getAttribute('href') && a.getAttribute('href').length > 1 && a.text && pattern.test(a.text.trim())) { | |
url = "https://packtpub.com/"; | |
url += a.attributes['href'].value; | |
if (!url || url.length <= 1) continue; | |
codeMeta = codePattern.exec(url); | |
if (codeMeta && codeMeta.length > 0 && downloadZip){ | |
downloadCmd += download(url, meta[1], 'zip'); | |
} else { | |
meta = ebookPattern.exec(url) | |
if (meta && downloadEbooks) downloadCmd += download(url, meta[1], meta[2]); | |
} | |
} | |
} | |
console.log(downloadCmd); | |
copy(downloadCmd); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment