Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Packt Pub Downloader - Quick hacky script to download all your e-books from packtpub.com. This may not always work, they may change their api calls etc.
<?php
define('DS', DIRECTORY_SEPARATOR);
// config values
$saveParentDir = __DIR__; // Parent dir of the ebooks and extras directories
$ebooksDir = 'ebooks'; // path of the ebooks directory relative to $saveParentDir
$extrasDir = 'ebooks' . DS . 'extras'; // path of the extras directory relative to $saveParentDir
$sleepDuration = 4; // Time to delay between page requests / different book downloads
$booksPerListPage = 25; // Book details to try requesting from the PacktPub API. This can be max 25
$fileTypesWanted = ['epub', 'mobi', 'pdf', 'code']; // Different file types from BOOK_FORMATS_URL you want to download
$downloadFrontCover = true; // Whether or not you want the book front cover downloading (if available)
$emailAddress = '<your email goes here>'; // packt-pub username
$password = '<your password goes here>'; // packt-pub password
define('AUTH_URL', 'https://services.packtpub.com/auth-v1/users/tokens');
define('REFRESH_TOKEN_URL', 'https://services.packtpub.com/auth-v1/users/me/tokens');
define(
'OWNED_BOOKS_URL',
'https://services.packtpub.com/entitlements-v1/users/me/products?sort=createdAt:DESC&limit=%d&offset=%d'
);
define('BOOK_FORMATS_URL', 'https://services.packtpub.com/products-v1/products/%d/types');
define('FILE_DOWNLOAD_DETAILS_URL', 'https://services.packtpub.com/products-v1/products/%d/files/%s');
define('BOOK_SUMMARY_URL', 'https://static.packt-cdn.com/products/%d/summary');
$defaultCurlOptions = [
CURLOPT_RETURNTRANSFER => true,
CURLOPT_USERAGENT => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko)' .
' Chrome/72.0.3626.121 Safari/537.36',
];
function errorAndDie($message)
{
echo $message, PHP_EOL;
die;
}
function getJwtExpiry($jwt)
{
list(, $jwtDataBase64, ) = explode('.', $jwt);
$jwtData = json_decode(base64_decode($jwtDataBase64));
return $jwtData->exp;
}
$jwt = '';
$refreshToken = '';
$getJson = null;
$refreshJwt = function() use (&$getJson, &$jwt, &$refreshToken)
{
if (empty($refreshToken)) {
errorAndDie('Empty refresh token, cannot refresh JWT');
}
$tokenInfo = $getJson(
REFRESH_TOKEN_URL,
'Token Refresh Failed',
[
CURLOPT_HTTPHEADER => ['Content-Type: application/json'],
CURLOPT_POSTFIELDS => json_encode(['refresh' => $refreshToken]),
]
);
$jwt = $tokenInfo->data->access;
$refreshToken = $tokenInfo->data->refresh;
echo 'JWT: ', $jwt, PHP_EOL, 'Refresh Token: ', $refreshToken, PHP_EOL;
};
$checkJwtExpiry = function() use ($refreshJwt, &$jwt)
{
$jwtExpiryTimestamp = getJwtExpiry($jwt);
if ($jwtExpiryTimestamp <= time()) {
echo 'Current JWT has expired', PHP_EOL;
$refreshJwt();
$jwtExpiryTimestamp = getJwtExpiry($jwt);
if ($jwtExpiryTimestamp <= time()) {
errorAndDie('JWT expired, and refresh failed');
}
echo 'JWT expires at ', date('Y/m/d H:i:s T', $jwtExpiryTimestamp), PHP_EOL;
}
return $jwtExpiryTimestamp;
};
$getJson = function ($url, $errorMessage, $extraOptions = []) use ($checkJwtExpiry, $defaultCurlOptions, &$jwt)
{
if (!empty($jwt)) {
if (!in_array($url, [AUTH_URL, REFRESH_TOKEN_URL])) {
$checkJwtExpiry();
}
if (!isset($extraOptions[CURLOPT_HTTPHEADER])) {
$extraOptions[CURLOPT_HTTPHEADER] = [];
}
$extraOptions[CURLOPT_HTTPHEADER][] = 'Authorization: Bearer ' . $jwt;
}
$ch = curl_init($url);
curl_setopt_array($ch, $defaultCurlOptions + $extraOptions);
$response = curl_exec($ch);
$responseCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
curl_close($ch);
if ($responseCode !== 200) {
errorAndDie($errorMessage);
}
if (null === ($decodedJson = json_decode($response))) {
errorAndDie($errorMessage);
}
return $decodedJson;
};
$downloadFile = function ($url, $savePath, $errorMessage) use ($defaultCurlOptions)
{
$fh = fopen($savePath, 'w+');
$ch = curl_init($url);
curl_setopt_array(
$ch,
$defaultCurlOptions + [
CURLOPT_FOLLOWLOCATION => true,
CURLOPT_RETURNTRANSFER => false,
CURLOPT_FILE => $fh,
CURLOPT_BINARYTRANSFER => true,
CURLOPT_WRITEFUNCTION => function($ch, $data) use ($fh) {
return fwrite($fh, $data);
}
]
);
curl_exec($ch);
$responseCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
curl_close($ch);
fclose($fh);
if ($responseCode !== 200 || !filesize($savePath)) {
echo $errorMessage, PHP_EOL;
return false;
}
return true;
};
echo 'Logging In', PHP_EOL;
$tokenInfo = $getJson(
AUTH_URL,
'Login Failed',
[
CURLOPT_HTTPHEADER => ['Content-Type: application/json'],
CURLOPT_POSTFIELDS => json_encode(
[
'username' => $emailAddress,
'password' => $password,
]
),
]
);
$jwt = $tokenInfo->data->access;
$refreshToken = $tokenInfo->data->refresh;
echo 'JWT: ', $jwt, PHP_EOL, 'Refresh Token: ', $refreshToken, PHP_EOL;
echo 'JWT expires at ', date('Y/m/d H:i:s T', getJwtExpiry($jwt)), PHP_EOL;
echo 'Sleeping for ', $sleepDuration, ' seconds', PHP_EOL;
sleep($sleepDuration);
$bookCount = 0;
echo 'Getting list of eBooks', PHP_EOL;
$booksInfo = $getJson(
sprintf(OWNED_BOOKS_URL, $booksPerListPage, $bookCount),
'Couldn\'t retrieve list of books'
);
$totalNumberOfBooks = $booksInfo->count;
$noOfPages = ceil($totalNumberOfBooks / $booksPerListPage);
echo 'Total number of books: ', $totalNumberOfBooks, ', Total number of pages: ', $noOfPages, PHP_EOL;
for ($pageCount = 1; $pageCount <= $noOfPages; $pageCount++) {
if ($pageCount > 1) {
$booksInfo = $getJson(
sprintf(OWNED_BOOKS_URL, $booksPerListPage, $bookCount),
'Couldn\'t retrieve list of books'
);
}
$pageBooksCount = count($booksInfo->data);
echo 'Found ', $pageBooksCount, ' books on page ', $pageCount, PHP_EOL;
if (count($booksInfo->data)) {
if (!file_exists($saveParentDir . DS . $ebooksDir)) {
mkdir($saveParentDir . DS . $ebooksDir);
}
if (!file_exists($saveParentDir . DS . $extrasDir)) {
mkdir($saveParentDir . DS . $extrasDir);
}
foreach ($booksInfo->data as $bookData) {
$bookCount++;
$name = $bookData->productName;
echo $bookCount, '. Examining "', $name, '"', PHP_EOL;
$fileName = preg_replace(['/[\<\>\:\"\/\\\|\?\*\%]+/', '/\s+/'], ['-', '_'], $name);
$downloadFormatInfo = $getJson(
sprintf(BOOK_FORMATS_URL, $bookData->productId),
'Couldn\'t retrieve available book formats'
);
$downloadLinks = [];
foreach ($downloadFormatInfo->data[0]->fileTypes as $fileType) {
if (in_array($fileType, $fileTypesWanted)) {
$downloadLinks[$fileType] = sprintf(FILE_DOWNLOAD_DETAILS_URL, $bookData->productId, $fileType);
}
}
if (0 === count($downloadLinks)) {
echo 'No Downloadable Books / Code', PHP_EOL;
continue;
}
foreach ($downloadLinks as $format => $downloadHref) {
$downloadLinkInfo = $getJson(
$downloadHref,
'Couldn\'t retrieve book download link'
);
$savePath = ('code' === $format)
? $saveParentDir . DS . $extrasDir . DS . $fileName . '.zip'
: $saveParentDir . DS . $ebooksDir . DS . $fileName . '.' . $format;
echo 'Downloading ', $format, ' to ', $savePath, PHP_EOL;
$downloadFile($downloadLinkInfo->data, $savePath, $format . ' download failed');
}
if ($downloadFrontCover) {
$frontCoverLinkInfo = $getJson(
sprintf(BOOK_SUMMARY_URL, $bookData->productId),
'Couldn\'t retrieve book summary link'
);
if (!empty($frontCoverLinkInfo->coverImage)) {
$fileExt = preg_replace('/^.+\.([^\.]+)$/', '$1', $frontCoverLinkInfo->coverImage);
$savePath = $saveParentDir . DS . $extrasDir . DS . $fileName . '.' . $fileExt;
echo 'Downloading Front Cover to: ', $savePath, PHP_EOL;
$downloadFile(
$frontCoverLinkInfo->coverImage,
$savePath,
'Front cover download failed'
);
}
}
echo 'Sleeping for ', $sleepDuration, ' seconds', PHP_EOL;
sleep($sleepDuration);
}
}
}
@nneul

This comment has been minimized.

Copy link

nneul commented Dec 19, 2018

No longer works due to changes in site - quick hack I put together here you can look at to see new method against their new REST endpoints which is much simpler than site parsing. https://gist.github.com/nneul/6eda98fd87a58a623b857523247f3471

@chappy84

This comment has been minimized.

Copy link
Owner Author

chappy84 commented Mar 9, 2019

This is now once again working after PacktPub's major site changes

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.