Skip to content

Instantly share code, notes, and snippets.

@yulioaj290
Last active November 6, 2018 20:17
Show Gist options
  • Save yulioaj290/20b4dccf19a262880497b66539f6bd87 to your computer and use it in GitHub Desktop.
Save yulioaj290/20b4dccf19a262880497b66539f6bd87 to your computer and use it in GitHub Desktop.

Laracasts.com video downloader

This script only produce a file video.sh_, with all commands to download all videos of a serie.

Requirements

  • NodeJS 8.4.0 or higher.
  • NPM 5.3.0 or higher.
  • Bzip2 1.0.6 or higher.

To prepare the environment for the script follow this steps (based on Ubuntu 14.04):

  1. (Optional) Update your linux repositories and upgrade all packages.
$ sudo apt-get update
$ sudo apt-get upgrade
  1. Install bzip2 on your system, it is neccesary to unzip node package modules.
$ sudo apt-get install bzip2
  1. Install PhantomJS dependencies on your Debian/Ubuntu (or Linux) system.
$ sudo apt-get install libfontconfig1 fontconfig libfontconfig1-dev libfreetype6-dev
  1. Install node dependencies included into the package.json.
$ npm install
  1. Verify the packages "phantomjs" and "casperjs" have been installed and linked on executable binaries system folder '/usr/bin/'.
$ phantomjs -v
$ casperjs -v

or

$ phantomjs --version
$ casperjs --version

Note: If some of this packages were trouble, you must create a symbolic link of their executables.

$ ln -s /absolute/path/to/project/node_modules/phantomjs/bin/phantomjs /usr/bin/phantomjs
$ ln -s /absolute/path/to/project/node_modules/casperjs/bin/casperjs /usr/bin/casperjs
  • Check again the step 4.
  1. Config the data inside the "scraper.js" file.
* var prefixFile = '_[Vue.JS]';
* var sufixFile = '.mp4';
* const domain = 'https://laracasts.com';
* const initURL = 'https://laracasts.com/series/learn-vue-2-step-by-step/episodes/1';
  1. Execute the script.
$ casperjs --ignore-ssl-errors=true scraper.js
{
"version": "0.0.1",
"private": true,
"name": "Laracasts.com video downloader",
"description": "Laracasts.com video downloader",
"repository": "https://gist.github.com/yulioaj290/20b4dccf19a262880497b66539f6bd87",
"license": "MIT",
"devDependencies": {
"casperjs": "^1.1.1",
"http-server": "^0.6.1",
"karma": "^0.12.16",
"karma-chrome-launcher": "^0.1.4",
"karma-jasmine": "^0.1.5",
"phantomjs": "^1.9.19",
"protractor": "~1.0.0",
"shelljs": "^0.2.6",
"tmp": "0.0.23"
},
"scripts": {
"prestart": "npm install",
"start": "http-server -a 0.0.0.0 -p 8000",
"pretest": "npm install",
"test": "node node_modules/karma/bin/karma start test/karma.conf.js",
"test-single-run": "node node_modules/karma/bin/karma start test/karma.conf.js --single-run",
"preupdate-webdriver": "npm install",
"update-webdriver": "webdriver-manager update",
"preprotractor": "npm run update-webdriver",
"protractor": "protractor test/protractor-conf.js",
"update-index-async": "node -e \"require('shelljs/global'); sed('-i', /\\/\\/@@NG_LOADER_START@@[\\s\\S]*\\/\\/@@NG_LOADER_END@@/, '//@@NG_LOADER_START@@\\n' + '\\n//@@NG_LOADER_END@@', 'app/index-async.html');\""
}
}
/**
* Copyright 2017 Yulio Aleman Jimenez
*
* Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
* NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
var fs = require('fs');
var args = require('system').args;
var casper = require('casper').create({
verbose: true,
pageSettings: {
webSecurityEnabled: false,
loadImages: false,
loadPlugins: false,
}
});
var prefixFile = '_[Vue.JS]';
var sufixFile = '.mp4';
const domain = 'https://laracasts.com';
const initURL = 'https://laracasts.com/series/learn-vue-2-step-by-step/episodes/1';
if (args.length < 4) {
casper.echo("Usage: casperjs --ignore-ssl-errors=true script.js");
casper.exit(0) ;
}
// wait 7 seconds before giving up
casper.options.waitTimeout = 10000000
casper.onResourceTimeout = function(request) {
console.log('Response (#' + request.id + '): ' + JSON.stringify(request));
};
casper.userAgent('Mozilla/5.0 (Macintosh; Intel Mac OS X); Safari');
casper.on('started', function () {
this.page.customHeaders = { 'Accept': '*/*' }
});
// casper.on('resource.error',function (request) {
// this.echo('RESOURCE: [' + request.id + '] IN: [' + request.url + '] SAID: [' + request.errorString + ']' );
// });
casper
.start()
.open(initURL)
.then(function () {
var listOfLinks = this.evaluate(function(){
// Getting links to other pages to scrape, this will be
// a primitive array that will be easily returned from page.evaluate
var links = [].map.call(document.querySelectorAll('div.sidebar-section ul.condensed-episode-list li a.episode-title'), function(link) {
return link.href;
});
return links;
});
var listOfTitles = this.evaluate(function(){
var titles = [].map.call(document.querySelectorAll('div.sidebar-section ul.condensed-episode-list li a.episode-title'), function(title) {
var spliting = title.innerHTML.split('\n');
return spliting[1].split(' ').join('_').split(':').join('').split('#').join('No');
});
return titles;
});
var currLinks = 1;
var fileName = "";
var output = "#!/bin/bash \n";
// Now to iterate over that array of links
this.each(listOfLinks, function(self, eachPageHref) {
self.thenOpen(eachPageHref, function () {
// Getting video from each page
var video = this.evaluate(function() {
return document.querySelector('source[data-default="true"]').getAttribute('src');
});
fileName = ((currLinks < 10) ? '0' + currLinks : currLinks ) + '_' + listOfTitles[currLinks - 1] + prefixFile + sufixFile;
// if(currLinks < 40){
output += 'wget -O "' + fileName + '" "https:' + video + '" \n';
console.log('ADDED [' + fileName + ']');
// } else {
// console.log('NOT ADDED [' + fileName + ']');
// }
currLinks++;
});
});
this.then(function(){
// output += 'mv ' + prefixFile + '* upload/ \n';
fs.write("video.sh", output, 'w');
});
})
casper.run();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment