Skip to content

Instantly share code, notes, and snippets.

@jamlfy
Last active January 3, 2017 21:20
Show Gist options
  • Save jamlfy/2c1b14f64a75f59a0a0b62ede0180adb to your computer and use it in GitHub Desktop.
Save jamlfy/2c1b14f64a75f59a0a0b62ede0180adb to your computer and use it in GitHub Desktop.
/*** HOME ***/
var HOME = 'http://www.plotandesign.net/';
/*** WHERE ***/
var HOME_CONTENT = '#content .row2 > .main > .wrapper > .col1.cols',
POST_MENU = 'ul.left a',
TOP_MENU = '#header .menu li > a',
CONTENT_POST = '#content .row2 .main > .wrapper';
/*** DATA ***/
var PAGES = {},
POST = [],
stepIndex = 0;
function posibleJS (msg, line, source) {
console.log('>', msg);
}
function posibleAlert (msg) {
console.log('<', msg);
}
function posibleError (resourceError) {
console.error(resourceError.errorString)
}
/**
* [getPostOrPages description]
* @param {String} name Selector
* @return {Object} Post/Pages
*/
function getPostOrPages (name) {
function clean (node) {
var newNode = [];
for (var i = 0; i < node.length; i++) {
if(node[i] && !node[i].body && node[i].tagName ){
if(node[i].tagName == 'DIV'){
var z = clean($(node[i].innerHTML.replace(/\t|\n|\s{2,}/gim, '').replace(/<br>/gim, '</p><p>')));
for (var w = 0; w < z.length; w++) {
newNode.push(z[w]);
}
} else if( /P|H[1-6]|UL|I|TABLE|LI|IMG/i.test(node[i].tagName) ){
newNode.push(node[i]);
}
}
}
return newNode;
}
var child = clean($(name).children());
var post = { text : [] };
for (var i = 0; i < child.length; i++) {
if( child[i] && child[i].tagName && !child[i].body && child[i].innerHTML.length ){
if( child[i].tagName != 'BR' && child[i].tagName != 'H1' ){
post.text.push(child[i].outerHTML);
} else if ( child[i].tagName == 'H1' ) {
post.title = child[i].innerText;
} else if ( child[i].tagName == 'I' && !post.autor ) {
post.autor = child[i].innerText;
}
}
}
return post;
}
/**
* [getHome description]
* @param {String} name Selector
* @return {Array} Element
*/
function getHome(name) {
var data = [],
post = {},
child = $(name).children();
for (var i = 0; i < child.length; i++) {
if(child[i].tagName === 'H2'){
if(post.title){
data.push(post);
}
post = { title : child[i].innerText, text : [] };
} else {
if(post.text && child[i].tagName != 'BR' ){
if(child[i].tagName != 'I'){
post.text.push(child[i].outerHTML.replace(/\t|\n|\s{2,}/gim, '').replace(/<br>/gim, '</p><p>')));
} else {
post.autor = child[i].innerText;
}
}
}
}
if(post.title){
data.push(post);
}
return data;
}
/**
* [getPostMenu description]
* @param {String} name Selector
* @return {Array} Array URL
*/
function getPostMenu (name) {
var urs = [],
items = $(name);
for (var i = 0; i < items.length; i++) {
urs.push(items[i].href);
}
return urs;
}
/**
* [getMenu description]
* @param {Number} i Number in array
* @param {String} top Selector
* @return {String} Go to
*/
function getMenu (i, top) {
return $(top)[i].href;
}
/**
* [startPage description]
* @param {String} url Go to
* @param {Boolean} isPost [description]
*/
function startPage(url, isPost) {
var webPage = require('webpage');
var page = webPage.create(),
newUrl;
page.onConsoleMessage = posibleJS;
page.onAlert = posibleAlert;
page.onResourceError = posibleError;
stepIndex++;
if(url){
page.open(url, function(status){
if (status === 'success') {
console.log('Start :', url);
page.injectJs('jquery.min.js');
if(!phantom.state){
if(isPost || url.indexOf('index.html') < 0){
PAGES[ url ] = page.evaluate(getPostOrPages, CONTENT_POST);
}
if(isPost){
PAGES[ url ].post = true;
newUrl = POST[stepIndex];
} else {
if(url.indexOf('index.html') >= 0){
POST = page.evaluate(getPostMenu, POST_MENU);
PAGES[ url ] = page.evaluate(getHome, HOME_CONTENT);
PAGES[ url ].list = true;
} else {
PAGES[ url ].page = true;
}
newUrl = page.evaluate(getMenu, stepIndex, TOP_MENU);
}
if(!newUrl && !isPost){
stepIndex = 0;
newUrl = POST[stepIndex];
isPost = true;
}
startPage(newUrl, isPost);
} else {
phantom.state();
}
} else {
console.log('mierda');
}
});
} else {
console.log(JSON.stringify(PAGES, null, '\t'));
phantom.exit();
}
}
console.log('Start :', HOME);
startPage(HOME + 'index.html');
2017-01-03T16:17:16 [DEBUG] CookieJar - Created but will not store cookies (use option '--cookies-file=<filename>' to enable persistent cookie storage)
2017-01-03T16:17:22 [DEBUG] Set "http" proxy to: "" : 1080
2017-01-03T16:17:22 [DEBUG] Phantom - execute: Configuration
2017-01-03T16:17:22 [DEBUG] 0 objectName : ""
2017-01-03T16:17:22 [DEBUG] 1 cookiesFile : ""
2017-01-03T16:17:22 [DEBUG] 2 diskCacheEnabled : "false"
2017-01-03T16:17:22 [DEBUG] 3 maxDiskCacheSize : "-1"
2017-01-03T16:17:22 [DEBUG] 4 diskCachePath : ""
2017-01-03T16:17:22 [DEBUG] 5 ignoreSslErrors : "false"
2017-01-03T16:17:22 [DEBUG] 6 localUrlAccessEnabled : "true"
2017-01-03T16:17:22 [DEBUG] 7 localToRemoteUrlAccessEnabled : "false"
2017-01-03T16:17:22 [DEBUG] 8 outputEncoding : "UTF-8"
2017-01-03T16:17:22 [DEBUG] 9 proxyType : "http"
2017-01-03T16:17:22 [DEBUG] 10 proxy : ":1080"
2017-01-03T16:17:22 [DEBUG] 11 proxyAuth : ":"
2017-01-03T16:17:22 [DEBUG] 12 scriptEncoding : "UTF-8"
2017-01-03T16:17:22 [DEBUG] 13 webSecurityEnabled : "true"
2017-01-03T16:17:22 [DEBUG] 14 offlineStoragePath : ""
2017-01-03T16:17:22 [DEBUG] 15 localStoragePath : ""
2017-01-03T16:17:22 [DEBUG] 16 localStorageDefaultQuota : "-1"
2017-01-03T16:17:22 [DEBUG] 17 offlineStorageDefaultQuota : "-1"
2017-01-03T16:17:22 [DEBUG] 18 printDebugMessages : "true"
2017-01-03T16:17:22 [DEBUG] 19 javascriptCanOpenWindows : "true"
2017-01-03T16:17:22 [DEBUG] 20 javascriptCanCloseWindows : "true"
2017-01-03T16:17:22 [DEBUG] 21 sslProtocol : "default"
2017-01-03T16:17:22 [DEBUG] 22 sslCiphers : "ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-SHA:ECDHE-ECDSA-AES128-SHA:ECDHE-RSA-AES128-SHA:ECDHE-RSA-AES256-SHA:ECDHE-ECDSA-RC4-SHA:ECDHE-RSA-RC4-SHA:DHE-RSA-AES128-SHA:DHE-DSS-AES128-SHA:DHE-RSA-AES256-SHA:AES128-GCM-SHA256:AES128-SHA:AES256-SHA:DES-CBC3-SHA:RC4-SHA:RC4-MD5"
2017-01-03T16:17:22 [DEBUG] 23 sslCertificatesPath : ""
2017-01-03T16:17:22 [DEBUG] 24 sslClientCertificateFile : ""
2017-01-03T16:17:22 [DEBUG] 25 sslClientKeyFile : ""
2017-01-03T16:17:22 [DEBUG] 26 sslClientKeyPassphrase : ""
2017-01-03T16:17:22 [DEBUG] 27 webdriver : ":"
2017-01-03T16:17:22 [DEBUG] 28 webdriverLogFile : ""
2017-01-03T16:17:22 [DEBUG] 29 webdriverLogLevel : "INFO"
2017-01-03T16:17:22 [DEBUG] 30 webdriverSeleniumGridHub : ""
2017-01-03T16:17:22 [DEBUG] Phantom - execute: Script & Arguments
2017-01-03T16:17:22 [DEBUG] script: "import/getText.js"
2017-01-03T16:17:22 [DEBUG] Phantom - execute: Starting normal mode
2017-01-03T16:17:22 [DEBUG] WebPage - setupFrame ""
2017-01-03T16:17:22 [DEBUG] FileSystem - _open: ":/modules/fs.js" QMap(("mode", QVariant(QString, "r")))
2017-01-03T16:17:23 [DEBUG] FileSystem - _open: ":/modules/system.js" QMap(("mode", QVariant(QString, "r")))
2017-01-03T16:17:23 [DEBUG] FileSystem - _open: ":/modules/webpage.js" QMap(("mode", QVariant(QString, "r")))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment