Skip to content

Instantly share code, notes, and snippets.

@t-ashula
Last active August 29, 2015 14:06
Show Gist options
  • Save t-ashula/b3813c6b98315cc474e7 to your computer and use it in GitHub Desktop.
Save t-ashula/b3813c6b98315cc474e7 to your computer and use it in GitHub Desktop.
// https://www.rumor.xyz/post/1000001
'use strict';
var page = require('webpage').create(),
system = require('system'),
num = parseInt(system.args[1], 10), // 1000001
url = 'https://www.rumor.xyz/post/';
if (isNaN(num) || num < 1000001) {
num = 1000001;
}
url = url + num;
// https://github.com/ariya/phantomjs/issues/10150#issuecomment-28707859
console.error = function () {
system.stderr.write(Array.prototype.join.call(arguments, ' ') + '\n');
};
page.settings.userAgent = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2143.0 Safari/537.36';
page.onError = function (msg, trace) {
var msgStack = ['ERROR: ' + msg];
if (trace && trace.length) {
msgStack.push('TRACE:');
trace.forEach(function (t) {
msgStack.push(' -> ' + t.file + ': ' + t.line + (t.function ? ' (in function "' + t.function+'")' : ''));
});
}
console.error(msgStack.join('\n'));
};
page.onInitialized = function () {
page.evaluate(function () {
document.addEventListener('DOMContentLoaded', function () {
window.callPhantom('DOMContentLoaded');
}, false);
});
};
page.onCallback = function (data) {
var rumor = page.evaluate(function () {
var rumor = {
'0.number': (function (path) {
return path[path.length - 1];
}(location.pathname.split('/'))),
'1.post': (function (el) {
return el ? el.textContent.trim() : '';
}(document.querySelector('#post_caption'))),
'2.like' : (function(els) {
return els && els[1] ? els[1].textContent : 0;
}(document.querySelectorAll('#post_menu_bar_inner .count_label'))),
'3.comments': (function (el) {
return !el ? [] : Array.prototype.map.call(el.querySelectorAll('.comment'), function (c) {
return c.querySelector('.comment_content').textContent.trim();
});
}(document.querySelector('#comment_section')))
};
return rumor;
}) || {};
console.log(JSON.stringify(rumor));
phantom.exit();
};
page.onResourceReceived = function (res) {
if (res.url === url) {
if (res.status !== 200) {
phantom.exit();
}
}
};
page.open(url, function (status) {
console.error(url, ' opened');
if (status !== 'success') {
console.error('Unable to access network');
phantom.exit();
}
else {
setTimeout(function () {
phantom.exit();
}, 5000);
}
});
#!/usr/bin/bash
for i in $(seq 1000001 1010001); do
phantomjs rumor.js $i > $i.json;
if [ -e $i.json ]; then
cat $i.json;
p=$(jq '.["4_photo"]' $i.json | sed -e 's/"//g');
if [[ $p = http* ]]; then
wget --quiet --no-check-certificate $p -O $(echo $p | sed -e 's!/!.!g' -e 's!:!!g' )
fi
fi
done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment