Last active
August 29, 2015 14:06
-
-
Save t-ashula/b3813c6b98315cc474e7 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// https://www.rumor.xyz/post/1000001 | |
'use strict'; | |
var page = require('webpage').create(), | |
system = require('system'), | |
num = parseInt(system.args[1], 10), // 1000001 | |
url = 'https://www.rumor.xyz/post/'; | |
if (isNaN(num) || num < 1000001) { | |
num = 1000001; | |
} | |
url = url + num; | |
// https://github.com/ariya/phantomjs/issues/10150#issuecomment-28707859 | |
console.error = function () { | |
system.stderr.write(Array.prototype.join.call(arguments, ' ') + '\n'); | |
}; | |
page.settings.userAgent = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2143.0 Safari/537.36'; | |
page.onError = function (msg, trace) { | |
var msgStack = ['ERROR: ' + msg]; | |
if (trace && trace.length) { | |
msgStack.push('TRACE:'); | |
trace.forEach(function (t) { | |
msgStack.push(' -> ' + t.file + ': ' + t.line + (t.function ? ' (in function "' + t.function+'")' : '')); | |
}); | |
} | |
console.error(msgStack.join('\n')); | |
}; | |
page.onInitialized = function () { | |
page.evaluate(function () { | |
document.addEventListener('DOMContentLoaded', function () { | |
window.callPhantom('DOMContentLoaded'); | |
}, false); | |
}); | |
}; | |
page.onCallback = function (data) { | |
var rumor = page.evaluate(function () { | |
var rumor = { | |
'0.number': (function (path) { | |
return path[path.length - 1]; | |
}(location.pathname.split('/'))), | |
'1.post': (function (el) { | |
return el ? el.textContent.trim() : ''; | |
}(document.querySelector('#post_caption'))), | |
'2.like' : (function(els) { | |
return els && els[1] ? els[1].textContent : 0; | |
}(document.querySelectorAll('#post_menu_bar_inner .count_label'))), | |
'3.comments': (function (el) { | |
return !el ? [] : Array.prototype.map.call(el.querySelectorAll('.comment'), function (c) { | |
return c.querySelector('.comment_content').textContent.trim(); | |
}); | |
}(document.querySelector('#comment_section'))) | |
}; | |
return rumor; | |
}) || {}; | |
console.log(JSON.stringify(rumor)); | |
phantom.exit(); | |
}; | |
page.onResourceReceived = function (res) { | |
if (res.url === url) { | |
if (res.status !== 200) { | |
phantom.exit(); | |
} | |
} | |
}; | |
page.open(url, function (status) { | |
console.error(url, ' opened'); | |
if (status !== 'success') { | |
console.error('Unable to access network'); | |
phantom.exit(); | |
} | |
else { | |
setTimeout(function () { | |
phantom.exit(); | |
}, 5000); | |
} | |
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/bash | |
for i in $(seq 1000001 1010001); do | |
phantomjs rumor.js $i > $i.json; | |
if [ -e $i.json ]; then | |
cat $i.json; | |
p=$(jq '.["4_photo"]' $i.json | sed -e 's/"//g'); | |
if [[ $p = http* ]]; then | |
wget --quiet --no-check-certificate $p -O $(echo $p | sed -e 's!/!.!g' -e 's!:!!g' ) | |
fi | |
fi | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment