Last active
October 10, 2015 19:48
-
-
Save soomtong/3741997 to your computer and use it in GitHub Desktop.
get domainlist from rankey.com
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// | |
// http://faultnote.tistory.com/337 | |
// | |
var util = require('util'); | |
var request = require('request'); | |
var fs = require('fs'); | |
var token = /asset|boheom|bohum|inus|life|ins|direct|inr|inz|youngm|jongsin/i; | |
var url = 'http://www.rankey.com/rank/ajax/ajax_rank_site_cate.php'; // ref. http://www.rankey.com/rank/rank_site_cate.php | |
var cate = 1; | |
var page = 1; | |
var category = [ | |
{id:1, title:'건강/의학'}, | |
{id:2, title:'게임'}, | |
{id:3, title:'교육/학원'}, | |
{id:4, title:'온라인교육'}, | |
{id:5, title:'온라인교육'}, | |
{id:6, title:'뉴스/미디어'}, | |
{id:7, title:'문학/예술'}, | |
{id:8, title:'비즈니스/경제'}, | |
{id:9, title:'사회/문화/종교'}, | |
{id:10, title:'생활/가정/취미'}, | |
{id:11, title:'쇼핑'}, | |
{id:12, title:'스포츠/레저'}, | |
{id:13, title:'엔터테인먼트'}, | |
{id:14, title:'여행'}, | |
{id:15, title:'유통/판매/운송'}, | |
{id:16, title:'정치/행정'}, | |
{id:17, title:'제조'}, | |
{id:18, title:'커뮤니티'}, | |
{id:19, title:'학문'}, | |
{id:20, title:'서비스'}, | |
{id:21, title:'정보통신/IT'}, | |
{id:22, title:'인터넷/컴퓨터'} | |
]; | |
var getAllList = function (url, cate, page, category) { | |
request.post(url, { | |
form:{ | |
sel_rank:'M3', | |
sel_period:'20130301', | |
sel_cate1:cate, | |
page:page | |
} | |
}, function (error, response, data) { | |
if (response.statusCode == 200) { | |
// you can get at the raw response like this... | |
try { | |
var rdata = JSON.parse(data); | |
var jdata = rdata.rdata; | |
var totalPage = rdata.page; | |
console.log('\n============ cate : ' + category[cate - 1].title + '(' + cate + ')' + ' \t page : ' + page + '\n'); | |
jdata.forEach(function (item, idx) { | |
var data = item.sv_rank + '\t\t' + item.url + '\t\t' + item.grp_name + '\t\t' + category[cate - 1].title + '\n'; | |
if (token.test(item.url)) { | |
fs.appendFileSync('dataList.txt', data); | |
console.log((idx + 1) + '\t: ' + data); | |
} | |
fs.appendFileSync('allList.txt', data); | |
}); | |
} catch (e) { | |
util.error(e); | |
} | |
// next tick | |
page++; | |
if (totalPage >= page) { | |
setTimeout(function () { | |
getAllList(url, cate, page, category); | |
}, 2000); | |
} else { | |
// get new category | |
cate++; | |
page = 1; | |
if (category.length >= cate) { | |
setTimeout(function () { | |
getAllList(url, cate, page, category); | |
}, 4000); | |
} else { | |
// end of code | |
util.warn('======================================'); | |
util.warn('render complete...'); | |
} | |
} | |
} else { | |
util.error("http connection has errors."); | |
util.error("response code : " + response.statusCode); | |
} | |
}); | |
}; | |
// get first data | |
getAllList(url, cate, page, category); | |
// process start | |
console.log("Get all domain list from rankey.com ... thanks."); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment