Skip to content

Instantly share code, notes, and snippets.

@soomtong
Last active October 10, 2015 19:48
Show Gist options
  • Save soomtong/3741997 to your computer and use it in GitHub Desktop.
Save soomtong/3741997 to your computer and use it in GitHub Desktop.
get domainlist from rankey.com
//
// http://faultnote.tistory.com/337
//
var util = require('util');
var request = require('request');
var fs = require('fs');
var token = /asset|boheom|bohum|inus|life|ins|direct|inr|inz|youngm|jongsin/i;
var url = 'http://www.rankey.com/rank/ajax/ajax_rank_site_cate.php'; // ref. http://www.rankey.com/rank/rank_site_cate.php
var cate = 1;
var page = 1;
var category = [
{id:1, title:'건강/의학'},
{id:2, title:'게임'},
{id:3, title:'교육/학원'},
{id:4, title:'온라인교육'},
{id:5, title:'온라인교육'},
{id:6, title:'뉴스/미디어'},
{id:7, title:'문학/예술'},
{id:8, title:'비즈니스/경제'},
{id:9, title:'사회/문화/종교'},
{id:10, title:'생활/가정/취미'},
{id:11, title:'쇼핑'},
{id:12, title:'스포츠/레저'},
{id:13, title:'엔터테인먼트'},
{id:14, title:'여행'},
{id:15, title:'유통/판매/운송'},
{id:16, title:'정치/행정'},
{id:17, title:'제조'},
{id:18, title:'커뮤니티'},
{id:19, title:'학문'},
{id:20, title:'서비스'},
{id:21, title:'정보통신/IT'},
{id:22, title:'인터넷/컴퓨터'}
];
var getAllList = function (url, cate, page, category) {
request.post(url, {
form:{
sel_rank:'M3',
sel_period:'20130301',
sel_cate1:cate,
page:page
}
}, function (error, response, data) {
if (response.statusCode == 200) {
// you can get at the raw response like this...
try {
var rdata = JSON.parse(data);
var jdata = rdata.rdata;
var totalPage = rdata.page;
console.log('\n============ cate : ' + category[cate - 1].title + '(' + cate + ')' + ' \t page : ' + page + '\n');
jdata.forEach(function (item, idx) {
var data = item.sv_rank + '\t\t' + item.url + '\t\t' + item.grp_name + '\t\t' + category[cate - 1].title + '\n';
if (token.test(item.url)) {
fs.appendFileSync('dataList.txt', data);
console.log((idx + 1) + '\t: ' + data);
}
fs.appendFileSync('allList.txt', data);
});
} catch (e) {
util.error(e);
}
// next tick
page++;
if (totalPage >= page) {
setTimeout(function () {
getAllList(url, cate, page, category);
}, 2000);
} else {
// get new category
cate++;
page = 1;
if (category.length >= cate) {
setTimeout(function () {
getAllList(url, cate, page, category);
}, 4000);
} else {
// end of code
util.warn('======================================');
util.warn('render complete...');
}
}
} else {
util.error("http connection has errors.");
util.error("response code : " + response.statusCode);
}
});
};
// get first data
getAllList(url, cate, page, category);
// process start
console.log("Get all domain list from rankey.com ... thanks.");
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment