Skip to content

Instantly share code, notes, and snippets.

@isqad
Created May 24, 2012 11:11
Show Gist options
  • Save isqad/2780859 to your computer and use it in GitHub Desktop.
Save isqad/2780859 to your computer and use it in GitHub Desktop.
crawler
var http = require('http');
var events = require('events');
var jsdom = require('jsdom');
var mongoose = require('mongoose');
mongoose.connect('mongodb://localhost/crawler');
var Schema = mongoose.Schema;
var ObjectId = Schema.ObjectId;
//model
var links = new Schema({
link: String,
date: Date
});
var Rsses = new Schema({
link: String,
date: Date
});
var LinkModel = mongoose.model('LinkModel', links);
////////
var startHost = 'http://e1.ru';
var spider = new events.EventEmitter();
spider.on('ready',function(){
//проверяем, есть ли у нас эта ссылка в базе
LinkModel.findOne({link: startHost},function(err,doc){
if(!doc) {
//добавляем в базу
LinkModel.link = startHost;
LinkModel.date = new Date();
LinkModel.save(function(err){
if(!err) {
jsdom.env(startHost,
['http://code.jquery.com/jquery-1.5.min.js'],
function(errors,window){
/*var firstRss = window.$('link[type="application/rss+xml"]').first();
if(typeof window.$(firstRss).attr("href") !== "undefined") {
console.log(window.$(firstRss).attr("href"));
} else {
console.log("rss not found");
}*/
var links = window.$("a");
window.$(links).each(function(){
var linkHref = window.$(this).attr("href");
if(typeof linkHref !== "undefined") {
if(linkHref.indexOf('http://') == -1) {
linkHref = 'http://' +
}
}
});
});
}
});
}
});
//загружаем страничку e1.ru
/*http.get({
host: startHost,
port: 80
},function(res){
res.on('data', function(chunk) {
console.log("Body chunk: " + chunk);
});
res.on('end',function(){
setTimeout(function(){
spider.emit('ready');
},100);
});
});*/
});
mongoose.connection.on('open',function(){
spider.emit('ready');
});
@isqad
Copy link
Author

isqad commented May 24, 2012

Не доделанный

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment