Skip to content

Instantly share code, notes, and snippets.

@pudo
Created April 11, 2013 09:59
Show Gist options
  • Save pudo/5362165 to your computer and use it in GitHub Desktop.
Save pudo/5362165 to your computer and use it in GitHub Desktop.
events = require 'events'
request = require 'request'
cheerio = require 'cheerio'
externalize = (url) ->
return 'http://spiegel.de' + url
getChannels = () ->
ee = new events.EventEmitter()
request.get 'http://spiegel.de/', (err, res, body) ->
$ = cheerio.load body
channels = $ '#spHeader #spChannel li:first-child a'
for channel in channels
channel = $ channel
url = channel.attr 'href'
if -1 isnt url.search 'http'
continue
name = channel.text()
if name is 'NACHRICHTEN'
continue
ee.emit 'channel', url, name
return ee
getHeadlines = () ->
ee = new events.EventEmitter()
request.get 'http://www.spiegel.de/schlagzeilen/index-siebentage.html', (err, res, body) ->
$ = cheerio.load body
headlines = $ '#spMainContent .spMarkVisited a'
for headline in headlines
headline = $ headline
url = headline.attr 'href'
title = headline.attr 'title'
ee.emit 'headline', url, title
return ee
#feed = getHeadlines()
#feed.on 'headline', (url, title) ->
# console.log title + ' -- ' + url
getRankings = () ->
sample_time = new Date()
feed = getChannels()
feed.on 'channel', (url, name) ->
#if url isnt '/'
# return
extUrl = externalize url
request.get extUrl, (err, res, body) ->
$ = cheerio.load body
topics = $ '.spTopThema'
rank = 0
for topic in topics
topic = $ topic
title = $ topic.find 'h2 a'
obj =
title: title.text()
url: title.attr 'href'
first: topic.hasClass 'spFirstTeaser'
listitem: false
channel_url: url
channel_name: name
time: sample_time
rank: rank
level: (url.split '/').length - 1
console.log obj
for listitem in topic.find '.spDottedLinkList a'
listitem = $ listitem
obj.listitem = true
obj.title = listitem.attr 'title'
obj.url = listitem.attr 'href'
console.log obj
rank++
getRankings()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment