Skip to content

Instantly share code, notes, and snippets.

@toto
Created June 22, 2009 19:50
Show Gist options
  • Save toto/134154 to your computer and use it in GitHub Desktop.
Save toto/134154 to your computer and use it in GitHub Desktop.
Produces Fulltext Feeds for Heise Online
#!/opt/local/bin/ruby
require 'rubygems'
require 'scrubyt'
require 'open-uri'
require 'builder'
LIMIT = 5
res = []
data = Scrubyt::Extractor.define do
fetch 'http://www.heise.de/newsticker/classic/', :user_agent => 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_7; de-de) AppleWebKit/530.17 (KHTML, like Gecko) Version/4.0 Safari/530.17'
link_title "//h3/a", :write_text => true do
link_url
end
end
data.to_hash.each_with_index do |item,index|
break if index >= LIMIT
content = Scrubyt::Extractor.define do
next if item[:link_url] =~ /\Ahttp\:/
fetch 'http://heise.de' + item[:link_url], :user_agent => 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_7; de-de) AppleWebKit/530.17 (KHTML, like Gecko) Version/4.0 Safari/530.17'
item '//div[@id="mitte_news"]' do
item_text '//div[@class="meldung_wrapper"]'
created_at '//p[@class="news_datum"]'
end
end
item_text = content.to_hash.first
next unless item_text
date = item_text[:created_at].split(/[\.: ]/).collect{|i| i.to_i}
date = Time.utc(date[2].to_i, date[1].to_i, date[0].to_i, date[-2].to_i, date[-1].to_i)
res << {:title => item[:link_title],
:link => 'http://heise.de' + item[:link_url],
:content => item_text[:item_text].gsub(/ +/,' ').gsub('Anzeige','').gsub(/\n\n+/,"\n"),
:created_at => date}
end
builder = Builder::XmlMarkup.new(:target => STDOUT)
builder.feed do |feed|
feed.title("Heise Online Newsticker")
feed.updated(res.first[:created_at].strftime("%Y-%m-%dT%H:%M:%SZ"))
for item in res
feed.entry do |entry|
entry.title(item[:title])
entry.link(:href => item[:link])
entry.id item[:link]
entry.content(item[:content], :type => 'text')
entry.updated(item[:created_at].strftime("%Y-%m-%dT%H:%M:%SZ"))
entry.author do |author|
author.name('Heise Online')
end
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment