Skip to content

Instantly share code, notes, and snippets.

@rafapolo
Created February 15, 2011 18:28
Show Gist options
  • Save rafapolo/827963 to your computer and use it in GitHub Desktop.
Save rafapolo/827963 to your computer and use it in GitHub Desktop.
pega as top stories do google news e lista as notícias paralelas aplicando outra cor às palavras já ditas.
#encoding: utf-8
class FrontController < ApplicationController
def list
coder = HTMLEntities.new
@entries = []
url = "http://news.google.com/?output=rss"
link = "http://ajax.googleapis.com/ajax/services/feed/load?v=1.0&num=10&q="+url
result = ActiveSupport::JSON.decode(open(link).read)
max = 0
count = 0
# pega as noticias do google news
result["responseData"]["feed"]["entries"].each do |entry|
count +=1
e = entry["title"].split(" - ")
title = e[0]
source = e[1]
related = entry["content"].scan(/<b>all\s(.+)\snews/i)[0][0].to_s || nil
more_url = nil
# pega a url das related news
if related
more = entry["content"].scan(/(http:\/\/news.google.com\/news\/more(.+)topic=h)/i)
more_url = more[0][0].gsub("more", "story").gsub(/&amp;/i, "&")
end
related_page = coder.decode(URI.parse(more_url).read)
# extrai snippets, heads, sources e datas usando expressões regulares
snippets = related_page.scan(/<div class="snippet">(.*?)(\s<b>...<\/b>\s)?<\/div>/)
heads = related_page.scan(/<span class="titletext">(.*?)(<b>...<\/b>)?<\/span>/)
sources = related_page.scan(/<span class="source source-pref .+?">(.*?)<\/span>/)
dates = related_page.scan(/<span class="date ">(.*?)<\/span><\/div>/)
max = (snippets.size>max) ? snippets.size : max
clean_snippets = []
count = 0
# decodifica html, limpa e junta dados num conjunto de clean_snippets
snippets.each do |snippet|
source = sources[count] ? sources[count][0].to_s : ""
text = snippet[0].to_s + (snippet[1] ? " ..." : "")
clean_snippets << {:text=>text, :title=>heads[count][0].to_s, :source=>source, :date=>dates[count][0].to_s, :count=>count+=1}
end
# aplica cor as palavras já ditas no texto do snippet
lighted_snippets= lowlight(clean_snippets, :text)
# e nos títulos
lighted_snippets= lowlight(lighted_snippets, :title)
@entries << {:max=>(max+3), :snippets=>lighted_snippets}
end
end
def lowlight(clean_snippets, text)
parallel_snippets = []
snippets = clean_snippets.dup
snippets.each do |snippet|
# quebra texto em conjunto de palavras
break_text = snippet[text].split(" ")
# para cada palavra
break_text.each do |word|
# nos outros snippets
snippets.each do |other|
# se no outro snippets tiver essa palavra
if (other[text].split(" ").index(word) && snippet[:count]!=other[:count])
# aplica esse estilo html: said => cinza.
word.gsub!(word, '<span class="said">'+word+'</span>')
break
end
end
end
# junta as palavras novamente
snippet[text] = break_text.join(" ")
parallel_snippets << snippet
end
parallel_snippets
end
end
@rafapolo
Copy link
Author

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment