Skip to content

Instantly share code, notes, and snippets.

@rafapolo
Created March 2, 2010 16:51
Show Gist options
  • Save rafapolo/319666 to your computer and use it in GitHub Desktop.
Save rafapolo/319666 to your computer and use it in GitHub Desktop.
Pega umquetenha.org
#!ruby19
# encoding: utf8
# author: rafael polo
# created_at: 02.mar.2010
require 'open-uri'
require 'timeout'
class PegaUmquetenha
REGEX_CALENDARIO = /umquetenha.org\/uqt\/\?m=([\d]+)/
REGEX_POST = /<a href="http:\/\/umquetenha.org\/uqt\/\?p=([\d]+)"/
REGEX_CAPA = /width: 170px; height: 170px;" src="(.*)" border="0"/
REGEX_ALBUM = /(.*) - (.*) \(([\d]{4})\)/
REGEX_DOWNLOAD = /<a href="(.*)" target="blank"><span style=".*"><strong>download<\/strong>/
def self.pega
page = load_url("http://umquetenha.org/uqt")
meses = page.scan(REGEX_CALENDARIO)
meses.each do |mes|
page_mes = load_url("http://umquetenha.org/uqt/?m=#{mes}")
quando = page_mes.scan(/<h2 class="pagetitle">(.*)<\/h2>/)
puts "\n#{quando}"
posts = page_mes.scan(REGEX_POST)
puts "#{posts.size} álbuns"
posts.each do |post|
page_album = load_url("http://umquetenha.org/uqt/?p=#{post}")
album = page_album.scan(/<h2>(.*\([\d]{4}\))<\/h2>/).to_s.gsub("&#8211;", "-").gsub("&#038;", "&")
capa = page_album.scan(REGEX_CAPA)
download = page_album.scan(REGEX_DOWNLOAD)
album_info = album.scan(REGEX_ALBUM)
if album_info && album_info.first
album_info = album_info.first
artista = album_info[0]
album = album_info[1]
ano = album_info[2]
if album!="" && capa!="" && artista!="" && album!="" && ano!=""
puts "Artista: #{artista}"
puts "Album: #{album}"
puts "Ano: #{ano}"
puts "Download: #{download}"
puts "Capa: #{capa}"
puts
end
end
end
end
end
def self.load_url(url)
response=""
begin
timeout(8) do
uri = URI.parse(url)
response = uri.read if uri
end
rescue Exception=>error
response = false
puts "Erro: #{error}\n\n"
end
response
end
end
PegaUmquetenha.pega
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment