Created
April 21, 2013 08:57
-
-
Save arathunku/5428976 to your computer and use it in GitHub Desktop.
Downloading movies information for friend's profile @wkinie
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
source 'https://rubygems.org' | |
gem 'nokogiri' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'nokogiri' | |
require 'open-uri' | |
require 'date' | |
require 'rss/maker' | |
site = Nokogiri::HTML(open('http://www.filmweb.pl/premiere')) | |
films =[] | |
#Date.today.next.day what day will be tomorrow. Filmweb uses that to ID div | |
#be aware, if something changes (mostly css selectors) you have to correct that manualy in css selectores | |
site.css("#day-#{Date.today.next.day}").css('.filmPreview').each {|p| | |
information = {} | |
information[:title] = p.css('.filmTitle').first.content | |
information[:img] = p.css('.entityPoster img')[0]['src'] | |
plot = p.css('.filmPlot') | |
if plot.first | |
#sometimes plot content is shorter than 115 chars and then we can't select [0] element for nil | |
#because regex won't match | |
if plot.first.content.length >= 115 | |
info = plot.first.content.match(/^(?=.{115})(.{0,114})(?=\s).*$/)[0] | |
end | |
info ||= plot.first.content | |
information[:info] = info | |
end | |
information[:info] ||= "Brak opisu" | |
information[:link] = "http://www.filmweb.pl#{p.css('.entityTitle a')[0]['href']}" | |
films << information | |
} | |
#I do no think that code is valid because filmweb, for each new film is making | |
#entry on premiers for given day that's why we used Day.today.next.day at the beginning | |
#films.each { |p| | |
# site = Nokogiri::HTML(open(p[:link])) | |
# p[:date] = site.css('#filmPremierePoland').first.content | |
# p[:tomorrow] = 1 if Date.today+1 == Date.parse(p[:date]) | |
# p[:tomorrow] ||= 0 | |
#} | |
content = RSS::Maker.make("1.0") do |m| | |
#not useful information but RSS::Maker libary will complain since RSS standard requires that | |
m.channel.title = "Informacje o nowych filmach!" | |
m.channel.link = "http://www.twitter.pl/wkinie" | |
m.channel.author = "autor" | |
m.channel.description = "description" | |
m.channel.about = "new films" | |
#end of not so useful information | |
#I think IFTTT detects on that elements if there was an update but I can't get head around | |
#when it will really work. | |
m.channel.updated = Time.now | |
films.each do |f| | |
m.items.new_item do |i| | |
d = Date.parse(f[:date]) | |
day = d.day | |
month = d.month | |
#since I was a bit I'm adding 0 at the beginning that way. | |
if day.to_s.length == 1 | |
day = "0#{day}" | |
end | |
if month.to_s.length == 1 | |
month = "0#{month}" | |
end | |
#end of a bit lazyness | |
i.title = "#{day}.#{month} | #{f[:title]}" | |
i.link = f[:link] | |
i.pubDate = Time.now | |
i.description = "#{f[:info]}..." | |
i.date = Time.now | |
end | |
end | |
end | |
#prints of XML. You could also write that to file but for what is ">" in terminal? | |
#ruby wkinie.rb > film.xml and you have everything in a file. | |
puts content |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment