Created
January 10, 2013 12:12
-
-
Save anonymous/4501578 to your computer and use it in GitHub Desktop.
dailyprogrammer
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#usr/bin/env ruby | |
require 'mechanize' | |
class DailyProgrammerCrawler | |
def initialize | |
@agent = Mechanize.new | |
@page = '' | |
@links = {} | |
end | |
def init | |
@page = @agent.get('http://www.reddit.com/r/dailyprogrammer/') | |
end | |
def extractLinks | |
1.upto(49) do |i| | |
item = @page.parser.xpath("//div[#{i}]/div[2]/p[1]/a") | |
unless item[0] == nil | |
if item[0].text[0] == '[' && ! item[0].text.scan(/(#\d+)/).empty? && ! item[0].text.scan(/(\[\w+\])/).empty? | |
@links.merge!(Hash[item[0].text=>item[0]['href']]) | |
end | |
end | |
end | |
end | |
def next_page | |
@page.links.each do |l| | |
if l.text[0..l.text.size-3] == 'next' | |
@page = l.click | |
return true | |
end | |
end | |
false | |
end | |
def print_by_difficulty | |
for link in @difficulty do | |
for i in (0..link.size-1) do | |
title = link[i][0] | |
start = title.rindex("]") + 1 | |
title = title[start..title.size-1] | |
puts "#{link[i][0].scan(/\[\D+\]/).first} #{link[i][0].scan(/(#\d+)/).flatten.first}: #{title} : www.reddit.com#{link[i][1]}" | |
end | |
end | |
end | |
def print_by_num | |
@links = @links.sort_by {|k, v| k.scan(/(#\d+)/).flatten.first.delete('#').to_i} | |
@links.each do |link| | |
title = link[0] | |
start = title.rindex("]") + 1 | |
title = title[start..title.size-1] | |
puts "#{link[0].scan(/\[\D+\]/).first} #{link[0].scan(/(#\d+)/).flatten.first}: #{title} : www.reddit.com#{link[1]}" | |
end | |
end | |
def save | |
@links.reject! {|l| l.empty?} | |
File.open('dailyProgrammerChallenges.txt', 'w+') {|f| f.puts(@links)} | |
end | |
def sort_by_difficulty | |
easy = [] | |
inter = [] | |
hard = [] | |
@difficulty = [] | |
@links.each do |l| | |
name = l[0].scan(/\[\D+\]/).first.downcase | |
case name | |
when '[easy]' | |
easy << l | |
when '[intermediate]' | |
inter << l | |
when '[hard]' | |
hard << l | |
when '[difficult]' | |
hard << l | |
end | |
end | |
@difficulty << easy.sort_by {|v| v[0].scan(/(#\d+)/).flatten.first.delete('#').to_i } | |
@difficulty << inter.sort_by {|v| v[0].scan(/(#\d+)/).flatten.first.delete('#').to_i } | |
@difficulty << hard.sort_by {|v| v[0].scan(/(#\d+)/).flatten.first.delete('#').to_i } | |
end | |
def crawl | |
init | |
extractLinks | |
while next_page do | |
extractLinks | |
end | |
end | |
end | |
print "Sort by (N)umber or (D)ifficulty:\n> " | |
sortType = gets.chomp.downcase | |
case sortType | |
when 'n' | |
crawler = DailyProgrammerCrawler.new | |
crawler.crawl | |
crawler.print_by_num | |
when 'd' | |
crawler = DailyProgrammerCrawler.new | |
crawler.crawl | |
crawler.sort_by_difficulty | |
crawler.print_by_difficulty | |
else | |
puts "Invalid sorting method" | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment