Skip to content

anonymous /gist:4501578

Embed URL

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
dailyprogrammer
#usr/bin/env ruby
require 'mechanize'
class DailyProgrammerCrawler
def initialize
@agent = Mechanize.new
@page = ''
@links = {}
end
def init
@page = @agent.get('http://www.reddit.com/r/dailyprogrammer/')
end
def extractLinks
1.upto(49) do |i|
item = @page.parser.xpath("//div[#{i}]/div[2]/p[1]/a")
unless item[0] == nil
if item[0].text[0] == '[' && ! item[0].text.scan(/(#\d+)/).empty? && ! item[0].text.scan(/(\[\w+\])/).empty?
@links.merge!(Hash[item[0].text=>item[0]['href']])
end
end
end
end
def next_page
@page.links.each do |l|
if l.text[0..l.text.size-3] == 'next'
@page = l.click
return true
end
end
false
end
def print_by_difficulty
for link in @difficulty do
for i in (0..link.size-1) do
title = link[i][0]
start = title.rindex("]") + 1
title = title[start..title.size-1]
puts "#{link[i][0].scan(/\[\D+\]/).first} #{link[i][0].scan(/(#\d+)/).flatten.first}: #{title} : www.reddit.com#{link[i][1]}"
end
end
end
def print_by_num
@links = @links.sort_by {|k, v| k.scan(/(#\d+)/).flatten.first.delete('#').to_i}
@links.each do |link|
title = link[0]
start = title.rindex("]") + 1
title = title[start..title.size-1]
puts "#{link[0].scan(/\[\D+\]/).first} #{link[0].scan(/(#\d+)/).flatten.first}: #{title} : www.reddit.com#{link[1]}"
end
end
def save
@links.reject! {|l| l.empty?}
File.open('dailyProgrammerChallenges.txt', 'w+') {|f| f.puts(@links)}
end
def sort_by_difficulty
easy = []
inter = []
hard = []
@difficulty = []
@links.each do |l|
name = l[0].scan(/\[\D+\]/).first.downcase
case name
when '[easy]'
easy << l
when '[intermediate]'
inter << l
when '[hard]'
hard << l
when '[difficult]'
hard << l
end
end
@difficulty << easy.sort_by {|v| v[0].scan(/(#\d+)/).flatten.first.delete('#').to_i }
@difficulty << inter.sort_by {|v| v[0].scan(/(#\d+)/).flatten.first.delete('#').to_i }
@difficulty << hard.sort_by {|v| v[0].scan(/(#\d+)/).flatten.first.delete('#').to_i }
end
def crawl
init
extractLinks
while next_page do
extractLinks
end
end
end
print "Sort by (N)umber or (D)ifficulty:\n> "
sortType = gets.chomp.downcase
case sortType
when 'n'
crawler = DailyProgrammerCrawler.new
crawler.crawl
crawler.print_by_num
when 'd'
crawler = DailyProgrammerCrawler.new
crawler.crawl
crawler.sort_by_difficulty
crawler.print_by_difficulty
else
puts "Invalid sorting method"
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Something went wrong with that request. Please try again.