anonymous / gist:4501578
Created

Embed URL

HTTPS clone URL

SSH clone URL

You can clone with HTTPS or SSH.

Download Gist

dailyprogrammer

View gist:4501578
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118
#usr/bin/env ruby
 
require 'mechanize'
 
class DailyProgrammerCrawler
 
def initialize
@agent = Mechanize.new
@page = ''
@links = {}
end
 
def init
@page = @agent.get('http://www.reddit.com/r/dailyprogrammer/')
end
 
def extractLinks
1.upto(49) do |i|
item = @page.parser.xpath("//div[#{i}]/div[2]/p[1]/a")
unless item[0] == nil
if item[0].text[0] == '[' && ! item[0].text.scan(/(#\d+)/).empty? && ! item[0].text.scan(/(\[\w+\])/).empty?
@links.merge!(Hash[item[0].text=>item[0]['href']])
end
end
end
end
 
def next_page
@page.links.each do |l|
if l.text[0..l.text.size-3] == 'next'
@page = l.click
return true
end
end
false
end
 
def print_by_difficulty
for link in @difficulty do
for i in (0..link.size-1) do
title = link[i][0]
start = title.rindex("]") + 1
title = title[start..title.size-1]
 
puts "#{link[i][0].scan(/\[\D+\]/).first} #{link[i][0].scan(/(#\d+)/).flatten.first}: #{title} : www.reddit.com#{link[i][1]}"
end
end
end
 
def print_by_num
@links = @links.sort_by {|k, v| k.scan(/(#\d+)/).flatten.first.delete('#').to_i}
@links.each do |link|
title = link[0]
start = title.rindex("]") + 1
title = title[start..title.size-1]
puts "#{link[0].scan(/\[\D+\]/).first} #{link[0].scan(/(#\d+)/).flatten.first}: #{title} : www.reddit.com#{link[1]}"
end
end
 
 
def save
@links.reject! {|l| l.empty?}
File.open('dailyProgrammerChallenges.txt', 'w+') {|f| f.puts(@links)}
end
 
def sort_by_difficulty
easy = []
inter = []
hard = []
@difficulty = []
 
@links.each do |l|
name = l[0].scan(/\[\D+\]/).first.downcase
case name
when '[easy]'
easy << l
when '[intermediate]'
inter << l
when '[hard]'
hard << l
when '[difficult]'
hard << l
end
end
@difficulty << easy.sort_by {|v| v[0].scan(/(#\d+)/).flatten.first.delete('#').to_i }
@difficulty << inter.sort_by {|v| v[0].scan(/(#\d+)/).flatten.first.delete('#').to_i }
@difficulty << hard.sort_by {|v| v[0].scan(/(#\d+)/).flatten.first.delete('#').to_i }
end
 
def crawl
init
extractLinks
while next_page do
extractLinks
end
end
end
 
print "Sort by (N)umber or (D)ifficulty:\n> "
sortType = gets.chomp.downcase
 
case sortType
 
when 'n'
crawler = DailyProgrammerCrawler.new
crawler.crawl
crawler.print_by_num
 
when 'd'
crawler = DailyProgrammerCrawler.new
crawler.crawl
crawler.sort_by_difficulty
crawler.print_by_difficulty
 
else
puts "Invalid sorting method"
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Something went wrong with that request. Please try again.