Skip to content

Instantly share code, notes, and snippets.

Created February 25, 2014 22:42
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save anonymous/9219546 to your computer and use it in GitHub Desktop.
Save anonymous/9219546 to your computer and use it in GitHub Desktop.
require 'nokogiri'
module Try
def try(method, args = [])
if self.respond_to? method
self.send method, *args
end
end
end
class Nokogiri::XML::Element
include Try
end
class NilClass
include Try
end
def prettify_reddit(html)
doc = Nokogiri::HTML(html)
# Header
doc.css('#sr-header-area').remove
# Footer
doc.css('.footer-parent').remove
# Right sidebar
doc.css('#search').remove
doc.css('.side .sponsor').remove
doc.css('.side .submit-link').remove
doc.css('.side .submit-text').remove
doc.css('.side .create').remove
doc.css('#ad_main').remove
doc.css('.side .goldvertisement').remove
doc.css('.side .sidecontentbox').remove
doc.css('.side .account-activity-box').remove
# Left sidebar
doc.css('.listing-chooser').remove
# Ads
doc.css('.organic-listing').remove
# Annoying form at top of link listing
doc.css('#sharelink_').remove
# Random crap
doc.css('.expando').remove
doc.css('.midcol').remove
doc.css('.share').remove
doc.css('.hide-button').each{|node|
node.parent.remove
}
doc.css('.report-button').each{|node|
node.parent.remove
}
if doc.css('.linklisting .thing').length > 1
table = Nokogiri::XML::Node.new "table", doc
doc.css('.linklisting .thing').each do |node|
entry = """
<tr>
<td>
#{node.at_css('.title a').try(:to_html)}
</td>
<td>
#{node.at_css('.domain').try(:to_html)}
</td>
</tr>
<tr>
<td>
On
#{node.at_css('.tagline .subreddit').try(:to_html)}
by
#{node.at_css('.tagline .author').try(:to_html)}
and there are
#{node.at_css('.first .comments').try(:to_html)}
</td>
</tr>
<tr><td>&nbsp;</td></tr>
"""
table.add_child entry
node.remove
end
doc.at_css('.linklisting').children.before table
# Move the subreddit infos at the bottom
subreddit_side = doc.at_css('.side .titlebox')
if subreddit_side
table.parent.children.after subreddit_side
end
else
doc.at_css('.side .titlebox').try(:remove)
end
return doc.to_s
end
def nice_reddit_post(html)
doc = Nokogiri::HTML(html)
post = {
:title => doc.at_css('.linklisting .thing:first-child p.title a.title').try(:to_html),
:score => doc.at_css('.side .score').try(:to_html),
:votes => doc.at_css('.side .upvotes').try(:to_html) + ' ' + doc.at_css('.side .downvotes').try(:to_html),
:tagline => doc.at_css('.linklisting .thing:first-child p.tagline').try(:to_html),
:thing => {},
:comments => []
}
level = 0
parse_thing = lambda {|thing|
info = {
:author => thing.at_css('a.author').try(:to_html),
:score => thing.at_css('.score.unvoted').try(:to_html),
:text => thing.css('.usertext-body .md p').length > 0 ? thing.at_css('.usertext-body').css('.md p') : [],
:posted_at => thing.at_css('.tagline time').try(:to_html),
:level => thing.ancestors('.listing').length > 0 ? level + 1 : 0
}
thing.remove
info
}
doc.css('.commentarea .thing').each do |thing|
info = parse_thing.call thing
if info[:level] == 0
level == 0
else
level += 1
end
post[:comments].push(info)
end
post[:thing] = parse_thing.call doc.at_css('.linklisting .thing')
render = lambda {|thing|
indent = ">>> " * thing[:level]
"""
<h3>#{indent} #{thing[:author]} - #{thing[:score]} #{thing[:posted_at]} ago</h3>
""" + thing[:text].inject("") { |sofar, text| sofar + """
<p>#{indent} #{text.content}</p>
""" }
}
template = """
<div>
<h2>#{post[:title]}</h2>
<hr/>
#{post[:thing][:text].inject("") {|sofar, text| sofar + "<p>#{text.content}</p>"}}
<p>#{post[:tagline]}</p>
<p>#{post[:points]}</p>
<p>#{post[:votes]}</p>
<hr/>
<br/>
#{post[:comments].map(&render).join('')}
</div>
"""
doc.at_css('.linklisting').children.before template
return doc.to_s
end
def ELinks::pre_format_html_hook(url, html)
case url
when /reddit\.com/
case url
# This should be a post page
when /\/r\/\w+\/\w+/
html = nice_reddit_post html
return prettify_reddit html
else
return prettify_reddit html
end
when /news\.ycombinator\.com/
doc = Nokogiri::HTML(html)
doc.css('a').each do |link|
# Fix the "news2" link, elinks thinks it's a local file link if link['href'] == "news2"
link['href'] = "/news2"
end
end
return nil
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment