public
Last active

  • Download Gist
substitutions.rb
Ruby
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118
#!/usr/bin/ruby
 
# dependencies:
# gem install sinatra
# gem install mechanize
 
#
# to run:
# from command line type: ruby substitutions.rb &
# and in your browser, go to: localhost:4567/http://some.random.site.com/
#
 
require 'uri'
require 'sinatra'
require 'mechanize'
 
$debug = !!ARGV.delete('debug')
def debug(message)
$stderr.puts(message) if $debug
end
 
SUBSTITUTIONS = {
'witnesses' => 'these dudes I know',
'allegedly' => 'kinda probably',
'new study' => 'tumblr post',
'rebuild' => 'avenge',
'space' => 'spaaace',
'google glass' => 'virtual boy',
'smartphone' => 'pokedex',
'electric' => 'atomic',
'senator' => 'elf-lord',
'car' => 'cat',
'election' => 'eating contest',
'congressional leaders' => 'river spirits',
'homeland security' => 'homestar runner',
'could not be reach for comment' => 'is guilty and everyone knows it'
}
 
agent = Mechanize.new { |agent|
agent.user_agent = '1288-Bot/1.0 (+https://gist.github.com/rabbitt/7457688)'
}
 
get '/*' do
url = params[:splat].first.gsub(%r{^(https?):[^a-z0-9-]*}, "\\1://")
url = URI.parse(url.include?('http') ? url : "http://#{url}")
 
root = agent.get(url.to_s).root
 
root.at_xpath('//head').tap { |head|
base = Nokogiri::XML::Node.new('base', head)
base['href'] = url.dup.tap { |u| u.query = ''; u.path = '/' }.to_s
head.add_child(base)
 
styles = Nokogiri::XML::Node.new('style', head)
styles.content = <<-'EOS'.gsub('^\s{6}', '')
.xkcd-replacement { background-color: pink; }
EOS
head.add_child(styles)
 
jquery = Nokogiri::XML::Node.new('script', head)
jquery['src'] = 'http://code.jquery.com/jquery-2.0.3.min.js'
head.add_child(jquery)
}
 
root.at_xpath('//body').tap { |body|
js = Nokogiri::XML::Node.new('script', body)
js.content = <<-'EOF'.gsub(/^.{6}/, '')
$('.xkcd-replacement').hover(function() { $(this).siblings('.xkcd-original').show(); $(this).hide(); });
$('.xkcd-original').hover(function() {}, function() { $(this).siblings('.xkcd-replacement').show(); $(this).hide(); });
EOF
body.add_child(js)
}
 
root.tap { |html|
page = html.to_html
 
SUBSTITUTIONS.each {|search, replace|
# find all uniq matches (ie: Google Glass, GOOGLE GLASS, google glass, etc)
page.scan(/#{search}/i).uniq.each do |match|
replacement = case match
# if it looks like a title style capitalized match, then capitalize all replacement words
when /^[A-Z][a-z]+/ then replace.split(/\s+/).map(&:capitalize).join(' ')
# if it looks like it's fully uppercase, then fully uppercase the replacement
when /^[A-Z]+\b/ then replace.upcase
# otherwise just replace it as is
else replace
end
 
debug " --> Searching for elements containing #{match.inspect}"
html.xpath(%Q{//*[contains(text(), "#{match}")]}).inject([]) { |list,element|
debug " --> Found Element #{element.name} with match #{match} - path: #{element.path}"
next list if (element.path.include?('/head/') || %w(style script iframe object noscript).include?(element.name))
list.tap { |l| l << element }
}.each do |element|
 
repl_node = Nokogiri::XML::Node.new('span', element).tap {|node|
node['class'] = 'xkcd-replacement'
node.content = replacement
}
 
orig_node = Nokogiri::XML::Node.new('span', element).tap {|node|
node['class'] = 'xkcd-original'
node['style'] = 'display:none;'
node.content = match
}
 
children = element.content.split(/(#{Regexp.escape(match)})/).reject(&:empty?)
children = children.collect {|part|
part == match ? [repl_node.dup, orig_node.dup] : Nokogiri::XML::Text.new(part, element.parent)
}.flatten
 
element.children = Nokogiri::XML::NodeSet.new(element.document, children.flatten)
debug " --> Found #{match.inspect} and repalceed with #{replacement.inspect}\n --> Modified Element (#{element.name}): #{element.to_html.inspect}"
end
end
}
}.to_html
end

Dependencies:

gem install mechanize
gem install sinatra

Run from command line:
ruby substitutions.rb [debug]

Then open your browser to:

http://localhost:4567/<url>

Examples:

http://localhost:4567/http://www.foxnews.com/politics/2013/11/11/white-texas-republican-wins-election-with-campaign-that-implied-black/
http://localhost:4567/http://www.foxnews.com/politics/2013/11/13/senator-says-us-ending-buys-russian-helicopters/

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.