Skip to content

Instantly share code, notes, and snippets.

@rabbitt
Last active December 28, 2015 06:29
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rabbitt/7457688 to your computer and use it in GitHub Desktop.
Save rabbitt/7457688 to your computer and use it in GitHub Desktop.
#!/usr/bin/ruby
# dependencies:
# gem install sinatra
# gem install mechanize
#
# to run:
# from command line type: ruby substitutions.rb &
# and in your browser, go to: localhost:4567/http://some.random.site.com/
#
require 'uri'
require 'sinatra'
require 'mechanize'
$debug = !!ARGV.delete('debug')
def debug(message)
$stderr.puts(message) if $debug
end
SUBSTITUTIONS = {
'witnesses' => 'these dudes I know',
'allegedly' => 'kinda probably',
'new study' => 'tumblr post',
'rebuild' => 'avenge',
'space' => 'spaaace',
'google glass' => 'virtual boy',
'smartphone' => 'pokedex',
'electric' => 'atomic',
'senator' => 'elf-lord',
'car' => 'cat',
'election' => 'eating contest',
'congressional leaders' => 'river spirits',
'homeland security' => 'homestar runner',
'could not be reach for comment' => 'is guilty and everyone knows it'
}
agent = Mechanize.new { |agent|
agent.user_agent = '1288-Bot/1.0 (+https://gist.github.com/rabbitt/7457688)'
}
get '/*' do
url = params[:splat].first.gsub(%r{^(https?):[^a-z0-9-]*}, "\\1://")
url = URI.parse(url.include?('http') ? url : "http://#{url}")
root = agent.get(url.to_s).root
root.at_xpath('//head').tap { |head|
base = Nokogiri::XML::Node.new('base', head)
base['href'] = url.dup.tap { |u| u.query = ''; u.path = '/' }.to_s
head.add_child(base)
styles = Nokogiri::XML::Node.new('style', head)
styles.content = <<-'EOS'.gsub('^\s{6}', '')
.xkcd-replacement { background-color: pink; }
EOS
head.add_child(styles)
jquery = Nokogiri::XML::Node.new('script', head)
jquery['src'] = 'http://code.jquery.com/jquery-2.0.3.min.js'
head.add_child(jquery)
}
root.at_xpath('//body').tap { |body|
js = Nokogiri::XML::Node.new('script', body)
js.content = <<-'EOF'.gsub(/^.{6}/, '')
$('.xkcd-replacement').hover(function() { $(this).siblings('.xkcd-original').show(); $(this).hide(); });
$('.xkcd-original').hover(function() {}, function() { $(this).siblings('.xkcd-replacement').show(); $(this).hide(); });
EOF
body.add_child(js)
}
root.tap { |html|
page = html.to_html
SUBSTITUTIONS.each {|search, replace|
# find all uniq matches (ie: Google Glass, GOOGLE GLASS, google glass, etc)
page.scan(/#{search}/i).uniq.each do |match|
replacement = case match
# if it looks like a title style capitalized match, then capitalize all replacement words
when /^[A-Z][a-z]+/ then replace.split(/\s+/).map(&:capitalize).join(' ')
# if it looks like it's fully uppercase, then fully uppercase the replacement
when /^[A-Z]+\b/ then replace.upcase
# otherwise just replace it as is
else replace
end
debug " --> Searching for elements containing #{match.inspect}"
html.xpath(%Q{//*[contains(text(), "#{match}")]}).inject([]) { |list,element|
debug " --> Found Element #{element.name} with match #{match} - path: #{element.path}"
next list if (element.path.include?('/head/') || %w(style script iframe object noscript).include?(element.name))
list.tap { |l| l << element }
}.each do |element|
repl_node = Nokogiri::XML::Node.new('span', element).tap {|node|
node['class'] = 'xkcd-replacement'
node.content = replacement
}
orig_node = Nokogiri::XML::Node.new('span', element).tap {|node|
node['class'] = 'xkcd-original'
node['style'] = 'display:none;'
node.content = match
}
children = element.content.split(/(#{Regexp.escape(match)})/).reject(&:empty?)
children = children.collect {|part|
part == match ? [repl_node.dup, orig_node.dup] : Nokogiri::XML::Text.new(part, element.parent)
}.flatten
element.children = Nokogiri::XML::NodeSet.new(element.document, children.flatten)
debug " --> Found #{match.inspect} and repalceed with #{replacement.inspect}\n --> Modified Element (#{element.name}): #{element.to_html.inspect}"
end
end
}
}.to_html
end
@rabbitt
Copy link
Author

rabbitt commented Nov 13, 2013

Dependencies:

gem install mechanize
gem install sinatra

Run from command line:
ruby substitutions.rb [debug]

Then open your browser to:

http://localhost:4567/<url>

Examples:

http://localhost:4567/http://www.foxnews.com/politics/2013/11/11/white-texas-republican-wins-election-with-campaign-that-implied-black/
http://localhost:4567/http://www.foxnews.com/politics/2013/11/13/senator-says-us-ending-buys-russian-helicopters/

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment