Skip to content

Instantly share code, notes, and snippets.

@whym
Last active May 12, 2016 03:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save whym/0767e87cbb1423b18f75 to your computer and use it in GitHub Desktop.
Save whym/0767e87cbb1423b18f75 to your computer and use it in GitHub Desktop.
#! /usr/bin/env ruby
require 'open-uri'
require 'uri'
require 'readline'
require 'simpleidn'
require 'addressable/uri'
require 'mechanize'
def to_ascii_uri(uri)
u = Addressable::URI.parse(uri)
u.host = SimpleIDN.to_ascii(u.host)
return u.to_s
end
def launch(uri, &block)
Thread.start(uri) do |x|
begin
yield x
rescue => e
puts ([e.inspect]+e.backtrace).join("\n")
end
end
end
def fix_uri(uri)
uri.strip!
if /\.[a-z]{2,4}(\/|$)/.match(uri) and not uri.include?('://') and not uri.start_with?('http') then
uri = 'http://' + uri
end
return uri
end
wait_secs = 1
Thread.abort_on_exception=true
while uri = Readline.readline('> ', add_hist=true) do
uri = to_ascii_uri(fix_uri(uri))
if uri == '' then
next
elsif uri !~ URI::regexp then
puts "invalid; skipping '#{uri}'"
next
end
#TODO: create a queue for each archiving service
launch(uri) do | uri|
# archive.is
agent = Mechanize.new
agent.follow_meta_refresh = true
page = agent.get('https://archive.is/')
form = page.form_with(:id => 'submiturl')
form['anyway'] = '1'
form.field_with(:name => 'url').value = uri
res = agent.submit(form)
puts "<#{res.uri}>"
end
sleep wait_secs
launch(uri) do |x|
# megalodon.jp
agent = Mechanize.new
page = agent.get('http://megalodon.jp/pc/?url=' + URI.escape(x, /[\/:\?\&=]/))
res = agent.submit(page.forms[0])
puts "<#{res.uri}>"
end
sleep wait_secs
launch(uri) do |x|
# web.archive.org
open("https://web.archive.org/save/#{x}") do |f|
if f.meta['content-location'] then
puts "<https://web.archive.org#{f.meta['content-location']}>"
else
puts f.meta.inspect
end
end
end
sleep wait_secs
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment