Skip to content

Instantly share code, notes, and snippets.

@fairchild
Forked from eshao/etherpad-batch-import.rb
Created October 23, 2010 11:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save fairchild/642083 to your computer and use it in GitHub Desktop.
Save fairchild/642083 to your computer and use it in GitHub Desktop.
#!/usr/bin/env ruby
# lib/etherpad.rb
#
# mechanize => 1.0.0
%w{rubygems nokogiri mechanize json logger}.each {|l| require l }
class Etherpad
def initialize(base_url, username, password)
@@base_url = base_url
@@username = username
@@password = password
@@robot = Mechanize.new do |a|
`rm ../log/etherpad.log`
log = Logger.new('../log/etherpad.log')
log.level = Logger::INFO
log.datetime_format = ""
a.log = log
a.user_agent_alias = 'Mac Safari'
end
end
# slug2txt.
def fetch(slug)
return Pad.new(slug).to_s
end
# Fetches everything in the Etherpad namespace.
def fetchall(toc_slug)
todo = [toc_slug] # List of slugs to fetch.
done = [] # Stack of slugs already done.
while !todo.empty?
slug = todo.pop
# Save contents to a file.
begin
pad = Pad.new(slug)
mkd = pad.to_s
rescue
puts "404 on #{slug}."
next
end
unless mkd.empty?
mkd_file = File.new("../.cache/#{slug}.mkd", "w")
mkd_file.write(mkd)
puts "Wrote #{slug}."
mkd_file.close
end
done << pad
# Add links to processing.
new_slugs = pad.slugs.reject {|slug| done.include? slug }
new_slugs << slug + "-scratch"
new_slugs << slug + "-sb"
todo = (todo | new_slugs).uniq
end
puts "#{done.length} files processed."
end
def import(slug, file)
pad = Pad.new(slug)
pad.import(file)
puts "Successfully imported #{file} into #{slug}"
end
def importzipfolder(folder)
Dir.chdir(folder)
Dir.foreach('.') do |file|
next unless file[-5..-1] == ".html"
import(file[0..-6], file)
end
end
end
class Etherpad::Pad < Etherpad
def initialize(slug, rev = "latest")
@base_url = @@base_url
@username = @@username
@password = @@password
rev = "rev.#{rev}" unless rev == "latest"
@slug = slug
@url = {
:base => URI.join(@base_url, slug),
:txt => URI.join(@base_url, "ep/pad/export/#{slug}/#{rev}?format=txt"),
:imp => URI.join(@base_url, "ep/pad/impexp/import"),
:imp2 => URI.join(@base_url, "ep/pad/impexp/import2"),
}
end
# Regexp grep of slugs in pad with related @@base_url.
def slugs
tbr = []
to_s.each_line do |line|
# Takes care of URLs in form of http://etherpad.com/<slug>{...}
line.gsub(/#{@base_url}(.*?)(\{.*?\})?(>|\s)/) do |match|
slug = $1
if $2.nil?
tbr << slug
else
bracket = $2.delete("{}")
bracket.split(',').each {|suffix| tbr << "#{slug}#{suffix}" }
end
end
end
return tbr.uniq
end
# Return full-text of the pad.
def to_s
page = @@robot.get(@url[:txt])
# Check if we need to login.
if page.methods.include? 'forms'
form = page.forms.first
form.email = @username
form.password = @password
page = form.submit
end
return page.body
end
# Imports a file.
def import(file)
page = @@robot.get(@url[:base])
# Check if we need to login.
if page.title.include? "Sign In"
form = page.forms.first
form.email = @username
form.password = @password
page = form.submit
end
# Check if we need to create the stub.
form = page.form_with(:action => '/ep/pad/create')
page = form.submit unless form.nil?
# Fill out the form
form = page.form_with(:action => '/ep/pad/impexp/import')
form.file_uploads.first.file_name = file
page = form.click_button
token = page.body.split("'")[3]
page = @@robot.post(@url[:imp2], {'padId' => @slug, 'token' => token})
return true if page.body == 'ok'
return false
end
end
if __FILE__ == $0
ep = Etherpad.new('http://YOUR_SUBDOMAIN.titanpad.com/',
'YOUR_EMAIL', 'YOUR_PASSWORD')
ep.importzipfolder(ARGV[0])
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment